summaryrefslogblamecommitdiffstats
path: root/tickets-per-day-of-month.py
blob: 4dbb9b20cc74b7e29dad9e2e9cf36c7058cf4011 (plain) (tree)































































                                                                         
#
# How many tickets are issued in Boston per day of month?
#
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import utils

data = utils.data
#data.info()

bdom = data.groupby(pd.Grouper(key="Issued", freq="D"))["Issued"].count()
fig, ax = plt.subplots(figsize=(10, 4))

# drop day 31 (there are only 7 months)
b31 = bdom[bdom.index.day == 31]
bdom.drop(b31.index, inplace=True)

# drop 2020 data
covid = bdom[bdom.index.year == 2020]
bdom.drop(covid.index, inplace=True)

# avg, +- std
med = bdom.groupby(bdom.index.day).median()
std = bdom.groupby(bdom.index.day).std()

plt.fill_between(med.index, med-std, med+std, facecolor="white")
# plt.plot(med.index, med, color="white")

# fit trend line
# fit = np.poly1d(np.polyfit(bdom.index.day, bdom.values, 1))
# plt.plot(bdom.index.day, fit(bdom.index.day), color='black')

# plot each data point (add in covid)
plt.scatter(covid.index.day, covid.values, color="tab:red", alpha=0.2)
plt.scatter(bdom.index.day, bdom.values, color="black", alpha=0.2)


plt.xticks(range(1, 31))

ax.set(
    title="Tickets Issued on Day of Month",
    ylabel="Tickets Issued"
)

plt.tight_layout()
plt.savefig(
    utils.FIG_DIR / "tickets-by-day-of-month.svg",
    transparent=True)


# Why so many low values?
low = bdom[bdom.values < 1000]

# faily evenly spaced out between months
#low.groupby(low.index.month).count()

# and by year
#low.groupby(low.index.year).count()

# However, day of week shows that 80% 459 of the days
# fall on a Sunday, when a majority of meters are inactive
#low.groupby(low.index.dayofweek).count()