summaryrefslogblamecommitdiffstats
path: root/utils.py
blob: 17e52e51835179c8877301b682357f4da754103a (plain) (tree)























                                                                             
from pathlib import Path
import pandas as pd
import csv

DATA_DIR = Path("data")
RAW_DIR = DATA_DIR / "raw"
FIG_DIR = Path("reports") / "figures"

csv_args = {
    "engine": "python",  # to handle Window's \r\n line endings
    "sep": "\t",  # tab delineated files
    "header": 2,  # ignore the "to Bryan on Date" preamble
    "skipfooter": 1,  # ignore the last row (total tickets in file)
    "quoting": csv.QUOTE_NONE,  # ignore double quotes (") in Location column
    "parse_dates": {"Issued": ["Ticket Issue Date", "Issue Time"]},
    # we're using the "python" engine (to enable "skipfooter") which doesn't
    # care about dtypes. So we have to use the "converters" argument.
    "converters": {
    }
}

data = pd.concat(
    (pd.read_csv(x, **csv_args) for x in RAW_DIR.glob("**/FOIA*.txt")),
    ignore_index=True)