In [1]:
import dataclasses
import datetime

import altair
import apple_mobility_trends.data
import statsmodels.tsa.exponential_smoothing.ets
from IPython.display import Markdown
In [2]:
@dataclasses.dataclass
class QuestionParameters:
    region: str
    sub_region: str
    start: datetime.datetime
    end: datetime.datetime


question_parameters = {
    5323: QuestionParameters(
        region="New York City",
        sub_region="New York",
        start=datetime.datetime(2020, 10, 25),
        end=datetime.datetime(2020, 10, 31),
    ),
    5324: QuestionParameters(
        region="San Francisco - Bay Area",
        sub_region="California",
        start=datetime.datetime(2020, 10, 25),
        end=datetime.datetime(2020, 10, 31),
    ),
    5325: QuestionParameters(
        region="Phoenix",
        sub_region="Arizona",
        start=datetime.datetime(2020, 10, 25),
        end=datetime.datetime(2020, 10, 31),
    ),
    5327: QuestionParameters(
        region="Phoenix",
        sub_region="Arizona",
        start=datetime.datetime(2020, 11, 22),
        end=datetime.datetime(2020, 11, 28),
    ),
    5328: QuestionParameters(
        region="San Francisco - Bay Area",
        sub_region="California",
        start=datetime.datetime(2020, 11, 22),
        end=datetime.datetime(2020, 11, 28),
    ),
    5329: QuestionParameters(
        region="New York City",
        sub_region="New York",
        start=datetime.datetime(2020, 11, 22),
        end=datetime.datetime(2020, 11, 28),
    ),
}
question_id = 5323
In [3]:
region = question_parameters[question_id].region
sub_region = question_parameters[question_id].sub_region
end = question_parameters[question_id].end
series = apple_mobility_trends.data.get_series(
    transportation_type="transit",
    geo_type="city",
    country="United States",
    sub_region=sub_region,
    region=region,
)
window = 7
title = f"Transit activity in {region}"
series.plot(title=title, xlabel="Date", ylabel=f"Transit activity")
smoothed_series = series.interpolate().rolling(window).mean().dropna().asfreq("D")
smoothed_series.plot()
Out[3]:
<AxesSubplot:title={'center':'Transit activity in New York City'}, xlabel='Date', ylabel='Transit activity'>
In [4]:
fit_results = statsmodels.tsa.exponential_smoothing.ets.ETSModel(smoothed_series).fit()
display(fit_results.summary())
prediction_results = fit_results.get_prediction(-1, end)
predictions = prediction_results.summary_frame(alpha=0.5)
ETS Results
Dep. Variable: New York City No. Observations: 261
Model: ETS(ANN) Log Likelihood -471.150
Date: Tue, 06 Oct 2020 AIC 948.300
Time: 17:56:01 BIC 958.994
Sample: 01-19-2020 HQIC 952.599
- 10-05-2020 Scale 2.165
Covariance Type: approx
coef std err z P>|z| [0.025 0.975]
smoothing_level 0.9999 0.060 16.749 0.000 0.883 1.117
initial_level 98.0753 1.475 66.491 0.000 95.184 100.966
Ljung-Box (Q): 405.52 Jarque-Bera (JB): 2979.33
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.07 Skew: -3.69
Prob(H) (two-sided): 0.00 Kurtosis: 17.82


Warnings:
[1] Covariance matrix calculated using numerical (complex-step) differentiation.
In [5]:
prediction = predictions.loc[end]
display(Markdown(f"Mean: {prediction['mean']:.3}"))
display(
    Markdown(
        f"50% prediction interval: {prediction['pi_lower']:.3} to {prediction['pi_upper']:.3}"
    )
)
predictions.index.rename("date", inplace=True)
predictions.reset_index(level="date", inplace=True)
(
    altair.Chart(predictions)
    .mark_line()
    .encode(x="date", y="mean")
    .properties(title=title + " forecast")
) + (
    altair.Chart(predictions)
    .mark_errorband()
    .encode(
        x="date",
        y=altair.Y("pi_lower", title="Transit activity (moving average)"),
        y2="pi_upper",
        tooltip=["date", "mean", "pi_lower", "pi_upper"],
    )
)

Mean: 53.3

50% prediction interval: 48.2 to 58.3

Out[5]: