import dataclasses
import datetime
import altair
import apple_mobility_trends.data
import statsmodels.tsa.exponential_smoothing.ets
from IPython.display import Markdown
@dataclasses.dataclass
class QuestionParameters:
region: str
sub_region: str
start: datetime.datetime
end: datetime.datetime
question_parameters = {
5323: QuestionParameters(
region="New York City",
sub_region="New York",
start=datetime.datetime(2020, 10, 25),
end=datetime.datetime(2020, 10, 31),
),
5324: QuestionParameters(
region="San Francisco - Bay Area",
sub_region="California",
start=datetime.datetime(2020, 10, 25),
end=datetime.datetime(2020, 10, 31),
),
5325: QuestionParameters(
region="Phoenix",
sub_region="Arizona",
start=datetime.datetime(2020, 10, 25),
end=datetime.datetime(2020, 10, 31),
),
5327: QuestionParameters(
region="Phoenix",
sub_region="Arizona",
start=datetime.datetime(2020, 11, 22),
end=datetime.datetime(2020, 11, 28),
),
5328: QuestionParameters(
region="San Francisco - Bay Area",
sub_region="California",
start=datetime.datetime(2020, 11, 22),
end=datetime.datetime(2020, 11, 28),
),
5329: QuestionParameters(
region="New York City",
sub_region="New York",
start=datetime.datetime(2020, 11, 22),
end=datetime.datetime(2020, 11, 28),
),
}
question_id = 5323
region = question_parameters[question_id].region
sub_region = question_parameters[question_id].sub_region
end = question_parameters[question_id].end
series = apple_mobility_trends.data.get_series(
transportation_type="transit",
geo_type="city",
country="United States",
sub_region=sub_region,
region=region,
)
window = 7
title = f"Transit activity in {region}"
series.plot(title=title, xlabel="Date", ylabel=f"Transit activity")
smoothed_series = series.interpolate().rolling(window).mean().dropna().asfreq("D")
smoothed_series.plot()
<AxesSubplot:title={'center':'Transit activity in New York City'}, xlabel='Date', ylabel='Transit activity'>
fit_results = statsmodels.tsa.exponential_smoothing.ets.ETSModel(smoothed_series).fit()
display(fit_results.summary())
prediction_results = fit_results.get_prediction(-1, end)
predictions = prediction_results.summary_frame(alpha=0.5)
Dep. Variable: | New York City | No. Observations: | 261 |
---|---|---|---|
Model: | ETS(ANN) | Log Likelihood | -471.150 |
Date: | Tue, 06 Oct 2020 | AIC | 948.300 |
Time: | 17:56:01 | BIC | 958.994 |
Sample: | 01-19-2020 | HQIC | 952.599 |
- 10-05-2020 | Scale | 2.165 | |
Covariance Type: | approx |
coef | std err | z | P>|z| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
smoothing_level | 0.9999 | 0.060 | 16.749 | 0.000 | 0.883 | 1.117 |
initial_level | 98.0753 | 1.475 | 66.491 | 0.000 | 95.184 | 100.966 |
Ljung-Box (Q): | 405.52 | Jarque-Bera (JB): | 2979.33 |
---|---|---|---|
Prob(Q): | 0.00 | Prob(JB): | 0.00 |
Heteroskedasticity (H): | 0.07 | Skew: | -3.69 |
Prob(H) (two-sided): | 0.00 | Kurtosis: | 17.82 |
prediction = predictions.loc[end]
display(Markdown(f"Mean: {prediction['mean']:.3}"))
display(
Markdown(
f"50% prediction interval: {prediction['pi_lower']:.3} to {prediction['pi_upper']:.3}"
)
)
predictions.index.rename("date", inplace=True)
predictions.reset_index(level="date", inplace=True)
(
altair.Chart(predictions)
.mark_line()
.encode(x="date", y="mean")
.properties(title=title + " forecast")
) + (
altair.Chart(predictions)
.mark_errorband()
.encode(
x="date",
y=altair.Y("pi_lower", title="Transit activity (moving average)"),
y2="pi_upper",
tooltip=["date", "mean", "pi_lower", "pi_upper"],
)
)
Mean: 53.3
50% prediction interval: 48.2 to 58.3