External Data Scraping: FRED API¶
In [ ]:
!pip install fredapi
Collecting fredapi Obtaining dependency information for fredapi from https://files.pythonhosted.org/packages/96/d4/f81fa9f67775a6a4b9e2cd8487239d61a9698cb2b9c02a5a2897d310f7a4/fredapi-0.5.1-py3-none-any.whl.metadata Downloading fredapi-0.5.1-py3-none-any.whl.metadata (5.0 kB) Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from fredapi) (1.4.4) Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/lib/python3.10/site-packages (from pandas->fredapi) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->fredapi) (2022.1) Requirement already satisfied: numpy>=1.21.0 in /opt/conda/lib/python3.10/site-packages (from pandas->fredapi) (1.26.0) Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas->fredapi) (1.16.0) Downloading fredapi-0.5.1-py3-none-any.whl (11 kB) Installing collected packages: fredapi Successfully installed fredapi-0.5.1 WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv [notice] A new release of pip is available: 23.2.1 -> 23.3.1 [notice] To update, run: pip install --upgrade pip
In [ ]:
from fredapi import Fred
import pandas as pd
In [ ]:
fred = Fred(api_key='90c777432f754129f7ca266e2ea35aab')
In [ ]:
gdp = fred.get_series_all_releases('GDP')
gdp = gdp[gdp['date'] > pd.Timestamp('2020-12-31')]
gdp.head(40)
Out[ ]:
realtime_start | date | value | |
---|---|---|---|
3120 | 2021-04-29 | 2021-01-01 | 22048.894 |
3121 | 2021-05-27 | 2021-01-01 | 22061.025 |
3122 | 2021-06-24 | 2021-01-01 | 22061.503 |
3123 | 2021-07-29 | 2021-01-01 | 22038.226 |
3124 | 2022-09-29 | 2021-01-01 | 22313.85 |
3125 | 2023-09-28 | 2021-01-01 | 22600.185 |
3126 | 2021-07-29 | 2021-04-01 | 22722.581 |
3127 | 2021-08-26 | 2021-04-01 | 22731.369 |
3128 | 2021-09-30 | 2021-04-01 | 22740.959 |
3129 | 2022-09-29 | 2021-04-01 | 23046.934 |
3130 | 2023-09-28 | 2021-04-01 | 23292.362 |
3131 | 2021-10-28 | 2021-07-01 | 23173.496 |
3132 | 2021-11-24 | 2021-07-01 | 23187.042 |
3133 | 2021-12-22 | 2021-07-01 | 23202.344 |
3134 | 2022-09-29 | 2021-07-01 | 23550.42 |
3135 | 2023-09-28 | 2021-07-01 | 23828.973 |
3136 | 2022-01-27 | 2021-10-01 | 23992.355 |
3137 | 2022-02-24 | 2021-10-01 | 24008.472 |
3138 | 2022-03-30 | 2021-10-01 | 24002.815 |
3139 | 2022-09-29 | 2021-10-01 | 24349.121 |
3140 | 2023-09-28 | 2021-10-01 | 24654.603 |
3141 | 2022-04-28 | 2022-01-01 | 24382.683 |
3142 | 2022-05-26 | 2022-01-01 | 24384.289 |
3143 | 2022-06-29 | 2022-01-01 | 24386.734 |
3144 | 2022-09-29 | 2022-01-01 | 24740.48 |
3145 | 2023-09-28 | 2022-01-01 | 25029.116 |
3146 | 2022-07-28 | 2022-04-01 | 24851.809 |
3147 | 2022-08-25 | 2022-04-01 | 24882.878 |
3148 | 2022-09-29 | 2022-04-01 | 25248.476 |
3149 | 2023-09-28 | 2022-04-01 | 25544.273 |
3150 | 2022-10-27 | 2022-07-01 | 25663.289 |
3151 | 2022-11-30 | 2022-07-01 | 25698.96 |
3152 | 2022-12-22 | 2022-07-01 | 25723.941 |
3153 | 2023-09-28 | 2022-07-01 | 25994.639 |
3154 | 2023-01-26 | 2022-10-01 | 26132.458 |
3155 | 2023-02-23 | 2022-10-01 | 26144.956 |
3156 | 2023-03-30 | 2022-10-01 | 26137.992 |
3157 | 2023-09-28 | 2022-10-01 | 26408.405 |
3158 | 2023-04-27 | 2023-01-01 | 26465.865 |
3159 | 2023-05-25 | 2023-01-01 | 26486.287 |
In [ ]:
# extract year and month from the date
gdp['year'] = gdp['date'].dt.year
gdp['month'] = gdp['date'].dt.month
# group by year and month, and calculate the mean of the values
gdp_monthly = gdp.groupby(['year', 'month'])['value'].mean().reset_index()
# create a 'year-month' column for the aggregated data
gdp_monthly['date'] = pd.to_datetime(gdp_monthly[['year', 'month']].assign(DAY=1))
# convert 'year_month' to a string without the day
gdp_monthly['date'] = gdp_monthly['date'].dt.strftime('%Y-%m')
gdp_monthly = gdp_monthly.drop(['year', 'month'], axis=1)
print(gdp_monthly)
value date 0 22187.280500 2021-01 1 22906.841000 2021-04 2 23388.455000 2021-07 3 24201.473200 2021-10 4 24584.660400 2022-01 5 25131.859000 2022-04 6 25770.207250 2022-07 7 26205.952750 2022-10 8 26573.881750 2023-01 9 26898.856667 2023-04 10 27623.543000 2023-07
In [ ]:
fed_funds = fred.get_series_all_releases('FEDFUNDS')
fed_funds = fed_funds[fed_funds['realtime_start'] > pd.Timestamp('2020-12-31')]
fed_funds.tail()
Out[ ]:
realtime_start | date | value | |
---|---|---|---|
881 | 2023-07-03 | 2023-06-01 | 5.08 |
882 | 2023-08-01 | 2023-07-01 | 5.12 |
883 | 2023-09-01 | 2023-08-01 | 5.33 |
884 | 2023-10-02 | 2023-09-01 | 5.33 |
885 | 2023-11-01 | 2023-10-01 | 5.33 |
In [ ]:
# extract year and month from the date
fed_funds['year'] = fed_funds['realtime_start'].dt.year
fed_funds['month'] = fed_funds['realtime_start'].dt.month
# group by year and month, and calculate the mean of the values
fed_funds_monthly = fed_funds.groupby(['year', 'month'])['value'].mean().reset_index()
# create a 'year-month' column for the aggregated data
fed_funds_monthly['date'] = pd.to_datetime(fed_funds_monthly[['year', 'month']].assign(DAY=1))
# convert 'year_month' to a string without the day
fed_funds_monthly['date'] = fed_funds_monthly['date'].dt.strftime('%Y-%m')
fed_funds_monthly = fed_funds_monthly.drop(['year', 'month'], axis=1)
print(fed_funds_monthly)
value date 0 0.09 2021-01 1 0.09 2021-02 2 0.08 2021-03 3 0.07 2021-04 4 0.07 2021-05 5 0.06 2021-06 6 0.08 2021-07 7 0.10 2021-08 8 0.09 2021-09 9 0.08 2021-10 10 0.08 2021-11 11 0.08 2021-12 12 0.08 2022-01 13 0.08 2022-02 14 0.08 2022-03 15 0.20 2022-04 16 0.33 2022-05 17 0.77 2022-06 18 1.21 2022-07 19 1.68 2022-08 20 2.33 2022-09 21 2.56 2022-10 22 3.08 2022-11 23 3.78 2022-12 24 4.10 2023-01 25 4.33 2023-02 26 4.57 2023-03 27 4.65 2023-04 28 4.83 2023-05 29 5.06 2023-06 30 5.08 2023-07 31 5.12 2023-08 32 5.33 2023-09 33 5.33 2023-10 34 5.33 2023-11
In [ ]:
unemployment = fred.get_series_all_releases('UNRATE')
unemployment = unemployment[unemployment['date'] > pd.Timestamp('2020-12-31')]
unemployment.shape
unemployment.head()
Out[ ]:
realtime_start | date | value | |
---|---|---|---|
2085 | 2021-02-05 | 2021-01-01 | 6.3 |
2086 | 2022-01-07 | 2021-01-01 | 6.4 |
2087 | 2023-01-06 | 2021-01-01 | 6.3 |
2088 | 2021-03-05 | 2021-02-01 | 6.2 |
2089 | 2021-04-02 | 2021-03-01 | 6.0 |
In [ ]:
# extract year and month from the date
unemployment['year'] = unemployment['date'].dt.year
unemployment['month'] = unemployment['date'].dt.month
# group by year and month, and calculate the mean of the values
unemployment_monthly = unemployment.groupby(['year', 'month'])['value'].mean().reset_index()
# create a 'year-month' column for the aggregated data
unemployment_monthly['date'] = pd.to_datetime(unemployment_monthly[['year', 'month']].assign(DAY=1))
# If you need to drop the day, simply convert 'year_month' to a string without the day
unemployment_monthly['date'] = unemployment_monthly['date'].dt.strftime('%Y-%m')
unemployment_monthly = unemployment_monthly.drop(['year', 'month'], axis=1)
# Show the resulting DataFrame
print(unemployment_monthly)
value date 0 6.333333 2021-01 1 6.200000 2021-02 2 6.050000 2021-03 3 6.066667 2021-04 4 5.800000 2021-05 5 5.900000 2021-06 6 5.400000 2021-07 7 5.200000 2021-08 8 4.766667 2021-09 9 4.550000 2021-10 10 4.200000 2021-11 11 3.900000 2021-12 12 4.000000 2022-01 13 3.800000 2022-02 14 3.600000 2022-03 15 3.600000 2022-04 16 3.600000 2022-05 17 3.600000 2022-06 18 3.500000 2022-07 19 3.700000 2022-08 20 3.500000 2022-09 21 3.700000 2022-10 22 3.650000 2022-11 23 3.500000 2022-12 24 3.400000 2023-01 25 3.600000 2023-02 26 3.500000 2023-03 27 3.400000 2023-04 28 3.700000 2023-05 29 3.600000 2023-06 30 3.500000 2023-07 31 3.800000 2023-08 32 3.800000 2023-09 33 3.900000 2023-10
In [ ]:
real_gdp = fred.get_series_all_releases('GDPC1')
In [ ]:
real_gdp = real_gdp.sort_values(by='date')
In [ ]:
real_gdp.head(10)
Out[ ]:
realtime_start | date | value | year | month | |
---|---|---|---|---|---|
4282 | 2021-04-29 | 2021-01-01 | 19087.568 | 2021 | 1 |
4283 | 2021-05-27 | 2021-01-01 | 19088.064 | 2021 | 1 |
4284 | 2021-06-24 | 2021-01-01 | 19086.375 | 2021 | 1 |
4285 | 2021-07-29 | 2021-01-01 | 19055.655 | 2021 | 1 |
4286 | 2022-09-29 | 2021-01-01 | 19216.224 | 2021 | 1 |
4287 | 2023-09-28 | 2021-01-01 | 20990.541 | 2021 | 1 |
4288 | 2021-07-29 | 2021-04-01 | 19358.176 | 2021 | 4 |
4289 | 2021-08-26 | 2021-04-01 | 19360.6 | 2021 | 4 |
4290 | 2021-09-30 | 2021-04-01 | 19368.31 | 2021 | 4 |
4291 | 2022-09-29 | 2021-04-01 | 19544.248 | 2021 | 4 |
In [ ]:
real_gdp = real_gdp[real_gdp['date'] > pd.Timestamp('2020-12-31')]
# extract year and month from the date
real_gdp['year'] = real_gdp['date'].dt.year
real_gdp['month'] = real_gdp['date'].dt.month
# group by year and month, and calculate the mean of the values
real_gdp_monthly = real_gdp.groupby(['year', 'month'])['value'].min().reset_index()
# create a 'year-month' column for the aggregated data
real_gdp_monthly['date'] = pd.to_datetime(real_gdp_monthly[['year', 'month']].assign(DAY=1))
# convert 'year_month' to a string without the day
real_gdp_monthly['date'] = real_gdp_monthly['date'].dt.strftime('%Y-%m')
real_gdp_monthly = real_gdp_monthly.drop(['year', 'month'], axis=1)
print(real_gdp_monthly)
value date 0 19055.655 2021-01 1 19358.176 2021-04 2 19465.195 2021-07 3 19805.962 2021-10 4 19727.918 2022-01 5 19681.682 2022-04 6 20021.721 2022-07 7 20182.491 2022-10 8 20235.878 2023-01 9 20386.467 2023-04 10 22491.567 2023-07
In [ ]:
real_gdp.head()
Out[ ]:
realtime_start | date | value | |
---|---|---|---|
0 | 1992-12-22 | 1947-01-01 | 1239.5 |
1 | 1996-01-19 | 1947-01-01 | NaT |
2 | 1997-05-07 | 1947-01-01 | 1402.5 |
3 | 1999-10-28 | 1947-01-01 | NaT |
4 | 2000-04-27 | 1947-01-01 | 1481.7 |
In [ ]:
unemployment_monthly.to_csv("../../data/csv/unemployment.csv")
In [ ]:
fed_funds_monthly.to_csv("../../data/csv/fed_funds.csv")
In [ ]:
gdp_monthly.to_csv("../../data/csv/gdp.csv")
In [ ]:
real_gdp_monthly.to_csv("../../data/csv/real_gdp.csv")
In [ ]: