External Data Scraping: FRED API¶

In [ ]:
!pip install fredapi
Collecting fredapi
  Obtaining dependency information for fredapi from https://files.pythonhosted.org/packages/96/d4/f81fa9f67775a6a4b9e2cd8487239d61a9698cb2b9c02a5a2897d310f7a4/fredapi-0.5.1-py3-none-any.whl.metadata
  Downloading fredapi-0.5.1-py3-none-any.whl.metadata (5.0 kB)
Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from fredapi) (1.4.4)
Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/lib/python3.10/site-packages (from pandas->fredapi) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->fredapi) (2022.1)
Requirement already satisfied: numpy>=1.21.0 in /opt/conda/lib/python3.10/site-packages (from pandas->fredapi) (1.26.0)
Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas->fredapi) (1.16.0)
Downloading fredapi-0.5.1-py3-none-any.whl (11 kB)
Installing collected packages: fredapi
Successfully installed fredapi-0.5.1
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv

[notice] A new release of pip is available: 23.2.1 -> 23.3.1
[notice] To update, run: pip install --upgrade pip
In [ ]:
from fredapi import Fred
import pandas as pd
In [ ]:
fred = Fred(api_key='90c777432f754129f7ca266e2ea35aab')
In [ ]:
gdp = fred.get_series_all_releases('GDP')
gdp = gdp[gdp['date'] > pd.Timestamp('2020-12-31')]
gdp.head(40)
Out[ ]:
realtime_start date value
3120 2021-04-29 2021-01-01 22048.894
3121 2021-05-27 2021-01-01 22061.025
3122 2021-06-24 2021-01-01 22061.503
3123 2021-07-29 2021-01-01 22038.226
3124 2022-09-29 2021-01-01 22313.85
3125 2023-09-28 2021-01-01 22600.185
3126 2021-07-29 2021-04-01 22722.581
3127 2021-08-26 2021-04-01 22731.369
3128 2021-09-30 2021-04-01 22740.959
3129 2022-09-29 2021-04-01 23046.934
3130 2023-09-28 2021-04-01 23292.362
3131 2021-10-28 2021-07-01 23173.496
3132 2021-11-24 2021-07-01 23187.042
3133 2021-12-22 2021-07-01 23202.344
3134 2022-09-29 2021-07-01 23550.42
3135 2023-09-28 2021-07-01 23828.973
3136 2022-01-27 2021-10-01 23992.355
3137 2022-02-24 2021-10-01 24008.472
3138 2022-03-30 2021-10-01 24002.815
3139 2022-09-29 2021-10-01 24349.121
3140 2023-09-28 2021-10-01 24654.603
3141 2022-04-28 2022-01-01 24382.683
3142 2022-05-26 2022-01-01 24384.289
3143 2022-06-29 2022-01-01 24386.734
3144 2022-09-29 2022-01-01 24740.48
3145 2023-09-28 2022-01-01 25029.116
3146 2022-07-28 2022-04-01 24851.809
3147 2022-08-25 2022-04-01 24882.878
3148 2022-09-29 2022-04-01 25248.476
3149 2023-09-28 2022-04-01 25544.273
3150 2022-10-27 2022-07-01 25663.289
3151 2022-11-30 2022-07-01 25698.96
3152 2022-12-22 2022-07-01 25723.941
3153 2023-09-28 2022-07-01 25994.639
3154 2023-01-26 2022-10-01 26132.458
3155 2023-02-23 2022-10-01 26144.956
3156 2023-03-30 2022-10-01 26137.992
3157 2023-09-28 2022-10-01 26408.405
3158 2023-04-27 2023-01-01 26465.865
3159 2023-05-25 2023-01-01 26486.287
In [ ]:
# extract year and month from the date
gdp['year'] = gdp['date'].dt.year
gdp['month'] = gdp['date'].dt.month

# group by year and month, and calculate the mean of the values
gdp_monthly = gdp.groupby(['year', 'month'])['value'].mean().reset_index()

# create a 'year-month' column for the aggregated data
gdp_monthly['date'] = pd.to_datetime(gdp_monthly[['year', 'month']].assign(DAY=1))

# convert 'year_month' to a string without the day
gdp_monthly['date'] = gdp_monthly['date'].dt.strftime('%Y-%m')

gdp_monthly = gdp_monthly.drop(['year', 'month'], axis=1)

print(gdp_monthly)
           value     date
0   22187.280500  2021-01
1   22906.841000  2021-04
2   23388.455000  2021-07
3   24201.473200  2021-10
4   24584.660400  2022-01
5   25131.859000  2022-04
6   25770.207250  2022-07
7   26205.952750  2022-10
8   26573.881750  2023-01
9   26898.856667  2023-04
10  27623.543000  2023-07
In [ ]:
fed_funds = fred.get_series_all_releases('FEDFUNDS')
fed_funds = fed_funds[fed_funds['realtime_start'] > pd.Timestamp('2020-12-31')]
fed_funds.tail()
Out[ ]:
realtime_start date value
881 2023-07-03 2023-06-01 5.08
882 2023-08-01 2023-07-01 5.12
883 2023-09-01 2023-08-01 5.33
884 2023-10-02 2023-09-01 5.33
885 2023-11-01 2023-10-01 5.33
In [ ]:
# extract year and month from the date
fed_funds['year'] = fed_funds['realtime_start'].dt.year
fed_funds['month'] = fed_funds['realtime_start'].dt.month

# group by year and month, and calculate the mean of the values
fed_funds_monthly = fed_funds.groupby(['year', 'month'])['value'].mean().reset_index()

# create a 'year-month' column for the aggregated data
fed_funds_monthly['date'] = pd.to_datetime(fed_funds_monthly[['year', 'month']].assign(DAY=1))

#  convert 'year_month' to a string without the day
fed_funds_monthly['date'] = fed_funds_monthly['date'].dt.strftime('%Y-%m')

fed_funds_monthly = fed_funds_monthly.drop(['year', 'month'], axis=1)

print(fed_funds_monthly)
    value     date
0    0.09  2021-01
1    0.09  2021-02
2    0.08  2021-03
3    0.07  2021-04
4    0.07  2021-05
5    0.06  2021-06
6    0.08  2021-07
7    0.10  2021-08
8    0.09  2021-09
9    0.08  2021-10
10   0.08  2021-11
11   0.08  2021-12
12   0.08  2022-01
13   0.08  2022-02
14   0.08  2022-03
15   0.20  2022-04
16   0.33  2022-05
17   0.77  2022-06
18   1.21  2022-07
19   1.68  2022-08
20   2.33  2022-09
21   2.56  2022-10
22   3.08  2022-11
23   3.78  2022-12
24   4.10  2023-01
25   4.33  2023-02
26   4.57  2023-03
27   4.65  2023-04
28   4.83  2023-05
29   5.06  2023-06
30   5.08  2023-07
31   5.12  2023-08
32   5.33  2023-09
33   5.33  2023-10
34   5.33  2023-11
In [ ]:
unemployment = fred.get_series_all_releases('UNRATE')
unemployment = unemployment[unemployment['date'] > pd.Timestamp('2020-12-31')]
unemployment.shape
unemployment.head()
Out[ ]:
realtime_start date value
2085 2021-02-05 2021-01-01 6.3
2086 2022-01-07 2021-01-01 6.4
2087 2023-01-06 2021-01-01 6.3
2088 2021-03-05 2021-02-01 6.2
2089 2021-04-02 2021-03-01 6.0
In [ ]:
# extract year and month from the date
unemployment['year'] = unemployment['date'].dt.year
unemployment['month'] = unemployment['date'].dt.month

# group by year and month, and calculate the mean of the values
unemployment_monthly = unemployment.groupby(['year', 'month'])['value'].mean().reset_index()

# create a 'year-month' column for the aggregated data
unemployment_monthly['date'] = pd.to_datetime(unemployment_monthly[['year', 'month']].assign(DAY=1))

# If you need to drop the day, simply convert 'year_month' to a string without the day
unemployment_monthly['date'] = unemployment_monthly['date'].dt.strftime('%Y-%m')

unemployment_monthly = unemployment_monthly.drop(['year', 'month'], axis=1)

# Show the resulting DataFrame
print(unemployment_monthly)
       value     date
0   6.333333  2021-01
1   6.200000  2021-02
2   6.050000  2021-03
3   6.066667  2021-04
4   5.800000  2021-05
5   5.900000  2021-06
6   5.400000  2021-07
7   5.200000  2021-08
8   4.766667  2021-09
9   4.550000  2021-10
10  4.200000  2021-11
11  3.900000  2021-12
12  4.000000  2022-01
13  3.800000  2022-02
14  3.600000  2022-03
15  3.600000  2022-04
16  3.600000  2022-05
17  3.600000  2022-06
18  3.500000  2022-07
19  3.700000  2022-08
20  3.500000  2022-09
21  3.700000  2022-10
22  3.650000  2022-11
23  3.500000  2022-12
24  3.400000  2023-01
25  3.600000  2023-02
26  3.500000  2023-03
27  3.400000  2023-04
28  3.700000  2023-05
29  3.600000  2023-06
30  3.500000  2023-07
31  3.800000  2023-08
32  3.800000  2023-09
33  3.900000  2023-10
In [ ]:
real_gdp = fred.get_series_all_releases('GDPC1')
In [ ]:
real_gdp = real_gdp.sort_values(by='date')
In [ ]:
real_gdp.head(10)
Out[ ]:
realtime_start date value year month
4282 2021-04-29 2021-01-01 19087.568 2021 1
4283 2021-05-27 2021-01-01 19088.064 2021 1
4284 2021-06-24 2021-01-01 19086.375 2021 1
4285 2021-07-29 2021-01-01 19055.655 2021 1
4286 2022-09-29 2021-01-01 19216.224 2021 1
4287 2023-09-28 2021-01-01 20990.541 2021 1
4288 2021-07-29 2021-04-01 19358.176 2021 4
4289 2021-08-26 2021-04-01 19360.6 2021 4
4290 2021-09-30 2021-04-01 19368.31 2021 4
4291 2022-09-29 2021-04-01 19544.248 2021 4
In [ ]:
real_gdp = real_gdp[real_gdp['date'] > pd.Timestamp('2020-12-31')]
# extract year and month from the date
real_gdp['year'] = real_gdp['date'].dt.year
real_gdp['month'] = real_gdp['date'].dt.month

# group by year and month, and calculate the mean of the values
real_gdp_monthly = real_gdp.groupby(['year', 'month'])['value'].min().reset_index()

# create a 'year-month' column for the aggregated data
real_gdp_monthly['date'] = pd.to_datetime(real_gdp_monthly[['year', 'month']].assign(DAY=1))

# convert 'year_month' to a string without the day
real_gdp_monthly['date'] = real_gdp_monthly['date'].dt.strftime('%Y-%m')

real_gdp_monthly = real_gdp_monthly.drop(['year', 'month'], axis=1)

print(real_gdp_monthly)
        value     date
0   19055.655  2021-01
1   19358.176  2021-04
2   19465.195  2021-07
3   19805.962  2021-10
4   19727.918  2022-01
5   19681.682  2022-04
6   20021.721  2022-07
7   20182.491  2022-10
8   20235.878  2023-01
9   20386.467  2023-04
10  22491.567  2023-07
In [ ]:
real_gdp.head()
Out[ ]:
realtime_start date value
0 1992-12-22 1947-01-01 1239.5
1 1996-01-19 1947-01-01 NaT
2 1997-05-07 1947-01-01 1402.5
3 1999-10-28 1947-01-01 NaT
4 2000-04-27 1947-01-01 1481.7
In [ ]:
unemployment_monthly.to_csv("../../data/csv/unemployment.csv")
In [ ]:
fed_funds_monthly.to_csv("../../data/csv/fed_funds.csv")
In [ ]:
gdp_monthly.to_csv("../../data/csv/gdp.csv")
In [ ]:
real_gdp_monthly.to_csv("../../data/csv/real_gdp.csv")
In [ ]: