In [ ]:
pip install pandas_datareader
Collecting pandas_datareader Downloading pandas_datareader-0.10.0-py3-none-any.whl (109 kB) ---------------------------------------- 0.0/109.5 kB ? eta -:--:-- --- ------------------------------------ 10.2/109.5 kB ? eta -:--:-- ---------- -------------------------- 30.7/109.5 kB 262.6 kB/s eta 0:00:01 ------------------------ ------------ 71.7/109.5 kB 435.7 kB/s eta 0:00:01 ------------------------------------ 109.5/109.5 kB 576.9 kB/s eta 0:00:00 Collecting lxml (from pandas_datareader) Obtaining dependency information for lxml from https://files.pythonhosted.org/packages/80/2e/49751104148b03ad880aaf381cc24d67b7d8f401f7d074ad7db4f6d95597/lxml-4.9.3-cp39-cp39-win_amd64.whl.metadata Downloading lxml-4.9.3-cp39-cp39-win_amd64.whl.metadata (3.9 kB) Requirement already satisfied: pandas>=0.23 in c:\users\dengzl\.conda\envs\data_analysis\lib\site-packages (from pandas_datareader) (2.1.1) Collecting requests>=2.19.0 (from pandas_datareader) Obtaining dependency information for requests>=2.19.0 from https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl.metadata Using cached requests-2.31.0-py3-none-any.whl.metadata (4.6 kB) Requirement already satisfied: numpy>=1.22.4 in c:\users\dengzl\.conda\envs\data_analysis\lib\site-packages (from pandas>=0.23->pandas_datareader) (1.26.0) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\dengzl\.conda\envs\data_analysis\lib\site-packages (from pandas>=0.23->pandas_datareader) (2.8.2) Requirement already satisfied: pytz>=2020.1 in c:\users\dengzl\.conda\envs\data_analysis\lib\site-packages (from pandas>=0.23->pandas_datareader) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in c:\users\dengzl\.conda\envs\data_analysis\lib\site-packages (from pandas>=0.23->pandas_datareader) (2023.3) Collecting charset-normalizer<4,>=2 (from requests>=2.19.0->pandas_datareader) Obtaining dependency information for charset-normalizer<4,>=2 from https://files.pythonhosted.org/packages/b3/c5/edc62435a27b017a5826d215f25ef3ab02b8b68d37b6e64cf5b602f1b55d/charset_normalizer-3.3.0-cp39-cp39-win_amd64.whl.metadata Downloading charset_normalizer-3.3.0-cp39-cp39-win_amd64.whl.metadata (33 kB) Collecting idna<4,>=2.5 (from requests>=2.19.0->pandas_datareader) Using cached idna-3.4-py3-none-any.whl (61 kB) Collecting urllib3<3,>=1.21.1 (from requests>=2.19.0->pandas_datareader) Obtaining dependency information for urllib3<3,>=1.21.1 from https://files.pythonhosted.org/packages/26/40/9957270221b6d3e9a3b92fdfba80dd5c9661ff45a664b47edd5d00f707f5/urllib3-2.0.6-py3-none-any.whl.metadata Downloading urllib3-2.0.6-py3-none-any.whl.metadata (6.6 kB) Collecting certifi>=2017.4.17 (from requests>=2.19.0->pandas_datareader) Obtaining dependency information for certifi>=2017.4.17 from https://files.pythonhosted.org/packages/4c/dd/2234eab22353ffc7d94e8d13177aaa050113286e93e7b40eae01fbf7c3d9/certifi-2023.7.22-py3-none-any.whl.metadata Using cached certifi-2023.7.22-py3-none-any.whl.metadata (2.2 kB) Requirement already satisfied: six>=1.5 in c:\users\dengzl\.conda\envs\data_analysis\lib\site-packages (from python-dateutil>=2.8.2->pandas>=0.23->pandas_datareader) (1.16.0) Using cached requests-2.31.0-py3-none-any.whl (62 kB) Using cached lxml-4.9.3-cp39-cp39-win_amd64.whl (3.9 MB) Using cached certifi-2023.7.22-py3-none-any.whl (158 kB) Downloading charset_normalizer-3.3.0-cp39-cp39-win_amd64.whl (98 kB) ---------------------------------------- 0.0/98.2 kB ? eta -:--:-- ---------------------------------------- 98.2/98.2 kB 2.7 MB/s eta 0:00:00 Downloading urllib3-2.0.6-py3-none-any.whl (123 kB) ---------------------------------------- 0.0/123.8 kB ? eta -:--:-- ---------------------------------------- 123.8/123.8 kB 3.7 MB/s eta 0:00:00 Installing collected packages: urllib3, lxml, idna, charset-normalizer, certifi, requests, pandas_datareader Successfully installed certifi-2023.7.22 charset-normalizer-3.3.0 idna-3.4 lxml-4.9.3 pandas_datareader-0.10.0 requests-2.31.0 urllib3-2.0.6 Note: you may need to restart the kernel to use updated packages.
In [ ]:
import pandas as pd
import pandas_datareader as pdr
In [ ]:
data=pdr.get_data_fred('GS10') # 官方提示这个是用来获取美国政府5年来10年期债券的汇率
data
Out[ ]:
GS10 | |
---|---|
DATE | |
2018-11-01 | 3.12 |
2018-12-01 | 2.83 |
2019-01-01 | 2.71 |
2019-02-01 | 2.68 |
2019-03-01 | 2.57 |
2019-04-01 | 2.53 |
2019-05-01 | 2.40 |
2019-06-01 | 2.07 |
2019-07-01 | 2.06 |
2019-08-01 | 1.63 |
2019-09-01 | 1.70 |
2019-10-01 | 1.71 |
2019-11-01 | 1.81 |
2019-12-01 | 1.86 |
2020-01-01 | 1.76 |
2020-02-01 | 1.50 |
2020-03-01 | 0.87 |
2020-04-01 | 0.66 |
2020-05-01 | 0.67 |
2020-06-01 | 0.73 |
2020-07-01 | 0.62 |
2020-08-01 | 0.65 |
2020-09-01 | 0.68 |
2020-10-01 | 0.79 |
2020-11-01 | 0.87 |
2020-12-01 | 0.93 |
2021-01-01 | 1.08 |
2021-02-01 | 1.26 |
2021-03-01 | 1.61 |
2021-04-01 | 1.64 |
2021-05-01 | 1.62 |
2021-06-01 | 1.52 |
2021-07-01 | 1.32 |
2021-08-01 | 1.28 |
2021-09-01 | 1.37 |
2021-10-01 | 1.58 |
2021-11-01 | 1.56 |
2021-12-01 | 1.47 |
2022-01-01 | 1.76 |
2022-02-01 | 1.93 |
2022-03-01 | 2.13 |
2022-04-01 | 2.75 |
2022-05-01 | 2.90 |
2022-06-01 | 3.14 |
2022-07-01 | 2.90 |
2022-08-01 | 2.90 |
2022-09-01 | 3.52 |
2022-10-01 | 3.98 |
2022-11-01 | 3.89 |
2022-12-01 | 3.62 |
2023-01-01 | 3.53 |
2023-02-01 | 3.75 |
2023-03-01 | 3.66 |
2023-04-01 | 3.46 |
2023-05-01 | 3.57 |
2023-06-01 | 3.75 |
2023-07-01 | 3.90 |
2023-08-01 | 4.17 |
2023-09-01 | 4.38 |
In [ ]:
type(data)
Out[ ]:
pandas.core.frame.DataFrame
In [ ]:
# 均值
type(data.index)
data.resample(rule='Y').mean() # 每年的均值
Out[ ]:
GS10 | |
---|---|
DATE | |
2018-12-31 | 2.975000 |
2019-12-31 | 2.144167 |
2020-12-31 | 0.894167 |
2021-12-31 | 1.442500 |
2022-12-31 | 2.951667 |
2023-12-31 | 3.796667 |
In [ ]:
# 计数
count = data.resample(rule='Y').count()
count
Out[ ]:
GS10 | |
---|---|
DATE | |
2018-12-31 | 2 |
2019-12-31 | 12 |
2020-12-31 | 12 |
2021-12-31 | 12 |
2022-12-31 | 12 |
2023-12-31 | 9 |
In [ ]:
# 累加
sum1 = data.resample(rule='Y').sum()
sum1
Out[ ]:
GS10 | |
---|---|
DATE | |
2018-12-31 | 5.95 |
2019-12-31 | 25.73 |
2020-12-31 | 10.73 |
2021-12-31 | 17.31 |
2022-12-31 | 35.42 |
2023-12-31 | 34.17 |
In [ ]:
sum1/count
Out[ ]:
GS10 | |
---|---|
DATE | |
2018-12-31 | 2.975000 |
2019-12-31 | 2.144167 |
2020-12-31 | 0.894167 |
2021-12-31 | 1.442500 |
2022-12-31 | 2.951667 |
2023-12-31 | 3.796667 |
In [ ]:
# 标准差
data.resample(rule='Y').std() # 数据样本内部的差异
Out[ ]:
GS10 | |
---|---|
DATE | |
2018-12-31 | 0.205061 |
2019-12-31 | 0.410996 |
2020-12-31 | 0.362152 |
2021-12-31 | 0.178230 |
2022-12-31 | 0.734276 |
2023-12-31 | 0.306023 |
In [ ]:
# 最大
data.resample(rule='Y').std().max()
Out[ ]:
GS10 0.734276 dtype: float64
In [ ]:
# 累乘
data.resample(rule='Y').prod()
Out[ ]:
GS10 | |
---|---|
DATE | |
2018-12-31 | 8.829600 |
2019-12-31 | 7709.579518 |
2020-12-31 | 0.129870 |
2021-12-31 | 74.203795 |
2022-12-31 | 300600.586272 |
2023-12-31 | 159859.118429 |
In [ ]:
# 加载一份新的数据
data2 = pdr.get_data_fred('GS5')
data2
Out[ ]:
GS5 | |
---|---|
DATE | |
2018-11-01 | 2.95 |
2018-12-01 | 2.68 |
2019-01-01 | 2.54 |
2019-02-01 | 2.49 |
2019-03-01 | 2.37 |
2019-04-01 | 2.33 |
2019-05-01 | 2.19 |
2019-06-01 | 1.83 |
2019-07-01 | 1.83 |
2019-08-01 | 1.49 |
2019-09-01 | 1.57 |
2019-10-01 | 1.53 |
2019-11-01 | 1.64 |
2019-12-01 | 1.68 |
2020-01-01 | 1.56 |
2020-02-01 | 1.32 |
2020-03-01 | 0.59 |
2020-04-01 | 0.39 |
2020-05-01 | 0.34 |
2020-06-01 | 0.34 |
2020-07-01 | 0.28 |
2020-08-01 | 0.27 |
2020-09-01 | 0.27 |
2020-10-01 | 0.34 |
2020-11-01 | 0.39 |
2020-12-01 | 0.39 |
2021-01-01 | 0.45 |
2021-02-01 | 0.54 |
2021-03-01 | 0.82 |
2021-04-01 | 0.86 |
2021-05-01 | 0.82 |
2021-06-01 | 0.84 |
2021-07-01 | 0.76 |
2021-08-01 | 0.77 |
2021-09-01 | 0.86 |
2021-10-01 | 1.11 |
2021-11-01 | 1.20 |
2021-12-01 | 1.23 |
2022-01-01 | 1.54 |
2022-02-01 | 1.81 |
2022-03-01 | 2.11 |
2022-04-01 | 2.78 |
2022-05-01 | 2.87 |
2022-06-01 | 3.19 |
2022-07-01 | 2.96 |
2022-08-01 | 3.03 |
2022-09-01 | 3.70 |
2022-10-01 | 4.18 |
2022-11-01 | 4.06 |
2022-12-01 | 3.76 |
2023-01-01 | 3.64 |
2023-02-01 | 3.94 |
2023-03-01 | 3.82 |
2023-04-01 | 3.54 |
2023-05-01 | 3.59 |
2023-06-01 | 3.95 |
2023-07-01 | 4.14 |
2023-08-01 | 4.31 |
2023-09-01 | 4.49 |
In [ ]:
# 合并两个样本
data['GS5'] = data2
In [ ]:
data
Out[ ]:
GS10 | GS5 | |
---|---|---|
DATE | ||
2018-11-01 | 3.12 | 2.95 |
2018-12-01 | 2.83 | 2.68 |
2019-01-01 | 2.71 | 2.54 |
2019-02-01 | 2.68 | 2.49 |
2019-03-01 | 2.57 | 2.37 |
2019-04-01 | 2.53 | 2.33 |
2019-05-01 | 2.40 | 2.19 |
2019-06-01 | 2.07 | 1.83 |
2019-07-01 | 2.06 | 1.83 |
2019-08-01 | 1.63 | 1.49 |
2019-09-01 | 1.70 | 1.57 |
2019-10-01 | 1.71 | 1.53 |
2019-11-01 | 1.81 | 1.64 |
2019-12-01 | 1.86 | 1.68 |
2020-01-01 | 1.76 | 1.56 |
2020-02-01 | 1.50 | 1.32 |
2020-03-01 | 0.87 | 0.59 |
2020-04-01 | 0.66 | 0.39 |
2020-05-01 | 0.67 | 0.34 |
2020-06-01 | 0.73 | 0.34 |
2020-07-01 | 0.62 | 0.28 |
2020-08-01 | 0.65 | 0.27 |
2020-09-01 | 0.68 | 0.27 |
2020-10-01 | 0.79 | 0.34 |
2020-11-01 | 0.87 | 0.39 |
2020-12-01 | 0.93 | 0.39 |
2021-01-01 | 1.08 | 0.45 |
2021-02-01 | 1.26 | 0.54 |
2021-03-01 | 1.61 | 0.82 |
2021-04-01 | 1.64 | 0.86 |
2021-05-01 | 1.62 | 0.82 |
2021-06-01 | 1.52 | 0.84 |
2021-07-01 | 1.32 | 0.76 |
2021-08-01 | 1.28 | 0.77 |
2021-09-01 | 1.37 | 0.86 |
2021-10-01 | 1.58 | 1.11 |
2021-11-01 | 1.56 | 1.20 |
2021-12-01 | 1.47 | 1.23 |
2022-01-01 | 1.76 | 1.54 |
2022-02-01 | 1.93 | 1.81 |
2022-03-01 | 2.13 | 2.11 |
2022-04-01 | 2.75 | 2.78 |
2022-05-01 | 2.90 | 2.87 |
2022-06-01 | 3.14 | 3.19 |
2022-07-01 | 2.90 | 2.96 |
2022-08-01 | 2.90 | 3.03 |
2022-09-01 | 3.52 | 3.70 |
2022-10-01 | 3.98 | 4.18 |
2022-11-01 | 3.89 | 4.06 |
2022-12-01 | 3.62 | 3.76 |
2023-01-01 | 3.53 | 3.64 |
2023-02-01 | 3.75 | 3.94 |
2023-03-01 | 3.66 | 3.82 |
2023-04-01 | 3.46 | 3.54 |
2023-05-01 | 3.57 | 3.59 |
2023-06-01 | 3.75 | 3.95 |
2023-07-01 | 3.90 | 4.14 |
2023-08-01 | 4.17 | 4.31 |
2023-09-01 | 4.38 | 4.49 |
In [ ]:
data['mean'] = (data['GS10'] + data['GS5']) / 2 # 计算10年期和5年期两个债券的均值并放进样本里
data
Out[ ]:
GS10 | GS5 | mean | |
---|---|---|---|
DATE | |||
2018-11-01 | 3.12 | 2.95 | 3.035 |
2018-12-01 | 2.83 | 2.68 | 2.755 |
2019-01-01 | 2.71 | 2.54 | 2.625 |
2019-02-01 | 2.68 | 2.49 | 2.585 |
2019-03-01 | 2.57 | 2.37 | 2.470 |
2019-04-01 | 2.53 | 2.33 | 2.430 |
2019-05-01 | 2.40 | 2.19 | 2.295 |
2019-06-01 | 2.07 | 1.83 | 1.950 |
2019-07-01 | 2.06 | 1.83 | 1.945 |
2019-08-01 | 1.63 | 1.49 | 1.560 |
2019-09-01 | 1.70 | 1.57 | 1.635 |
2019-10-01 | 1.71 | 1.53 | 1.620 |
2019-11-01 | 1.81 | 1.64 | 1.725 |
2019-12-01 | 1.86 | 1.68 | 1.770 |
2020-01-01 | 1.76 | 1.56 | 1.660 |
2020-02-01 | 1.50 | 1.32 | 1.410 |
2020-03-01 | 0.87 | 0.59 | 0.730 |
2020-04-01 | 0.66 | 0.39 | 0.525 |
2020-05-01 | 0.67 | 0.34 | 0.505 |
2020-06-01 | 0.73 | 0.34 | 0.535 |
2020-07-01 | 0.62 | 0.28 | 0.450 |
2020-08-01 | 0.65 | 0.27 | 0.460 |
2020-09-01 | 0.68 | 0.27 | 0.475 |
2020-10-01 | 0.79 | 0.34 | 0.565 |
2020-11-01 | 0.87 | 0.39 | 0.630 |
2020-12-01 | 0.93 | 0.39 | 0.660 |
2021-01-01 | 1.08 | 0.45 | 0.765 |
2021-02-01 | 1.26 | 0.54 | 0.900 |
2021-03-01 | 1.61 | 0.82 | 1.215 |
2021-04-01 | 1.64 | 0.86 | 1.250 |
2021-05-01 | 1.62 | 0.82 | 1.220 |
2021-06-01 | 1.52 | 0.84 | 1.180 |
2021-07-01 | 1.32 | 0.76 | 1.040 |
2021-08-01 | 1.28 | 0.77 | 1.025 |
2021-09-01 | 1.37 | 0.86 | 1.115 |
2021-10-01 | 1.58 | 1.11 | 1.345 |
2021-11-01 | 1.56 | 1.20 | 1.380 |
2021-12-01 | 1.47 | 1.23 | 1.350 |
2022-01-01 | 1.76 | 1.54 | 1.650 |
2022-02-01 | 1.93 | 1.81 | 1.870 |
2022-03-01 | 2.13 | 2.11 | 2.120 |
2022-04-01 | 2.75 | 2.78 | 2.765 |
2022-05-01 | 2.90 | 2.87 | 2.885 |
2022-06-01 | 3.14 | 3.19 | 3.165 |
2022-07-01 | 2.90 | 2.96 | 2.930 |
2022-08-01 | 2.90 | 3.03 | 2.965 |
2022-09-01 | 3.52 | 3.70 | 3.610 |
2022-10-01 | 3.98 | 4.18 | 4.080 |
2022-11-01 | 3.89 | 4.06 | 3.975 |
2022-12-01 | 3.62 | 3.76 | 3.690 |
2023-01-01 | 3.53 | 3.64 | 3.585 |
2023-02-01 | 3.75 | 3.94 | 3.845 |
2023-03-01 | 3.66 | 3.82 | 3.740 |
2023-04-01 | 3.46 | 3.54 | 3.500 |
2023-05-01 | 3.57 | 3.59 | 3.580 |
2023-06-01 | 3.75 | 3.95 | 3.850 |
2023-07-01 | 3.90 | 4.14 | 4.020 |
2023-08-01 | 4.17 | 4.31 | 4.240 |
2023-09-01 | 4.38 | 4.49 | 4.435 |
In [ ]:
# 周期转换
data.resample(rule='Y')
Out[ ]:
<pandas.core.resample.DatetimeIndexResampler object at 0x000002642B876B20>
In [ ]:
data.resample(rule='Y').first()
Out[ ]:
GS10 | GS5 | mean | |
---|---|---|---|
DATE | |||
2018-12-31 | 3.12 | 2.95 | 3.035 |
2019-12-31 | 2.71 | 2.54 | 2.625 |
2020-12-31 | 1.76 | 1.56 | 1.660 |
2021-12-31 | 1.08 | 0.45 | 0.765 |
2022-12-31 | 1.76 | 1.54 | 1.650 |
2023-12-31 | 3.53 | 3.64 | 3.585 |