In [ ]:
import pandas as pd
import numpy as np
In [ ]:
# 创建一个dataframe:带时间戳的价格数据
dates = pd.date_range("20210101",periods=30,freq="M")
dates
Out[ ]:
DatetimeIndex(['2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30', '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31', '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31', '2022-01-31', '2022-02-28', '2022-03-31', '2022-04-30', '2022-05-31', '2022-06-30', '2022-07-31', '2022-08-31', '2022-09-30', '2022-10-31', '2022-11-30', '2022-12-31', '2023-01-31', '2023-02-28', '2023-03-31', '2023-04-30', '2023-05-31', '2023-06-30'], dtype='datetime64[ns]', freq='M')
In [ ]:
data = pd.DataFrame(np.random.randn(30,3),columns=list('ABC'),index=dates)
data
Out[ ]:
A | B | C | |
---|---|---|---|
2021-01-31 | -1.005821 | -0.747159 | -0.590444 |
2021-02-28 | 0.106087 | -0.611014 | -2.492806 |
2021-03-31 | 0.923487 | -1.901083 | -1.139865 |
2021-04-30 | 0.045023 | -0.501125 | 0.834619 |
2021-05-31 | -0.015439 | -0.328349 | 0.905197 |
2021-06-30 | 0.366951 | -0.421883 | 1.579878 |
2021-07-31 | 1.337484 | 1.290041 | -0.466970 |
2021-08-31 | -0.373738 | -0.220213 | -0.529416 |
2021-09-30 | 0.740679 | -0.795566 | -0.392513 |
2021-10-31 | -0.759147 | 0.166461 | 2.225352 |
2021-11-30 | 0.120085 | -0.969381 | 0.050001 |
2021-12-31 | -1.328895 | 0.311472 | 0.237954 |
2022-01-31 | 0.211936 | 0.477653 | -0.097692 |
2022-02-28 | 0.135520 | 0.445589 | 1.909404 |
2022-03-31 | 0.876071 | 1.117198 | 0.629551 |
2022-04-30 | 0.863037 | -1.707017 | 0.470066 |
2022-05-31 | -0.979964 | 0.257285 | 0.898436 |
2022-06-30 | -1.423223 | 0.259646 | -0.650481 |
2022-07-31 | 1.580251 | -0.314205 | 0.639193 |
2022-08-31 | 1.954733 | -1.515528 | 0.143653 |
2022-09-30 | -0.722134 | 0.845884 | -0.299418 |
2022-10-31 | -0.448377 | -1.045969 | 0.244326 |
2022-11-30 | -0.092980 | -1.089742 | 0.561777 |
2022-12-31 | 2.820850 | -0.080729 | 0.770422 |
2023-01-31 | -1.482163 | 0.365914 | 1.351397 |
2023-02-28 | -0.364066 | -0.182885 | -0.922139 |
2023-03-31 | -0.589401 | 0.592518 | -0.119778 |
2023-04-30 | 0.705069 | 0.808626 | 2.058423 |
2023-05-31 | 0.659801 | 1.853893 | 1.030405 |
2023-06-30 | 0.363107 | -0.512096 | 0.169748 |
In [ ]:
# 头部数据
data.head()
Out[ ]:
A | B | C | |
---|---|---|---|
2021-01-31 | -1.005821 | -0.747159 | -0.590444 |
2021-02-28 | 0.106087 | -0.611014 | -2.492806 |
2021-03-31 | 0.923487 | -1.901083 | -1.139865 |
2021-04-30 | 0.045023 | -0.501125 | 0.834619 |
2021-05-31 | -0.015439 | -0.328349 | 0.905197 |
In [ ]:
# 头部前3条
data.head(3)
Out[ ]:
A | B | C | |
---|---|---|---|
2021-01-31 | -1.005821 | -0.747159 | -0.590444 |
2021-02-28 | 0.106087 | -0.611014 | -2.492806 |
2021-03-31 | 0.923487 | -1.901083 | -1.139865 |
In [ ]:
# 尾部数据
data.tail()
Out[ ]:
A | B | C | |
---|---|---|---|
2023-02-28 | -0.364066 | -0.182885 | -0.922139 |
2023-03-31 | -0.589401 | 0.592518 | -0.119778 |
2023-04-30 | 0.705069 | 0.808626 | 2.058423 |
2023-05-31 | 0.659801 | 1.853893 | 1.030405 |
2023-06-30 | 0.363107 | -0.512096 | 0.169748 |
In [ ]:
# 尾部3条
data.tail(3)
Out[ ]:
A | B | C | |
---|---|---|---|
2023-04-30 | 0.705069 | 0.808626 | 2.058423 |
2023-05-31 | 0.659801 | 1.853893 | 1.030405 |
2023-06-30 | 0.363107 | -0.512096 | 0.169748 |
In [ ]:
# 索引
data.index
Out[ ]:
DatetimeIndex(['2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30', '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31', '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31', '2022-01-31', '2022-02-28', '2022-03-31', '2022-04-30', '2022-05-31', '2022-06-30', '2022-07-31', '2022-08-31', '2022-09-30', '2022-10-31', '2022-11-30', '2022-12-31', '2023-01-31', '2023-02-28', '2023-03-31', '2023-04-30', '2023-05-31', '2023-06-30'], dtype='datetime64[ns]', freq='M')
In [ ]:
# 列名
data.columns
Out[ ]:
Index(['A', 'B', 'C'], dtype='object')
In [ ]:
# 查看数值(array)
data.values
Out[ ]:
array([[-1.00582066, -0.74715913, -0.59044421], [ 0.10608719, -0.61101411, -2.49280605], [ 0.92348709, -1.901083 , -1.13986527], [ 0.04502284, -0.50112509, 0.83461856], [-0.01543869, -0.32834889, 0.90519706], [ 0.36695114, -0.42188302, 1.57987849], [ 1.33748358, 1.29004108, -0.46697028], [-0.37373787, -0.22021339, -0.52941598], [ 0.74067882, -0.79556616, -0.39251251], [-0.75914684, 0.16646147, 2.22535186], [ 0.12008512, -0.96938058, 0.05000075], [-1.32889545, 0.31147169, 0.23795395], [ 0.2119362 , 0.47765278, -0.0976922 ], [ 0.13551963, 0.44558949, 1.90940387], [ 0.87607052, 1.11719757, 0.62955101], [ 0.8630371 , -1.70701661, 0.47006564], [-0.97996414, 0.25728477, 0.89843618], [-1.42322307, 0.25964647, -0.65048082], [ 1.580251 , -0.3142048 , 0.63919311], [ 1.95473317, -1.51552846, 0.1436534 ], [-0.7221338 , 0.84588397, -0.29941785], [-0.44837652, -1.04596934, 0.24432642], [-0.0929797 , -1.08974158, 0.56177711], [ 2.82084974, -0.08072931, 0.77042241], [-1.48216309, 0.36591366, 1.35139741], [-0.3640665 , -0.18288453, -0.92213874], [-0.58940142, 0.59251817, -0.11977769], [ 0.70506868, 0.80862606, 2.05842293], [ 0.65980106, 1.85389319, 1.03040533], [ 0.36310659, -0.51209611, 0.16974761]])
In [ ]:
data.to_numpy()
Out[ ]:
array([[-1.00582066, -0.74715913, -0.59044421], [ 0.10608719, -0.61101411, -2.49280605], [ 0.92348709, -1.901083 , -1.13986527], [ 0.04502284, -0.50112509, 0.83461856], [-0.01543869, -0.32834889, 0.90519706], [ 0.36695114, -0.42188302, 1.57987849], [ 1.33748358, 1.29004108, -0.46697028], [-0.37373787, -0.22021339, -0.52941598], [ 0.74067882, -0.79556616, -0.39251251], [-0.75914684, 0.16646147, 2.22535186], [ 0.12008512, -0.96938058, 0.05000075], [-1.32889545, 0.31147169, 0.23795395], [ 0.2119362 , 0.47765278, -0.0976922 ], [ 0.13551963, 0.44558949, 1.90940387], [ 0.87607052, 1.11719757, 0.62955101], [ 0.8630371 , -1.70701661, 0.47006564], [-0.97996414, 0.25728477, 0.89843618], [-1.42322307, 0.25964647, -0.65048082], [ 1.580251 , -0.3142048 , 0.63919311], [ 1.95473317, -1.51552846, 0.1436534 ], [-0.7221338 , 0.84588397, -0.29941785], [-0.44837652, -1.04596934, 0.24432642], [-0.0929797 , -1.08974158, 0.56177711], [ 2.82084974, -0.08072931, 0.77042241], [-1.48216309, 0.36591366, 1.35139741], [-0.3640665 , -0.18288453, -0.92213874], [-0.58940142, 0.59251817, -0.11977769], [ 0.70506868, 0.80862606, 2.05842293], [ 0.65980106, 1.85389319, 1.03040533], [ 0.36310659, -0.51209611, 0.16974761]])
In [ ]:
# 查看统计摘要
data.describe()
Out[ ]:
A | B | C | |
---|---|---|---|
count | 30.000000 | 30.000000 | 30.000000 |
mean | 0.140827 | -0.138392 | 0.300276 |
std | 1.009620 | 0.888840 | 1.014414 |
min | -1.482163 | -1.901083 | -2.492806 |
25% | -0.554145 | -0.713123 | -0.369239 |
50% | 0.113086 | -0.201549 | 0.241140 |
75% | 0.731776 | 0.425671 | 0.882482 |
max | 2.820850 | 1.853893 | 2.225352 |
In [ ]:
# 列数据
data[['A','B']]
Out[ ]:
A | B | |
---|---|---|
2021-01-31 | -1.005821 | -0.747159 |
2021-02-28 | 0.106087 | -0.611014 |
2021-03-31 | 0.923487 | -1.901083 |
2021-04-30 | 0.045023 | -0.501125 |
2021-05-31 | -0.015439 | -0.328349 |
2021-06-30 | 0.366951 | -0.421883 |
2021-07-31 | 1.337484 | 1.290041 |
2021-08-31 | -0.373738 | -0.220213 |
2021-09-30 | 0.740679 | -0.795566 |
2021-10-31 | -0.759147 | 0.166461 |
2021-11-30 | 0.120085 | -0.969381 |
2021-12-31 | -1.328895 | 0.311472 |
2022-01-31 | 0.211936 | 0.477653 |
2022-02-28 | 0.135520 | 0.445589 |
2022-03-31 | 0.876071 | 1.117198 |
2022-04-30 | 0.863037 | -1.707017 |
2022-05-31 | -0.979964 | 0.257285 |
2022-06-30 | -1.423223 | 0.259646 |
2022-07-31 | 1.580251 | -0.314205 |
2022-08-31 | 1.954733 | -1.515528 |
2022-09-30 | -0.722134 | 0.845884 |
2022-10-31 | -0.448377 | -1.045969 |
2022-11-30 | -0.092980 | -1.089742 |
2022-12-31 | 2.820850 | -0.080729 |
2023-01-31 | -1.482163 | 0.365914 |
2023-02-28 | -0.364066 | -0.182885 |
2023-03-31 | -0.589401 | 0.592518 |
2023-04-30 | 0.705069 | 0.808626 |
2023-05-31 | 0.659801 | 1.853893 |
2023-06-30 | 0.363107 | -0.512096 |
In [ ]:
# 行数据
data.iloc[0:10]
Out[ ]:
A | B | C | |
---|---|---|---|
2021-01-31 | -1.005821 | -0.747159 | -0.590444 |
2021-02-28 | 0.106087 | -0.611014 | -2.492806 |
2021-03-31 | 0.923487 | -1.901083 | -1.139865 |
2021-04-30 | 0.045023 | -0.501125 | 0.834619 |
2021-05-31 | -0.015439 | -0.328349 | 0.905197 |
2021-06-30 | 0.366951 | -0.421883 | 1.579878 |
2021-07-31 | 1.337484 | 1.290041 | -0.466970 |
2021-08-31 | -0.373738 | -0.220213 | -0.529416 |
2021-09-30 | 0.740679 | -0.795566 | -0.392513 |
2021-10-31 | -0.759147 | 0.166461 | 2.225352 |
In [ ]:
# 行列切片
data.loc['20210101':'20220101','A':'B'] # 20210101到20220101的A/B两列数据
Out[ ]:
A | B | |
---|---|---|
2021-01-31 | -1.005821 | -0.747159 |
2021-02-28 | 0.106087 | -0.611014 |
2021-03-31 | 0.923487 | -1.901083 |
2021-04-30 | 0.045023 | -0.501125 |
2021-05-31 | -0.015439 | -0.328349 |
2021-06-30 | 0.366951 | -0.421883 |
2021-07-31 | 1.337484 | 1.290041 |
2021-08-31 | -0.373738 | -0.220213 |
2021-09-30 | 0.740679 | -0.795566 |
2021-10-31 | -0.759147 | 0.166461 |
2021-11-30 | 0.120085 | -0.969381 |
2021-12-31 | -1.328895 | 0.311472 |
In [ ]:
# 按值筛选
## 小数取2位
data = round(data,2)
## A列中数值是0.74的那一行
data[data['A']==0.74]
## A列中数值是0.74的那一行的A列
data[data['A']==0.74]['A']
Out[ ]:
2021-09-30 0.74 Freq: M, Name: A, dtype: float64
In [ ]:
# 按条件筛选(布尔值)
data[data['A']>0.5]
Out[ ]:
A | B | C | |
---|---|---|---|
2021-03-31 | 0.92 | -1.90 | -1.14 |
2021-07-31 | 1.34 | 1.29 | -0.47 |
2021-09-30 | 0.74 | -0.80 | -0.39 |
2022-03-31 | 0.88 | 1.12 | 0.63 |
2022-04-30 | 0.86 | -1.71 | 0.47 |
2022-07-31 | 1.58 | -0.31 | 0.64 |
2022-08-31 | 1.95 | -1.52 | 0.14 |
2022-12-31 | 2.82 | -0.08 | 0.77 |
2023-04-30 | 0.71 | 0.81 | 2.06 |
2023-05-31 | 0.66 | 1.85 | 1.03 |
In [ ]:
data[data>0.5] # 不满足条件的那一个数值会变成nan
Out[ ]:
A | B | C | |
---|---|---|---|
2021-01-31 | NaN | NaN | NaN |
2021-02-28 | NaN | NaN | NaN |
2021-03-31 | 0.92 | NaN | NaN |
2021-04-30 | NaN | NaN | 0.83 |
2021-05-31 | NaN | NaN | 0.91 |
2021-06-30 | NaN | NaN | 1.58 |
2021-07-31 | 1.34 | 1.29 | NaN |
2021-08-31 | NaN | NaN | NaN |
2021-09-30 | 0.74 | NaN | NaN |
2021-10-31 | NaN | NaN | 2.23 |
2021-11-30 | NaN | NaN | NaN |
2021-12-31 | NaN | NaN | NaN |
2022-01-31 | NaN | NaN | NaN |
2022-02-28 | NaN | NaN | 1.91 |
2022-03-31 | 0.88 | 1.12 | 0.63 |
2022-04-30 | 0.86 | NaN | NaN |
2022-05-31 | NaN | NaN | 0.90 |
2022-06-30 | NaN | NaN | NaN |
2022-07-31 | 1.58 | NaN | 0.64 |
2022-08-31 | 1.95 | NaN | NaN |
2022-09-30 | NaN | 0.85 | NaN |
2022-10-31 | NaN | NaN | NaN |
2022-11-30 | NaN | NaN | 0.56 |
2022-12-31 | 2.82 | NaN | 0.77 |
2023-01-31 | NaN | NaN | 1.35 |
2023-02-28 | NaN | NaN | NaN |
2023-03-31 | NaN | 0.59 | NaN |
2023-04-30 | 0.71 | 0.81 | 2.06 |
2023-05-31 | 0.66 | 1.85 | 1.03 |
2023-06-30 | NaN | NaN | NaN |
In [ ]:
# 针对上面的结果去除NaN
data[data>0.5].dropna()
Out[ ]:
A | B | C | |
---|---|---|---|
2022-03-31 | 0.88 | 1.12 | 0.63 |
2023-04-30 | 0.71 | 0.81 | 2.06 |
2023-05-31 | 0.66 | 1.85 | 1.03 |
In [ ]:
# 去除重复值
data.drop_duplicates()
Out[ ]:
A | B | C | |
---|---|---|---|
2021-01-31 | -1.01 | -0.75 | -0.59 |
2021-02-28 | 0.11 | -0.61 | -2.49 |
2021-03-31 | 0.92 | -1.90 | -1.14 |
2021-04-30 | 0.05 | -0.50 | 0.83 |
2021-05-31 | -0.02 | -0.33 | 0.91 |
2021-06-30 | 0.37 | -0.42 | 1.58 |
2021-07-31 | 1.34 | 1.29 | -0.47 |
2021-08-31 | -0.37 | -0.22 | -0.53 |
2021-09-30 | 0.74 | -0.80 | -0.39 |
2021-10-31 | -0.76 | 0.17 | 2.23 |
2021-11-30 | 0.12 | -0.97 | 0.05 |
2021-12-31 | -1.33 | 0.31 | 0.24 |
2022-01-31 | 0.21 | 0.48 | -0.10 |
2022-02-28 | 0.14 | 0.45 | 1.91 |
2022-03-31 | 0.88 | 1.12 | 0.63 |
2022-04-30 | 0.86 | -1.71 | 0.47 |
2022-05-31 | -0.98 | 0.26 | 0.90 |
2022-06-30 | -1.42 | 0.26 | -0.65 |
2022-07-31 | 1.58 | -0.31 | 0.64 |
2022-08-31 | 1.95 | -1.52 | 0.14 |
2022-09-30 | -0.72 | 0.85 | -0.30 |
2022-10-31 | -0.45 | -1.05 | 0.24 |
2022-11-30 | -0.09 | -1.09 | 0.56 |
2022-12-31 | 2.82 | -0.08 | 0.77 |
2023-01-31 | -1.48 | 0.37 | 1.35 |
2023-02-28 | -0.36 | -0.18 | -0.92 |
2023-03-31 | -0.59 | 0.59 | -0.12 |
2023-04-30 | 0.71 | 0.81 | 2.06 |
2023-05-31 | 0.66 | 1.85 | 1.03 |
2023-06-30 | 0.36 | -0.51 | 0.17 |
In [ ]:
# 转置
data.T
Out[ ]:
2021-01-31 | 2021-02-28 | 2021-03-31 | 2021-04-30 | 2021-05-31 | 2021-06-30 | 2021-07-31 | 2021-08-31 | 2021-09-30 | 2021-10-31 | ... | 2022-09-30 | 2022-10-31 | 2022-11-30 | 2022-12-31 | 2023-01-31 | 2023-02-28 | 2023-03-31 | 2023-04-30 | 2023-05-31 | 2023-06-30 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
A | -1.01 | 0.11 | 0.92 | 0.05 | -0.02 | 0.37 | 1.34 | -0.37 | 0.74 | -0.76 | ... | -0.72 | -0.45 | -0.09 | 2.82 | -1.48 | -0.36 | -0.59 | 0.71 | 0.66 | 0.36 |
B | -0.75 | -0.61 | -1.90 | -0.50 | -0.33 | -0.42 | 1.29 | -0.22 | -0.80 | 0.17 | ... | 0.85 | -1.05 | -1.09 | -0.08 | 0.37 | -0.18 | 0.59 | 0.81 | 1.85 | -0.51 |
C | -0.59 | -2.49 | -1.14 | 0.83 | 0.91 | 1.58 | -0.47 | -0.53 | -0.39 | 2.23 | ... | -0.30 | 0.24 | 0.56 | 0.77 | 1.35 | -0.92 | -0.12 | 2.06 | 1.03 | 0.17 |
3 rows × 30 columns
In [ ]:
# 排序
data.sort_values(by='A',ascending=False) # A列降序
Out[ ]:
A | B | C | |
---|---|---|---|
2022-12-31 | 2.82 | -0.08 | 0.77 |
2022-08-31 | 1.95 | -1.52 | 0.14 |
2022-07-31 | 1.58 | -0.31 | 0.64 |
2021-07-31 | 1.34 | 1.29 | -0.47 |
2021-03-31 | 0.92 | -1.90 | -1.14 |
2022-03-31 | 0.88 | 1.12 | 0.63 |
2022-04-30 | 0.86 | -1.71 | 0.47 |
2021-09-30 | 0.74 | -0.80 | -0.39 |
2023-04-30 | 0.71 | 0.81 | 2.06 |
2023-05-31 | 0.66 | 1.85 | 1.03 |
2021-06-30 | 0.37 | -0.42 | 1.58 |
2023-06-30 | 0.36 | -0.51 | 0.17 |
2022-01-31 | 0.21 | 0.48 | -0.10 |
2022-02-28 | 0.14 | 0.45 | 1.91 |
2021-11-30 | 0.12 | -0.97 | 0.05 |
2021-02-28 | 0.11 | -0.61 | -2.49 |
2021-04-30 | 0.05 | -0.50 | 0.83 |
2021-05-31 | -0.02 | -0.33 | 0.91 |
2022-11-30 | -0.09 | -1.09 | 0.56 |
2023-02-28 | -0.36 | -0.18 | -0.92 |
2021-08-31 | -0.37 | -0.22 | -0.53 |
2022-10-31 | -0.45 | -1.05 | 0.24 |
2023-03-31 | -0.59 | 0.59 | -0.12 |
2022-09-30 | -0.72 | 0.85 | -0.30 |
2021-10-31 | -0.76 | 0.17 | 2.23 |
2022-05-31 | -0.98 | 0.26 | 0.90 |
2021-01-31 | -1.01 | -0.75 | -0.59 |
2021-12-31 | -1.33 | 0.31 | 0.24 |
2022-06-30 | -1.42 | 0.26 | -0.65 |
2023-01-31 | -1.48 | 0.37 | 1.35 |
In [ ]:
data.sort_index(ascending=False)
Out[ ]:
A | B | C | |
---|---|---|---|
2023-06-30 | 0.36 | -0.51 | 0.17 |
2023-05-31 | 0.66 | 1.85 | 1.03 |
2023-04-30 | 0.71 | 0.81 | 2.06 |
2023-03-31 | -0.59 | 0.59 | -0.12 |
2023-02-28 | -0.36 | -0.18 | -0.92 |
2023-01-31 | -1.48 | 0.37 | 1.35 |
2022-12-31 | 2.82 | -0.08 | 0.77 |
2022-11-30 | -0.09 | -1.09 | 0.56 |
2022-10-31 | -0.45 | -1.05 | 0.24 |
2022-09-30 | -0.72 | 0.85 | -0.30 |
2022-08-31 | 1.95 | -1.52 | 0.14 |
2022-07-31 | 1.58 | -0.31 | 0.64 |
2022-06-30 | -1.42 | 0.26 | -0.65 |
2022-05-31 | -0.98 | 0.26 | 0.90 |
2022-04-30 | 0.86 | -1.71 | 0.47 |
2022-03-31 | 0.88 | 1.12 | 0.63 |
2022-02-28 | 0.14 | 0.45 | 1.91 |
2022-01-31 | 0.21 | 0.48 | -0.10 |
2021-12-31 | -1.33 | 0.31 | 0.24 |
2021-11-30 | 0.12 | -0.97 | 0.05 |
2021-10-31 | -0.76 | 0.17 | 2.23 |
2021-09-30 | 0.74 | -0.80 | -0.39 |
2021-08-31 | -0.37 | -0.22 | -0.53 |
2021-07-31 | 1.34 | 1.29 | -0.47 |
2021-06-30 | 0.37 | -0.42 | 1.58 |
2021-05-31 | -0.02 | -0.33 | 0.91 |
2021-04-30 | 0.05 | -0.50 | 0.83 |
2021-03-31 | 0.92 | -1.90 | -1.14 |
2021-02-28 | 0.11 | -0.61 | -2.49 |
2021-01-31 | -1.01 | -0.75 | -0.59 |