In [ ]:
import pandas as pd
import numpy as np
data = pd.DataFrame(np.random.randn(1000,3),columns=["a",'b','c'],index=pd.date_range('20200101',periods=1000))
data
Out[ ]:
a | b | c | |
---|---|---|---|
2020-01-01 | -0.275344 | -0.216934 | -0.083554 |
2020-01-02 | -0.667199 | -0.722457 | 0.587480 |
2020-01-03 | 0.270504 | -0.363369 | 1.450482 |
2020-01-04 | -0.621467 | 0.067100 | -0.161752 |
2020-01-05 | 0.701313 | 1.162024 | 0.929627 |
... | ... | ... | ... |
2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
1000 rows × 3 columns
In [ ]:
# 数据存储
data.to_csv('txt.csv')
In [ ]:
pd.read_csv('txt.csv')
Out[ ]:
Unnamed: 0 | a | b | c | |
---|---|---|---|---|
0 | 2020-01-01 | -0.275344 | -0.216934 | -0.083554 |
1 | 2020-01-02 | -0.667199 | -0.722457 | 0.587480 |
2 | 2020-01-03 | 0.270504 | -0.363369 | 1.450482 |
3 | 2020-01-04 | -0.621467 | 0.067100 | -0.161752 |
4 | 2020-01-05 | 0.701313 | 1.162024 | 0.929627 |
... | ... | ... | ... | ... |
995 | 2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
996 | 2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
997 | 2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
998 | 2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
999 | 2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
1000 rows × 4 columns
In [ ]:
## 上面读取时候索引变了,有两种方法可以变成我们原来要的样子
## 方法一:读取时候设置索引
pd.read_csv('txt.csv',index_col=['Unnamed: 0'])
Out[ ]:
a | b | c | |
---|---|---|---|
2020-01-01 | -0.275344 | -0.216934 | -0.083554 |
2020-01-02 | -0.667199 | -0.722457 | 0.587480 |
2020-01-03 | 0.270504 | -0.363369 | 1.450482 |
2020-01-04 | -0.621467 | 0.067100 | -0.161752 |
2020-01-05 | 0.701313 | 1.162024 | 0.929627 |
... | ... | ... | ... |
2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
1000 rows × 3 columns
In [ ]:
## 方法二:存储时对数据索引进行命名: date
data.index
Out[ ]:
DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04', '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10', ... '2022-09-17', '2022-09-18', '2022-09-19', '2022-09-20', '2022-09-21', '2022-09-22', '2022-09-23', '2022-09-24', '2022-09-25', '2022-09-26'], dtype='datetime64[ns]', length=1000, freq='D')
In [ ]:
data.index.names = ['date']
In [ ]:
data
Out[ ]:
a | b | c | |
---|---|---|---|
date | |||
2020-01-01 | -0.275344 | -0.216934 | -0.083554 |
2020-01-02 | -0.667199 | -0.722457 | 0.587480 |
2020-01-03 | 0.270504 | -0.363369 | 1.450482 |
2020-01-04 | -0.621467 | 0.067100 | -0.161752 |
2020-01-05 | 0.701313 | 1.162024 | 0.929627 |
... | ... | ... | ... |
2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
1000 rows × 3 columns
In [ ]:
data.to_csv('txt.csv') # 完全覆盖/替换
In [ ]:
# 对已有文件进行数据追加
data2=data.tail()
data2
Out[ ]:
a | b | c | |
---|---|---|---|
date | |||
2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
In [ ]:
data2.to_csv('txt.csv',mode='a',header=False) # a追加模式且不写入列名
In [ ]:
# excel的存储和读取
filename = 'excel.xlsx'
data.to_excel(filename,sheet_name='a')
In [ ]:
pd.read_excel(filename)
Out[ ]:
date | a | b | c | |
---|---|---|---|---|
0 | 2020-01-01 | -0.275344 | -0.216934 | -0.083554 |
1 | 2020-01-02 | -0.667199 | -0.722457 | 0.587480 |
2 | 2020-01-03 | 0.270504 | -0.363369 | 1.450482 |
3 | 2020-01-04 | -0.621467 | 0.067100 | -0.161752 |
4 | 2020-01-05 | 0.701313 | 1.162024 | 0.929627 |
... | ... | ... | ... | ... |
995 | 2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
996 | 2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
997 | 2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
998 | 2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
999 | 2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
1000 rows × 4 columns
In [ ]:
# 一次写入多个sheet
with pd.ExcelWriter('writer.xlsx') as writer:
data.to_excel(writer,sheet_name='a')
data.to_excel(writer,sheet_name='b')
data.to_excel(writer,sheet_name='c')
In [ ]:
# 追加新sheet
with pd.ExcelWriter('writer.xlsx',mode='a',engine='openpyxl') as writer:
data2.to_excel(writer,sheet_name="d")