HDFS 转dataframe

发布时间 2024-01-07 16:08:39作者: myrj
import pyarrow.parquet as pq
from pyarrow import fs
 
# 创建Hadoop文件系统对象
fs = fs.LocalFileSystem()
hadoop_path = "hdfs://<your-hdfs-address>/<csv-file>"
 
# 从HDFS读取CSV文件并转化为DataFrame
table = pq.read_pandas(hadoop_path)
dataframe = table.to_pandas()
print(dataframe)