pyspark list[dict]转pyspark df

发布时间 2023-04-25 10:17:47作者: Young_Mo

数据处理把 list嵌套字段转成 pyspark dataframe 

# coding=utf-8
from pyspark.sql import SparkSession
from pyspark.sql.types import *
import pandas as pd
from pyspark.sql import Row


class SparkContext:
    def __init__(self, name="cleaner"):
        self.spark = (
            SparkSession.builder.appName(name)
                .config("hive.exec.dynamic.partition", True)
                .config("hive.exec.dynamic.partition.mode", "nonstrict")
                .enableHiveSupport()
                .getOrCreate()
        )
        self.spark.sparkContext.setLogLevel("ERROR")

    def __enter__(self):
        return self.spark

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.spark.stop()


def main():
    data = [{'ent_name': '百度', 'credit_code': '1234567890'},
            {'ent_name': 'abc', 'credit_code': '121212222'}
            ]
    row_data = [Row(**row) for row in data]
    with SparkContext('test_df') as spark:
        df = spark.createDataFrame(row_data)
        df.show()


if __name__ == '__main__':
    main()