datax采集txt文件数据到hive

发布时间 2023-12-05 17:18:57作者: 所向披靡zz

1、提前创建hive表结构

DROP TABLE IF EXISTS ods.ods_log_1diu;
CREATE TABLE IF NOT EXISTS ods.ods_log_1diu
(
    SI_NO      STRING, --varchar(10) not null,主键
    SEND_TABLE STRING, --varchar(30) not null,主键
    SEQ        STRING, --varchar(11) not null,主键
    SEND_DATE  STRING, --datetime,
    SEND_TIME  STRING  --varchar(6),
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '^'
    STORED AS ORC
    TBLPROPERTIES ('orc.compress' = 'SNAPPY');

2、datax采集json

{
    "job": {
        "content": [
            {
                "reader": {
                    "parameter": {
                        "path": [
                            "${inputFilePath}/LOG_${BeforeDay}*.txt"
                        ],
                        "column": [
                            {
                                "index": 0,
                                "type": "string"
                            },
                            {
                                "index": 1,
                                "type": "string"
                            },
                            {
                                "index": 2,
                                "type": "string"
                            },
                            {
                                "index": 3,
                                "type": "string"
                            },
                            {
                                "index": 4,
                                "type": "string"
                            }
                        ],
                        "skipHeader": "true",
                        "encoding": "UTF-8",
                        "fieldDelimiter": "\t"
                    },
                    "name": "txtfilereader"
                },
                "writer": {
                    "parameter": {
                        "path": "/user/hive/warehouse/ods.db/ods_log_1diu",
                        "fileName": "ods_log_1diu",
                        "compress": "SNAPPY",
                        "column": [
                            {
                                "name": "SI_NO",
                                "type": "STRING"
                            },
                            {
                                "name": "SEND_TABLE",
                                "type": "STRING"
                            },
                            {
                                "name": "SEQ",
                                "type": "STRING"
                            },
                            {
                                "name": "SEND_DATE",
                                "type": "STRING"
                            },
                            {
                                "name": "SEND_TIME",
                                "type": "STRING"
                            }
                        ],
                        "defaultFS": "hdfs://master-:9000",
                        "writeMode": "truncate",
                        "fieldDelimiter": "^",
                        "fileType": "orc"
                    },
                    "name": "hdfswriter"
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": 3
            },
            "errorLimit": {
                "record": 0,
                "percentage": 0.02
            }
        }
    }
}