Hive建表语句示例

发布时间 2024-01-09 08:51:31作者: 粒子先生
CREATE EXTERNAL TABLE `ods_baidu_news`(
  `domain` string,
  `sitename` string,
  `sourceurl` string,
  `casedatatype` string,
  `fetchtype` int,
  `casename` string,
  `content` string,
  `gathertime` bigint,
  `author` string,
  `pubdate` bigint,
  `searchword` string,
  `detailpage` string)
COMMENT 'ods'
PARTITIONED BY (
  `pt` string COMMENT 'partition')
ROW FORMAT SERDE
  'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
  'hdfs://hdfs.dcpro.jcinfo.com:9000/dw/ods/ods_baidu_news'
TBLPROPERTIES (
  'bucketing_version'='2',
  'transient_lastDdlTime'='1600672434');

 

CREATE EXTERNAL TABLE `dwd_baidu_news`(
  `uniqid` string,
  `createdate` bigint,
  `domain` string,
  `sitename` string,
  `sourceurl` string,
  `casedatatype` string,
  `fetchtype` int,
  `casename` string,
  `content` string,
  `contentnotag` string,
  `gathertime` bigint,
  `author` string,
  `pubdate` bigint,
  `punishdate` string,
  `punishyear` string,
  `searchword` string,
  `detailpage` string,
  `caseaddrdistrict` int,
  `keywords` array<string>,
  `maintype` array<string>,
  `casetype` array<string>)
COMMENT 'dwdBaiduNews'
PARTITIONED BY (
  `pt` string COMMENT 'partition')
ROW FORMAT SERDE
  'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
  'hdfs://hdfs.dcpro.jcinfo.com:9000/dw/dwd/dwd_baidu_news'
TBLPROPERTIES (
  'bucketing_version'='2',
  'transient_lastDdlTime'='1600679725');

 

CREATE EXTERNAL TABLE IF NOT EXISTS dw.judgmentDocumentods(
odsId string,
caseTitle string,
plaintiff string,
caseTypeShow string,
judgementResult string,
court string,
docContent string,
collegiateBench string,
provinceId string,
ascertain string,
lawyer string,
defendantLawyer string,
litigant string,
caseNo string,
reason string,
judge string,
firstClaim string,
courtHeld string,
procedure string,
defendant string,
instrumentType string,
judgementDate string,
trialProcess string,
courtClerk string,
description string,
plaintiffWords string,
defendantWords string,
fristReply string,
claim string,
secondJudgementResult string,
firstCourtHeld string,
firstDefendantWords string,
appellantWords string,
appelleeWords string,
keywords string,
caseId string,
reasonId string,
releaseDate string,
caseSource string,
firstAscertain string,
lawFirm array<string>,
lawClauseList array<
        struct<
                attribute:string,
                lawName:string,
                Items:array<
                        struct<
                                content:string,
                                name:string
                        >
                >
        >
>)
COMMENT 'cpws-ods' PARTITIONED BY (pt String COMMENT 'partition')
STORED AS parquet
lOCATION '/dw/ods/judgmentDocumentFromFile';