hive数据清洗,导入mysql

发布时间 2023-10-12 21:41:50作者: 傲世小苦瓜

-- 用于清洗的表
create table data1
(
`ip` string comment '城市',
`date1` string comment '日期',
`day` string comment '天数',
`traffic` double comment '流量',
`type` string comment '类型:视频video或文章article',
`id` string comment '视频或者文章的id'
)
row format delimited
fields terminated by ','
lines terminated by '\n';


load data local inpath '/home/hadoop/result.txt' into table data1;


select * from data1 limit 1000;


insert overwrite table data1
select ip,
date_format(from_unixtime(unix_timestamp(date1, 'dd/MMM/yyyy:HH:mm:ss Z'), 'yyyy-MM-dd HH:mm:ss'),
'yyyy-MM-dd HH:mm:ss') as date1,
day,
traffic,
type,
id
from data1;


select * from data1 limit 1000;


CREATE TABLE top_visits AS
SELECT type, id, COUNT(*) AS visit_count
FROM data1
GROUP BY type, id
ORDER BY visit_count DESC
LIMIT 10;


select * from top_visits;


CREATE TABLE top_courses_by_city AS
SELECT ip , type, id, COUNT(*) AS visit_count
FROM data1
GROUP BY ip, type, id
ORDER BY visit_count DESC
LIMIT 10;

 

select * from top_courses_by_city;

 

 

CREATE TABLE top_courses_by_traffic AS
SELECT type, id, SUM(traffic) AS total_traffic
FROM data1
GROUP BY type, id
ORDER BY total_traffic DESC
LIMIT 10;


select * from top_courses_by_traffic;

 

bin/sqoop export \
--connect "jdbc:mysql://node1:3306/test?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password 123456 \
--table top_visits \
--export-dir /user/hive/warehouse/top_visits \
--fields-terminated-by "\001" \
--input-null-non-string '\\N'

 


bin/sqoop export \
--connect "jdbc:mysql://node1:3306/test?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password 123456 \
--table top_courses_by_city \
--export-dir /user/hive/warehouse/top_courses_by_city \
--fields-terminated-by "\001" \
--input-null-non-string '\\N'

 


bin/sqoop export \
--connect "jdbc:mysql://node1:3306/test?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password 123456 \
--table top_courses_by_traffic \
--export-dir /user/hive/warehouse/top_courses_by_traffic \
--fields-terminated-by "\001" \
--input-null-non-string '\\N'