大数据分析案例--日志分析

发布时间 2023-10-14 01:08:00作者: 王庆园

具体操作和上次课堂测试类似,可视化代码更是比上次简单

这里只展示datagrip里的sql语句

这里不包括日期的清洗,日期的清洗有些困难,请教同学后知道怎样清洗,具体写在下一节

create table `past`(
`ip` string,
`Date` string,
`day` string,
`traffic` string,
`type` string,
`id` string
)
row format delimited
fields terminated by "," ; --字段之间分隔符
select * from past LIMIT 3;--查询前三条数据
--建立一个新的表是清洗过后的数据
create table `now1`(
`ip` string,
`Date` string,
`day` string,
`traffic` string,
`type` string,
`id` string
)row format delimited
fields terminated by "," ;
select * from now1 LIMIT 3;--查询前三条数据
--修改时间信息
--提取文章为11325
create table `article`(
`ip` string,
`Date` string,
`day` string,
`traffic` string,
`type` string,
`id` string
)row format delimited
fields terminated by "," ;
insert overwrite table article select ip as ip ,`Date` as `Date`,day as day ,traffic as traffic,type as type,id as id from now1 where now1.type like 'article%' group by id,ip,`Date`,day ,traffic,type;
select * from article LIMIT 3;--查询前三条数据
--提取文章为video
create table `video`(
`ip` string,
`Date` string,
`day` string,
`traffic` string,
`type` string,
`id` string
)row format delimited
fields terminated by "," ;
select * from video LIMIT 3;--查询前三条数据
insert overwrite table video select ip as ip ,`Date` as `Date`,day as day ,traffic as traffic,type as type,id as id from now1 where now1.type like 'video%' group by id,ip,`Date`,day ,traffic,type;

--清洗时间是2016-11-10 00:01:03
create table `time`(
`ip` string,
`Date` string,
`day` string,
`traffic` string,
`type` string,
`id` string
)row format delimited
fields terminated by "," ;
select * from `time` LIMIT 10;--查询前三条数据
insert overwrite table `time` select ip as ip ,`Date` as `Date`,day as day ,traffic as traffic,type as type,id as id from now1 where now1.`Date` like '2016-11-10 00:01:03%' group by id,ip,`Date`,day ,traffic,type;


--清洗流量--暂时还不知道咋整
create table `liuliang`(
`ip` string,
`Date` string,
`day` string,
`traffic` string,
`type` string,
`id` string
)row format delimited fields terminated by "," ;
insert overwrite table `liuliang` select ip as ip ,`Date` as `Date`,day as day ,sum(traffic) as traffic,type as type,id as id from now1 where now1.`Date` like '2016-11-10 00:01:03%' group by id,ip,`Date`,day ,type;

--统计最受欢迎的视频文章top10访问次数
create table `table1`(
`traffic` string,
`id` string,

`type` string,
`times` string
)row format delimited fields terminated by "," ;
insert overwrite table table1 select sum( traffic ) AS traffic,id as id ,count(id) as times,type as type from now1 group by  id ,type order by traffic desc ;
select * from `table1` LIMIT 20;--查询前三条数据



--按照流量统计最受欢迎的课程10
create table `table2`(
`ip` string,
`id` string,
`type` string,
`traffic` string
)row format delimited fields terminated by "," ;
select * from `table2` LIMIT 100;--查询前三条数据
insert overwrite table table2 select ip as ip ,id as id, type as type ,sum(traffic) as traffic  from now1 group by  ip,id ,type order by sum(traffic),id desc ;

--按照地市统计最受欢迎的课程10
create table `table3`(
`ip` string,
`id` string,
`type` string,
`ips` string
)row format delimited fields terminated by "," ;
select * from `table3` LIMIT 50;--查询前三条数据
insert overwrite table table3 select ip as ip ,id as id, type as type ,count(ip) as ips  from now1 group by  ip,id ,type order by count(ip) desc ;