0001.MySQL案例练习

发布时间 2023-04-12 16:55:16作者: 興華

案例数据分析链接:https://pan.baidu.com/s/1NLclAEo8ZoV8FGesyNkVfw?pwd=rgg3
提取码:rgg3

1. 案例中首先要厘清表间关系及其表关键字段等信息

 

 

 2. 厘清第1项内容后,进入MySQL软件进行对应表数据建立

①首先建立数据库并应用

create database Kdd99;
use Kdd99;

②创建各表数据并将对应数据进行上传

创建表

# drop table sale;  # 删除表
create table sale( 
year integer,
market varchar(6),
sale long,
profit long
);
select * from sale;    # 单表数据查询

上传数据

show variables like "%secure%";   # 查询MySQl数据需要存放的路径

load data infile'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\sale.csv'     # 即需要将待分析数据放入MySQL规定的路径下进行上传
    into table sale
    character set gb2312
    fields terminated by ','
    optionally enclosed by '"' escaped by '"'
    lines terminated by '\r\n'
    ignore 1 lines;
    
select * from sale;

其他表建立以及数据上传如下:

# drop table kdd99_trans;
create table kdd99_trans(
trans_id integer,
account_id integer,
date date,
type varchar(2),
operation varchar(20),
amount DECIMAL,
balance DECIMAL,
k_symbol varchar(20),
bank varchar(4),
account long
);

select * from kdd99_trans;

load data infile'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_trans.csv'
    into table kdd99_trans
    character set gb2312
    fields terminated by ','
    optionally enclosed by '"' escaped by '"'
    lines terminated by '\r\n'
    ignore 1 lines;

select * from kdd99_trans;

# drop table kdd99_accounts;
create table kdd99_accounts ( 
account_id integer, 
district_id integer,
frequency varchar(20),
date DATE
);

select * from kdd99_accounts;

load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\kdd99_accounts.csv'
into table kdd99_accounts 
character set gb2312
fields terminated by ','
optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;
  
  select * from kdd99_accounts;

# drop table kdd99_card;
create table Kdd99_card( 
card_id integer, 
disp_id integer,
issued  DATE,
type varchar(10)
);

  select * from kdd99_card;
  
load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_card.csv'
    into table Kdd99_card
    CHARACTER SET gb2312
    fields terminated by ','
    optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;

 select card_id from kdd99_card;

# drop table kdd99_clients;
create table Kdd99_clients( 
client_id integer, 
sex varchar(2),
birth_date DATE,
district_id integer
);

 select * from kdd99_clients;
 
load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_clients.csv'
into table Kdd99_clients
CHARACTER SET gb2312
fields terminated by ','
optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;

select * from kdd99_clients;

# drop table kdd99_disp;
create table Kdd99_disp( 
disp_id integer, 
client_id integer,
account_id integer,
type varchar(6)
);

load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_disp.csv'
into table Kdd99_disp
CHARACTER SET gb2312
fields terminated by ','
optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;

select * from kdd99_disp;

# drop table kdd99_district;
create table Kdd99_district( 
A1 integer,
GDP long,
A4 DOUBLE,
A10 DOUBLE,
A11 DOUBLE,
A12 DOUBLE,
A13 DOUBLE,
A14 DOUBLE,
A15 DOUBLE,
A16 DOUBLE
); 

load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_district.csv'
into table Kdd99_district
CHARACTER SET gb2312
fields terminated by ','
optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;

select * from kdd99_district;

# drop table kdd99_loans;
create table Kdd99_loans( 
loan_id integer,
account_id integer,
date date,
amount DECIMAL,
duration integer,
payments DECIMAL,
status varchar(2)
); 

load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_loans.csv'
into table Kdd99_loans
CHARACTER SET gb2312
fields terminated by ','
optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;

select * from kdd99_loans;

# drop table kdd99_order;
create table Kdd99_order( 
order_id integer,
account_id integer,
bank_to varchar(2),
account_to integer,
amount DECIMAL,
k_symbol varchar(20)
); 

load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_order.csv'
into table Kdd99_order
CHARACTER SET gb2312
fields terminated by ','
optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;

select * from kdd99_order;

# drop table One;
create table One ( 
X integer, 
A VARCHAR(4)
);

load data infile "C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\One.csv"
into table One
CHARACTER SET gb2312
fields terminated by ','
optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;

select * from One;

# drop table Two;
create table Two ( 
X integer, 
A VARCHAR(4)
);

load data infile "C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Two.csv"
into table Two
CHARACTER SET gb2312
fields terminated by ','
optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;

select * from two;

# drop table table1;
create table Table1 ( 
id integer, 
A VARCHAR(4)
);

load data infile "C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Table1.csv"
into table Table1
CHARACTER SET gb2312
fields terminated by ','
optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;

select * from table1;

# drop table table2;
create table Table2 ( 
id integer, 
B VARCHAR(4)
);
load data infile "C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Table2.csv"
into table Table2
CHARACTER SET gb2312
fields terminated by ','
optionally enclosed by '"' escaped by '"' 
lines terminated by '\r\n'
ignore 1 lines;

select * from table2 limit 5;

show tables;

select year as '年份',market,sale,profit as money from kdd99.sale;

select * from sale;

3. 根据案例需求进行对应数据分析

数据的基本查询方式
### 单表查询
#语法:select * from 表名
select * from kdd99.sale;

#### 1)查询指定列
#语法:select 字段1[,字段2,…] from 表名;
select year,market,sale,profit
from kdd99.sale;

#### 2)指定列命名 as 
#语法:select 字段名 [as] 列别名 from 原表名 [[as] 表别名];
select year as `年份`,market,sale,profit
from kdd99.sale;

select year as `年份`,market,sale,profit as money
from kdd99.sale;

#### 3)表命名 
#语法:select 字段名 [as] 列别名 from 原表名 [[as] 表别名];
SELECT t.year,t.market,t.sale,t.profit
from kdd99.sale t;

#### 4)删除重复 DISTINCT
#语法:select distinct 字段名[,字段名2,…] from 表名;
select DISTINCT year
from kdd99.sale;

#### 5)排序 order by
#语法:select 字段1[,字段2,…] from 表名 order by 字段1[ 排序方向,字段2 排序方向,…];
select year,market,sale,profit
from kdd99.sale
order by year asc,sale;

#### 6)限制结果 limit 
#语法:select 字段1[,字段2,…] from 表名 limit [偏移量,] 行数;
select * from kdd99.sale
limit 10;

#### 7)条件过滤 where
# 算数运算 
select * FROM KDD99.KDD99_TRANS
where amount * 12 < 8000;

select * FROM KDD99.KDD99_TRANS
where amount / 12 * 1.10 >= 8500;

select * FROM KDD99.KDD99_TRANS
where (amount / 12 ) * 1.10 >= 8500;

select * FROM KDD99.KDD99_TRANS
where amount + 1000 <= 10000;

#比较运算
select * FROM KDD99.KDD99_TRANS
Where k_symbol = '房屋贷款';

select * FROM KDD99.KDD99_TRANS
where date >= '1998-12-01';

#逻辑运算 
select * FROM KDD99.KDD99_TRANS
where date <= 1998-12-31 AND k_symbol = '房屋贷款';

#### 8)空值查询 null
#语法:select 字段1[,字段2,…] from 表名 where 空值字段 is [not] null;

#### 9)模糊查询 like
#语法:select 字段1[,字段2,…] from 表名 where 字段 [not] like 通配符;
select * FROM KDD99.KDD99_TRANS
where date <= 1998-12-31 AND k_symbol like '%贷款';

select * FROM KDD99.KDD99_TRANS
where date <= 1998-12-31 AND k_symbol = '房屋%';

select * FROM KDD99.KDD99_TRANS
where date <= 1998-12-31 AND k_symbol like '%贷%';

#### 10)创建新列
select a.*,profit/sale as rate
from kdd99.sale a;

select a.*,profit/sale as rate 
from kdd99.sale
where profit/sale <0.07;
案例情景分析
## 商业情景应用-1
# Bank公司的领导希望得到1998年12月发生的“房屋贷款”交易,
# 包括帐户号、交易日期、对方银行和金额,
# 按照交易日期和交易序号进行升序排序。
SELECT t1.account_id,t1.date,t1.bank,t1.amount
FROM KDD99.KDD99_TRANS t1
WHERE t1.date BETWEEN '1998-12-01' AND '1998-12-31' 
AND t1.k_symbol = '房屋贷款'
ORDER BY t1.date,t1.account_id;

#### 11)聚合 group by
#语法:select 字段1[,字段2,…] from 表名[ where 查询条件] group by 分组字段1[,分组字段2,…];
select count(*)  from  KDD99.kdd99_tRANS;
select date,count(*)  from  KDD99.kdd99_tRANS
group by date;

## 商业情景应用-2
# 汇总每个交易日的交易次数、日均交易额、日总交易额。
SELECT t1.date, 
(COUNT(t1.account_id)) AS COUNT_of_account_id, 
(AVG(t1.amount)) AS avg_of_amount, 
(SUM(t1.amount)) AS SUM_of_amount
FROM KDD99.KDD99_TRANS t1
GROUP BY t1.date;

#### 12) 分组筛选 having
#语法:select 字段1[,字段2,…] from 表名[ where 筛选条件] group by 分组字段1[,分组字段2,…] having 筛选条件;

## 商业情景应用-3
#在刚才的需求基础上,筛选出交易频次大于300的交易日记录。
SELECT t1.date, 
COUNT(t1.account_id) AS COUNT_of_account_id, 
AVG(t1.amount) AS avg_of_amount, 
SUM(t1.amount) AS SUM_of_amount
FROM KDD99.KDD99_TRANS t1
GROUP BY t1.date
HAVING COUNT_of_account_id>300;

#### 13)日期时间函数
select now();
select CURRENT_DATE();
select CURRENT_TIME();
select YEAR(CURRENT_DATE());
select WEEKDAY(CURRENT_DATE());

## 商业情景应用-4
#在商业情景应用-1的基础之上,需要新生成一个代表交易日星期的变量。
SELECT t1.account_id,t1.date,t1.bank,t1.amount,weekday(t1.date) as weekday
FROM KDD99.KDD99_TRANS t1
WHERE t1.date BETWEEN '1998-12-01' AND '1998-12-31' 
AND t1.k_symbol = '房屋贷款'
ORDER BY t1.date,t1.account_id;

#### 14)条件判断 case when then/if
#语法:IF(condition, value_if_true, value_if_false)
#语法:CASE expressing WHEN condition THEN return

## 商业情景应用-5
#根据交易日星期的信息生成是否为工作日的新变量。周一至周五为工作日,周六、周日为非工作日
# if
SELECT t1.account_id,t1.date,t1.bank,t1.amount,weekday(t1.date) as weekday,
IF(weekday(t1.date)>=5,'周末','工作日') AS Weekday_2
FROM KDD99.KDD99_TRANS t1
WHERE t1.date BETWEEN '1998-12-01' AND '1998-12-31'
AND t1.k_symbol = '房屋贷款'
ORDER BY t1.date,t1.account_id;

#case when 
SELECT t1.account_id,t1.date,t1.bank,t1.amount,weekday(t1.date) as weekday,
(CASE WHEN weekday(t1.date)>=5 THEN '周末'
WHEN weekday(t1.date) <5 THEN '工作日' END) AS Weekday_2
FROM KDD99.KDD99_TRANS t1
WHERE t1.date BETWEEN '1998-12-01' AND '1998-12-31'
AND t1.k_symbol = '房屋贷款'
ORDER BY t1.date,t1.account_id;

#### 15)条件求和/条件计数 sumif /countif
#语法:sumif(range,criteria,sum_range) Sumif(条件区域,求和条件,实际求和区域)
#语法:countif(range,criteria) COUNTIF(订数区域,计数条件)
SELECT t1.date,
SUM(t1.amount) AS SUM_of_amount
FROM KDD99.KDD99_TRANS t1
WHERE t1.date BETWEEN '1998-12-01' AND '1998-12-31'
group by t1.date;

SELECT t1.date,
SUM(if(t1.date BETWEEN '1998-12-01' AND '1998-12-31',t1.amount,0)) AS SUM_of_amount
FROM KDD99.KDD99_TRANS t1
group by t1.date;

#### 16)时间间隔
select CURRENT_DATE()-DATE('2022-04-01');
select datediff(CURRENT_DATE(),'2022-04-01');
select DATE_ADD(CURRENT_DATE(),INTERVAL +1 day);

## 商业情景应用-6
# 假如现在是1997年5月16日,交易数据截止1997年5月15日(T-1),
# 汇总每个账户号借贷类型为“贷”的贷款金额的年度同比,季度同/环比情况。
SET @昨日 = date ('1997-05-15');
SET @本季初 = date ('1997-04-01');
SET @环季初 = date ('1997-01-01');
SET @同季初 = date ('1996-04-01');
SET @本年初 = date ('1997-01-01');
SET @同年初 = date ('1996-01-01');
SET @季度天数 = DATEDIFF(@昨日,@本季初)+1;
SET @年度天数 = DATEDIFF(@昨日,@本年初)+1;
SET @环季末 = DATE_ADD(@环季初,INTERVAL @季度天数-1 DAY);
SET @同季末 = DATE_ADD(@同季初,INTERVAL @季度天数-1 DAY);
SET @同年末 = DATE_ADD(@同年初,INTERVAL @年度天数-1 DAY);
    
select @昨日,@本季初,@环季初,@同季初,@本年初,@同年初,@季度天数,@环季末,@同季末,@同年末;

select t1.account_id,
sum(if(t1.date between @本年初 and @昨日,t1.amount,0)) as `本年`,
sum(if(t1.date between @同年初  and @同年末,t1.amount,0)) as `同年`,
sum(if(t1.date between @本年初 and @昨日,t1.amount,0)) /sum(if(t1.date between @同年初  and @同年末,t1.amount,0)) -1 as `年同比`,
sum(if(t1.date between @本季初 and @昨日,t1.amount,0)) as `本季`,
sum(if(t1.date between @同季初 and @同季末,t1.amount,0)) as `同季`,
sum(if(t1.date between @环季初 and @环季末,t1.amount,0)) as `环季`,
sum(if(t1.date between @本季初 and @昨日,t1.amount,0)) /sum(if(t1.date between @同季初 and @同季末,t1.amount,0))-1 as `季同比`,
sum(if(t1.date between @本季初 and @昨日,t1.amount,0)) /sum(if(t1.date between @环季初 and @环季末,t1.amount,0))-1 as `季环比`
FROM KDD99.KDD99_TRANS t1
where t1.type='' 
GROUP BY t1.account_id;

### 多表查询
#### 1)union 去重
#语法:select 字段1[,字段2,…] from 表名 union select 字段1[,字段2,…] from 表名;
select * from one 
union 
select * from two;

#### 2)union all 不去重
#语法:select 字段1[,字段2,…] from 表名 union all select 字段1[,字段2,…] from 表名;
select * from one 
union all
select * from two;

#### 3)笛卡尔积 cross join
#语法:select 字段1[,…] from 表1, 表2 ;
select * from table1, table2;

#### 4)内连接 inner join
#语法:select 字段1[,…] from 表1[ inner] join 表2 on 连接条件;
select * from table1, table2
Where table1.id= table2.id;

select * from table1 
inner join table2 on table1.id= table2.id;

#### 5)左连接 left join
#语法:select 字段1[,…] from 表1 left join 表2 on 连接条件;
select * from Table1 
left join Table2 on Table1.id = Table2.id;

#### 6)右链接 right join
#语法:select 字段1[,…] from 表1 right join 表2 on 连接条件;
select * from Table1 
right join Table2 on Table1.id = Table2.id;

## 商业情景应用-7
#希望得到信用卡持卡人的客户信息
SELECT t1.card_id,t1.disp_id,t1.issued,t1.type,t3.sex,t3.birth_date,t3.district_id
from kdd99.kdd99_card t1 
left join kdd99.kdd99_disp t2 on t1.disp_id=t2.disp_id
left join kdd99.kdd99_clients t3 on t2.client_id =t3.client_id;

## 商业情景应用-8
# Bank公司的贷款风险分析人员希望得到贷款客户在发放贷款前一年的交易数据。
SELECT t1.loan_id,t1.account_id,t1.date,t1.amount,t1.duration, 
t1.payments, t1.status,t2.trans_id, t2.date AS date1, 
t2.type, t2.operation,t2.amount AS amount1,t2.balance, 
t2.k_symbol, t2.bank, t2.account
FROM KDD99.KDD99_LOANS t1
LEFT JOIN KDD99.KDD99_TRANS t2 ON t1.account_id = t2.account_id 
AND t1.date>t2.date AND t1.date <= t2.date+365
ORDER BY t1.loan_id,t1.account_id,t1.date,t2.date;

#### 7)子查询

## 商业情景应用-9
# 获取合同终止,但贷款没有支付的订单明细
select * from kdd99.kdd99_order 
where account_id in
(select account_id from kdd99.kdd99_loans where status="B")
order by account_id;

select a.* from kdd99.kdd99_order a 
left join kdd99.kdd99_loans b on a.account_id=b.account_id
where b.status="B"
ORDER BY account_id;

### 查询创建表/视图
# 创建表
#语法:CREATE TABLE <table-name> AS (select...)
# 创建视图
#语法:CREATE VIEW <view-name> AS (select...)

## 商业情景应用-10
# 将上述应用9中的数据存入表名为“loan_tran”中。
CREATE TABLE loan_tran AS 
SELECT t1.loan_id, t1.account_id, t1.date,t1.amount, t1.duration, 
t1.payments, t1.status, t2.trans_id, t2.date AS date1, 
t2.type, t2.operation,t2.amount AS amount1,t2.balance, 
t2.k_symbol, t2.bank,t2.account
FROM KDD99.KDD99_LOANS t1
LEFT JOIN KDD99.KDD99_TRANS t2 ON t1.account_id = t2.account_id 
AND t1.date>t2.date AND t1.date <= t2.date+365
ORDER BY t1.loan_id,t1.account_id,t1.date,t2.date;

#将上述应用9中的查询创建名为“loan_tran_view”的视图。
CREATE view loan_tran_view AS 
SELECT t1.loan_id, t1.account_id, t1.date,t1.amount, t1.duration, 
t1.payments, t1.status, t2.trans_id, t2.date AS date1, 
t2.type, t2.operation,t2.amount AS amount1,t2.balance, 
t2.k_symbol, t2.bank,t2.account
FROM KDD99.KDD99_LOANS t1
LEFT JOIN KDD99.KDD99_TRANS t2 ON t1.account_id = t2.account_id 
AND t1.date>t2.date AND t1.date <= t2.date+365
ORDER BY t1.loan_id,t1.account_id,t1.date,t2.date;




### 窗口函数  top N 问题
#rank(),dense_rank() 和 row_number()
#语法:<窗口函数> over (partition by <用于分组的列名> order by <用于排序的列名>)

## 商业情景应用
# 获取信贷类型为"贷"且交易类型为"信贷资金"的每一天交易金额最高的一笔交易记录
SELECT * FROM 
(select t1.trans_id,t1.account_id,T1.date,T1.amount,
rank() over (PARTITION by date ORDER BY amount desc) as rank1,
dense_rank() over (PARTITION by date ORDER BY amount desc) as rank2,
row_number() over (PARTITION by date ORDER BY amount desc) as rank3
from KDD99.kdd99_tRANS t1
WHERE t1.type='' and t1.operation='信贷资金' 
#and t1.date='1993-02-08'
) t 
WHERE rank3=1;