使用presto 进行跨库数据对比

发布时间 2023-06-21 15:21:37作者: linbo.yang

 

明细+汇总

 

 1 SELECT 
 2 concat(
 3 'select a.* from ('
 4 ,concat(' ' ,array_join (TRANSFORM(table_catalog_schema ,_->concat('select id, map(array[' ,column_name_str ,'],split(' ,array_join(TRANSFORM(column_name_list ,_->concat('coalesce(cast('    ,CASE WHEN element_at(column_type_list,array_position(column_name_list,_)) LIKE 'decimal%' OR element_at(column_type_list,array_position(column_name_list,_)) ='double' THEN  concat('REGEXP_REPLACE(cast(',_,' as varchar),','''([0]+$)''',',','''''',')') WHEN element_at(column_type_list,array_position(column_name_list,_)) LIKE 'timestam%' OR element_at(column_type_list,array_position(column_name_list,_)) LIKE 'datetime' THEN  concat('substring( cast(',_,' as varchar) ,1,19)') WHEN element_at(column_type_list,array_position(column_name_list,_)) = 'json'  THEN  concat('json_format(json ','''{"":""}''',')') ELSE _  END    ,' as varchar)',',',chr(39),'NULL',chr(39),')')),concat('||','''^''','||'))  ,' ,''^''))' ,' from ',_,'.',table_name  ,' where '  , CASE WHEN contains(column_name_list,'create_time')  THEN concat( 'create_time',' between ' ,'date_trunc(''day'',current_timestamp -INTERVAL ''1'' DAY  )' ,' and ' ,'date_add(''HOUR'',-1,cast(date_format(current_timestamp,''%Y-%m-%d %H:00:00'') AS timestamp))') ELSE '1=1' END  ,CASE WHEN array_position(table_catalog_schema,_)<> dcnt THEN ' union all ' ELSE ' ' END  )),' '))                                            
 5 ,') a left join  ('
 6 ,concat(' ',concat('select id, map(array[',column_name_str ,'],split(' ,array_join(TRANSFORM(column_name_list ,_->concat('coalesce(cast('    ,CASE WHEN element_at(column_type_list,array_position(column_name_list,_)) LIKE 'decimal%' OR element_at(column_type_list,array_position(column_name_list,_)) ='double' THEN  concat('REGEXP_REPLACE(cast(',_,' as varchar),','''([0]+$)''',',','''''',')') WHEN element_at(column_type_list,array_position(column_name_list,_)) LIKE 'timestam%' OR element_at(column_type_list,array_position(column_name_list,_)) LIKE 'datetime' THEN  concat('substring( cast(',_,' as varchar) ,1,19)') WHEN element_at(column_type_list,array_position(column_name_list,_)) = 'json'  THEN  concat('json_format(json ','''{"":""}''',')') ELSE _  END    ,' as varchar)',',',chr(39),'NULL',chr(39),')')),concat('||','''^''','||')) ,' ,''^''))' ,' from doris.',doris_tabName  ,' where ' , CASE WHEN contains(column_name_list,'create_time') THEN concat( 'create_time',' between ','date_trunc(''day'',current_timestamp -INTERVAL ''1'' DAY  )',' and ','date_add(''HOUR'',-1,cast(date_format(current_timestamp,''%Y-%m-%d %H:00:00'') AS timestamp))') ELSE '1=1' END ,' and msg_is_valid =1 ' ))
 7 ,') b on a.id=b.id WHERE b.id IS NULL '  
 8 ) detal_SQL  -- 根据明细对比的SQL
 9 ,concat(
10     'select a.*,b.*,a.cnt-b.cnt from (' 
11     ,array_join (TRANSFORM(table_catalog_schema,_->concat('select  ''',substr(_,strpos(_,'.')+1)||'.'||table_name,''' cate_log_schema, count(1) cnt  from ',_,'.',table_name ,' where ' , CASE WHEN contains(column_name_list,'create_time') THEN concat( 'create_time',' between ','date_trunc(''day'',current_timestamp -INTERVAL ''1'' DAY  )',' and ','date_add(''HOUR'',-1,cast(date_format(current_timestamp,''%Y-%m-%d %H:00:00'') AS timestamp))') ELSE '1=1' END ,CASE WHEN array_position(table_catalog_schema,_)<> dcnt THEN ' group by 1 union all ' ELSE ' group by 1' END )),' ')
12     ,') a left join ( '    
13     ,concat('select  concat(msg_source_db,''.'',msg_source_table) cate_log_schema',', count(1) cnt  from doris.',doris_tabName ,' where ' , CASE WHEN contains(column_name_list,'create_time') THEN concat( 'create_time',' between ','date_trunc(''day'',current_timestamp -INTERVAL ''1'' DAY  )',' and  ','date_add(''HOUR'',-1,cast(date_format(current_timestamp,''%Y-%m-%d %H:00:00'') AS timestamp))') ELSE '1=1' END ,' and msg_is_valid =1  group by 1' )
14     ,') b on a.cate_log_schema=b.cate_log_schema '
15 ) agg_Sql_Str  -- 根据汇总数据对比的Sql
16 FROM (
17 SELECT table_name
18 ,max(doris_tabName) doris_tabName
19 --,array_agg(DISTINCT doris_tabName) doris_tabName
20 ,array_agg(DISTINCT  concat(table_catalog,'.',table_schema)) table_catalog_schema
21 ,count(DISTINCT  concat(table_catalog,'.',table_schema)) dcnt
22 ,max(column_name_list) column_name_list
23 ,max(column_name_str) column_name_str
24 ,max(columnCnt) columnCnt
25 ,max(column_type_list) column_type_list
26 FROM (
27 SELECT table_catalog,table_schema,table_name, array_join(array_agg(concat('''',column_name,'''')),',') column_name_str,array_agg(column_name ORDER BY ordinal_position) column_name_list ,count(column_name) columnCnt ,array_agg(data_type ORDER BY ordinal_position ) column_type_list FROM oms4_som01.information_schema.columns WHERE table_schema NOT IN ('information_schema','sys','pt','maxwell','db_oms4_som998','db_ecs_oms4_som_nike_prod') GROUP BY 1,2,3
28 UNION ALL  SELECT table_catalog,table_schema,table_name, array_join(array_agg(concat('''',column_name,'''')),',') column_name_str,array_agg(column_name ORDER BY ordinal_position) column_name_list ,count(column_name) columnCnt ,array_agg(data_type ORDER BY ordinal_position ) column_type_list  FROM oms4_som02.information_schema.columns  WHERE table_schema NOT IN ('information_schema','sys','pt','maxwell','db_oms4_som999') GROUP BY 1,2,3
29 UNION ALL  SELECT table_catalog,table_schema,table_name, array_join(array_agg(concat('''',column_name,'''')),',') column_name_str,array_agg(column_name ORDER BY ordinal_position) column_name_list ,count(column_name) columnCnt ,array_agg(data_type ORDER BY ordinal_position ) column_type_list  FROM oms4_baseinfo.information_schema.columns  WHERE table_schema NOT IN ('db_sec_compare_prod') GROUP BY 1,2,3
30 UNION ALL  SELECT table_catalog,table_schema,table_name, array_join(array_agg(concat('''',column_name,'''')),',') column_name_str,array_agg(column_name ORDER BY ordinal_position) column_name_list ,count(column_name) columnCnt ,array_agg(data_type ORDER BY ordinal_position ) column_type_list FROM oms4_finance.information_schema.columns   WHERE table_schema NOT IN ('db_sec_compare_prod') GROUP BY 1,2,3
31 )t INNER JOIN (
32 SELECT *
33 FROM (VALUES -- (doris表名, 源库表明)
34 ('db_ods.oms4_db_base_info_bi_brand','bi_brand'),
35 ('db_ods.oms4_db_base_info_bi_sales_entity_brand_ref','bi_sales_entity_brand_ref'),
36 ('db_ods.oms4_db_base_info_bi_sys_goods_tax_rate','bi_sys_goods_tax_rate'),
37 ('db_ods.oms4_rf_refund','rf_refund'),
38 ('db_ods.oms4_db_oms4_som_trade_order_guide_info','trade_order_guide_info'),
39 ('db_ods.oms4_db_oms4_som_trade_order_item','trade_order_item'),
40 ('db_ods.oms4_db_oms4_som_trade_order_member_info','trade_order_member_info'),
41 ('db_ods.oms4_db_oms4_som_trade_order_payment_info','trade_order_payment_info'),
42 ('db_ods.oms4_db_oms4_som_trade_return_order','trade_return_order')
43 --('db_ods.oms4_db_oms4_som_trade_order_guide_info','trade_order_guide_info'),
44 --('db_ods.oms4_db_oms4_som_trade_order_delivery_info','trade_order_delivery_info'),
45 --('db_ods.oms4_db_oms4_som_so_order_timing_promise','so_order_timing_promise')
46 ) AS tab (doris_tabName,src_db_tabName)
47 ) s ON t.table_name=s.src_db_tabName
48 GROUP BY table_name
49 ) tt
View Code