大表 left join 小表时产生数据倾斜怎么处理?

drop table if exists tableS;

create table tableS as rcfile as
select t.order_app_tp_cd as site_id,
count(distinct t.order_id) cnt_order_all,

count(distinct case when t.order_date = '${hivevar:statis_date}' then t.order_id else null end) as cnt_order,
sum(case when t.order_date = '${hivevar:statis_date}' then t.pay_amt else 0.0 end) as amt_order,

count(distinct case when t.pay_date = '${hivevar:statis_date}'
and t.status = '30'
then t.order_id else null end) as cnt_eff,

sum(case when t.pay_date = '${hivevar:statis_date}'
and t.status in ('30','50','60','70')
then t.pay_amt else 0.0 end) as amt_eff,

sum(case when t.pay_date = '${hivevar:statis_date}'
and t.status in ('50','60','70')
then t.pay_amt else 0.0 end) as amt_back,

count(distinct case when t.pay_date = '${hivevar:statis_date}'
and t.status = '30' then t.member_id
else null end) as cnt_buyer,

count(distinct case when c.member_id is not null
and t.pay_date = '${hivevar:statis_date}'
and t.status = '30' then t.member_id
else null end) as cnt_buyer_new,

count(distinct case when t.pay_date between date_sub(from_unixtime(to_unix_timestamp('${hivevar:statis_date}', 'yyyyMMdd'), 'yyyy-MM-dd'),14) and '${hivevar:statis_date}'
and t.status='30'
then t.order_id else null end) as cnt_eff_15,
sum(case when t.pay_date between date_sub(from_unixtime(to_unix_timestamp('${hivevar:statis_date}', 'yyyyMMdd'), 'yyyy-MM-dd'),14) and '${hivevar:statis_date}'
and t.status in('30','50','70')
then t.pay_amt else 0.0 end) as amt_eff_15,
count(distinct case when t.pay_date between date_sub(from_unixtime(to_unix_timestamp('${hivevar:statis_date}', 'yyyyMMdd'), 'yyyy-MM-dd'),14) and '${hivevar:statis_date}'
and t.status='30'
then t.member_id else null end) as cnt_buyer_15,
count(distinct case when t.pay_date between date_sub(from_unixtime(to_unix_timestamp('${hivevar:statis_date}', 'yyyyMMdd'), 'yyyy-MM-dd'),29) and '${hivevar:statis_date}'
and t.status='30'
then t.order_id else null end) as cnt_eff_30,
sum(case when t.pay_date between date_sub(from_unixtime(to_unix_timestamp('${hivevar:statis_date}', 'yyyyMMdd'), 'yyyy-MM-dd'),29) and '${hivevar:statis_date}'
and t.status in('30','50','70')
then t.pay_amt else 0.0 end) as amt_eff_30,
count(distinct case when t.pay_date between date_sub(from_unixtime(to_unix_timestamp('${hivevar:statis_date}', 'yyyyMMdd'), 'yyyy-MM-dd'),29) and '${hivevar:statis_date}'
and t.status='30'
then t.member_id else null end) as cnt_buyer_30
from (select *
from tableA t
where statis_date between date_sub(from_unixtime(to_unix_timestamp('${hivevar:statis_date}', 'yyyyMMdd'), 'yyyy-MM-dd'),29) and '${hivevar:statis_date}'
and order_apply_tp = 4
and order_dtl_apply_cd = '-1'
and order_source = 'B2C'
and gmv_Flg = 1
and chnl_cd = '50') t
left join (select c1.member_id
from tableB c1
where site_id not in ('PC','WAP','APP')
and first_pay_time = '${hivevar:statis_date}'
group by c1.member_id) c
on t.member_id = c.member_id
join (select name,code
from tableC
where statis_date between date_sub(from_unixtime(to_unix_timestamp('${hivevar:statis_date}', 'yyyyMMdd'), 'yyyy-MM-dd'),29) and '${hivevar:statis_date}'
and status = 0) td
on td.code = t.order_app_tp_cd
group by t.order_app_tp_cd;
其中tableA是大表,tableB是小表,tableC是小表
运行后发现GC运行很频繁,应该是产生了数据倾斜?请问各位大佬怎么解决?

sql
Csdn user default icon
上传中...
上传图片
插入图片
抄袭、复制答案,以达到刷声望分或其他目的的行为,在CSDN问答是严格禁止的,一经发现立刻封号。是时候展现真正的技术了!
立即提问
相关内容推荐