爸爸在哪儿2

爸爸在哪儿2
文章图片
image.png 需求说明 【爸爸在哪儿2】复杂的继承路径,非常类似中国古代的姓氏制度。
有些子孙封到某地后,便以此地为姓,其变为此姓的始祖。
存在2支或多支同姓的,需要合并,以最早的始祖为组。

  1. 路径中,当气泡不为sr或repeat(空白未标的都是repeat)时,以及背景颜色不同时,需切断——新姓的始祖
  2. 同姓合并,图中第2刀处的501应归到第一刀的027一起,以027作为改姓始祖
实现逻辑
  1. 父子关系数据构建(同时关联取potype和tinyclass)
drop table if exists ods.sku_path; create table ods.sku_path as with sku as( select distinct product_code,tiny_class from ods.dim_sku ),road as( select a.style_id,a.po_type,b.productyearseason_id ,b.repeat_style_id ,b.root_style_id,b.style_path -- ,COALESCE (b.style_path,a.style_id )style_path ,row_number() over (partition by b.style_id order by repeat_level_style desc) rn from ods.f_po_head a inner join ods.dim_style_repeat_hierarchy b on a.style_id =b.style_id where b.style_path is not null and b.style_path like '120150515%' )select a.*,b.product_code,b.tiny_class from road a inner join sku b on a.style_id=b.product_code where rn=1 ;

  1. 找新的“宗主”
drop table if exists ods.sku_path_root; create table ods.sku_path_root as select a.style_id ,a.repeat_style_id ,a.root_style_id ,a.style_path ,a.po_type ,a.tiny_class , b.po_type parent_po_type,b.tiny_class parent_tiny_class, case when b.style_id is null --自己是第一个(无爹) or a.po_type not in('Fast-track Repeat','Standard Repeat') --自己非standar和repeat类的也是 or a.tiny_class <>b.tiny_class --和爹小类不一样 then a.style_id end as my_root from ods.sku_path a left join ods.sku_path b on a.repeat_style_id =b.style_id and a.root_style_id =b.root_style_id --上级唯一 ;

  1. 补空白(找同宗)
drop table ods.sku_path_all_root; create table ods.sku_path_all_root as select a.style_id ,a.style_path, a.repeat_style_id ,a.po_type,a.tiny_class , length(a.style_path)-length(replace(a.style_path,'/',''))+1 lvl,max(b.my_root)my_root from ods.sku_path_root a inner join ods.sku_path_root b on instr(a.style_path,b.my_root)>0 and a.root_style_id =b.root_style_id group by a.style_id ,a.style_path ,a.repeat_style_id,a.po_type,a.tiny_class ;

  1. 合并同宗
drop table if exists ods.sku_path_result; create table ods.sku_path_result as select a.style_id ,a.style_path,a.po_type,a.tiny_class, case when a.po_type in('Fast-track Repeat','Standard Repeat') and a.lvl>2 then (min(b.my_root) over (partition by c.my_root)) --when a.po_type='Fast-track Repeat' and a.lvl=2 then a.repeat_style_id else a.my_root end as new_root from ods.sku_path_all_root a inner join ods.sku_path_all_root b on a.my_root=b.style_id --小宗 left join ods.sku_path_all_root c on b.repeat_style_id=c.style_id --父 where a.style_path like '120150515%' order by a.style_path

export HADOOP_USER_NAME=hive;

    推荐阅读