hive-1


##后台nginx日志mpi-web报表生成 use bi; ##set ($suffix = "${env.YYYY}${env.MM}${env.DD}"); ##今日临时表创建 drop table if exists dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD}; create table if not exists dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD} ( server_ip string, url string, times int, avg_response_today int, avg_body_size int, response_line95_today int, count_50x int, count_40x int, count_30x int, count_20x int ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\005' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' LINES TERMINATED BY '\n' STORED AS ORC; ##昨日临时表创建 drop table if exists dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD}; create table if not exists dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD} ( server_ip string, url string, times int, avg_response_today int, avg_body_size int, response_line95_today int, count_50x int, count_40x int, count_30x int, count_20x int ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\005' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' LINES TERMINATED BY '\n' STORED AS ORC; ##今日报表生成 insert into table dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD} select server_ip, concat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')) as url_s, count(url) as times, round(avg(response_time)*1000) as avg_response_today, round(avg(body_bytes_sent)) as avg_body_size, round(percentile(round(response_time*1000),0.95)) as response_line95_today, sum(if(status like '50%' ,1 ,0)) as count_50x, sum(if(status like '40%' ,1 ,0)) as count_40x, sum(if(status like '30%' ,1 ,0)) as count_30x, sum(if(status like '20%' ,1 ,0)) as count_20x from dpdw_nginx_log_all where application = 'mpi-web' and hp_stat_date='${env.YYYYMMDD}' and parse_url(url, 'PATH')<>'/index.jsp' and parse_url(url, 'PATH')<>'/heartbeat.html' and parse_url(url, 'HOST')<>'dp' and parse_url(url, 'HOST')<>'www.dianping.com' group by concat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')),server_ip,application order by times desc ; ##昨日报表生成 insert into table dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD} select server_ip as server_ip, concat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')) as url, count(url) as times, round(avg(response_time)*1000) as avg_response_today, round(avg(body_bytes_sent)) as avg_body_size, round(percentile(round(response_time*1000),0.95)) as response_line95_today, sum(if(status like '50%' ,1 ,0)) as count_50x, sum(if(status like '40%' ,1 ,0)) as count_40x, sum(if(status like '30%' ,1 ,0)) as count_30x, sum(if(status like '20%' ,1 ,0)) as count_20x from dpdw_nginx_log_all where application = 'mpi-web' and hp_stat_date='${env.YYYYMMDD_P1D}' and parse_url(url, 'PATH')<>'/index.jsp' and parse_url(url, 'PATH')<>'/heartbeat.html' and parse_url(url, 'HOST')<>'dp' and parse_url(url, 'HOST')<>'www.dianping.com' group by concat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')),server_ip,application order by times desc ; insert overwrite table dprpt_mpi_nginx_log_daily partition(hp_cal_dt = '${env.YYYYMMDD}') select t.server_ip as server_ip, t.url as url, t.times as times, coalesce(y.times,0) as times_cmp, t.avg_response_today as avg_response_today, coalesce(y.avg_response_today,0) as avg_response_cmp, t.response_line95_today as response_line95_today, coalesce(y.response_line95_today,0) as response_line95_today_cmp, t.avg_body_size as avg_body_size, t.count_20x as count_20x, coalesce(y.count_20x,0) as count_20x_cmp, t.count_30x as count_30x, coalesce(y.count_30x,0) as count_30x_cmp, t.count_40x as count_40x, coalesce(y.count_40x,0) as count_40x_cmp, t.count_50x as count_50x, coalesce(y.count_50x,0) as count_50x_cmp from dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD} t left outer join dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD} y on t.url=y.url and t.server_ip=y.server_ip order by times desc; drop table if exists dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD}; drop table if exists dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD};

    推荐阅读