@JeemyJohn
2020-01-01T13:23:28.000000Z
字数 4868
阅读 938
SUBSTR(from_unixtime(unix_timestamp()),12,2)
项目:易购APP UV流量预测
本周工作:
1. 天级UV流量预测模型T+2模型离线效果评估(MAPE:0.0472)、上线、生成事件并与业务方联调;
2. 业务方核验过程中天级T+2模型线上效果(平均MAPE:,有一天超过20%)与误差分析,原因定位;
3. 业务方小时级模型从预测每小时UV改成预测每小时净增UV、到当前小时累计UV以及动态天级UV,小时级模型方案重新设计;
4. 新的小时模型特征设计、样本与特征跑数据落表、数据验证;
下周计划:
1. 新的小时级模型上线、生成事件并与业务方联调;
2. 对已上线模型天级模型进行效果观察与优化;
USE YYADMIN;
drop table if exists temp_TEST_NYM_D;
create table temp_TEST_NYM_D STORED AS RCFILE AS
SELECT
if(utm_src is null or utm_src = '', 'direct', utm_src) utm_src,
if(utm_src is null or utm_src = '', 'direct', utm_medium) utm_medium,
visitor_id
FROM
( SELECT
concat(TRMNL_TP_ID, CLNT_ID) AS visitor_id,
case when t.utm_src is not null and trim(t.utm_src)<>'' then t.utm_src
when t5.wap_source is not null then t5.wap_source
else t.utm_src
end as utm_src,
case when t.utm_medium is not null and trim(t.utm_medium)<>'' then t.utm_medium
when t5.wap_medium is not null then t5.wap_medium
else t.utm_medium
end as utm_medium
FROM bi_sor.tsor_br_trmnl_base_pageview_d t
left join (select * from mobdss.tsor_br_trmnl_event_appreport_d a
where a.statis_date = '20191204') t5
on t.clnt_id = t5.client_uid
and t.visit_id = t5.session_id
WHERE t.statis_date = '20191204'
AND t.intfc_tp = 1
)T ;
drop table if exists temp_TEST_NYM_D_1;
create table temp_TEST_NYM_D_1 STORED AS RCFILE AS
select utm_src_cate_id_1,
utm_src_cate_nm_1,
utm_src_cate_id,
utm_src_cate_nm,
utm_src_id,
utm_src_nm
from bi_td.tdm_utm_src_td;
drop table if exists temp_TEST_NYM_D_2;
create table temp_TEST_NYM_D_2 STORED AS RCFILE AS
select CASE WHEN (case when t.utm_src = 'direct' then '60' else nvl(t9.utm_src_cate_id_1, 99) end ) = '60'
THEN '直接流量'
WHEN (case when t.utm_src = 'direct' then '60' else nvl(t9.utm_src_cate_id_1, 99) end ) <> '60' and
(case when t.utm_src = 'direct' then '60' else nvl(t9.utm_src_cate_id_1, 99) end ) <> '99' and
(case when t.utm_src = 'direct' then '60' else nvl(t9.utm_src_cate_id_1, 99) end ) IS NOT NULL
THEN '站外流量'
ELSE '其他' end is_direct,
count(distinct visitor_id) uv
from temp_TEST_NYM_D t
left join temp_TEST_NYM_D_1 t9
on if(t.utm_src = '' or t.utm_src is null or utm_src = 'direct', -- add by 20190103
concat(t.utm_src, rand()), concat(utm_src , '/', utm_medium )) = t9.utm_src_nm
group by CASE WHEN (case when t.utm_src = 'direct' then '60' else nvl(t9.utm_src_cate_id_1, 99) end ) = '60'
THEN '直接流量'
WHEN (case when t.utm_src = 'direct' then '60' else nvl(t9.utm_src_cate_id_1, 99) end ) <> '60' and
(case when t.utm_src = 'direct' then '60' else nvl(t9.utm_src_cate_id_1, 99) end ) <> '99' and
(case when t.utm_src = 'direct' then '60' else nvl(t9.utm_src_cate_id_1, 99) end ) IS NOT NULL
THEN '站外流量'
ELSE '其他' end;
sfe.tsfe_Hisense_warehouse_sku_sales
sfe.tsfe_Hisense_warehouse_order_base_feature
sfe.tsfe_Hisense_warehouse_order_detail_feature
sfe.tsfe_Hisense_warehouse_sale_detail_feature
SELECT
statis_date
, SUM(sale_cnt) AS sum_cnt
, SUM(IF(sale_cnt >= 1, 1, 0)) AS pos -- 非零销sku数
, SUM(IF(sale_cnt < 1, 1, 0)) AS neg -- 零销sku数
FROM
(
SELECT
statis_date
, gds_cd
, SUM(sale_cnt) AS sale_cnt
FROM
sfe.tsfe_Hisense_warehouse_sku_sales
WHERE
statis_date < '20190930'
GROUP BY
statis_date
, gds_cd
) t
GROUP BY statis_date
ORDER BY statis_date
;
所有店的属性信息表
brock_dim.t_shp_str_inf_sw_ed
sfe.tsfe_ar_v0_store_sales_base_d
订单明细表
BROCK_DWD.t_ord_retail_grp_ord_dtl_d
scpdm.TDM_ORDER_DETAIL_D
sfe.tsfe_mid_total_es_sales_base_info_d
商品属性信息表
brock_dim.t_prd_gds_inf_ed
全渠道库存表
bi_dpa.tdpa_inv_dtl_d
海信线上sku查询逻辑
select
*
from
sfe.tsfe_ar_order_detail_org_Hisense_d
where
bill_type='1'
and chnl_cd='50'
and order_source in ('B2C', 'TMALL')
and vendor_tp='1'
and purchase_f in ('0','1')
and depot_id in ('0001', '0002', '0003')
and status = '30'
and statis_date = '${hivevar:statis_date}'
and gds_cd in (select gds_cd from sfe.tsfe_salespredict_v3_gds_info_td_hisense)
select
*
from
sfe.tsfe_ods_auto_rep_ie_salespredict_order_detail_offline_d
where
statis_date = 20190920
and instr(brand_nm,'Hisense') != 0
and plant_id != fh_dd
SELECT
sale_cnt
, SUM(IF(val < 0.1, 1, 0)) AS p1
, SUM(IF(val >= 0.1 AND val < 0.2, 1, 0)) AS p2
, SUM(IF(val >= 0.2 AND val < 0.3, 1, 0)) AS p3
, SUM(IF(val >= 0.3 AND val < 0.4, 1, 0)) AS p4
, SUM(IF(val >= 0.4 AND val < 0.5, 1, 0)) AS p5
, SUM(IF(val >= 0.5 AND val < 0.6, 1, 0)) AS p6
, SUM(IF(val >= 0.6 AND val < 0.7, 1, 0)) AS p7
, SUM(IF(val >= 0.7 AND val < 0.8, 1, 0)) AS p8
, SUM(IF(val >= 0.8 AND val < 0.9, 1, 0)) AS p9
, SUM(IF(val >= 0.9 AND val < 1.0, 1, 0)) AS p10
, SUM(IF(val > 1.0, 1, 0)) AS p11
FROM
(
SELECT
if(label <= 10, label, 11) AS sale_cnt
, abs(label - prediction) AS val
FROM
sfe.tsfe_hisense_eshop_sale_predict_results
WHERE
label > 0
) t
GROUP BY
sale_cnt
ORDER BY
sale_cnt
;
SELECT
sale_cnt
, SUM(IF(pred_cnt <=1, 1, 0)) AS cnt1
, SUM(IF(pred_cnt = 2, 1, 0)) AS cnt2
, SUM(IF(pred_cnt = 4, 1, 0)) AS cnt3
, SUM(IF(pred_cnt = 5, 1, 0)) AS cnt4
, SUM(IF(pred_cnt = 6, 1, 0)) AS cnt5
, SUM(IF(pred_cnt = 7, 1, 0)) AS cnt7
, SUM(IF(pred_cnt = 8, 1, 0)) AS cnt8
, SUM(IF(pred_cnt = 9, 1, 0)) AS cnt9
, SUM(IF(pred_cnt =10, 1, 0)) AS cnt10
, SUM(IF(pred_cnt =11, 1, 0)) AS cnt11
FROM
(
SELECT
IF(label <= 10, label, 11) AS sale_cnt
, IF(round(prediction) <= 10, round(prediction), 11) AS pred_cnt
FROM
sfe.tsfe_hisense_eshop_sale_predict_results
WHERE
label > 0
) t
GROUP BY
sale_cnt
ORDER BY
sale_cnt
;
select
statis_date
, count(1) AS cnt
, sum(if(sale_cnt > 0, 1, 0)) AS pos
, sum(if(sale_cnt > 0, 1, 0))/count(1) AS p1
FROM
sfe.tsfe_Hisense_warehouse_sku_sales
--sfe.tsfe_Hisense_warehouse_order_base_feature
WHERE
statis_date >= '20190901'
group by statis_date
order by statis_date