当前位置:网站首页>连续登陆问题
连续登陆问题
2022-07-28 05:21:00 【xiexiexie0520】
连续登陆问题
连续登陆问题
在电商、物流和银行可能经常会遇到这样的需求:统计用户连续交易的总额、连续登陆天数、连续登陆开始和结束时间、间隔天数等
数据:
注意:每个用户每天可能会有多条记录
id datestr amount
1,2019-02-08,6214.23
1,2019-02-08,6247.32
1,2019-02-09,85.63
1,2019-02-09,967.36
1,2019-02-10,85.69
1,2019-02-12,769.85
1,2019-02-13,943.86
1,2019-02-14,538.42
1,2019-02-15,369.76
1,2019-02-16,369.76
1,2019-02-18,795.15
1,2019-02-19,715.65
1,2019-02-21,537.71
2,2019-02-08,6214.23
2,2019-02-08,6247.32
2,2019-02-09,85.63
2,2019-02-09,967.36
2,2019-02-10,85.69
2,2019-02-12,769.85
2,2019-02-13,943.86
2,2019-02-14,943.18
2,2019-02-15,369.76
2,2019-02-18,795.15
2,2019-02-19,715.65
2,2019-02-21,537.71
3,2019-02-08,6214.23
3,2019-02-08,6247.32
3,2019-02-09,85.63
3,2019-02-09,967.36
3,2019-02-10,85.69
3,2019-02-12,769.85
3,2019-02-13,943.86
3,2019-02-14,276.81
3,2019-02-15,369.76
3,2019-02-16,369.76
3,2019-02-18,795.15
3,2019-02-19,715.65
3,2019-02-21,537.71
建表语句
create table deal_tb(
id string
,datestr string
,amount string
)row format delimited fields terminated by ',';
计算逻辑
- 先按用户和日期分组求和,使每个用户每天只有一条数据
select id
,datestr
,sum(amount) as sum_amount
from deal_tb
group by id,datestr
- 根据用户ID分组按日期排序,将日期和分组序号相减得到连续登陆的开始日期,如果开始日期相同说明连续登陆
select tt1.id
,tt1.datestr
,tt1.sum_amount
,date_sub(tt1.datestr,rn) as grp
from(
select t1.id
,t1.datestr
,t1.sum_amount
,row_number() over(partition by id order by datestr) as rn
from(
select id
,datestr
,sum(amount) as sum_amount
from deal_tb
group by id,datestr
) t1
) tt1
- 统计用户连续交易的总额、连续登陆天数、连续登陆开始和结束时间、间隔天数
select ttt1.id
,ttt1.grp
,round(sum(ttt1.sum_amount),2) as sc_sum_amount
,count(1) as sc_days
,min(ttt1.datestr) as sc_start_date
,max(ttt1.datestr) as sc_end_date
,datediff(ttt1.grp,lag(ttt1.grp,1) over(partition by ttt1.id order by ttt1.grp)) as iv_days
from(
select tt1.id
,tt1.datestr
,tt1.sum_amount
,date_sub(tt1.datestr,rn) as grp
from(
select t1.id
,t1.datestr
,t1.sum_amount
,row_number() over(partition by id order by datestr) as rn
from(
select id
,datestr
,sum(amount) as sum_amount
from deal_tb
group by id,datestr
) t1
) tt1
) ttt1
group by ttt1.id,ttt1.grp;
- 精简版
select t1.id
,t1.grp
,round(sum(t1.sum_amount),3) as total_amount -- 连续交易总额
,count(1) as total_days -- 连续登录天数
,min(datestr) as start_date -- 连续登录开始的时间
,max(datestr) as end_date -- 连续登录结束的时间
,datediff(t1.grp,lag(t1.grp,1) over(partition by t1.id order by t1.grp)) as interval_days -- 间隔天数
from(
select id
,datestr
,round(sum(amount),3) as sum_amount
,date_sub(datestr,row_number() over(partition by id order by datestr)) as grp
from deal_tb
group by id,datestr
) t1
group by t1.id,t1.grp;
- 结果
1 2019-02-07 13600.23 3 2019-02-08 2019-02-10 NULL
1 2019-02-08 2991.650 5 2019-02-12 2019-02-16 1
1 2019-02-09 1510.8 2 2019-02-18 2019-02-19 1
1 2019-02-10 537.71 1 2019-02-21 2019-02-21 1
2 2019-02-07 13600.23 3 2019-02-08 2019-02-10 NULL
2 2019-02-08 3026.649 4 2019-02-12 2019-02-15 1
2 2019-02-10 1510.8 2 2019-02-18 2019-02-19 2
2 2019-02-11 537.71 1 2019-02-21 2019-02-21 1
3 2019-02-07 13600.23 3 2019-02-08 2019-02-10 NULL
3 2019-02-08 2730.04 5 2019-02-12 2019-02-16 1
3 2019-02-09 1510.8 2 2019-02-18 2019-02-19 1
3 2019-02-10 537.71 1 2019-02-21 2019-02-21 1
边栏推荐
猜你喜欢
随机推荐
[interview question] anti shake and throttling
抖音-视频步骤
南京邮电大学CTF题目writeup (一) 含题目地址
文旅头部结合数字藏品效应显著,但如何支撑用户持续购买力
数字藏品以虚强实,赋能实体经济发展
数据处理之增删改;约束
Microsoft edge browser plug-in (1)
Child parent thread interaction
Flex elastic box item properties
ArcMap地图投影相关操作
mysql视图,存储过程与存储函数
7月7日国风廿四节气《小暑》来袭!!附..合..成..预..告..
基于php小区疫情出入管理系统(php毕业设计)
CAD-GIS数据转换
ctfshow单身狗 -- web
蓝桥代码 翻硬币(我这样写也通过了,官网测试是不是有问题)
扩展欧几里得定理
Cad-gis data conversion
变量,流程控制与游标
结果填空 购物单(教你用Excel解决)









