1.题目
现有用户登录日志表 t_login_log,包含用户ID(user_id),登录日期(login_date)。数据已经按照用户日期去重,请查出连续登录超过4天的用户ID

2.考点

可以看到,连续时diff值相同,不连续时diff值就出现变化。这也就说明,只要统计diff值相同的行数,就能计算出每次连续登录的天数。
3.SQL
step1:处理日期格式,计算登录日期与月初日期('2022-01-01')差date_diff
select
user_id,
login_date,
datediff(from_unixtime(unix_timestamp(login_date,'yyyyMMdd'),'yyyy-MM-dd'),'2022-01-01') as date_diff
from t_login_log;执行结果

step2:row_number()开窗,计算每个用户每个登录日期的排序 row_num
select
user_id,
login_date,
datediff(from_unixtime(unix_timestamp(login_date,'yyyyMMdd'),'yyyy-MM-dd'),'2022-01-01') as date_diff,
row_number()over(partition by user_id order by login_date asc) as row_num
from t_login_log;查询结果

step3:计算date_diff和row_num的差值
select
user_id,
login_date,
datediff(from_unixtime(unix_timestamp(login_date,'yyyyMMdd'),'yyyy-MM-dd'),'2022-01-01') as date_diff,
row_number()over(partition by user_id order by login_date asc) as row_num,
row_number()over(partition by user_id order by login_date asc) - datediff(from_unixtime(unix_timestamp(login_date,'yyyyMMdd'),'yyyy-MM-dd'),'2022-01-01') as diff
from t_login_log;查询结果

step4:统计连续登录大于等于3天的用户
select
user_id
from
(
select
user_id,
diff,
count(1) as login_days
from
(
select
user_id,
login_date,
row_number()over(partition by user_id order by login_date asc) - datediff(from_unixtime(unix_timestamp(login_date,'yyyyMMdd'),'yyyy-MM-dd'),'2022-01-01') as diff
from t_login_log
) t
group by user_id,diff
)tt
where login_days >=4
group by user_id执行结果

4.建表语句
建表语句
create table t_login_log
(
user_id string comment '用户ID',
login_date string comment '登录日期'
);数据插入语句
insert into t_login_log values
('0001','20220101'),
('0001','20220102'),
('0001','20220103'),
('0001','20220104'),
('0001','20220105'),
('0001','20220107'),
('0001','20220108'),
('0001','20220109'),
('0002','20220101'),
('0002','20220102'),
('0002','20220103'),
('0002','20220107'),
('0002','20220108'),
('0003','20220107'),
('0003','20220108'),
('0003','20220109');