问题遇到的现象和发生背景
我的代码
SELECT uid,days_window,ROUND(total* days_window/diff_time,2) avg_exam_cnt
FROM (
SELECT
uid,
COUNT(start_time) total, -- 用户2021年作答的次数
DATEDIFF(MAX(start_time),MIN(start_time))+1 diff_time, -- 头尾作答时间窗
DATEDIFF(MAX(start_time),MIN(start_time)) days_window -- 最大间隔天数
FROM exam_record
WHERE YEAR(start_time)=2021 -- 2021年的数据
group by uid
)t1
WHERE diff_time>1
ORDER BY days_window DESC,avg_exam_cnt DESC
;
运行结果及报错内容
我发现我的结果和正确答案总是差1天
正确代码
WITH t2 AS (
SELECT
uid,
COUNT(start_time) total, -- 用户2021年作答的次数
DATEDIFF(MAX(start_time),MIN(start_time))+1 diff_time, -- 头尾作答时间窗
MAX(DATEDIFF(next_time,start_time))+1 days_window -- 最大间隔天数
FROM (
SELECT uid,start_time,
LEAD(start_time,1)OVER(PARTITION BY uid ORDER BY start_time) AS next_time -- 第二次作答时间
FROM exam_record
WHERE YEAR(start_time)=2021 -- 2021年的数据
) t1
GROUP BY uid
)
SELECT uid,days_window,ROUND(total* days_window/diff_time,2) avg_exam_cnt
FROM t2
WHERE diff_time>1
ORDER BY days_window DESC,avg_exam_cnt DESC
;
我的问题
在计算“最大时间间隔”这一问题下,为什么我不能直接用
max(datediff(max(start_date),min(start_date))
得到呢?为什么求间隔都要用lead窗口函数?非常感谢!