V2EX = way to explore
V2EX 是一个关于分享和探索的地方
现在注册
已注册用户请  登录
vvtf
V2EX  ›  数据库

问个 SQL 查询问题.

  •  
  •   vvtf · 31 天前 · 1611 次点击

    现在有一张表如下:

    create table iot (
      client_id bigint not null,
      device_time datetime not null,
      runstate int not null comment '运行状态:0-停止,1-运行'
    );
    

    需求是查询疲劳运行的设备, 规则是持续运行60s时为疲劳, 然后停止运行持续120s时为解除疲劳.

    这是我的 sql, 这个正确不?

    WITH t0 as (
        SELECT 1 client_id, '2025-02-01 00:00:00' device_time, 1 runstate
        UNION ALL SELECT 1, '2025-02-01 00:00:30', 1
        UNION ALL SELECT 1, '2025-02-01 00:01:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:01:30', 0
        UNION ALL SELECT 1, '2025-02-01 00:02:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:02:30', 1
        UNION ALL SELECT 1, '2025-02-01 00:03:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:03:30', 0
        UNION ALL SELECT 1, '2025-02-01 00:04:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:04:30', 0
        UNION ALL SELECT 1, '2025-02-01 00:05:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:05:30', 0
        UNION ALL SELECT 1, '2025-02-01 00:06:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:06:30', 0
        UNION ALL SELECT 1, '2025-02-01 00:07:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:08:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:09:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:10:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:11:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:12:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:13:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:14:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:15:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:16:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:17:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:18:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:19:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:20:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:21:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:22:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:23:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:24:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:25:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:26:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:27:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:28:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:29:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:30:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:31:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:32:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:33:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:34:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:35:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:36:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:37:00', 0
        UNION ALL SELECT 1, '2025-02-01 00:38:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:39:00', 1
        UNION ALL SELECT 1, '2025-02-01 00:40:00', 1
    )
    , t1 AS (
        -- 这里的 run 和 rest 会去查询配置表
        SELECT
            client_id,
            -- 运行时长: 60 秒
            60 run, 
            -- 休息时长: 120 秒
            120 rest,
            device_time,
            -- 运行状态: 0-停止,1-运行
            runstate,
            ROW_NUMBER() OVER (ORDER BY device_time) AS rn,
            ROW_NUMBER() OVER (
              PARTITION BY client_id, runstate 
              ORDER BY device_time) AS grp
        FROM t0
        order by device_time
    )
    ,t2 AS (
        SELECT
            *,
            MIN(device_time) OVER (
              PARTITION BY client_id, runstate, rn - grp
              order by device_time
              ) AS start_time,
            MAX(device_time) OVER (
              PARTITION BY client_id,runstate, rn - grp
              order by device_time
              ) AS end_time,
              rn - grp gap
        FROM
            t1
    )
    ,t3 AS (
        SELECT
            *,
            -- 持续时长
            TIMESTAMPDIFF(second, start_time, end_time) AS duration,
            -- 前一个时长
            lag(
              TIMESTAMPDIFF(second, start_time, end_time),1,0
            ) over(
              partition by client_id,runstate, rn - grp
              order by device_time
            ) prev_duration
        FROM
            t2
    )
    ,t4 AS (
        SELECT
            *,
            case 
              -- 触发疲劳时,设置状态为当前行号
              when runstate=1 and duration>=run and prev_duration<run then rn
              else 0
            end fatigue
        FROM
            t3
    )
    , t5 as (
        SELECT
            *,
            case 
              -- 触发解除疲劳是,设置状态为前面所有的疲劳的负值
              when runstate=0 and duration>=rest and prev_duration<rest then 
              -sum(fatigue) over(partition by client_id order by device_time)
              else fatigue
            end fatigue1
        FROM
            t4
    )
    
    select 
      *,
      -- >0 为疲劳, <=0 为非疲劳
      sum(fatigue1) over(partition by client_id order by device_time) fatigue_final
    from t5
    order by device_time
    
    第 1 条附言  ·  28 天前

    改成这样就行了,
    使用 row_number 有问题,
    就是当发生疲劳了,
    而后面紧跟着的几个状态都是解除疲劳的触发就会出现错误.
    所以使用 last_value 解决.

    WITH t0 as (...), 
    t1 AS (
      -- 这里的 run 和 rest 会去查询配置表
      SELECT 
        client_id, 
        -- 运行时长: 60 秒
        60 run, 
        -- 休息时长: 120 秒
        120 rest, 
        device_time, 
        -- 运行状态: 0-停止,1-运行
        runstate, 
        ROW_NUMBER() OVER (
          ORDER BY device_time
        ) AS rn, 
        ROW_NUMBER() OVER (
          PARTITION BY client_id, runstate 
          ORDER BY device_time
        ) AS grp 
      FROM 
        t0 
      order by 
        device_time
    ), 
    t2 AS (
      SELECT 
        *, 
        MIN(device_time) OVER (
          PARTITION BY client_id, runstate, rn - grp 
          order by device_time
        ) AS start_time, 
        rn - grp gap 
      FROM 
        t1
    ), 
    t3 AS (
      SELECT 
        *, 
        -- 持续时长
        TIMESTAMPDIFF(second, start_time, device_time) AS duration 
      FROM 
        t2
    ), 
    t4 AS (
      SELECT 
        *, 
        case -- 触发疲劳时,设置 1
        -- 解除疲劳设置 0
        -- 其他设置 null
        when runstate = 1 
        and duration >= run then 1 when runstate = 0 
        and duration >= rest then 0 else null end fatigue 
      FROM 
        t3
    ) 
    select 
      *, 
      -- 使用前面最后非 null 的状态为疲劳状态
      ifnull(
        last_value(fatigue) ignore nulls over(
          partition by client_id 
          order by device_time 
          rows between unbounded preceding and current row
        ), 
        0
      ) fatigue_final 
    from 
      t4 
    order by device_time
    
    
    4 条回复    2025-02-24 08:54:40 +08:00
    pyang6984
        1
    pyang6984  
       31 天前
    WITH t0 as (
    SELECT 1 client_id, '2025-02-01 00:00:00' device_time, 1 runstate
    UNION ALL SELECT 1, '2025-02-01 00:00:30', 1
    UNION ALL SELECT 1, '2025-02-01 00:01:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:01:30', 0
    UNION ALL SELECT 1, '2025-02-01 00:02:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:02:30', 1
    UNION ALL SELECT 1, '2025-02-01 00:03:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:03:30', 0
    UNION ALL SELECT 1, '2025-02-01 00:04:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:04:30', 0
    UNION ALL SELECT 1, '2025-02-01 00:05:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:05:30', 0
    UNION ALL SELECT 1, '2025-02-01 00:06:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:06:30', 0
    UNION ALL SELECT 1, '2025-02-01 00:07:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:08:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:09:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:10:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:11:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:12:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:13:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:14:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:15:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:16:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:17:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:18:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:19:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:20:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:21:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:22:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:23:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:24:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:25:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:26:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:27:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:28:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:29:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:30:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:31:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:32:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:33:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:34:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:35:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:36:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:37:00', 0
    UNION ALL SELECT 1, '2025-02-01 00:38:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:39:00', 1
    UNION ALL SELECT 1, '2025-02-01 00:40:00', 1
    ),
    t1 AS (
    -- 这里的 run 和 rest 会去查询配置表
    SELECT
    client_id,
    -- 运行时长: 60 秒
    60 run,
    -- 休息时长: 120 秒
    120 rest,
    device_time,
    -- 运行状态: 0-停止,1-运行
    runstate,
    ROW_NUMBER() OVER (ORDER BY device_time) AS rn,
    ROW_NUMBER() OVER (
    PARTITION BY client_id, runstate
    ORDER BY device_time) AS grp
    FROM t0
    ORDER BY device_time
    ),
    t2 AS (
    SELECT
    *,
    MIN(device_time) OVER (
    PARTITION BY client_id, runstate, rn - grp
    ORDER BY device_time
    ) AS start_time,
    MAX(device_time) OVER (
    PARTITION BY client_id, runstate, rn - grp
    ORDER BY device_time
    ) AS end_time,
    rn - grp gap
    FROM
    t1
    ),
    t3 AS (
    SELECT
    *,
    -- 持续时长
    --在计算持续时长时,TIMESTAMPDIFF 函数使用 end_time 和 start_time 进行计算,然而 end_time 和 start_time 是同---一组内的最大和最小时间,若组内只有一条记录,该计算结果会为 0 。所以,应当使用当前行的 device_time 减去组
    --内的起始时间来计算持续时长。
    TIMESTAMPDIFF(SECOND, start_time, device_time) AS duration,
    -- 前一个时长
    LAG(TIMESTAMPDIFF(SECOND, start_time, device_time), 1, 0) OVER (
    PARTITION BY client_id
    ORDER BY device_time
    ) prev_duration
    FROM
    t2
    ),
    t4 AS (
    SELECT
    *,
    CASE
    -- 触发疲劳时,设置状态为当前行号
    WHEN runstate = 1 AND duration >= run AND prev_duration < run THEN rn
    ELSE 0
    END fatigue
    FROM
    t3
    ),
    t5 as (
    SELECT
    *,
    CASE
    -- 触发解除疲劳是,设置状态为前面所有的疲劳的负值
    WHEN runstate = 0 AND duration >= rest AND prev_duration < rest THEN
    -SUM(fatigue) OVER (PARTITION BY client_id ORDER BY device_time)
    ELSE fatigue
    END fatigue1
    FROM
    t4
    )
    SELECT
    *,
    -- >0 为疲劳, <=0 为非疲劳
    SUM(fatigue1) OVER (PARTITION BY client_id ORDER BY device_time) fatigue_final
    FROM t5
    ORDER BY device_time;
    Nooooobycat
        2
    Nooooobycat  
       31 天前
    WITH state_groups AS (
    SELECT
    client_id,
    device_time,
    runstate,
    SUM(change_flag) OVER (PARTITION BY client_id ORDER BY device_time) AS grp
    FROM (
    SELECT
    client_id,
    device_time,
    runstate,
    CASE WHEN LAG(runstate) OVER (PARTITION BY client_id ORDER BY device_time) = runstate THEN 0 ELSE 1 END AS change_flag
    FROM iot
    ) AS tmp
    ),
    group_durations AS (
    SELECT
    client_id,
    grp,
    runstate,
    MIN(device_time) AS start_time,
    MAX(device_time) AS end_time,
    TIMESTAMPDIFF(SECOND, MIN(device_time), MAX(device_time)) AS duration_seconds
    FROM state_groups
    GROUP BY client_id, grp, runstate
    ),
    ranked_runs AS (
    SELECT
    client_id,
    end_time AS last_run_end,
    duration_seconds,
    ROW_NUMBER() OVER (PARTITION BY client_id ORDER BY end_time DESC) AS rn
    FROM group_durations
    WHERE runstate = 1
    )
    SELECT DISTINCT rr.client_id
    FROM ranked_runs rr
    WHERE rr.rn = 1
    AND rr.duration_seconds >= 60
    AND NOT EXISTS (
    SELECT 1
    FROM group_durations gd
    WHERE gd.client_id = rr.client_id
    AND gd.runstate = 0
    AND gd.start_time >= rr.last_run_end
    AND gd.duration_seconds >= 120
    );
    Rache1
        3
    Rache1  
       31 天前
    果然还是 V2 提问者靠谱一些

    一些社区里面那些提问的,问个数据库问题,数据库版本、表结构、填充数据啥都没有,让补充问题吧,有的就上个图片就来了
    vvtf
        4
    vvtf  
    OP
       28 天前
    改成这样就行了,
    使用 row_number 有问题就是当发生疲劳了,
    而后面紧跟着的几个状态都是解除疲劳的触发就会出现错误.
    所以使用 last_value 解决.

    ```sql

    WITH t0 as (
    ...
    )
    , t1 AS (
    -- 这里的 run 和 rest 会去查询配置表
    SELECT
    client_id,
    -- 运行时长: 60 秒
    60 run,
    -- 休息时长: 120 秒
    120 rest,
    device_time,
    -- 运行状态: 0-停止,1-运行
    runstate,
    ROW_NUMBER() OVER (ORDER BY device_time) AS rn,
    ROW_NUMBER() OVER (
    PARTITION BY client_id, runstate
    ORDER BY device_time) AS grp
    FROM t0
    order by device_time
    )
    ,t2 AS (
    SELECT
    *,
    MIN(device_time) OVER (
    PARTITION BY client_id, runstate, rn - grp
    order by device_time
    ) AS start_time ,
    rn - grp gap
    FROM
    t1
    )
    ,t3 AS (
    SELECT
    *,
    -- 持续时长
    TIMESTAMPDIFF(second, start_time, device_time) AS duration
    FROM
    t2
    )

    ,t4 AS (
    SELECT
    *,
    case
    -- 触发疲劳时,设置 1
    -- 解除疲劳设置 0
    -- 其他设置 null
    when runstate=1 and duration>=run then 1
    when runstate=0 and duration>=rest then 0
    else null
    end fatigue
    FROM
    t3
    )

    select
    *,
    -- 使用前面最后非 null 的状态为疲劳状态
    ifnull(last_value(fatigue) ignore nulls over(
    partition by client_id
    order by device_time
    rows between unbounded preceding and current row
    ),0) fatigue_final
    from t4
    order by device_time


    ```
    关于   ·   帮助文档   ·   博客   ·   API   ·   FAQ   ·   实用小工具   ·   2381 人在线   最高记录 6679   ·     Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 · 24ms · UTC 16:03 · PVG 00:03 · LAX 09:03 · JFK 12:03
    Developed with CodeLauncher
    ♥ Do have faith in what you're doing.