今日の学び@2020/3/2
SQL&BigQuery
SELECT product_id, score, -- ランク:一意 ROW_NUMBER() OVER(ORDER BY score DESC) AS ROW, -- ランク:同順許容&飛ばす RANK() OVER(ORDER BY score DESC) AS rank, -- ランク:同順許容&飛ばさない DENSE_RANK() OVER(ORDER BY score DESC) AS dense_rank, -- LAG(対象列,n):n行前参照 LAG(product_id) OVER(ORDER BY score DESC) AS lag1, LAG(product_id,2) OVER(ORDER BY score DESC) AS lag2, -- LEAD(対象列,n):n行後参照 LEAD(product_id) OVER(ORDER BY score DESC) AS lead1, LEAD(product_id,2) OVER(ORDER BY score DESC) AS lead2, -- ランク上位からの累計 SUM(score) OVER(ORDER BY score DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) AS cum_score, -- 現在行と前後1行の平均 AVG(score) OVER( ORDER BY score DESC ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING ) AS local_avg, -- ランク最上位 FIRST_VALUE(product_id) OVER( ORDER BY score DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS first_value, -- ランク最下位 LAST_VALUE(product_id) OVER( ORDER BY score DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS last_value FROM `popular_products`
WITH test_table AS ( SELECT category, product_id, ROW_NUMBER() OVER( PARTITION BY category ORDER BY score DESC ) AS rank, FROM `popular_products` ) SELECT * FROM test_table -- ウインドウ関数結果の絞り込みは、関数内部で不可の為、サブクエリにして絞り込む WHERE rank <= 2 ORDER BY category, rank
SELECT -- 上位1件のランク抽出は、DISTINCT使用 DISTINCT category, FIRST_VALUE(product_id) OVER( PARTITION BY category ORDER BY score DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS product_id, FROM `popular_products`
SELECT dt, -- 横持ち変換 MAX(CASE WHEN indicator = 'impressions' THEN val END) AS impressions, MAX(CASE WHEN indicator = 'sessions' THEN val END) AS sessions, MAX(CASE WHEN indicator = 'users' THEN val END) AS users FROM `daily_kpi` GROUP BY dt ORDER BY dt
-- めちゃくちゃメンドクサイw SELECT -- ラベル用列作成 CASE WHEN p.idx = 1 THEN 'q1' WHEN p.idx = 2 THEN 'q2' WHEN p.idx = 3 THEN 'q3' WHEN p.idx = 4 THEN 'q4' END AS quarter, -- 売上横持ちを縦持ち変換 CASE WHEN p.idx = 1 THEN q.q1 WHEN p.idx = 2 THEN q.q2 WHEN p.idx = 3 THEN q.q3 WHEN p.idx = 4 THEN q.q4 END AS sales FROM `sql-bigdata-recipe.rec.quarterly_sales` AS q CROSS JOIN ( SELECT -- インデックス用ピボットテーブル 1 AS idx UNION ALL SELECT 2 AS idx UNION ALL SELECT 3 AS idx UNION ALL SELECT 4 AS idx ) AS p ORDER BY quarter
-- これは慣れるべき? WITH p AS ( SELECT purchase_id, STRING_AGG(product_id) AS product_ids, FROM `purchase_detail_log` GROUP BY purchase_id ) SELECT purchase_id, product_id FROM p CROSS JOIN UNNEST(SPLIT(product_ids,',')) AS product_id
ウインドウ関数は便利だし、難しくもないと感じる
縦持ち⇔横持ち変換は、ちょっと面倒くさい
UDFを作れないかと考えた
3章の基本は、かなり時間掛かっている
4章以降の実践で、縦持ち横持ちを実践していけば、慣れるはず
数学
所感
思ったより全体的にペースが遅いが、少しずつ改善していこう
SQLの3章は基礎練なので、4章以降の実践で楽しめる様、地道にいこう