TO_JSON_STRINGを使って行全体をまとめて比較する
概要
- バッチで実行するクエリを変更した後に、前後のテーブルで1行全体を比較し、変化があった行を調べたい。
- 複数列をまとめてJOINのキーにしてしまいたい。
- ARRAY型、STRUCT型でも手軽にJOINしたい。
ということが最近あった。愚直やると、
- 対象のすべての列を書き出す
- またNULL->非NULL(もしくはその逆)の変化を検知するためには、NULLを含む値の比較はNULLで返ってくるため、NULLIFなどで回避する
といった対応が必要です。
こういったとき、TO_JSON_STRINGが使えます。
今回のキーワード
TO_JSON_STRING
クエリ
サブクエリrace1_unnestとrace2_unnestを比較対象とします。
race2_unnest に存在しない、race1_unnestの行を、LEFT JOINで出力します。
race2_unnestは、race1_unnestの値を書き換える形で生成しています。(クエリのコメント部分参照)
WITH race1 AS ( SELECT "800M" AS race, [STRUCT("Rudisha" as name,"aiueo" as splits), STRUCT("Makhloufi" as name,"aiueo" as splits), STRUCT("Murphy" as name,NULL as splits), STRUCT("Bosse" as name,"aiueo" as splits), STRUCT("Rotich" as name,"aiueo" as splits), STRUCT("Lewandowski" as name,"aiueo" as splits), STRUCT("Kipketer" as name,"aiueo" as splits), STRUCT("Berian" as name,"aiueo" as splits), STRUCT("Nathan" as name,"aiueo" as splits), STRUCT("David" as name,NULL as splits)] AS participants), race1_unnest AS ( SELECT race, p.*, FROM race1,race1.participants AS p ), race2 AS ( SELECT "800M" AS race, [STRUCT("Rudisha" as name, "aiueo" as splits), STRUCT("Makhloufi" as name,NULL as splits), #変更 非NULL -> NULL STRUCT("Murphy" as name,NULL as splits), #変化なし STRUCT("Bosse" as name,"aiueo" as splits), STRUCT("Rotich" as name,"aiueo" as splits), STRUCT("Lewandowski" as name,"aiueo" as splits), STRUCT("Kipketer" as name,"aiueo" as splits), STRUCT("Berian" as name,"aiueo" as splits), STRUCT("Nathan" as name,"kakikukeko" as splits), #変更 STRUCT("David" as name,"yey" as splits) #変更 NULL -> 非NULL ] AS participants), race2_unnest AS ( SELECT race, p.*, FROM race2,race2.participants AS p ) SELECT t1, t2 FROM race1_unnest AS t1 LEFT JOIN race2_unnest AS t2 ON TO_JSON_STRING(t1) = TO_JSON_STRING(t2) WHERE t2.race IS NULL
アウトプット
解説
JOINに1列づつ書くのではなく、TO_JSON_STRING(t1) = TO_JSON_STRING(t2)
と書くことで、行をまとめて比較しています。
愚直に書いた場合との比較
アウトプット
NULL = NULL の返り値はNULL(TRUEでもFALSEでもない)なので、比較する列にNULLが含まれる行は、上記のように変化がくてもも出力してしまいます。
クエリは以下です。
WITH race1 AS ( SELECT "800M" AS race, [STRUCT("Rudisha" as name,"aiueo" as splits), STRUCT("Makhloufi" as name,"aiueo" as splits), STRUCT("Murphy" as name,NULL as splits),#変化なし###########この行のこと STRUCT("Bosse" as name,"aiueo" as splits), STRUCT("Rotich" as name,"aiueo" as splits), STRUCT("Lewandowski" as name,"aiueo" as splits), STRUCT("Kipketer" as name,"aiueo" as splits), STRUCT("Berian" as name,"aiueo" as splits), STRUCT("Nathan" as name,"aiueo" as splits), STRUCT("David" as name,NULL as splits)] AS participants), race1_unnest AS ( SELECT race, p.*, FROM race1,race1.participants AS p ), race2 AS ( SELECT "800M" AS race, [STRUCT("Rudisha" as name, "aiueo" as splits), STRUCT("Makhloufi" as name,NULL as splits), #変更 非NULL -> NULL STRUCT("Murphy" as name,NULL as splits), #変化なし###########この行のこと STRUCT("Bosse" as name,"aiueo" as splits), STRUCT("Rotich" as name,"aiueo" as splits), STRUCT("Lewandowski" as name,"aiueo" as splits), STRUCT("Kipketer" as name,"aiueo" as splits), STRUCT("Berian" as name,"aiueo" as splits), STRUCT("Nathan" as name,"kakikukeko" as splits), #変更 STRUCT("David" as name,"yey" as splits) #変更 NULL -> 非NULL ] AS participants), race2_unnest AS ( SELECT race, p.*, FROM race2,race2.participants AS p ) SELECT t1, t2 FROM race1_unnest AS t1 LEFT JOIN race2_unnest AS t2 ON t1.race = t2.race AND t1.name = t2.name AND t1.splits = t2.splits WHERE t2.race IS NULL
参考
ARRAY型、STRUCT型にも対応しています。
WITH race1 AS ( SELECT "800M" AS race, [STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits), STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits), STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits), STRUCT("Bosse" as name, [23.6, 26.2, 26.5, 27.1] as splits), STRUCT("Rotich" as name, [24.7, 25.6, 26.9, 26.4] as splits), STRUCT("Lewandowski" as name, [25.0, 25.7, 26.3, 27.2] as splits), STRUCT("Kipketer" as name, [23.2, 26.1, 27.3, 29.4] as splits), STRUCT("Berian" as name, [23.7, 26.1, 27.0, 29.3] as splits), STRUCT("Nathan" as name, ARRAY<FLOAT64>[] as splits), STRUCT("David" as name, NULL as splits)] AS participants), race1_unnest AS ( SELECT race, p.* FROM race1,race1.participants AS p ), race2 AS ( SELECT "800M" AS race, [STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits), STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits), STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits), STRUCT("Bosse" as name, [23.6, 26.2, 26.5, 27.1] as splits), STRUCT("Rotich" as name, [24.7, 25.6, 26.9, 26.4] as splits), STRUCT("Lewandowski" as name, [25.0, 25.7, 26.3, 27.2] as splits), STRUCT("Kipketer" as name, [23.2, 26.1, 27.3, 29.4] as splits), STRUCT("Berian" as name, [23.7, 26.1, 27.0, 293.0] as splits), #この値が変化している STRUCT("Nathan" as name, ARRAY<FLOAT64>[] as splits), STRUCT("David" as name, NULL as splits)] AS participants), race2_unnest AS ( SELECT race, p.* FROM race2,race2.participants AS p ) SELECT t1, t2 FROM race1_unnest AS t1 LEFT JOIN race2_unnest AS t2 ON TO_JSON_STRING(t1) = TO_JSON_STRING(t2) WHERE t2.race IS NULL