Skip to content

Instantly share code, notes, and snippets.

@yoshiokatsuneo
Created December 22, 2024 16:42
Show Gist options
  • Save yoshiokatsuneo/99249444892638b6ca1df3fd6d76ea34 to your computer and use it in GitHub Desktop.
Save yoshiokatsuneo/99249444892638b6ca1df3fd6d76ea34 to your computer and use it in GitHub Desktop.
get diff of rows using BigQuery JavaScript UDF
-- 行の差分を取得
create temp function row_diff(obj1 json, obj2 json)
returns ARRAY<STRUCT<key STRING, value STRING, old_value STRING>>
language js as """
const diffs = [];
if (obj1 === null || obj2 === null ){
return null;
}
const excludeFields = ['updated_at'];
const keys = [...new Set(Object.keys(obj1).concat(Object.keys(obj2)))].filter(key => !excludeFields.includes(key));
keys.forEach((key) => {
if (obj1[key] !== obj2[key]) {
diffs.push({key, value: obj1[key], old_value: obj2[key]})
}
});
return diffs;
""";
with
my_table AS (
SELECT 1 AS id, 'Alice' AS name, 25 AS age, TIMESTAMP('2022-01-01') AS updated_at UNION ALL
SELECT 1 AS id, 'Alice' AS name, 26 AS age, TIMESTAMP('2022-01-02') AS updated_at UNION ALL
SELECT 2 AS id, 'Bob' AS name, 30 AS age, TIMESTAMP('2022-01-02') AS updated_at UNION ALL
SELECT 2 AS id, 'BobBob' AS name, 31 AS age, TIMESTAMP('2022-01-03') AS updated_at UNION ALL
SELECT 2 AS id, 'BobBob' AS name, 31 AS age, TIMESTAMP('2022-01-04') AS updated_at UNION ALL
SELECT 3 AS id, 'Charlie' AS name, 35 as age, TIMESTAMP('2022-01-03') AS updated_at
)
SELECT
id,
row_diff(
to_json(my_table),
to_json(lag(my_table) OVER (partition by id order by updated_at))
) as diffs,
my_table,
FROM
my_table
qualify array_length(diffs) <> 0
order by id, updated_at
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment