Bilbottom · December 4, 2024 09:29
diff --git a/csv-1.csv b/csv-1.csv
diff --git a/csv-2.csv b/csv-2.csv
diff --git a/csv-diff.sql b/csv-diff.sql
 with

 files as (
    /* Union to align their data types */
        select 'file_1' as file, *
        from 'csv-1.csv'
    union all by name
        select 'file_2' as file, *
        from 'csv-2.csv'
 ),

 file_1 as (
    select
        * exclude (file),
        sha256(concat_ws('|', *columns(* exclude (file)))) as hash
    from files
    where file = 'file_1'
 ),

 file_2 as (
    select
        * exclude (file),
        sha256(concat_ws('|', *columns(* exclude (file)))) as hash
    from files
    where file = 'file_2'
 ),

 both_files as (
    select
        case
            when file_1.id is null          then 'insert'
            when file_2.id is null          then 'delete'
            when file_1.hash != file_2.hash then 'update'
                                            else 'same'
        end as diff_type,
        id,
        columns(file_1.*) as "\0__f1",
        columns(file_2.*) as "\0__f2",
    from file_1
        full join file_2
            using (id)  /* whatever column(s) uniquely identify records */
 )

 select * exclude (hash__f1, hash__f2)
 from both_files
 order by id
	with

	files as (
	/* Union to align their data types */
	select 'file_1' as file, *
	from 'csv-1.csv'
	union all by name
	select 'file_2' as file, *
	from 'csv-2.csv'
	),

	file_1 as (
	select
	* exclude (file),
	sha256(concat_ws('\|', columns( exclude (file)))) as hash
	from files
	where file = 'file_1'
	),

	file_2 as (
	select
	* exclude (file),
	sha256(concat_ws('\|', columns( exclude (file)))) as hash
	from files
	where file = 'file_2'
	),

	both_files as (
	select
	case
	when file_1.id is null then 'insert'
	when file_2.id is null then 'delete'
	when file_1.hash != file_2.hash then 'update'
	else 'same'
	end as diff_type,
	id,
	columns(file_1.*) as "\0__f1",
	columns(file_2.*) as "\0__f2",
	from file_1
	full join file_2
	using (id) /* whatever column(s) uniquely identify records */
	)

	select * exclude (hash__f1, hash__f2)
	from both_files
	order by id