Skip to content

Instantly share code, notes, and snippets.

@koturn
Last active October 4, 2025 11:08
Show Gist options
  • Save koturn/a47e4376f5fa3061027a966b7867c9f5 to your computer and use it in GitHub Desktop.
Save koturn/a47e4376f5fa3061027a966b7867c9f5 to your computer and use it in GitHub Desktop.
SQLite3でNULLを考慮してCSVインポートを行うためのラッパースクリプト
#!/bin/bash
set -eu
import_schema='imp'
##
## Show help text of this script.
##
## stdout: Help text of this program.
## stderr: Nothing
##
show_help() {
cat << __EOF__
[USAGE]
$0 {OPTIONS...} DBFILE CSVFILE {TABLENAME}
[OPTIONS]
-a, --analyze
Do ANALYZE after import.
-c SQLITE3_CMD, --command=SQLITE3_CMD
Specify SQLite3 command.
DEFAULT VALUE: sqlite3
-d, --delete
DELETE all records from target table before importing.
-h, --help
Show help and exit program.
-r, --reindex
Do REINDEX after import.
-s N, --skip=N
Specify the number of header lines to skip.
-t DBFILE, --temporary-database=DBFILE
Specify temporary database name.
DEFAULT VALUE: :memory:
-u, --upsert
Do INSERT/UPDATE instead of INSERT.
-U, --update
Do UPDATE instead of INSERT.
-v, --verbose
Show verbose messages.
-V, --vacuum
Do VACUUM after import.
__EOF__
}
unset GETOPT_COMPATIBLE
opt=$(getopt -o 'ac:dhrs:t:uUvV' -l 'analyze,command;,delete,help,reindex,skip,temporary-database:,update,upsert,verbose,vacuum' -- "$@")
eval set -- "${opt}"
while [ $# -gt 0 ]; do
case "$1" in
'-a' | '--analyze')
do_analyze=1
;;
'-c' | '--command')
sqlite3="$2"
;;
'-d' | '--delete')
mode='delete'
;;
'-h' | '--help')
show_help
exit 0
;;
'-r' | '--reindex')
do_reindex=1
;;
'-s' | '--skip')
if [ "$2" -eq 0 ]; then
unset opt_skip
else
opt_skip="--skip $2"
fi
;;
'-t' | '--temporary-database')
tempdb="$2"
;;
'-u' | '--upsert')
mode='upsert'
;;
'-U' | '--update')
mode='update'
;;
'-v' | '--verbose')
opt_verbose='-v'
;;
'-V' | '--vacuum')
do_vacuum=1
;;
--)
shift
break
;;
esac
shift
done
if [ $# -ne 2 -a $# -ne 3 ]; then
echo >&2 'Invalid arguments'
show_help >&2
exit 1
fi
dbfile="$1"
csv="$2"
if [ -z "${3:-}" ]; then
tblname=${2##*/}
tblname=${tblname%.*}
else
tblname="$3"
fi
[ -z "${sqlite3:-}" ] && sqlite3='sqlite3' || :
[ -z "${tempdb:-}" ] && tempdb=':memory:' || :
if "${sqlite3}" "${dbfile}" "SELECT 1 FROM "${tblname}" LIMIT 1" > /dev/null 2>&1; then
# Aggregate columns.
cols=$("${sqlite3}" "${dbfile}" << __EOF__
SELECT
STRING_AGG('"' || name || '"', ', ' ORDER BY cid) AS concat_cols
FROM
pragma_table_info('${tblname}')
;
__EOF__
)
# Aggregate columns for CREATE TABLE statement.
cols_for_create=$("${sqlite3}" "${dbfile}" << __EOF__
SELECT
STRING_AGG('"' || name || '" TEXT', ', ' ORDER BY cid) AS concat_cols
FROM
pragma_table_info('${tblname}')
;
__EOF__
)
# Aggregate columns for SELECT part in INSERT statement.
cols_for_select=$("${sqlite3}" "${dbfile}" << __EOF__
SELECT
STRING_AGG(
CASE
WHEN "notnull" = 1 OR pk = 1 THEN name_with_cast
ELSE 'IIF("' || name || '" = '''', NULL, ' || name_with_cast || ')'
END || ' AS "' || name || '"',
', '
ORDER BY cid) AS concat_cols_for_select
FROM
(
SELECT
cid,
name,
CASE
WHEN type NOT LIKE '%INT%' AND (type LIKE '%TEXT%' OR type LIKE '%CHAR%' OR type LIKE '%CLOB%') THEN '"' || name || '"'
ELSE 'CAST("' || name || '" AS ' || type || ')'
END AS name_with_cast,
"notnull",
pk
FROM
pragma_table_info('${tblname}')
)
;
__EOF__
)
if [ "${mode:-}" = 'upsert' ]; then
# Aggregate primary key columns for ON CONFLICT clause in INSERT statement.
cols_pk=$("${sqlite3}" "${dbfile}" << __EOF__
SELECT
STRING_AGG('"' || name || '"', ', ' ORDER BY cid) AS concat_cols
FROM
pragma_table_info('${tblname}')
WHERE
pk = 1
;
__EOF__
)
if [ -n "${cols_pk}" ]; then
# Aggregate non-primary key columns for UPDATE part in INSERT statement (UPSERT).
cols_update=$("${sqlite3}" "${dbfile}" << __EOF__
SELECT
STRING_AGG('"' || name || '" = EXCLUDED."' || name || '"', ', ' ORDER BY cid) AS concat_cols
FROM
pragma_table_info('${tblname}')
WHERE
pk = 0
;
__EOF__
)
if [ -n "${cols_update}" ]; then
# Aggregate primary key columns for WHERE clause in UPDATE part in INSERT statement (UPSERT).
cond_where=$("${sqlite3}" "${dbfile}" << __EOF__
SELECT
STRING_AGG('"' || name || '" IS DISTINCT FROM EXCLUDED."' || name || '"', ' OR ' ORDER BY cid) AS concat_cols
FROM
pragma_table_info('${tblname}')
WHERE
pk = 0
;
__EOF__
)
upsert_part=" WHERE TRUE ON CONFLICT (${cols_pk}) DO UPDATE SET ${cols_update} WHERE ${cond_where}"
fi
fi
elif [ "${mode:-}" = 'delete' ]; then
stmt_delete="DELETE FROM \"${tblname}\";"
fi
if [ "${mode:-}" = 'update' ]; then
if [ "${tblname}" = 't' -o "${tblname}" = 'T' ]; then
alias_imptbl='t2'
else
alias_imptbl='t'
fi
# Aggregate non-primary key columns for SET clause in UPDATE statement.
cols_set=$("${sqlite3}" "${dbfile}" << __EOF__
SELECT
STRING_AGG('"' || name || '" = ${alias_imptbl}."' || name || '"', ', ' ORDER BY cid) AS concat_cols
FROM
pragma_table_info('${tblname}')
WHERE
pk = 0
;
__EOF__
)
if [ -z "${cols_set}" ]; then
echo >&2 'Cannot import CSV on UPDATE mode for the table without non primary key rows'
exit 2
fi
# Aggregate primary key columns for WHERE clause in UPDATE statement to check primary key equivalents.
cond_where=$("${sqlite3}" "${dbfile}" << __EOF__
SELECT
STRING_AGG('"${tblname}"."' || name || '" = ' || '${alias_imptbl}."' || name || '"', ' AND ' ORDER BY cid) AS concat_cols
FROM
pragma_table_info('${tblname}')
WHERE
pk = 1
;
__EOF__
)
if [ -z "${cond_where}" ]; then
echo >&2 'Cannot import CSV on UPDATE mode for the table without primary key'
exit 3
fi
# Aggregate non-primary key columns for WHERE clause in UPDATE statement to check non-primary key differences.
cond_where_diff=$("${sqlite3}" "${dbfile}" << __EOF__
SELECT
STRING_AGG('"${tblname}"."' || name || '" IS DISTINCT FROM ${alias_imptbl}."' || name || '"', ' OR ' ORDER BY cid) AS concat_cols
FROM
pragma_table_info('${tblname}')
WHERE
pk = 0
;
__EOF__
)
stmt_insert_or_update="UPDATE \"${tblname}\" SET ${cols_set} FROM (SELECT ${cols_for_select} FROM ${import_schema}.\"${tblname}\") ${alias_imptbl} WHERE ${cond_where} AND (${cond_where_diff});"
else
stmt_insert_or_update="INSERT INTO \"${tblname}\" (${cols}) SELECT ${cols_for_select} FROM ${import_schema}.\"${tblname}\"${upsert_part:-};"
fi
[ -f "${tempdb:-}" ] && rm -f "${tempdb:-}" || :
"${sqlite3}" "${dbfile}" << __EOF__
${opt_verbose:+.echo on}
.bail on
ATTACH DATABASE '${tempdb:-:memory:}' AS ${import_schema};
BEGIN;
CREATE TABLE ${import_schema}."${tblname}" (${cols_for_create});
.import --csv --schema ${import_schema} ${opt_skip:-} ${opt_verbose:-} '${csv}' "${tblname}"
${opt_verbose:+.changes on}
${stmt_delete:-}
${stmt_insert_or_update}
COMMIT; ${do_reindex:+"REINDEX \"${tblname}\";"} ${do_vacuum:+VACUUM;} ${do_analyze:+"ANALYZE \"${tblname}\""}
__EOF__
else
if [ "${mode:-}" = 'update' ]; then
echo >&2 'Cannot import CSV on UPDATE mode for non-existing table'
exit 4
fi
if [ "${mode:-}" = 'delete' ]; then
echo >&2 'Meaningless DELETE mode for non-existing table'
fi
"${sqlite3}" "${dbfile}" << __EOF__
.bail on
${opt_verbose:+.echo on}
${opt_verbose:+.changes on}
.import --csv ${opt_skip:-} ${opt_verbose:-} ${csv} "${tblname}"
${do_reindex:+"REINDEX \"${tblname}\";"} ${do_vacuum:+VACUUM;} ${do_analyze:+"ANALYZE \"${tblname}\""}
__EOF__
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment