Last active
October 4, 2025 11:08
-
-
Save koturn/a47e4376f5fa3061027a966b7867c9f5 to your computer and use it in GitHub Desktop.
SQLite3でNULLを考慮してCSVインポートを行うためのラッパースクリプト
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -eu | |
import_schema='imp' | |
## | |
## Show help text of this script. | |
## | |
## stdout: Help text of this program. | |
## stderr: Nothing | |
## | |
show_help() { | |
cat << __EOF__ | |
[USAGE] | |
$0 {OPTIONS...} DBFILE CSVFILE {TABLENAME} | |
[OPTIONS] | |
-a, --analyze | |
Do ANALYZE after import. | |
-c SQLITE3_CMD, --command=SQLITE3_CMD | |
Specify SQLite3 command. | |
DEFAULT VALUE: sqlite3 | |
-d, --delete | |
DELETE all records from target table before importing. | |
-h, --help | |
Show help and exit program. | |
-r, --reindex | |
Do REINDEX after import. | |
-s N, --skip=N | |
Specify the number of header lines to skip. | |
-t DBFILE, --temporary-database=DBFILE | |
Specify temporary database name. | |
DEFAULT VALUE: :memory: | |
-u, --upsert | |
Do INSERT/UPDATE instead of INSERT. | |
-U, --update | |
Do UPDATE instead of INSERT. | |
-v, --verbose | |
Show verbose messages. | |
-V, --vacuum | |
Do VACUUM after import. | |
__EOF__ | |
} | |
unset GETOPT_COMPATIBLE | |
opt=$(getopt -o 'ac:dhrs:t:uUvV' -l 'analyze,command;,delete,help,reindex,skip,temporary-database:,update,upsert,verbose,vacuum' -- "$@") | |
eval set -- "${opt}" | |
while [ $# -gt 0 ]; do | |
case "$1" in | |
'-a' | '--analyze') | |
do_analyze=1 | |
;; | |
'-c' | '--command') | |
sqlite3="$2" | |
;; | |
'-d' | '--delete') | |
mode='delete' | |
;; | |
'-h' | '--help') | |
show_help | |
exit 0 | |
;; | |
'-r' | '--reindex') | |
do_reindex=1 | |
;; | |
'-s' | '--skip') | |
if [ "$2" -eq 0 ]; then | |
unset opt_skip | |
else | |
opt_skip="--skip $2" | |
fi | |
;; | |
'-t' | '--temporary-database') | |
tempdb="$2" | |
;; | |
'-u' | '--upsert') | |
mode='upsert' | |
;; | |
'-U' | '--update') | |
mode='update' | |
;; | |
'-v' | '--verbose') | |
opt_verbose='-v' | |
;; | |
'-V' | '--vacuum') | |
do_vacuum=1 | |
;; | |
--) | |
shift | |
break | |
;; | |
esac | |
shift | |
done | |
if [ $# -ne 2 -a $# -ne 3 ]; then | |
echo >&2 'Invalid arguments' | |
show_help >&2 | |
exit 1 | |
fi | |
dbfile="$1" | |
csv="$2" | |
if [ -z "${3:-}" ]; then | |
tblname=${2##*/} | |
tblname=${tblname%.*} | |
else | |
tblname="$3" | |
fi | |
[ -z "${sqlite3:-}" ] && sqlite3='sqlite3' || : | |
[ -z "${tempdb:-}" ] && tempdb=':memory:' || : | |
if "${sqlite3}" "${dbfile}" "SELECT 1 FROM "${tblname}" LIMIT 1" > /dev/null 2>&1; then | |
# Aggregate columns. | |
cols=$("${sqlite3}" "${dbfile}" << __EOF__ | |
SELECT | |
STRING_AGG('"' || name || '"', ', ' ORDER BY cid) AS concat_cols | |
FROM | |
pragma_table_info('${tblname}') | |
; | |
__EOF__ | |
) | |
# Aggregate columns for CREATE TABLE statement. | |
cols_for_create=$("${sqlite3}" "${dbfile}" << __EOF__ | |
SELECT | |
STRING_AGG('"' || name || '" TEXT', ', ' ORDER BY cid) AS concat_cols | |
FROM | |
pragma_table_info('${tblname}') | |
; | |
__EOF__ | |
) | |
# Aggregate columns for SELECT part in INSERT statement. | |
cols_for_select=$("${sqlite3}" "${dbfile}" << __EOF__ | |
SELECT | |
STRING_AGG( | |
CASE | |
WHEN "notnull" = 1 OR pk = 1 THEN name_with_cast | |
ELSE 'IIF("' || name || '" = '''', NULL, ' || name_with_cast || ')' | |
END || ' AS "' || name || '"', | |
', ' | |
ORDER BY cid) AS concat_cols_for_select | |
FROM | |
( | |
SELECT | |
cid, | |
name, | |
CASE | |
WHEN type NOT LIKE '%INT%' AND (type LIKE '%TEXT%' OR type LIKE '%CHAR%' OR type LIKE '%CLOB%') THEN '"' || name || '"' | |
ELSE 'CAST("' || name || '" AS ' || type || ')' | |
END AS name_with_cast, | |
"notnull", | |
pk | |
FROM | |
pragma_table_info('${tblname}') | |
) | |
; | |
__EOF__ | |
) | |
if [ "${mode:-}" = 'upsert' ]; then | |
# Aggregate primary key columns for ON CONFLICT clause in INSERT statement. | |
cols_pk=$("${sqlite3}" "${dbfile}" << __EOF__ | |
SELECT | |
STRING_AGG('"' || name || '"', ', ' ORDER BY cid) AS concat_cols | |
FROM | |
pragma_table_info('${tblname}') | |
WHERE | |
pk = 1 | |
; | |
__EOF__ | |
) | |
if [ -n "${cols_pk}" ]; then | |
# Aggregate non-primary key columns for UPDATE part in INSERT statement (UPSERT). | |
cols_update=$("${sqlite3}" "${dbfile}" << __EOF__ | |
SELECT | |
STRING_AGG('"' || name || '" = EXCLUDED."' || name || '"', ', ' ORDER BY cid) AS concat_cols | |
FROM | |
pragma_table_info('${tblname}') | |
WHERE | |
pk = 0 | |
; | |
__EOF__ | |
) | |
if [ -n "${cols_update}" ]; then | |
# Aggregate primary key columns for WHERE clause in UPDATE part in INSERT statement (UPSERT). | |
cond_where=$("${sqlite3}" "${dbfile}" << __EOF__ | |
SELECT | |
STRING_AGG('"' || name || '" IS DISTINCT FROM EXCLUDED."' || name || '"', ' OR ' ORDER BY cid) AS concat_cols | |
FROM | |
pragma_table_info('${tblname}') | |
WHERE | |
pk = 0 | |
; | |
__EOF__ | |
) | |
upsert_part=" WHERE TRUE ON CONFLICT (${cols_pk}) DO UPDATE SET ${cols_update} WHERE ${cond_where}" | |
fi | |
fi | |
elif [ "${mode:-}" = 'delete' ]; then | |
stmt_delete="DELETE FROM \"${tblname}\";" | |
fi | |
if [ "${mode:-}" = 'update' ]; then | |
if [ "${tblname}" = 't' -o "${tblname}" = 'T' ]; then | |
alias_imptbl='t2' | |
else | |
alias_imptbl='t' | |
fi | |
# Aggregate non-primary key columns for SET clause in UPDATE statement. | |
cols_set=$("${sqlite3}" "${dbfile}" << __EOF__ | |
SELECT | |
STRING_AGG('"' || name || '" = ${alias_imptbl}."' || name || '"', ', ' ORDER BY cid) AS concat_cols | |
FROM | |
pragma_table_info('${tblname}') | |
WHERE | |
pk = 0 | |
; | |
__EOF__ | |
) | |
if [ -z "${cols_set}" ]; then | |
echo >&2 'Cannot import CSV on UPDATE mode for the table without non primary key rows' | |
exit 2 | |
fi | |
# Aggregate primary key columns for WHERE clause in UPDATE statement to check primary key equivalents. | |
cond_where=$("${sqlite3}" "${dbfile}" << __EOF__ | |
SELECT | |
STRING_AGG('"${tblname}"."' || name || '" = ' || '${alias_imptbl}."' || name || '"', ' AND ' ORDER BY cid) AS concat_cols | |
FROM | |
pragma_table_info('${tblname}') | |
WHERE | |
pk = 1 | |
; | |
__EOF__ | |
) | |
if [ -z "${cond_where}" ]; then | |
echo >&2 'Cannot import CSV on UPDATE mode for the table without primary key' | |
exit 3 | |
fi | |
# Aggregate non-primary key columns for WHERE clause in UPDATE statement to check non-primary key differences. | |
cond_where_diff=$("${sqlite3}" "${dbfile}" << __EOF__ | |
SELECT | |
STRING_AGG('"${tblname}"."' || name || '" IS DISTINCT FROM ${alias_imptbl}."' || name || '"', ' OR ' ORDER BY cid) AS concat_cols | |
FROM | |
pragma_table_info('${tblname}') | |
WHERE | |
pk = 0 | |
; | |
__EOF__ | |
) | |
stmt_insert_or_update="UPDATE \"${tblname}\" SET ${cols_set} FROM (SELECT ${cols_for_select} FROM ${import_schema}.\"${tblname}\") ${alias_imptbl} WHERE ${cond_where} AND (${cond_where_diff});" | |
else | |
stmt_insert_or_update="INSERT INTO \"${tblname}\" (${cols}) SELECT ${cols_for_select} FROM ${import_schema}.\"${tblname}\"${upsert_part:-};" | |
fi | |
[ -f "${tempdb:-}" ] && rm -f "${tempdb:-}" || : | |
"${sqlite3}" "${dbfile}" << __EOF__ | |
${opt_verbose:+.echo on} | |
.bail on | |
ATTACH DATABASE '${tempdb:-:memory:}' AS ${import_schema}; | |
BEGIN; | |
CREATE TABLE ${import_schema}."${tblname}" (${cols_for_create}); | |
.import --csv --schema ${import_schema} ${opt_skip:-} ${opt_verbose:-} '${csv}' "${tblname}" | |
${opt_verbose:+.changes on} | |
${stmt_delete:-} | |
${stmt_insert_or_update} | |
COMMIT; ${do_reindex:+"REINDEX \"${tblname}\";"} ${do_vacuum:+VACUUM;} ${do_analyze:+"ANALYZE \"${tblname}\""} | |
__EOF__ | |
else | |
if [ "${mode:-}" = 'update' ]; then | |
echo >&2 'Cannot import CSV on UPDATE mode for non-existing table' | |
exit 4 | |
fi | |
if [ "${mode:-}" = 'delete' ]; then | |
echo >&2 'Meaningless DELETE mode for non-existing table' | |
fi | |
"${sqlite3}" "${dbfile}" << __EOF__ | |
.bail on | |
${opt_verbose:+.echo on} | |
${opt_verbose:+.changes on} | |
.import --csv ${opt_skip:-} ${opt_verbose:-} ${csv} "${tblname}" | |
${do_reindex:+"REINDEX \"${tblname}\";"} ${do_vacuum:+VACUUM;} ${do_analyze:+"ANALYZE \"${tblname}\""} | |
__EOF__ | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment