Last active
February 19, 2025 10:14
-
-
Save gromgit/6965be955d504d53023439efaff196fe to your computer and use it in GitHub Desktop.
Counterexample to https://www.reddit.com/r/sqlite/comments/1irbr0b/column_count_mismatch_during_csv_import_increases/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
cmd() { | |
echo ">>> $(printf '%q ' "$@")" >&2 | |
time "$@" | |
} | |
sqlite=/opt/homebrew/opt/sqlite/bin/sqlite3 | |
csvs=(perfect_0.csv perfect_1.csv imperfect_2.csv) | |
# Generate CSVs | |
for i in $(seq -f %07.0f 9999999); do | |
echo "${i},This,is,line,${i},of,a,nonsense,CSV,that,exists,just,to,debunk,the,assumption,that,missing,columns,cause,performance,collapse,in,SQLite,${RANDOM}" | |
done > perfect_0.csv | |
# 2nd CSV with new PKs | |
sed -e 's/^/1/' < perfect_0.csv > perfect_1.csv | |
# 3rd CSV with new PKs and missing last column | |
sed -e 's/^/2/; s/,[^,]*$//' < perfect_0.csv > imperfect_2.csv | |
for i in "${csvs[@]}"; do | |
echo "===== ${i} ($(wc -l < "${i}") rows) =====" | |
head -5 "$i" | |
done | |
rm -f test.db | |
$sqlite test.db <<EOSQL | |
CREATE TABLE t(a INTEGER PRIMARY KEY, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y INTEGER); | |
CREATE INDEX xtra_1 ON t(a,b,c,f,k,l,q,m,g,u); | |
CREATE INDEX xtra_2 ON t(c,f,k,l); | |
EOSQL | |
for i in "${csvs[@]}"; do | |
cmd bash -c "$sqlite test.db '.import --csv ${i} t' 2>/dev/null" | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Conclusion: performance collapse reported by OP is NOT due to missing column.