Skip to content

Instantly share code, notes, and snippets.

@vinhjaxt
Created June 22, 2022 11:20
Show Gist options
  • Save vinhjaxt/85b8744b44c3967cb28d59cfa5ad7d9d to your computer and use it in GitHub Desktop.
Save vinhjaxt/85b8744b44c3967cb28d59cfa5ad7d9d to your computer and use it in GitHub Desktop.
CSV to SQLite
// npm i -g csv-parse better-sqlite3 psl
const {parse: csvParse} = require('csv-parse')
const fs = require('fs')
const psl = require('psl')
const sqlite3 = require('better-sqlite3')
const db = new sqlite3('orgurl.db', {
readonly: false,
fileMustExist: false,
})
const file = fs.createReadStream(process.argv[2])
const csvParser = csvParse()
csvParser.on('readable', () => {
let row
while ((row = csvParser.read())) {
try {
if (row[1].trim().length == 0) continue
const info = db.prepare('INSERT INTO orgs(name, info, contact) VALUES (?, ?, ?)').run(row[1].trim(), row[3].trim().replace(/\n\s+/g, '\n'), row.slice(4).join('\n\n').trim().replace(/\n +/g, '\n'))
const orgId = info.lastInsertRowid
for (let url of row[2].split(/[\n;\s\/]/)) {
url = url.replace(/^https?:|[\/\.;]$/g, '')
if (url.length == 0 || /@/.test(url)) continue
if (!psl.isValid(url)) {
console.log(url, row)
process.exit(1)
}
console.log(url)
db.prepare('INSERT INTO urls(url, org_id) VALUES (?, ?)').run(url, orgId)
}
}catch(e){
console.error(e, row)
}
}
})
csvParser.on('end', () => {
console.log('Done')
})
let noop = () => {}
!(async () => {
try {
db.exec(`CREATE TABLE IF NOT EXISTS orgs(id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE, info TEXT, contact TEXT);`)
db.exec(`CREATE TABLE IF NOT EXISTS urls(url TEXT UNIQUE, org_id INTEGER, disabled BOOL);`)
file.pipe(csvParser)
}catch(e){
console.error(e)
}
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment