Last active
July 13, 2016 20:01
-
-
Save ers35/3b615a75fa0ed5e6d5cc to your computer and use it in GitHub Desktop.
Convert http://redd.it/3bxlg7 to an SQLite database
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- The author disclaims copyright to this source code. | |
-- Convert http://redd.it/3bxlg7 to an SQLite database | |
-- Convert the whole dataset at once like so: | |
-- find reddit_data -type f -exec time lua json2sqlite.lua {} \; | |
local cjson = require"cjson" | |
local sqlite = require"lsqlite3" | |
local db = sqlite.open("reddit_data.db") | |
db:exec[[ | |
PRAGMA JOURNAL_MODE = OFF; | |
PRAGMA SYNCHRONOUS = OFF; | |
PRAGMA temp_store = MEMORY; | |
PRAGMA mmap_size = 268435456; | |
CREATE TABLE IF NOT EXISTS comment | |
( | |
id TEXT UNIQUE ON CONFLICT REPLACE, | |
body TEXT, | |
edited TEXT, | |
subreddit_id TEXT, | |
subreddit TEXT, | |
author_flair_text TEXT, | |
author TEXT, | |
ups TEXT, | |
retrieved_on INTEGER, | |
created_utc INTEGER, | |
link_id TEXT, | |
downs TEXT, | |
distinguished TEXT, | |
author_flair_css_class TEXT, | |
parent_id TEXT, | |
gilded INTEGER, | |
score INTEGER, | |
controversiality INTEGER, | |
archived TEXT, | |
name TEXT | |
); | |
]] | |
print(db:errmsg()) | |
local stmt = db:prepare[[ | |
INSERT INTO comment | |
( | |
id, | |
body, | |
edited, | |
subreddit_id, | |
subreddit, | |
author_flair_text, | |
author, | |
ups, | |
retrieved_on, | |
created_utc, | |
link_id, | |
downs, | |
distinguished, | |
author_flair_css_class, | |
parent_id, | |
gilded, | |
score, | |
controversiality, | |
archived, | |
name | |
) | |
VALUES | |
( | |
:id, | |
:body, | |
:edited, | |
:subreddit_id, | |
:subreddit, | |
:author_flair_text, | |
:author, | |
:ups, | |
:retrieved_on, | |
:created_utc, | |
:link_id, | |
:downs, | |
:distinguished, | |
:author_flair_css_class, | |
:parent_id, | |
:gilded, | |
:score, | |
:controversiality, | |
:archived, | |
:name | |
); | |
]] | |
print(db:errmsg()) | |
print(arg[1]) | |
db:exec"BEGIN TRANSACTION;" | |
local fd = io.open(arg[1], "r") | |
for json in fd:lines() do | |
local tbl = cjson.decode(json) | |
for k, v in pairs(tbl) do | |
if v == cjson.null then | |
tbl[k] = nil | |
end | |
end | |
stmt:bind_names(tbl) | |
stmt:step() | |
stmt:reset() | |
end | |
db:exec"COMMIT;" | |
print(db:errmsg()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment