Created
April 5, 2010 19:14
-
-
Save mjmsmith/356737 to your computer and use it in GitHub Desktop.
munge metafilter infodump for easier querying
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Download import.rb, user.sql, favorite.sql, and site.sql to a directory. | |
Download http://stuff.metafilter.com/infodump/infodump-all.zip and unzip to the same directory. | |
Create a MySQL database. | |
Edit <USERNAME>, <PASSWORD>, and <DATABASE> in import.rb. | |
Run import.rb. The MySQL bin directory needs to be on your path. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DROP TABLE IF EXISTS favorite; | |
CREATE TABLE favorite ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
created datetime NOT NULL, | |
favee_id int(11) NOT NULL, | |
faver_id int(11) NOT NULL, | |
post_id int(11) NOT NULL, | |
comment_id int(11) NOT NULL, | |
type int(11) NOT NULL, | |
PRIMARY KEY (id) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
LOAD DATA LOCAL INFILE 'favoritesdata.txt' REPLACE INTO TABLE favorite | |
LINES TERMINATED BY '\r\n' IGNORE 2 LINES ( | |
id,faver_id,favee_id,type,comment_id,post_id,created | |
); | |
DROP TABLE IF EXISTS mefi_favorite; | |
CREATE TABLE mefi_favorite ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
created datetime NOT NULL, | |
favee_id int(11) NOT NULL, | |
faver_id int(11) NOT NULL, | |
post_id int(11) NOT NULL, | |
comment_id int(11) NOT NULL, | |
PRIMARY KEY (id), | |
KEY favee_id (favee_id), | |
KEY faver_id (faver_id), | |
KEY post_id (post_id), | |
KEY comment_id (comment_id) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1 | |
AS SELECT created,favee_id,faver_id,post_id,comment_id FROM favorite WHERE type IN (1,2); | |
UPDATE mefi_favorite SET post_id=comment_id, comment_id=0 WHERE post_id=0; | |
DROP TABLE IF EXISTS askme_favorite; | |
CREATE TABLE askme_favorite ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
created datetime NOT NULL, | |
favee_id int(11) NOT NULL, | |
faver_id int(11) NOT NULL, | |
post_id int(11) NOT NULL, | |
comment_id int(11) NOT NULL, | |
PRIMARY KEY (id), | |
KEY favee_id (favee_id), | |
KEY faver_id (faver_id), | |
KEY post_id (post_id), | |
KEY comment_id (comment_id) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1 | |
AS SELECT created,favee_id,faver_id,post_id,comment_id FROM favorite WHERE type IN (3,4); | |
UPDATE askme_favorite SET post_id=comment_id, comment_id=0 WHERE post_id=0; | |
DROP TABLE IF EXISTS meta_favorite; | |
CREATE TABLE meta_favorite ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
created datetime NOT NULL, | |
favee_id int(11) NOT NULL, | |
faver_id int(11) NOT NULL, | |
post_id int(11) NOT NULL, | |
comment_id int(11) NOT NULL, | |
PRIMARY KEY (id), | |
KEY favee_id (favee_id), | |
KEY faver_id (faver_id), | |
KEY post_id (post_id), | |
KEY comment_id (comment_id) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1 | |
AS SELECT created,favee_id,faver_id,post_id,comment_id FROM favorite WHERE type IN (5,6); | |
UPDATE meta_favorite SET post_id=comment_id, comment_id=0 WHERE post_id=0; | |
DROP TABLE IF EXISTS music_favorite; | |
CREATE TABLE music_favorite ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
created datetime NOT NULL, | |
favee_id int(11) NOT NULL, | |
faver_id int(11) NOT NULL, | |
post_id int(11) NOT NULL, | |
comment_id int(11) NOT NULL, | |
PRIMARY KEY (id), | |
KEY favee_id (favee_id), | |
KEY faver_id (faver_id), | |
KEY post_id (post_id), | |
KEY comment_id (comment_id) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1 | |
AS SELECT created,favee_id,faver_id,post_id,comment_id FROM favorite WHERE type IN (8,9); | |
UPDATE music_favorite SET post_id=comment_id, comment_id=0 WHERE post_id=0; | |
DROP TABLE favorite; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env ruby | |
def exec_sql filename | |
`mysql -u<USERNAME> -p<PASSWORD> <DATABASE> < #{filename}` | |
end | |
puts 'importing user' | |
exec_sql 'user.sql' | |
puts 'importing favorite' | |
exec_sql 'favorite.sql' | |
sites = ['mefi', 'askme', 'meta', 'music'] | |
sites.each do |site| | |
puts "importing #{site}" | |
sql = File.new('site.sql').read.gsub('${site}', site) | |
File.open('tmp.sql', 'w') {|f| f.write sql} | |
exec_sql 'tmp.sql' | |
end | |
File.delete('tmp.sql') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DROP TABLE IF EXISTS ${site}_post; | |
CREATE TABLE ${site}_post ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
post_id int(11) NOT NULL, | |
user_id int(11) NOT NULL, | |
created datetime NOT NULL, | |
category int(11) NOT NULL, | |
comments_count int(11) NOT NULL, | |
favorites_count int(11) NOT NULL, | |
deleted int(11) NOT NULL, | |
reason varchar(255) NOT NULL, | |
title varchar(255) NOT NULL, | |
PRIMARY KEY (id), | |
KEY user_id (user_id), | |
KEY post_id (post_id) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
LOAD DATA LOCAL INFILE 'postdata_${site}.txt' REPLACE INTO TABLE ${site}_post | |
LINES TERMINATED BY '\r\n' IGNORE 2 LINES ( | |
post_id,user_id,created,category,comments_count,favorites_count,deleted,reason | |
); | |
DROP TABLE IF EXISTS title; | |
CREATE TABLE title ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
post_id int(11) NOT NULL, | |
title varchar(255) NOT NULL, | |
PRIMARY KEY (id) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
LOAD DATA LOCAL INFILE 'posttitles_${site}.txt' REPLACE INTO TABLE title | |
LINES TERMINATED BY '\r\n' IGNORE 2 LINES ( | |
post_id,title | |
); | |
UPDATE ${site}_post p, title t SET p.title = t.title WHERE p.post_id = t.post_id; | |
UPDATE ${site}_post SET title = CONCAT('(',post_id,')') WHERE title = ''; | |
UPDATE ${site}_post SET reason = '' WHERE reason = '[NULL]'; | |
DROP TABLE title; | |
DROP TABLE IF EXISTS ${site}_comment; | |
CREATE TABLE ${site}_comment ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
best int(11) NOT NULL, | |
comment_id int(11) NOT NULL, | |
created datetime NOT NULL, | |
post_id int(11) NOT NULL, | |
user_id int(11) NOT NULL, | |
favorites_count int(11) NOT NULL, | |
length int(11) NOT NULL, | |
PRIMARY KEY (id), | |
KEY comment_id (comment_id), | |
KEY post_id (post_id), | |
KEY user_id (user_id) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
LOAD DATA LOCAL INFILE 'commentdata_${site}.txt' REPLACE INTO TABLE ${site}_comment | |
LINES TERMINATED BY '\r\n' IGNORE 2 LINES ( | |
comment_id,post_id,user_id,created,favorites_count,best | |
); | |
DROP TABLE IF EXISTS length; | |
CREATE TABLE length ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
comment_id int(11) NOT NULL, | |
length int(11) NOT NULL, | |
PRIMARY KEY (id) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
LOAD DATA LOCAL INFILE 'commentlength_${site}.txt' REPLACE INTO TABLE length | |
LINES TERMINATED BY '\r\n' IGNORE 2 LINES ( | |
comment_id,length | |
); | |
UPDATE ${site}_comment c, length l SET c.length = l.length WHERE c.comment_id = l.comment_id; | |
DROP TABLE length; | |
DROP TABLE IF EXISTS ${site}_tag; | |
CREATE TABLE ${site}_tag ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
post_id int(11) NOT NULL, | |
created datetime NOT NULL, | |
name varchar(255) NOT NULL, | |
PRIMARY KEY (id), | |
KEY post_id (post_id), | |
KEY name (name) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
LOAD DATA LOCAL INFILE 'tagdata_${site}.txt' REPLACE INTO TABLE ${site}_tag | |
LINES TERMINATED BY '\r\n' IGNORE 2 LINES ( | |
id,post_id,created,name | |
); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DROP TABLE IF EXISTS user; | |
CREATE TABLE user ( | |
id int(11) NOT NULL AUTO_INCREMENT, | |
joined datetime NOT NULL, | |
name varchar(255) NOT NULL, | |
PRIMARY KEY (id), | |
KEY name (name) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
LOAD DATA LOCAL INFILE 'usernames.txt' REPLACE INTO TABLE user | |
LINES TERMINATED BY '\r\n' IGNORE 2 LINES ( | |
id,joined,name | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment