Created
January 15, 2012 06:23
-
-
Save mark-cooper/1614700 to your computer and use it in GitHub Desktop.
Open Library authors dump: olid/names to file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.libcode.www.openlibrary; | |
import java.io.BufferedReader; | |
import java.io.BufferedWriter; | |
import java.io.File; | |
import java.io.FileReader; | |
import java.io.FileWriter; | |
import java.io.IOException; | |
import org.json.simple.JSONObject; | |
import org.json.simple.parser.JSONParser; | |
import org.json.simple.parser.ParseException; | |
public class OpenLibraryOLIDName { | |
public static final JSONParser parser = new JSONParser(); | |
public static final String authors = "/home/mcooper/OL-Data/ol_dump_authors_2011-12-31.txt"; | |
public static final String authors_olids = "/home/mcooper/OL-Data/olid_authors.txt"; | |
public static void main(String[] args) throws ParseException, IOException { | |
long startTime = System.currentTimeMillis(); | |
processData(authors, authors_olids, "\t", false); | |
long endTime = System.currentTimeMillis(); | |
System.out.println("Total execution time: " + ((endTime - startTime) * 0.001) + "s"); | |
} | |
public static void processData(String file, String output_file, String delimiter, boolean print) throws ParseException, IOException { | |
BufferedReader reader = new BufferedReader(new FileReader(new File(file))); | |
BufferedWriter writer = new BufferedWriter(new FileWriter(new File(output_file))); | |
String line; | |
int count = 0; | |
while((line = reader.readLine()) != null) { | |
count += 1; | |
// Get the OLID | |
String[] parts = line.split("\t"); | |
String id = parts[1]; | |
// Get the JSON | |
String json = parts[4]; | |
JSONObject j = (JSONObject) parser.parse(json); | |
// Get the name | |
String name = ""; | |
if(j != null) { | |
String result = (String) j.get("name"); | |
if(result != null) name = result; | |
} | |
writer.write(id + delimiter + name); | |
writer.newLine(); | |
if(print) System.out.println(count + ": " + line); | |
} | |
writer.close(); | |
System.out.println("ID/NAMES #" + count); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment