Skip to content

Instantly share code, notes, and snippets.

@mark-cooper
Created January 15, 2012 06:23
Show Gist options
  • Save mark-cooper/1614700 to your computer and use it in GitHub Desktop.
Save mark-cooper/1614700 to your computer and use it in GitHub Desktop.
Open Library authors dump: olid/names to file
package net.libcode.www.openlibrary;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class OpenLibraryOLIDName {
public static final JSONParser parser = new JSONParser();
public static final String authors = "/home/mcooper/OL-Data/ol_dump_authors_2011-12-31.txt";
public static final String authors_olids = "/home/mcooper/OL-Data/olid_authors.txt";
public static void main(String[] args) throws ParseException, IOException {
long startTime = System.currentTimeMillis();
processData(authors, authors_olids, "\t", false);
long endTime = System.currentTimeMillis();
System.out.println("Total execution time: " + ((endTime - startTime) * 0.001) + "s");
}
public static void processData(String file, String output_file, String delimiter, boolean print) throws ParseException, IOException {
BufferedReader reader = new BufferedReader(new FileReader(new File(file)));
BufferedWriter writer = new BufferedWriter(new FileWriter(new File(output_file)));
String line;
int count = 0;
while((line = reader.readLine()) != null) {
count += 1;
// Get the OLID
String[] parts = line.split("\t");
String id = parts[1];
// Get the JSON
String json = parts[4];
JSONObject j = (JSONObject) parser.parse(json);
// Get the name
String name = "";
if(j != null) {
String result = (String) j.get("name");
if(result != null) name = result;
}
writer.write(id + delimiter + name);
writer.newLine();
if(print) System.out.println(count + ": " + line);
}
writer.close();
System.out.println("ID/NAMES #" + count);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment