mark-cooper · January 15, 2012 16:11
diff --git a/OpenLibraryData.java b/OpenLibraryData.java
 package net.libcode.www.openlibrary;

 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

 import org.json.simple.JSONArray;
 import org.json.simple.JSONObject;
 import org.json.simple.parser.JSONParser;
 import org.json.simple.parser.ParseException;

 import com.google.common.io.Files;

 /*
 * Script style, built incrementally, ready for reorganization
 * JSONObjects use a lot of Strings:
 * -Xmx1024m -XX:MaxPermSize=256m
 */
 public class OpenLibraryData {

 	public static final JSONParser parser = new JSONParser();
 	public static final int MAX_JSON = 50000; // Chunk these
 	
 	public static final File authors        = new File("/home/mcooper/OL-Data/ol_dump_authors_2011-12-31.txt");
 	public static final File authors_search = new File("/home/mcooper/OL-Data/authors_search.txt");
 	public static final File authors_output = new File("/home/mcooper/OL-Data/authors_output.txt");
 	public static final File authors_found  = new File("/home/mcooper/OL-Data/authors_found.csv");
 	
 	public static final File works        = new File("/home/mcooper/OL-Data/ol_dump_works_2011-12-31.txt");
 	public static final File works_search = new File("/home/mcooper/OL-Data/works_search.txt");
 	public static final File works_output = new File("/home/mcooper/OL-Data/works_output.txt");
 	public static final File works_found  = new File("/home/mcooper/OL-Data/works_found.csv");
 	
 	public static final File editions        = new File("/home/mcooper/OL-Data/ol_dump_editions_2011-12-31.txt");
 	public static final File editions_search = new File("/home/mcooper/OL-Data/editions_search.txt");
 	public static final File editions_output = new File("/home/mcooper/OL-Data/editions_output.txt");
 	public static final File editions_found  = new File("/home/mcooper/OL-Data/editions_found.csv");
 	
 	public static final String author_profile_url = "http://openlibrary.org/authors/";
 	public static final String author_cover_url = "http://covers.openlibrary.org/a/olid/";
 	public static final String author_csv_hdr = "ID\tNAME\tPROFILE\tIMAGE\tBIRTH\tDEATH\tLINKS";
 	
 	public static final String works_profile_url = "http://openlibrary.org/works/";
 	public static final String works_cover_url = "http://covers.openlibrary.org/w/id/";
 	public static final String works_csv_hdr = "ID\tTITLE\tPROFILE\tIMAGE\tAUTHORS\tPUBLISHED\tDESCRIPTION";
 	
 	public static final String editions_profile_url = "http://openlibrary.org/books/";
 	public static final String editions_cover_url = "http://covers.openlibrary.org/b/id/";
 	public static final String editions_csv_hdr = "ID\tTITLE\tWORKS\tAUTHORS\tOCLC\tLCCN\tISBN10\tISBN13\tPLACES\tPUBLISHERS\tDATE\tFORMAT\tPAGES\tPAGINATION\tDIMENSIONS\tSUBJECTS\tSUBJECT PLACES\tSERIES\tCOVERS\tLIBRARYTHING\tGOODREADS";
 	
 	public static final String OLID_DELIM = "\t";
 	public static final String OLID_IDENT = "O";
 	public static final String JSON_S = "{";
 	public static final String JSON_E = "}";
 	public static final String SMALL_JPG = "-S.jpg";
 	public static final String MED_JPG = "-M.jpg";
 	
 	public static final String CSV_FIELD_DELIM = "\t";
 	public static final String CSV_CELL_DELIM = "|";
 	public static final String NL = String.format("%n");
 	
 	public static void main(String[] args) throws IOException, ParseException {
 		clearFile(authors_found); // Start over searching the authors dump file ...
 		Set<String> searchTerms = readSearchTermsFile(authors_search, 10);
 		
 		long startTime = System.currentTimeMillis();
 		searchData(authors, authors_output, searchTerms, false, true);
 		searchTerms = null;
 		
 		Map<String, JSONObject> authorsData = parseData(authors_output, 0, true);
 		
 		List<String> authors_csv = new ArrayList<String>(authorsData.size() + 1);
 		authors_csv.add(author_csv_hdr);
 		
 		System.out.println();
 		System.out.println("AUTHORS FOUND #" + authorsData.size());
 		System.out.println();
 		
 		authors_csv.addAll(getAuthorsCsv(authorsData, "\t", "|", true));
 		
 		long endTime = System.currentTimeMillis();
 		System.out.println("Total execution time: " + (millisToSeconds(endTime - startTime)) + "s");
 		System.out.println();
 		
 		appendToFile(authors_found, authors_csv, Charset.defaultCharset());
 		authors_csv = null;
 		
 		clearFile(works_found); // Search the works dump file by author OLID
 		searchTerms = readSearchTermsFile(works_search, 10);
 		
 		startTime = System.currentTimeMillis();
 		searchData(works, works_output, searchTerms, false, false);
 		searchTerms = null;
 		
 		Map<String, JSONObject> worksData = parseData(works_output, 0, false);
 		
 		List<String> works_csv = new ArrayList<String>(worksData.size() + 1);
 		works_csv.add(works_csv_hdr);
 		
 		System.out.println();
 		System.out.println("WORKS FOUND #" + worksData.size());
 		System.out.println();
 		
 		works_csv.addAll(getWorksCsv(worksData, "\t", "|", false));
 		
 		endTime = System.currentTimeMillis();
 		System.out.println("Total execution time: " + (millisToSeconds(endTime - startTime)) + "s");
 		System.out.println();
 		
 		appendToFile(works_found, works_csv, Charset.defaultCharset());
 		works_csv = null;
 		
 		clearFile(editions_found); // Search the editions dump file ... this is huge.
 //		searchTerms = readSearchTermsFile(editions_search, 100000);
 //		searchDataByOLID(editions, editions_output, searchTerms, true);
 //		searchTerms = null;
 		
 		startTime = System.currentTimeMillis();
 		Map<String, JSONObject> editionsData = parseData(editions_output, 0, true);
 		
 		List<String> editions_csv = new ArrayList<String>(editionsData.size() + 1);
 		editions_csv.add(editions_csv_hdr);
 		
 		System.out.println();
 		System.out.println("EDITIONS FOUND #" + editionsData.size());
 		System.out.println();
 		
 		editions_csv.addAll(getEditionsCsv(editionsData, CSV_FIELD_DELIM, CSV_CELL_DELIM, false));
 		editionsData = null;
 		
 		endTime = System.currentTimeMillis();
 		System.out.println("Total execution time: " + (millisToSeconds(endTime - startTime)) + "s");
 		System.out.println();
 		
 		appendToFile(editions_found, editions_csv, Charset.defaultCharset());
 		editions_csv = null;
 		
 		//////////
 		
 		startTime = System.currentTimeMillis();
 		editionsData = parseData(editions_output, 50001, true);
 		
 		editions_csv = new ArrayList<String>(editionsData.size() + 1);
 		editions_csv.add(editions_csv_hdr);
 		
 		System.out.println();
 		System.out.println("EDITIONS FOUND #" + editionsData.size());
 		System.out.println();
 		
 		editions_csv.addAll(getEditionsCsv(editionsData, CSV_FIELD_DELIM, CSV_CELL_DELIM, false));
 		editionsData = null;
 		
 		endTime = System.currentTimeMillis();
 		System.out.println("Total execution time: " + (millisToSeconds(endTime - startTime)) + "s");
 		System.out.println();
 		
 		appendToFile(editions_found, editions_csv, Charset.defaultCharset());
 		editions_csv = null;
 	}
 	
 	public static void clearFile(File file) throws IOException {
 		Files.write(new byte[]{}, file);
 	}
 	
 	public static void appendToFile(File file, String data, Charset charset) throws IOException {
 		Files.append(data + NL, file, charset);
 	}
 	
 	public static void appendToFile(File file, List<String> data, Charset charset) throws IOException {
 		for(String x : data) {
 			Files.append(x + NL, file, charset);
 		}
 	}
 	
 	public static Set<String> readSearchTermsFile(File file, int capacity) throws IOException {
 		Set<String> lines = new HashSet<String>(capacity);
 		BufferedReader reader = Files.newReader(file, Charset.defaultCharset());
 		String line;
 		while((line = reader.readLine()) != null) {
 			lines.add(line);
 		}
 		reader.close();
 		return lines;
 	}
 	
 	public static int searchData(File file, File output_file, Set<String> search, boolean unique, boolean print) throws IOException, ParseException {
 		clearFile(output_file);
 		BufferedReader reader = Files.newReader(file, Charset.defaultCharset());
 		String line;
 		String term;
 		int count = 0;
 		int found = 0;
 		while((line = reader.readLine()) != null) {
 			count += 1;
 			for (Iterator<String> i = search.iterator(); i.hasNext();) {
 			    term = i.next();
 			    if(line.contains(term)) {
 			    	found += 1;
 			    	appendToFile(output_file, line, Charset.defaultCharset());
 					if(unique) i.remove();
 					if(print) System.out.println(count + ": " + line);
 					break;
 				}
 			}
 		}
 		reader.close();
 		return found;
 	}
 	
 	/*
 	 * Search by OLID. Fast (6 minutes for 100,000 OLID edition searches on 25gb editions dump)
 	 */
 	public static int searchDataByOLID(File file, File output_file, Set<String> search, boolean print) throws IOException {
 		clearFile(output_file);
 		BufferedReader reader = Files.newReader(file, Charset.defaultCharset());
 		String line;
 		String[] parts;
 		String id;
 		int count = 0;
 		int found = 0;
 		while((line = reader.readLine()) != null) {
 			count += 1;
 			parts = line.split(OLID_DELIM);
 			int idStart = parts[1].indexOf(OLID_IDENT);
 			id = parts[1].substring(idStart, parts[1].length());
 			if(search.contains(id)) {
 				found += 1;
 				appendToFile(output_file, line, Charset.defaultCharset());
 				if(print) System.out.println(count + ": " + line);
 			}
 		}
 		reader.close();
 		return found;
 	}
 	
 	/*
 	 * Return Map of OLID => Json data up to MAX_JSON elements
 	 */
 	public static Map<String, JSONObject> parseData(File file, int begin, boolean print) throws IOException, ParseException {
 		Map<String, JSONObject> data = new HashMap<String, JSONObject>();
 		BufferedReader reader = Files.newReader(file, Charset.defaultCharset());
 		String line;
 		String[] parts;
 		String id;
 		String json;
 		int lineNumber = 0;
 		int count = 0;
 		while((line = reader.readLine()) != null && count < MAX_JSON) {
 			lineNumber += 1;
 			if(lineNumber >= begin) {
 				count += 1;
 				// Get the OLID
 				parts = line.split(OLID_DELIM);
 				int idStart = parts[1].indexOf(OLID_IDENT);
 				id = parts[1].substring(idStart, parts[1].length());
 				
 				// Get the JSON
 				int jsonStart = line.indexOf(JSON_S);
 				int jsonStop = line.lastIndexOf(JSON_E);
 				json = line.substring(jsonStart, jsonStop + 1);
 				data.put(id, (JSONObject) parser.parse(json));
 				
 				if(print) System.out.println(count + ": " + line);
 			}
 		}
 		reader.close();
 		return data;
 	}
 	
 	public static List<String> getAuthorsCsv(Map<String, JSONObject> authors, String fieldDelimiter, String cellDelimiter, boolean print) {
 		List<String> data = new ArrayList<String>(authors.size());
 		for(Map.Entry<String, JSONObject> entry : authors.entrySet()) {
 			List<String> field = new ArrayList<String>();
 			field.add(entry.getKey());
 			
 			JSONObject json = entry.getValue();
 			field.add(getJsonString(json, "name"));
 			field.add(author_profile_url + field.get(0));
 			field.add(author_cover_url + field.get(0) + MED_JPG);
 			field.add(getJsonString(json, "birth_date"));
 			field.add(getJsonString(json, "death_date"));
 			field.add(getJsonHashString(json, "links", "url", cellDelimiter, false));
 			
 			data.add(join(field, fieldDelimiter));
 			if(print) System.out.println(join(field, fieldDelimiter));
 		}
 		return data;
 	}
 	
 	public static List<String> getWorksCsv(Map<String, JSONObject> works, String fieldDelimiter, String cellDelimiter, boolean print) {
 		List<String> data = new ArrayList<String>(works.size());
 		String title;
 		String subtitle;
 		String desc;
 		for(Map.Entry<String, JSONObject> entry : works.entrySet()) {
 			List<String> field = new ArrayList<String>();
 			field.add(entry.getKey());
 			
 			JSONObject json = entry.getValue();
 			title = getJsonString(json, "title");
 			subtitle = getJsonString(json, "subtitle");
 			if(! subtitle.isEmpty()) title = title + " - " + subtitle;
 			field.add(title);
 			
 			field.add(works_profile_url + field.get(0));
 			field.add(works_cover_url + field.get(0) + MED_JPG);
 			field.add(getJsonHashString(json, "authors", "author", "key", cellDelimiter, true));
 			field.add(getJsonString(json, "first_publish_date"));
 			
 			try {
 				JSONObject description = (JSONObject) json.get("description");
 				desc = getJsonString(description, "value");
 			} catch (ClassCastException e) {
 				desc = getJsonString(json, "description");
 			}
 			field.add(removeNewLines(desc));
 			
 			data.add(join(field, fieldDelimiter));
 			if(print) System.out.println(join(field, fieldDelimiter));
 		}
 		return data;
 	}
 	
 	public static List<String> getEditionsCsv(Map<String, JSONObject> editions, String fieldDelimiter, String cellDelimiter, boolean print) {
 		List<String> data = new ArrayList<String>(editions.size());
 		String title;
 		String subtitle;
 		String by;
 		for(Map.Entry<String, JSONObject> entry : editions.entrySet()) {
 			List<String> field = new ArrayList<String>();
 			field.add(entry.getKey());
 			
 			JSONObject json = entry.getValue();
 			title = getJsonString(json, "title");
 			subtitle = getJsonString(json, "subtitle");
 			by = getJsonString(json, "by_statement");
 			if(! subtitle.isEmpty()) title = title + " - " + subtitle;
 			if(! by.isEmpty()) title = title + " - " + by;
 			field.add(title);
 			
 			field.add(getJsonHashString(json, "works", "key", cellDelimiter, true));
 			field.add(getJsonHashString(json, "authors", "key", cellDelimiter, true));
 			
 			field.add(getJsonArrayString(json, "oclc_numbers", cellDelimiter));
 			field.add(getJsonArrayString(json, "lccn", cellDelimiter));
 			field.add(getJsonArrayString(json, "isbn_10", cellDelimiter));
 			field.add(getJsonArrayString(json, "isbn_13", cellDelimiter));
 			
 			field.add(getJsonArrayString(json, "publish_places", cellDelimiter));
 			field.add(getJsonArrayString(json, "publishers", cellDelimiter));
 			field.add(getJsonString(json, "publish_date"));
 			
 			field.add(getJsonString(json, "physical format"));
 			field.add(getJsonString(json, "number_of_pages"));
 			field.add(getJsonString(json, "pagination"));
 			field.add(getJsonString(json, "physical_dimensions"));
 			
 			field.add(getJsonArrayString(json, "subjects", cellDelimiter));
 			field.add(getJsonArrayString(json, "subject_places", cellDelimiter));
 			field.add(getJsonArrayString(json, "series", cellDelimiter));
 			field.add(getJsonArrayString(json, "covers", cellDelimiter));
 			field.add(getJsonHashArrayString(json, "identifiers", "librarything", cellDelimiter));
 			field.add(getJsonHashArrayString(json, "identifiers", "goodreads", cellDelimiter));
 			
 			data.add(join(field, fieldDelimiter));
 			if(print) System.out.println(join(field, fieldDelimiter));
 		}
 		return data;
 	}
 	
 	public static String getJsonString(JSONObject json, String key) {
 		String value = "";
 		if(json != null) {
 			String result = null;
 			try {
 				result = (String) json.get(key);
 			} catch (ClassCastException e) {
 				result = String.valueOf(json.get(key));
 			}
 			if(result != null) value = result;
 		}
 		return value;
 	}
 	
 	public static String getJsonHashArrayString(JSONObject json, String json_obj, String key, String delimiter) {
 		String value = "";
 		if(json != null) {
 			JSONObject a = (JSONObject) json.get(json_obj);
 			if(a != null) value = getJsonArrayString(a, key, delimiter);
 		}
 		return value;
 	}
 	
 	public static String getJsonArrayString(JSONObject json, String key, String delimiter) {
 		String value = "";
 		if(json != null) {
 			JSONArray result = (JSONArray) json.get(key);
 			if(result != null) value = join(result, delimiter);
 		}
 		return value;
 	}
 	
 	public static String getJsonHashString(JSONObject json, String key, String sub_key, String delimiter, boolean olid) {
 		String value = "";
 		JSONArray result = (JSONArray) json.get(key);
 		if(result != null) {
 			String element;
 			for(Object x : result) {
 				JSONObject a = (JSONObject) x;
 				element = getJsonString(a, sub_key);
 				if(olid) {
 					int idStart = element.indexOf(OLID_IDENT);
 					element = element.substring(idStart, element.length());
 				}
 				value = value + element + delimiter;
 			}
 		}
 		if(! value.isEmpty()) value = value.substring(0, value.length() - 1);
 		return value;
 	}
 	
 	public static String getJsonHashString(JSONObject json, String key, String sub_key, String sub_sub_key, String delimiter, boolean olid) {
 		String value = "";
 		JSONArray result = (JSONArray) json.get(key);
 		if(result != null) {
 			String element;
 			for(Object x : result) {
 				JSONObject a = (JSONObject) x;
 				JSONObject b = (JSONObject) a.get(sub_key);
 				element = getJsonString(b, sub_sub_key);
 				if(olid) {
 					int idStart = element.indexOf(OLID_IDENT);
 					element = element.substring(idStart, element.length());
 				}
 				value = value + element + delimiter;
 			}
 		}
 		if(! value.isEmpty()) value = value.substring(0, value.length() - 1);
 		return value;
 	}
 	
 	public static String join(List<?> list, String delimiter) {
 		StringBuilder l = new StringBuilder();
 		for(Object x : list) {
 			if(l.length() != 0) l.append(delimiter);
 			l.append(x.toString());
 		}
 		return l.toString();
 	}
 	
 	public static String removeNewLines(String text) {
 		return text.replaceAll("\\r\\n|\\r|\\n", " ");
 	}
 	
 	public static long millisToSeconds(long milli) {
 		return (long) (milli * 0.001);
 	}

 }
	package net.libcode.www.openlibrary;

	import java.io.BufferedReader;
	import java.io.File;
	import java.io.FileReader;
	import java.io.IOException;
	import java.nio.charset.Charset;
	import java.util.ArrayList;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.Iterator;
	import java.util.List;
	import java.util.Map;
	import java.util.Set;

	import org.json.simple.JSONArray;
	import org.json.simple.JSONObject;
	import org.json.simple.parser.JSONParser;
	import org.json.simple.parser.ParseException;

	import com.google.common.io.Files;

	/*
	* Script style, built incrementally, ready for reorganization
	* JSONObjects use a lot of Strings:
	* -Xmx1024m -XX:MaxPermSize=256m
	*/
	public class OpenLibraryData {

	public static final JSONParser parser = new JSONParser();
	public static final int MAX_JSON = 50000; // Chunk these

	public static final File authors = new File("/home/mcooper/OL-Data/ol_dump_authors_2011-12-31.txt");
	public static final File authors_search = new File("/home/mcooper/OL-Data/authors_search.txt");
	public static final File authors_output = new File("/home/mcooper/OL-Data/authors_output.txt");
	public static final File authors_found = new File("/home/mcooper/OL-Data/authors_found.csv");

	public static final File works = new File("/home/mcooper/OL-Data/ol_dump_works_2011-12-31.txt");
	public static final File works_search = new File("/home/mcooper/OL-Data/works_search.txt");
	public static final File works_output = new File("/home/mcooper/OL-Data/works_output.txt");
	public static final File works_found = new File("/home/mcooper/OL-Data/works_found.csv");

	public static final File editions = new File("/home/mcooper/OL-Data/ol_dump_editions_2011-12-31.txt");
	public static final File editions_search = new File("/home/mcooper/OL-Data/editions_search.txt");
	public static final File editions_output = new File("/home/mcooper/OL-Data/editions_output.txt");
	public static final File editions_found = new File("/home/mcooper/OL-Data/editions_found.csv");

	public static final String author_profile_url = "http://openlibrary.org/authors/";
	public static final String author_cover_url = "http://covers.openlibrary.org/a/olid/";
	public static final String author_csv_hdr = "ID\tNAME\tPROFILE\tIMAGE\tBIRTH\tDEATH\tLINKS";

	public static final String works_profile_url = "http://openlibrary.org/works/";
	public static final String works_cover_url = "http://covers.openlibrary.org/w/id/";
	public static final String works_csv_hdr = "ID\tTITLE\tPROFILE\tIMAGE\tAUTHORS\tPUBLISHED\tDESCRIPTION";

	public static final String editions_profile_url = "http://openlibrary.org/books/";
	public static final String editions_cover_url = "http://covers.openlibrary.org/b/id/";
	public static final String editions_csv_hdr = "ID\tTITLE\tWORKS\tAUTHORS\tOCLC\tLCCN\tISBN10\tISBN13\tPLACES\tPUBLISHERS\tDATE\tFORMAT\tPAGES\tPAGINATION\tDIMENSIONS\tSUBJECTS\tSUBJECT PLACES\tSERIES\tCOVERS\tLIBRARYTHING\tGOODREADS";

	public static final String OLID_DELIM = "\t";
	public static final String OLID_IDENT = "O";
	public static final String JSON_S = "{";
	public static final String JSON_E = "}";
	public static final String SMALL_JPG = "-S.jpg";
	public static final String MED_JPG = "-M.jpg";

	public static final String CSV_FIELD_DELIM = "\t";
	public static final String CSV_CELL_DELIM = "\|";
	public static final String NL = String.format("%n");

	public static void main(String[] args) throws IOException, ParseException {
	clearFile(authors_found); // Start over searching the authors dump file ...
	Set<String> searchTerms = readSearchTermsFile(authors_search, 10);

	long startTime = System.currentTimeMillis();
	searchData(authors, authors_output, searchTerms, false, true);
	searchTerms = null;

	Map<String, JSONObject> authorsData = parseData(authors_output, 0, true);

	List<String> authors_csv = new ArrayList<String>(authorsData.size() + 1);
	authors_csv.add(author_csv_hdr);

	System.out.println();
	System.out.println("AUTHORS FOUND #" + authorsData.size());
	System.out.println();

	authors_csv.addAll(getAuthorsCsv(authorsData, "\t", "\|", true));

	long endTime = System.currentTimeMillis();
	System.out.println("Total execution time: " + (millisToSeconds(endTime - startTime)) + "s");
	System.out.println();

	appendToFile(authors_found, authors_csv, Charset.defaultCharset());
	authors_csv = null;

	clearFile(works_found); // Search the works dump file by author OLID
	searchTerms = readSearchTermsFile(works_search, 10);

	startTime = System.currentTimeMillis();
	searchData(works, works_output, searchTerms, false, false);
	searchTerms = null;

	Map<String, JSONObject> worksData = parseData(works_output, 0, false);

	List<String> works_csv = new ArrayList<String>(worksData.size() + 1);
	works_csv.add(works_csv_hdr);

	System.out.println();
	System.out.println("WORKS FOUND #" + worksData.size());
	System.out.println();

	works_csv.addAll(getWorksCsv(worksData, "\t", "\|", false));

	endTime = System.currentTimeMillis();
	System.out.println("Total execution time: " + (millisToSeconds(endTime - startTime)) + "s");
	System.out.println();

	appendToFile(works_found, works_csv, Charset.defaultCharset());
	works_csv = null;

	clearFile(editions_found); // Search the editions dump file ... this is huge.
	// searchTerms = readSearchTermsFile(editions_search, 100000);
	// searchDataByOLID(editions, editions_output, searchTerms, true);
	// searchTerms = null;

	startTime = System.currentTimeMillis();
	Map<String, JSONObject> editionsData = parseData(editions_output, 0, true);

	List<String> editions_csv = new ArrayList<String>(editionsData.size() + 1);
	editions_csv.add(editions_csv_hdr);

	System.out.println();
	System.out.println("EDITIONS FOUND #" + editionsData.size());
	System.out.println();

	editions_csv.addAll(getEditionsCsv(editionsData, CSV_FIELD_DELIM, CSV_CELL_DELIM, false));
	editionsData = null;

	endTime = System.currentTimeMillis();
	System.out.println("Total execution time: " + (millisToSeconds(endTime - startTime)) + "s");
	System.out.println();

	appendToFile(editions_found, editions_csv, Charset.defaultCharset());
	editions_csv = null;

	//////////

	startTime = System.currentTimeMillis();
	editionsData = parseData(editions_output, 50001, true);

	editions_csv = new ArrayList<String>(editionsData.size() + 1);
	editions_csv.add(editions_csv_hdr);

	System.out.println();
	System.out.println("EDITIONS FOUND #" + editionsData.size());
	System.out.println();

	editions_csv.addAll(getEditionsCsv(editionsData, CSV_FIELD_DELIM, CSV_CELL_DELIM, false));
	editionsData = null;

	endTime = System.currentTimeMillis();
	System.out.println("Total execution time: " + (millisToSeconds(endTime - startTime)) + "s");
	System.out.println();

	appendToFile(editions_found, editions_csv, Charset.defaultCharset());
	editions_csv = null;
	}

	public static void clearFile(File file) throws IOException {
	Files.write(new byte[]{}, file);
	}

	public static void appendToFile(File file, String data, Charset charset) throws IOException {
	Files.append(data + NL, file, charset);
	}

	public static void appendToFile(File file, List<String> data, Charset charset) throws IOException {
	for(String x : data) {
	Files.append(x + NL, file, charset);
	}
	}

	public static Set<String> readSearchTermsFile(File file, int capacity) throws IOException {
	Set<String> lines = new HashSet<String>(capacity);
	BufferedReader reader = Files.newReader(file, Charset.defaultCharset());
	String line;
	while((line = reader.readLine()) != null) {
	lines.add(line);
	}
	reader.close();
	return lines;
	}

	public static int searchData(File file, File output_file, Set<String> search, boolean unique, boolean print) throws IOException, ParseException {
	clearFile(output_file);
	BufferedReader reader = Files.newReader(file, Charset.defaultCharset());
	String line;
	String term;
	int count = 0;
	int found = 0;
	while((line = reader.readLine()) != null) {
	count += 1;
	for (Iterator<String> i = search.iterator(); i.hasNext();) {
	term = i.next();
	if(line.contains(term)) {
	found += 1;
	appendToFile(output_file, line, Charset.defaultCharset());
	if(unique) i.remove();
	if(print) System.out.println(count + ": " + line);
	break;
	}
	}
	}
	reader.close();
	return found;
	}

	/*
	* Search by OLID. Fast (6 minutes for 100,000 OLID edition searches on 25gb editions dump)
	*/
	public static int searchDataByOLID(File file, File output_file, Set<String> search, boolean print) throws IOException {
	clearFile(output_file);
	BufferedReader reader = Files.newReader(file, Charset.defaultCharset());
	String line;
	String[] parts;
	String id;
	int count = 0;
	int found = 0;
	while((line = reader.readLine()) != null) {
	count += 1;
	parts = line.split(OLID_DELIM);
	int idStart = parts[1].indexOf(OLID_IDENT);
	id = parts[1].substring(idStart, parts[1].length());
	if(search.contains(id)) {
	found += 1;
	appendToFile(output_file, line, Charset.defaultCharset());
	if(print) System.out.println(count + ": " + line);
	}
	}
	reader.close();
	return found;
	}

	/*
	* Return Map of OLID => Json data up to MAX_JSON elements
	*/
	public static Map<String, JSONObject> parseData(File file, int begin, boolean print) throws IOException, ParseException {
	Map<String, JSONObject> data = new HashMap<String, JSONObject>();
	BufferedReader reader = Files.newReader(file, Charset.defaultCharset());
	String line;
	String[] parts;
	String id;
	String json;
	int lineNumber = 0;
	int count = 0;
	while((line = reader.readLine()) != null && count < MAX_JSON) {
	lineNumber += 1;
	if(lineNumber >= begin) {
	count += 1;
	// Get the OLID
	parts = line.split(OLID_DELIM);
	int idStart = parts[1].indexOf(OLID_IDENT);
	id = parts[1].substring(idStart, parts[1].length());

	// Get the JSON
	int jsonStart = line.indexOf(JSON_S);
	int jsonStop = line.lastIndexOf(JSON_E);
	json = line.substring(jsonStart, jsonStop + 1);
	data.put(id, (JSONObject) parser.parse(json));

	if(print) System.out.println(count + ": " + line);
	}
	}
	reader.close();
	return data;
	}

	public static List<String> getAuthorsCsv(Map<String, JSONObject> authors, String fieldDelimiter, String cellDelimiter, boolean print) {
	List<String> data = new ArrayList<String>(authors.size());
	for(Map.Entry<String, JSONObject> entry : authors.entrySet()) {
	List<String> field = new ArrayList<String>();
	field.add(entry.getKey());

	JSONObject json = entry.getValue();
	field.add(getJsonString(json, "name"));
	field.add(author_profile_url + field.get(0));
	field.add(author_cover_url + field.get(0) + MED_JPG);
	field.add(getJsonString(json, "birth_date"));
	field.add(getJsonString(json, "death_date"));
	field.add(getJsonHashString(json, "links", "url", cellDelimiter, false));

	data.add(join(field, fieldDelimiter));
	if(print) System.out.println(join(field, fieldDelimiter));
	}
	return data;
	}

	public static List<String> getWorksCsv(Map<String, JSONObject> works, String fieldDelimiter, String cellDelimiter, boolean print) {
	List<String> data = new ArrayList<String>(works.size());
	String title;
	String subtitle;
	String desc;
	for(Map.Entry<String, JSONObject> entry : works.entrySet()) {
	List<String> field = new ArrayList<String>();
	field.add(entry.getKey());

	JSONObject json = entry.getValue();
	title = getJsonString(json, "title");
	subtitle = getJsonString(json, "subtitle");
	if(! subtitle.isEmpty()) title = title + " - " + subtitle;
	field.add(title);

	field.add(works_profile_url + field.get(0));
	field.add(works_cover_url + field.get(0) + MED_JPG);
	field.add(getJsonHashString(json, "authors", "author", "key", cellDelimiter, true));
	field.add(getJsonString(json, "first_publish_date"));

	try {
	JSONObject description = (JSONObject) json.get("description");
	desc = getJsonString(description, "value");
	} catch (ClassCastException e) {
	desc = getJsonString(json, "description");
	}
	field.add(removeNewLines(desc));

	data.add(join(field, fieldDelimiter));
	if(print) System.out.println(join(field, fieldDelimiter));
	}
	return data;
	}

	public static List<String> getEditionsCsv(Map<String, JSONObject> editions, String fieldDelimiter, String cellDelimiter, boolean print) {
	List<String> data = new ArrayList<String>(editions.size());
	String title;
	String subtitle;
	String by;
	for(Map.Entry<String, JSONObject> entry : editions.entrySet()) {
	List<String> field = new ArrayList<String>();
	field.add(entry.getKey());

	JSONObject json = entry.getValue();
	title = getJsonString(json, "title");
	subtitle = getJsonString(json, "subtitle");
	by = getJsonString(json, "by_statement");
	if(! subtitle.isEmpty()) title = title + " - " + subtitle;
	if(! by.isEmpty()) title = title + " - " + by;
	field.add(title);

	field.add(getJsonHashString(json, "works", "key", cellDelimiter, true));
	field.add(getJsonHashString(json, "authors", "key", cellDelimiter, true));

	field.add(getJsonArrayString(json, "oclc_numbers", cellDelimiter));
	field.add(getJsonArrayString(json, "lccn", cellDelimiter));
	field.add(getJsonArrayString(json, "isbn_10", cellDelimiter));
	field.add(getJsonArrayString(json, "isbn_13", cellDelimiter));

	field.add(getJsonArrayString(json, "publish_places", cellDelimiter));
	field.add(getJsonArrayString(json, "publishers", cellDelimiter));
	field.add(getJsonString(json, "publish_date"));

	field.add(getJsonString(json, "physical format"));
	field.add(getJsonString(json, "number_of_pages"));
	field.add(getJsonString(json, "pagination"));
	field.add(getJsonString(json, "physical_dimensions"));

	field.add(getJsonArrayString(json, "subjects", cellDelimiter));
	field.add(getJsonArrayString(json, "subject_places", cellDelimiter));
	field.add(getJsonArrayString(json, "series", cellDelimiter));
	field.add(getJsonArrayString(json, "covers", cellDelimiter));
	field.add(getJsonHashArrayString(json, "identifiers", "librarything", cellDelimiter));
	field.add(getJsonHashArrayString(json, "identifiers", "goodreads", cellDelimiter));

	data.add(join(field, fieldDelimiter));
	if(print) System.out.println(join(field, fieldDelimiter));
	}
	return data;
	}

	public static String getJsonString(JSONObject json, String key) {
	String value = "";
	if(json != null) {
	String result = null;
	try {
	result = (String) json.get(key);
	} catch (ClassCastException e) {
	result = String.valueOf(json.get(key));
	}
	if(result != null) value = result;
	}
	return value;
	}

	public static String getJsonHashArrayString(JSONObject json, String json_obj, String key, String delimiter) {
	String value = "";
	if(json != null) {
	JSONObject a = (JSONObject) json.get(json_obj);
	if(a != null) value = getJsonArrayString(a, key, delimiter);
	}
	return value;
	}

	public static String getJsonArrayString(JSONObject json, String key, String delimiter) {
	String value = "";
	if(json != null) {
	JSONArray result = (JSONArray) json.get(key);
	if(result != null) value = join(result, delimiter);
	}
	return value;
	}

	public static String getJsonHashString(JSONObject json, String key, String sub_key, String delimiter, boolean olid) {
	String value = "";
	JSONArray result = (JSONArray) json.get(key);
	if(result != null) {
	String element;
	for(Object x : result) {
	JSONObject a = (JSONObject) x;
	element = getJsonString(a, sub_key);
	if(olid) {
	int idStart = element.indexOf(OLID_IDENT);
	element = element.substring(idStart, element.length());
	}
	value = value + element + delimiter;
	}
	}
	if(! value.isEmpty()) value = value.substring(0, value.length() - 1);
	return value;
	}

	public static String getJsonHashString(JSONObject json, String key, String sub_key, String sub_sub_key, String delimiter, boolean olid) {
	String value = "";
	JSONArray result = (JSONArray) json.get(key);
	if(result != null) {
	String element;
	for(Object x : result) {
	JSONObject a = (JSONObject) x;
	JSONObject b = (JSONObject) a.get(sub_key);
	element = getJsonString(b, sub_sub_key);
	if(olid) {
	int idStart = element.indexOf(OLID_IDENT);
	element = element.substring(idStart, element.length());
	}
	value = value + element + delimiter;
	}
	}
	if(! value.isEmpty()) value = value.substring(0, value.length() - 1);
	return value;
	}

	public static String join(List<?> list, String delimiter) {
	StringBuilder l = new StringBuilder();
	for(Object x : list) {
	if(l.length() != 0) l.append(delimiter);
	l.append(x.toString());
	}
	return l.toString();
	}

	public static String removeNewLines(String text) {
	return text.replaceAll("\\r\\n\|\\r\|\\n", " ");
	}

	public static long millisToSeconds(long milli) {
	return (long) (milli * 0.001);
	}

	}