|
public static void shardSolrIndex() throws IOException, SolrServerException { |
|
/* |
|
Start by faceting by year so we can include each year in a separate core ! |
|
*/ |
|
SolrQuery yearRangeQuery = new SolrQuery(); |
|
yearRangeQuery.setQuery("*:*"); |
|
yearRangeQuery.setRows(0); |
|
yearRangeQuery.setFacet(true); |
|
yearRangeQuery.add(FacetParams.FACET_RANGE, "time"); |
|
//We go back to 2000 the year 2000, this is a bit overkill but this way we ensure we have everything |
|
//The alternative would be to sort but that isn't recommended since it would be a very costly query ! |
|
yearRangeQuery.add(FacetParams.FACET_RANGE_START, "NOW/YEAR-" + (Calendar.getInstance().get(Calendar.YEAR) - 2000) + "YEARS"); |
|
//Add the +0year to ensure that we DO NOT include the current year |
|
yearRangeQuery.add(FacetParams.FACET_RANGE_END, "NOW/YEAR+0YEARS"); |
|
yearRangeQuery.add(FacetParams.FACET_RANGE_GAP, "+1YEAR"); |
|
yearRangeQuery.add(FacetParams.FACET_MINCOUNT, String.valueOf(1)); |
|
|
|
//Create a temp directory to store our files in ! |
|
File tempDirectory = new File(ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator); |
|
tempDirectory.mkdirs(); |
|
|
|
|
|
QueryResponse queryResponse = solr.query(yearRangeQuery); |
|
//We only have one range query ! |
|
List<RangeFacet.Count> yearResults = queryResponse.getFacetRanges().get(0).getCounts(); |
|
for (RangeFacet.Count count : yearResults) { |
|
long totalRecords = count.getCount(); |
|
|
|
//Create a range query from this ! |
|
//We start with out current year |
|
DCDate dcStart = new DCDate(count.getValue()); |
|
Calendar endDate = Calendar.getInstance(); |
|
//Advance one year for the start of the next one ! |
|
endDate.setTime(dcStart.toDate()); |
|
endDate.add(Calendar.YEAR, 1); |
|
DCDate dcEndDate = new DCDate(endDate.getTime()); |
|
|
|
|
|
StringBuilder filterQuery = new StringBuilder(); |
|
filterQuery.append("time:(["); |
|
filterQuery.append(ClientUtils.escapeQueryChars(dcStart.toString())); |
|
filterQuery.append(" TO "); |
|
filterQuery.append(ClientUtils.escapeQueryChars(dcEndDate.toString())); |
|
filterQuery.append("]"); |
|
//The next part of the filter query excludes the content from midnight of the next year ! |
|
filterQuery.append(" NOT ").append(ClientUtils.escapeQueryChars(dcEndDate.toString())); |
|
filterQuery.append(")"); |
|
|
|
|
|
Map<String, String> yearQueryParams = new HashMap<String, String>(); |
|
yearQueryParams.put(CommonParams.Q, "*:*"); |
|
yearQueryParams.put(CommonParams.ROWS, String.valueOf(10000)); |
|
yearQueryParams.put(CommonParams.FQ, filterQuery.toString()); |
|
yearQueryParams.put(CommonParams.WT, "csv"); |
|
|
|
//Start by creating a new core |
|
String coreName = "statistics-" + dcStart.getYear(); |
|
HttpSolrServer statisticsYearServer = createCore(solr, coreName); |
|
|
|
System.out.println("Moving: " + totalRecords + " into core " + coreName); |
|
log.info("Moving: " + totalRecords + " records into core " + coreName); |
|
|
|
List<File> filesToUpload = new ArrayList<File>(); |
|
for(int i = 0; i < totalRecords; i+=10000){ |
|
String solrRequestUrl = solr.getBaseURL() + "/select"; |
|
solrRequestUrl = generateURL(solrRequestUrl, yearQueryParams); |
|
|
|
GetMethod get = new GetMethod(solrRequestUrl); |
|
new HttpClient().executeMethod(get); |
|
InputStream csvInputstream = get.getResponseBodyAsStream(); |
|
//Write the csv ouput to a file ! |
|
|
|
File csvFile = new File(tempDirectory.getPath() + File.separatorChar + "temp." + dcStart.getYear() + "." + i + ".csv"); |
|
CSVWriter bw = new CSVWriter(new FileWriter(csvFile)); |
|
int excl = -1; |
|
|
|
try { |
|
CSVReader reader = new CSVReader(new InputStreamReader(csvInputstream)); |
|
String [] nextLine; |
|
String [] firstLine = new String[0]; |
|
if ((nextLine = reader.readNext()) != null) { |
|
firstLine = nextLine; |
|
for(int pi=0; pi<firstLine.length; pi++) { |
|
String s = firstLine[pi]; |
|
if (s == null) s = ""; |
|
if (s.equals("_version_")) { |
|
excl = pi; |
|
break; |
|
} |
|
} |
|
} |
|
for (; nextLine !=null; nextLine = reader.readNext()) { |
|
int sz = firstLine.length; |
|
if (excl > 0) sz--; |
|
String[] outLine = new String[sz]; |
|
int outIndex = 0; |
|
for(int pi=0; pi<firstLine.length; pi++) { |
|
String s = (pi > nextLine.length - 1) ? "\"\"" : nextLine[pi]; |
|
if (pi == excl) continue; |
|
if (s == null) s = ""; |
|
outLine[outIndex++] = s; |
|
} |
|
bw.writeNext(outLine); |
|
} |
|
reader.close(); |
|
} catch (IOException e) { |
|
e.printStackTrace(); |
|
} |
|
|
|
bw.flush(); |
|
bw.close(); |
|
//FileUtils.copyInputStreamToFile(csvInputstream, csvFile); |
|
filesToUpload.add(csvFile); |
|
|
|
//Add 10000 & start over again |
|
yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000))); |
|
} |
|
|
|
for (File tempCsv : filesToUpload) { |
|
//Upload the data in the csv files to our new solr core |
|
try { |
|
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv"); |
|
contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8"); |
|
contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); |
|
contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8"); |
|
|
|
statisticsYearServer.request(contentStreamUpdateRequest); |
|
} catch (Exception e) { |
|
// TODO Auto-generated catch block |
|
e.printStackTrace(); |
|
} |
|
} |
|
statisticsYearServer.commit(true, true); |
|
|
|
|
|
//Delete contents of this year from our year query ! |
|
solr.deleteByQuery(filterQuery.toString()); |
|
solr.commit(true, true); |
|
|
|
log.info("Moved " + totalRecords + " records into core: " + coreName); |
|
} |
|
|
|
FileUtils.deleteDirectory(tempDirectory); |
|
} |
SolrTouch2.java performs the same action as SolrTouch.java without requiring a separate repository to hold the updated records.