Created
January 21, 2015 20:02
-
-
Save terrywbrady/fef9a535d27429293fa3 to your computer and use it in GitHub Desktop.
DSpace Special code to run stats-util -s that forces new id/version values
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* The contents of this file are subject to the license and copyright | |
* detailed in the LICENSE and NOTICE files at the root of the source | |
* tree and available online at | |
* | |
* http://www.dspace.org/license/ | |
*/ | |
package org.dspace.statistics; | |
import au.com.bytecode.opencsv.CSVReader; | |
import au.com.bytecode.opencsv.CSVWriter; | |
import com.maxmind.geoip.Location; | |
import com.maxmind.geoip.LookupService; | |
import java.io.*; | |
import org.apache.commons.httpclient.HttpClient; | |
import org.apache.commons.httpclient.methods.GetMethod; | |
import org.apache.commons.io.FileUtils; | |
import org.apache.commons.lang.ArrayUtils; | |
import org.apache.commons.lang.StringUtils; | |
import org.apache.commons.lang.time.DateFormatUtils; | |
import org.apache.log4j.Logger; | |
import org.apache.solr.client.solrj.SolrQuery; | |
import org.apache.solr.client.solrj.SolrServerException; | |
import org.apache.solr.client.solrj.impl.HttpSolrServer; | |
import org.apache.solr.client.solrj.request.AbstractUpdateRequest; | |
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; | |
import org.apache.solr.client.solrj.request.CoreAdminRequest; | |
import org.apache.solr.client.solrj.response.FacetField; | |
import org.apache.solr.client.solrj.response.QueryResponse; | |
import org.apache.solr.client.solrj.response.RangeFacet; | |
import org.apache.solr.client.solrj.util.ClientUtils; | |
import org.apache.solr.common.SolrDocument; | |
import org.apache.solr.common.SolrInputDocument; | |
import org.apache.solr.common.params.CommonParams; | |
import org.apache.solr.common.params.FacetParams; | |
import org.apache.solr.common.params.MapSolrParams; | |
import org.apache.solr.common.params.ShardParams; | |
import org.dspace.content.*; | |
import org.dspace.content.Collection; | |
import org.dspace.core.ConfigurationManager; | |
import org.dspace.core.Constants; | |
import org.dspace.core.Context; | |
import org.dspace.eperson.EPerson; | |
import org.dspace.eperson.Group; | |
import org.dspace.statistics.util.DnsLookup; | |
import org.dspace.statistics.util.LocationUtils; | |
import org.dspace.statistics.util.SpiderDetector; | |
import org.dspace.usage.UsageWorkflowEvent; | |
import javax.servlet.http.HttpServletRequest; | |
import java.net.URLEncoder; | |
import java.sql.SQLException; | |
import java.text.ParseException; | |
import java.text.SimpleDateFormat; | |
import java.util.*; | |
/** | |
* GUCODE[[twb27:Special version of SolrLogger designed to repair bad Solr Statistics records]] | |
* | |
* Static holder for a HttpSolrClient connection pool to issue | |
* usage logging events to Solr from DSpace libraries, and some static query | |
* composers. | |
* | |
* @author ben at atmire.com | |
* @author kevinvandevelde at atmire.com | |
* @author mdiggory at atmire.com | |
*/ | |
public class SolrLoggerSpecial | |
{ | |
private static final Logger log = Logger.getLogger(SolrLogger.class); | |
private static final HttpSolrServer solr; | |
public static final String DATE_FORMAT_8601 = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; | |
public static final String DATE_FORMAT_DCDATE = "yyyy-MM-dd'T'HH:mm:ss'Z'"; | |
private static final LookupService locationService; | |
private static final boolean useProxies; | |
private static List<String> statisticYearCores = new ArrayList<String>(); | |
public static enum StatisticsType { | |
VIEW ("view"), | |
SEARCH ("search"), | |
SEARCH_RESULT ("search_result"), | |
WORKFLOW("workflow"); | |
private final String text; | |
StatisticsType(String text) { | |
this.text = text; | |
} | |
public String text() { return text; } | |
} | |
static | |
{ | |
log.info("solr-statistics.spidersfile:" + ConfigurationManager.getProperty("solr-statistics", "spidersfile")); | |
log.info("solr-statistics.server:" + ConfigurationManager.getProperty("solr-statistics", "server")); | |
log.info("usage-statistics.dbfile:" + ConfigurationManager.getProperty("usage-statistics", "dbfile")); | |
HttpSolrServer server = null; | |
if (ConfigurationManager.getProperty("solr-statistics", "server") != null) | |
{ | |
try | |
{ | |
server = new HttpSolrServer(ConfigurationManager.getProperty("solr-statistics", "server")); | |
SolrQuery solrQuery = new SolrQuery() | |
.setQuery("type:2 AND id:1"); | |
server.query(solrQuery); | |
//Attempt to retrieve all the statistic year cores | |
File solrDir = new File(ConfigurationManager.getProperty("dspace.dir") + "/solr/"); | |
File[] solrCoreFiles = solrDir.listFiles(new FileFilter() { | |
@Override | |
public boolean accept(File file) { | |
//Core name example: statistics-2008 | |
return file.getName().matches("statistics-\\d\\d\\d\\d"); | |
} | |
}); | |
//Base url should like : http://localhost:{port.number}/solr | |
String baseSolrUrl = server.getBaseURL().replace("statistics", ""); | |
for (File solrCoreFile : solrCoreFiles) { | |
log.info("Loading core with name: " + solrCoreFile.getName()); | |
createCore(server, solrCoreFile.getName()); | |
//Add it to our cores list so we can query it ! | |
statisticYearCores.add(baseSolrUrl.replace("http://", "").replace("https://", "") + solrCoreFile.getName()); | |
} | |
//Also add the core containing the current year ! | |
statisticYearCores.add(server.getBaseURL().replace("http://", "").replace("https://", "")); | |
} catch (Exception e) { | |
log.error(e.getMessage(), e); | |
} | |
} | |
solr = server; | |
// Read in the file so we don't have to do it all the time | |
//spiderIps = SpiderDetector.getSpiderIpAddresses(); | |
LookupService service = null; | |
// Get the db file for the location | |
String dbfile = ConfigurationManager.getProperty("usage-statistics", "dbfile"); | |
if (dbfile != null) | |
{ | |
try | |
{ | |
service = new LookupService(dbfile, | |
LookupService.GEOIP_STANDARD); | |
} | |
catch (FileNotFoundException fe) | |
{ | |
log.error("The GeoLite Database file is missing (" + dbfile + ")! Solr Statistics cannot generate location based reports! Please see the DSpace installation instructions for instructions to install this file.", fe); | |
} | |
catch (IOException e) | |
{ | |
log.error("Unable to load GeoLite Database file (" + dbfile + ")! You may need to reinstall it. See the DSpace installation instructions for more details.", e); | |
} | |
} | |
else | |
{ | |
log.error("The required 'dbfile' configuration is missing in solr-statistics.cfg!"); | |
} | |
locationService = service; | |
if ("true".equals(ConfigurationManager.getProperty("useProxies"))) | |
{ | |
useProxies = true; | |
} | |
else | |
{ | |
useProxies = false; | |
} | |
log.info("useProxies=" + useProxies); | |
} | |
/** | |
* Old post method, use the new postview method instead ! | |
* | |
* @deprecated | |
* @param dspaceObject the object used. | |
* @param request the current request context. | |
* @param currentUser the current session's user. | |
*/ | |
public static void post(DSpaceObject dspaceObject, HttpServletRequest request, | |
EPerson currentUser) | |
{ | |
postView(dspaceObject, request, currentUser); | |
} | |
/** | |
* Store a usage event into Solr. | |
* | |
* @param dspaceObject the object used. | |
* @param request the current request context. | |
* @param currentUser the current session's user. | |
*/ | |
public static void postView(DSpaceObject dspaceObject, HttpServletRequest request, | |
EPerson currentUser) | |
{ | |
if (solr == null || locationService == null) | |
{ | |
return; | |
} | |
try | |
{ | |
SolrInputDocument doc1 = getCommonSolrDoc(dspaceObject, request, currentUser); | |
if (doc1 == null) return; | |
if(dspaceObject instanceof Bitstream) | |
{ | |
Bitstream bit = (Bitstream) dspaceObject; | |
Bundle[] bundles = bit.getBundles(); | |
for (Bundle bundle : bundles) { | |
doc1.addField("bundleName", bundle.getName()); | |
} | |
} | |
doc1.addField("statistics_type", StatisticsType.VIEW.text()); | |
solr.add(doc1); | |
//commits are executed automatically using the solr autocommit | |
// solr.commit(false, false); | |
} | |
catch (RuntimeException re) | |
{ | |
throw re; | |
} | |
catch (Exception e) | |
{ | |
log.error(e.getMessage(), e); | |
} | |
} | |
public static void postView(DSpaceObject dspaceObject, | |
String ip, String userAgent, String xforwarderfor, EPerson currentUser) { | |
if (solr == null || locationService == null) { | |
return; | |
} | |
try { | |
SolrInputDocument doc1 = getCommonSolrDoc(dspaceObject, ip, userAgent, xforwarderfor, | |
currentUser); | |
if (doc1 == null) | |
return; | |
if (dspaceObject instanceof Bitstream) { | |
Bitstream bit = (Bitstream) dspaceObject; | |
Bundle[] bundles = bit.getBundles(); | |
for (Bundle bundle : bundles) { | |
doc1.addField("bundleName", bundle.getName()); | |
} | |
} | |
doc1.addField("statistics_type", StatisticsType.VIEW.text()); | |
solr.add(doc1); | |
// commits are executed automatically using the solr autocommit | |
// solr.commit(false, false); | |
} catch (RuntimeException re) { | |
throw re; | |
} catch (Exception e) { | |
log.error(e.getMessage(), e); | |
} | |
} | |
/** | |
* Returns a solr input document containing common information about the statistics | |
* regardless if we are logging a search or a view of a DSpace object | |
* @param dspaceObject the object used. | |
* @param request the current request context. | |
* @param currentUser the current session's user. | |
* @return a solr input document | |
* @throws SQLException in case of a database exception | |
*/ | |
private static SolrInputDocument getCommonSolrDoc(DSpaceObject dspaceObject, HttpServletRequest request, EPerson currentUser) throws SQLException { | |
boolean isSpiderBot = request != null && SpiderDetector.isSpider(request); | |
if(isSpiderBot && | |
!ConfigurationManager.getBooleanProperty("usage-statistics", "logBots", true)) | |
{ | |
return null; | |
} | |
SolrInputDocument doc1 = new SolrInputDocument(); | |
// Save our basic info that we already have | |
if(request != null){ | |
String ip = request.getRemoteAddr(); | |
if (isUseProxies() && request.getHeader("X-Forwarded-For") != null) { | |
/* This header is a comma delimited list */ | |
for (String xfip : request.getHeader("X-Forwarded-For").split(",")) { | |
/* proxy itself will sometime populate this header with the same value in | |
remote address. ordering in spec is vague, we'll just take the last | |
not equal to the proxy | |
*/ | |
if (!request.getHeader("X-Forwarded-For").contains(ip)) { | |
ip = xfip.trim(); | |
} | |
} | |
} | |
doc1.addField("ip", ip); | |
//Also store the referrer | |
if(request.getHeader("referer") != null){ | |
doc1.addField("referrer", request.getHeader("referer")); | |
} | |
try | |
{ | |
String dns = DnsLookup.reverseDns(ip); | |
doc1.addField("dns", dns.toLowerCase()); | |
} | |
catch (Exception e) | |
{ | |
log.error("Failed DNS Lookup for IP:" + ip); | |
log.debug(e.getMessage(),e); | |
} | |
// Save the location information if valid, save the event without | |
// location information if not valid | |
if(locationService != null) | |
{ | |
Location location = locationService.getLocation(ip); | |
if (location != null | |
&& !("--".equals(location.countryCode) | |
&& location.latitude == -180 && location.longitude == -180)) | |
{ | |
try | |
{ | |
doc1.addField("continent", LocationUtils | |
.getContinentCode(location.countryCode)); | |
} | |
catch (Exception e) | |
{ | |
System.out | |
.println("COUNTRY ERROR: " + location.countryCode); | |
} | |
doc1.addField("countryCode", location.countryCode); | |
doc1.addField("city", location.city); | |
doc1.addField("latitude", location.latitude); | |
doc1.addField("longitude", location.longitude); | |
doc1.addField("isBot",isSpiderBot); | |
if(request.getHeader("User-Agent") != null) | |
{ | |
doc1.addField("userAgent", request.getHeader("User-Agent")); | |
} | |
} | |
} | |
} | |
if(dspaceObject != null){ | |
doc1.addField("id", dspaceObject.getID()); | |
doc1.addField("type", dspaceObject.getType()); | |
storeParents(doc1, dspaceObject); | |
} | |
// Save the current time | |
doc1.addField("time", DateFormatUtils.format(new Date(), DATE_FORMAT_8601)); | |
if (currentUser != null) | |
{ | |
//doc1.addField("epersonid", currentUser.getID()); | |
doc1.addField("epersonid", 0); //twb27, 4/2014, GU does not track individual events by user | |
} | |
return doc1; | |
} | |
private static SolrInputDocument getCommonSolrDoc(DSpaceObject dspaceObject, String ip, String userAgent, String xforwarderfor, EPerson currentUser) throws SQLException { | |
boolean isSpiderBot = SpiderDetector.isSpider(ip); | |
if(isSpiderBot && | |
!ConfigurationManager.getBooleanProperty("usage-statistics", "logBots", true)) | |
{ | |
return null; | |
} | |
SolrInputDocument doc1 = new SolrInputDocument(); | |
// Save our basic info that we already have | |
if (isUseProxies() && xforwarderfor != null) { | |
/* This header is a comma delimited list */ | |
for (String xfip : xforwarderfor.split(",")) { | |
/* proxy itself will sometime populate this header with the same value in | |
remote address. ordering in spec is vague, we'll just take the last | |
not equal to the proxy | |
*/ | |
if (!xforwarderfor.contains(ip)) { | |
ip = xfip.trim(); | |
} | |
} | |
doc1.addField("ip", ip); | |
try | |
{ | |
String dns = DnsLookup.reverseDns(ip); | |
doc1.addField("dns", dns.toLowerCase()); | |
} | |
catch (Exception e) | |
{ | |
log.error("Failed DNS Lookup for IP:" + ip); | |
log.debug(e.getMessage(),e); | |
} | |
// Save the location information if valid, save the event without | |
// location information if not valid | |
if(locationService != null) | |
{ | |
Location location = locationService.getLocation(ip); | |
if (location != null | |
&& !("--".equals(location.countryCode) | |
&& location.latitude == -180 && location.longitude == -180)) | |
{ | |
try | |
{ | |
doc1.addField("continent", LocationUtils | |
.getContinentCode(location.countryCode)); | |
} | |
catch (Exception e) | |
{ | |
System.out | |
.println("COUNTRY ERROR: " + location.countryCode); | |
} | |
doc1.addField("countryCode", location.countryCode); | |
doc1.addField("city", location.city); | |
doc1.addField("latitude", location.latitude); | |
doc1.addField("longitude", location.longitude); | |
doc1.addField("isBot",isSpiderBot); | |
if(userAgent != null) | |
{ | |
doc1.addField("userAgent", userAgent); | |
} | |
} | |
} | |
} | |
if(dspaceObject != null){ | |
doc1.addField("id", dspaceObject.getID()); | |
doc1.addField("type", dspaceObject.getType()); | |
storeParents(doc1, dspaceObject); | |
} | |
// Save the current time | |
doc1.addField("time", DateFormatUtils.format(new Date(), DATE_FORMAT_8601)); | |
if (currentUser != null) | |
{ | |
doc1.addField("epersonid", currentUser.getID()); | |
} | |
return doc1; | |
} | |
public static void postSearch(DSpaceObject resultObject, HttpServletRequest request, EPerson currentUser, | |
List<String> queries, int rpp, String sortBy, String order, int page, DSpaceObject scope) { | |
try | |
{ | |
SolrInputDocument solrDoc = getCommonSolrDoc(resultObject, request, currentUser); | |
if (solrDoc == null) return; | |
for (String query : queries) { | |
solrDoc.addField("query", query); | |
} | |
if(resultObject != null){ | |
//We have a search result | |
solrDoc.addField("statistics_type", StatisticsType.SEARCH_RESULT.text()); | |
}else{ | |
solrDoc.addField("statistics_type", StatisticsType.SEARCH.text()); | |
} | |
//Store the scope | |
if(scope != null){ | |
solrDoc.addField("scopeId", scope.getID()); | |
solrDoc.addField("scopeType", scope.getType()); | |
} | |
if(rpp != -1){ | |
solrDoc.addField("rpp", rpp); | |
} | |
if(sortBy != null){ | |
solrDoc.addField("sortBy", sortBy); | |
if(order != null){ | |
solrDoc.addField("sortOrder", order); | |
} | |
} | |
if(page != -1){ | |
solrDoc.addField("page", page); | |
} | |
solr.add(solrDoc); | |
} | |
catch (RuntimeException re) | |
{ | |
throw re; | |
} | |
catch (Exception e) | |
{ | |
log.error(e.getMessage(), e); | |
} | |
} | |
public static void postWorkflow(UsageWorkflowEvent usageWorkflowEvent) throws SQLException { | |
try { | |
SolrInputDocument solrDoc = getCommonSolrDoc(usageWorkflowEvent.getObject(), null, null); | |
//Log the current collection & the scope ! | |
solrDoc.addField("owningColl", usageWorkflowEvent.getScope().getID()); | |
storeParents(solrDoc, usageWorkflowEvent.getScope()); | |
if(usageWorkflowEvent.getWorkflowStep() != null){ | |
solrDoc.addField("workflowStep", usageWorkflowEvent.getWorkflowStep()); | |
} | |
if(usageWorkflowEvent.getOldState() != null){ | |
solrDoc.addField("previousWorkflowStep", usageWorkflowEvent.getOldState()); | |
} | |
if(usageWorkflowEvent.getGroupOwners() != null){ | |
for (int i = 0; i < usageWorkflowEvent.getGroupOwners().length; i++) { | |
Group group = usageWorkflowEvent.getGroupOwners()[i]; | |
solrDoc.addField("owner", "g" + group.getID()); | |
} | |
} | |
if(usageWorkflowEvent.getEpersonOwners() != null){ | |
for (int i = 0; i < usageWorkflowEvent.getEpersonOwners().length; i++) { | |
EPerson ePerson = usageWorkflowEvent.getEpersonOwners()[i]; | |
solrDoc.addField("owner", "e" + ePerson.getID()); | |
} | |
} | |
solrDoc.addField("workflowItemId", usageWorkflowEvent.getWorkflowItem().getID()); | |
EPerson submitter = ((Item) usageWorkflowEvent.getObject()).getSubmitter(); | |
if(submitter != null){ | |
solrDoc.addField("submitter", submitter.getID()); | |
} | |
solrDoc.addField("statistics_type", StatisticsType.WORKFLOW.text()); | |
if(usageWorkflowEvent.getActor() != null){ | |
solrDoc.addField("actor", usageWorkflowEvent.getActor().getID()); | |
} | |
solr.add(solrDoc); | |
} | |
catch (Exception e) | |
{ | |
//Log the exception, no need to send it through, the workflow shouldn't crash because of this ! | |
log.error(e.getMessage(), e); | |
} | |
} | |
/** | |
* Method just used to log the parents. | |
* <ul> | |
* <li>Community log: owning comms.</li> | |
* <li>Collection log: owning comms & their comms.</li> | |
* <li>Item log: owning colls/comms.</li> | |
* <li>Bitstream log: owning item/colls/comms.</li> | |
* </ul> | |
* | |
* @param doc1 | |
* the current SolrInputDocument | |
* @param dso | |
* the current dspace object we want to log | |
* @throws java.sql.SQLException | |
* ignore it | |
*/ | |
public static void storeParents(SolrInputDocument doc1, DSpaceObject dso) | |
throws SQLException | |
{ | |
if (dso instanceof Community) | |
{ | |
Community comm = (Community) dso; | |
while (comm != null && comm.getParentCommunity() != null) | |
{ | |
comm = comm.getParentCommunity(); | |
doc1.addField("owningComm", comm.getID()); | |
} | |
} | |
else if (dso instanceof Collection) | |
{ | |
Collection coll = (Collection) dso; | |
Community[] communities = coll.getCommunities(); | |
for (int i = 0; i < communities.length; i++) | |
{ | |
Community community = communities[i]; | |
doc1.addField("owningComm", community.getID()); | |
storeParents(doc1, community); | |
} | |
} | |
else if (dso instanceof Item) | |
{ | |
Item item = (Item) dso; | |
Collection[] collections = item.getCollections(); | |
for (int i = 0; i < collections.length; i++) | |
{ | |
Collection collection = collections[i]; | |
doc1.addField("owningColl", collection.getID()); | |
storeParents(doc1, collection); | |
} | |
} | |
else if (dso instanceof Bitstream) | |
{ | |
Bitstream bitstream = (Bitstream) dso; | |
Bundle[] bundles = bitstream.getBundles(); | |
for (int i = 0; i < bundles.length; i++) | |
{ | |
Bundle bundle = bundles[i]; | |
Item[] items = bundle.getItems(); | |
for (int j = 0; j < items.length; j++) | |
{ | |
Item item = items[j]; | |
doc1.addField("owningItem", item.getID()); | |
storeParents(doc1, item); | |
} | |
} | |
} | |
} | |
public static boolean isUseProxies() | |
{ | |
return useProxies; | |
} | |
/** | |
* Delete data from the index, as described by a query. | |
* | |
* @param query description of the records to be deleted. | |
* @throws IOException | |
* @throws SolrServerException | |
*/ | |
public static void removeIndex(String query) throws IOException, | |
SolrServerException | |
{ | |
solr.deleteByQuery(query); | |
solr.commit(); | |
} | |
public static Map<String, List<String>> queryField(String query, | |
List oldFieldVals, String field) | |
{ | |
Map<String, List<String>> currentValsStored = new HashMap<String, List<String>>(); | |
try | |
{ | |
// Get one document (since all the metadata for all the values | |
// should be the same just get the first one we find | |
Map<String, String> params = new HashMap<String, String>(); | |
params.put("q", query); | |
params.put("rows", "1"); | |
MapSolrParams solrParams = new MapSolrParams(params); | |
QueryResponse response = solr.query(solrParams); | |
// Make sure we at least got a document | |
if (response.getResults().getNumFound() == 0) | |
{ | |
return currentValsStored; | |
} | |
// We have at least one document good | |
SolrDocument document = response.getResults().get(0); | |
// System.out.println("HERE"); | |
// Get the info we need | |
} | |
catch (SolrServerException e) | |
{ | |
e.printStackTrace(); | |
} | |
return currentValsStored; | |
} | |
public static class ResultProcessor | |
{ | |
public void execute(String query) throws SolrServerException, IOException { | |
Map<String, String> params = new HashMap<String, String>(); | |
params.put("q", query); | |
params.put("rows", "10"); | |
if(0 < statisticYearCores.size()){ | |
params.put(ShardParams.SHARDS, StringUtils.join(statisticYearCores.iterator(), ',')); | |
} | |
MapSolrParams solrParams = new MapSolrParams(params); | |
QueryResponse response = solr.query(solrParams); | |
long numbFound = response.getResults().getNumFound(); | |
// process the first batch | |
process(response.getResults()); | |
// Run over the rest | |
for (int i = 10; i < numbFound; i += 10) | |
{ | |
params.put("start", String.valueOf(i)); | |
solrParams = new MapSolrParams(params); | |
response = solr.query(solrParams); | |
process(response.getResults()); | |
} | |
} | |
public void commit() throws IOException, SolrServerException { | |
solr.commit(); | |
} | |
/** | |
* Override to manage pages of documents | |
* @param docs | |
*/ | |
public void process(List<SolrDocument> docs) throws IOException, SolrServerException { | |
for(SolrDocument doc : docs){ | |
process(doc); | |
} | |
} | |
/** | |
* Override to manage individual documents | |
* @param doc | |
*/ | |
public void process(SolrDocument doc) throws IOException, SolrServerException { | |
} | |
} | |
public static void markRobotsByIP() | |
{ | |
for(String ip : SpiderDetector.getSpiderIpAddresses()){ | |
try { | |
/* Result Process to alter record to be identified as a bot */ | |
ResultProcessor processor = new ResultProcessor(){ | |
public void process(SolrDocument doc) throws IOException, SolrServerException { | |
doc.removeFields("isBot"); | |
doc.addField("isBot", true); | |
SolrInputDocument newInput = ClientUtils.toSolrInputDocument(doc); | |
solr.add(newInput); | |
log.info("Marked " + doc.getFieldValue("ip") + " as bot"); | |
} | |
}; | |
/* query for ip, exclude results previously set as bots. */ | |
processor.execute("ip:"+ip+ "* AND -isBot:true"); | |
solr.commit(); | |
} catch (Exception e) { | |
log.error(e.getMessage(),e); | |
} | |
} | |
} | |
public static void markRobotByUserAgent(String agent){ | |
try { | |
/* Result Process to alter record to be identified as a bot */ | |
ResultProcessor processor = new ResultProcessor(){ | |
public void process(SolrDocument doc) throws IOException, SolrServerException { | |
doc.removeFields("isBot"); | |
doc.addField("isBot", true); | |
SolrInputDocument newInput = ClientUtils.toSolrInputDocument(doc); | |
solr.add(newInput); | |
} | |
}; | |
/* query for ip, exclude results previously set as bots. */ | |
processor.execute("userAgent:"+agent+ " AND -isBot:true"); | |
solr.commit(); | |
} catch (Exception e) { | |
log.error(e.getMessage(),e); | |
} | |
} | |
public static void deleteRobotsByIsBotFlag() | |
{ | |
try { | |
solr.deleteByQuery("isBot:true"); | |
} catch (Exception e) { | |
log.error(e.getMessage(),e); | |
} | |
} | |
public static void deleteIP(String ip) | |
{ | |
try { | |
solr.deleteByQuery("ip:"+ip + "*"); | |
} catch (Exception e) { | |
log.error(e.getMessage(),e); | |
} | |
} | |
public static void deleteRobotsByIP() | |
{ | |
for(String ip : SpiderDetector.getSpiderIpAddresses()){ | |
deleteIP(ip); | |
} | |
} | |
/* | |
* //TODO: below are not used public static void | |
* update(String query, boolean addField, String fieldName, Object | |
* fieldValue, Object oldFieldValue) throws SolrServerException, IOException | |
* { List<Object> vals = new ArrayList<Object>(); vals.add(fieldValue); | |
* List<Object> oldvals = new ArrayList<Object>(); oldvals.add(fieldValue); | |
* update(query, addField, fieldName, vals, oldvals); } | |
*/ | |
public static void update(String query, String action, | |
List<String> fieldNames, List<List<Object>> fieldValuesList) | |
throws SolrServerException, IOException | |
{ | |
// Since there is NO update | |
// We need to get our documents | |
// QueryResponse queryResponse = solr.query()//query(query, null, -1, | |
// null, null, null); | |
final List<SolrDocument> docsToUpdate = new ArrayList<SolrDocument>(); | |
ResultProcessor processor = new ResultProcessor(){ | |
public void process(List<SolrDocument> docs) throws IOException, SolrServerException { | |
docsToUpdate.addAll(docs); | |
} | |
}; | |
processor.execute(query); | |
// We have all the docs delete the ones we don't need | |
solr.deleteByQuery(query); | |
// Add the new (updated onces | |
for (int i = 0; i < docsToUpdate.size(); i++) | |
{ | |
SolrDocument solrDocument = docsToUpdate.get(i); | |
// Now loop over our fieldname actions | |
for (int j = 0; j < fieldNames.size(); j++) | |
{ | |
String fieldName = fieldNames.get(j); | |
List<Object> fieldValues = fieldValuesList.get(j); | |
if (action.equals("addOne") || action.equals("replace")) | |
{ | |
if (action.equals("replace")) | |
{ | |
solrDocument.removeFields(fieldName); | |
} | |
for (Object fieldValue : fieldValues) | |
{ | |
solrDocument.addField(fieldName, fieldValue); | |
} | |
} | |
else if (action.equals("remOne")) | |
{ | |
// Remove the field | |
java.util.Collection<Object> values = solrDocument | |
.getFieldValues(fieldName); | |
solrDocument.removeFields(fieldName); | |
for (Object value : values) | |
{ | |
// Keep all the values besides the one we need to remove | |
if (!fieldValues.contains((value))) | |
{ | |
solrDocument.addField(fieldName, value); | |
} | |
} | |
} | |
} | |
SolrInputDocument newInput = ClientUtils | |
.toSolrInputDocument(solrDocument); | |
solr.add(newInput); | |
} | |
solr.commit(); | |
// System.out.println("SolrLogger.update(\""+query+"\"):"+(new | |
// Date().getTime() - start)+"ms,"+numbFound+"records"); | |
} | |
public static void query(String query, int max) throws SolrServerException | |
{ | |
query(query, null, null,0, max, null, null, null, null, null, false); | |
} | |
/** | |
* Query used to get values grouped by the given facet field. | |
* | |
* @param query | |
* the query to be used | |
* @param facetField | |
* the facet field on which to group our values | |
* @param max | |
* the max number of values given back (in case of 10 the top 10 | |
* will be given) | |
* @param showTotal | |
* a boolean determining whether the total amount should be given | |
* back as the last element of the array | |
* @return an array containing our results | |
* @throws SolrServerException | |
* ... | |
*/ | |
public static ObjectCount[] queryFacetField(String query, | |
String filterQuery, String facetField, int max, boolean showTotal, | |
List<String> facetQueries) throws SolrServerException | |
{ | |
QueryResponse queryResponse = query(query, filterQuery, facetField, | |
0,max, null, null, null, facetQueries, null, false); | |
if (queryResponse == null) | |
{ | |
return new ObjectCount[0]; | |
} | |
FacetField field = queryResponse.getFacetField(facetField); | |
// At least make sure we have one value | |
if (0 < field.getValueCount()) | |
{ | |
// Create an array for our result | |
ObjectCount[] result = new ObjectCount[field.getValueCount() | |
+ (showTotal ? 1 : 0)]; | |
// Run over our results & store them | |
for (int i = 0; i < field.getValues().size(); i++) | |
{ | |
FacetField.Count fieldCount = field.getValues().get(i); | |
result[i] = new ObjectCount(); | |
result[i].setCount(fieldCount.getCount()); | |
result[i].setValue(fieldCount.getName()); | |
} | |
if (showTotal) | |
{ | |
result[result.length - 1] = new ObjectCount(); | |
result[result.length - 1].setCount(queryResponse.getResults() | |
.getNumFound()); | |
result[result.length - 1].setValue("total"); | |
} | |
return result; | |
} | |
else | |
{ | |
// Return an empty array cause we got no data | |
return new ObjectCount[0]; | |
} | |
} | |
/** | |
* Query used to get values grouped by the date. | |
* | |
* @param query | |
* the query to be used | |
* @param max | |
* the max number of values given back (in case of 10 the top 10 | |
* will be given) | |
* @param dateType | |
* the type to be used (example: DAY, MONTH, YEAR) | |
* @param dateStart | |
* the start date Format:(-3, -2, ..) the date is calculated | |
* relatively on today | |
* @param dateEnd | |
* the end date stop Format (-2, +1, ..) the date is calculated | |
* relatively on today | |
* @param showTotal | |
* a boolean determining whether the total amount should be given | |
* back as the last element of the array | |
* @return and array containing our results | |
* @throws SolrServerException | |
* ... | |
*/ | |
public static ObjectCount[] queryFacetDate(String query, | |
String filterQuery, int max, String dateType, String dateStart, | |
String dateEnd, boolean showTotal) throws SolrServerException | |
{ | |
QueryResponse queryResponse = query(query, filterQuery, null, 0, max, | |
dateType, dateStart, dateEnd, null, null, false); | |
if (queryResponse == null) | |
{ | |
return new ObjectCount[0]; | |
} | |
FacetField dateFacet = queryResponse.getFacetDate("time"); | |
// TODO: check if this cannot crash I checked it, it crashed!!! | |
// Create an array for our result | |
ObjectCount[] result = new ObjectCount[dateFacet.getValueCount() | |
+ (showTotal ? 1 : 0)]; | |
// Run over our datefacet & store all the values | |
for (int i = 0; i < dateFacet.getValues().size(); i++) | |
{ | |
FacetField.Count dateCount = dateFacet.getValues().get(i); | |
result[i] = new ObjectCount(); | |
result[i].setCount(dateCount.getCount()); | |
result[i].setValue(getDateView(dateCount.getName(), dateType)); | |
} | |
if (showTotal) | |
{ | |
result[result.length - 1] = new ObjectCount(); | |
result[result.length - 1].setCount(queryResponse.getResults() | |
.getNumFound()); | |
// TODO: Make sure that this total is gotten out of the msgs.xml | |
result[result.length - 1].setValue("total"); | |
} | |
return result; | |
} | |
public static Map<String, Integer> queryFacetQuery(String query, | |
String filterQuery, List<String> facetQueries) | |
throws SolrServerException | |
{ | |
QueryResponse response = query(query, filterQuery, null,0, 1, null, null, | |
null, facetQueries, null, false); | |
return response.getFacetQuery(); | |
} | |
public static ObjectCount queryTotal(String query, String filterQuery) | |
throws SolrServerException | |
{ | |
QueryResponse queryResponse = query(query, filterQuery, null,0, -1, null, | |
null, null, null, null, false); | |
ObjectCount objCount = new ObjectCount(); | |
objCount.setCount(queryResponse.getResults().getNumFound()); | |
return objCount; | |
} | |
private static String getDateView(String name, String type) | |
{ | |
if (name != null && name.matches("^[0-9]{4}\\-[0-9]{2}.*")) | |
{ | |
/* | |
* if("YEAR".equalsIgnoreCase(type)) return name.substring(0, 4); | |
* else if("MONTH".equalsIgnoreCase(type)) return name.substring(0, | |
* 7); else if("DAY".equalsIgnoreCase(type)) return | |
* name.substring(0, 10); else if("HOUR".equalsIgnoreCase(type)) | |
* return name.substring(11, 13); | |
*/ | |
// Get our date | |
Date date = null; | |
try | |
{ | |
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT_8601); | |
date = format.parse(name); | |
} | |
catch (ParseException e) | |
{ | |
try | |
{ | |
// We should use the dcdate (the dcdate is used when | |
// generating random data) | |
SimpleDateFormat format = new SimpleDateFormat( | |
DATE_FORMAT_DCDATE); | |
date = format.parse(name); | |
} | |
catch (ParseException e1) | |
{ | |
e1.printStackTrace(); | |
} | |
// e.printStackTrace(); | |
} | |
String dateformatString = "dd-MM-yyyy"; | |
if ("DAY".equals(type)) | |
{ | |
dateformatString = "dd-MM-yyyy"; | |
} | |
else if ("MONTH".equals(type)) | |
{ | |
dateformatString = "MMMM yyyy"; | |
} | |
else if ("YEAR".equals(type)) | |
{ | |
dateformatString = "yyyy"; | |
} | |
SimpleDateFormat simpleFormat = new SimpleDateFormat( | |
dateformatString); | |
if (date != null) | |
{ | |
name = simpleFormat.format(date); | |
} | |
} | |
return name; | |
} | |
public static QueryResponse query(String query, String filterQuery, | |
String facetField, int rows, int max, String dateType, String dateStart, | |
String dateEnd, List<String> facetQueries, String sort, boolean ascending) | |
throws SolrServerException | |
{ | |
if (solr == null) | |
{ | |
return null; | |
} | |
// System.out.println("QUERY"); | |
SolrQuery solrQuery = new SolrQuery().setRows(rows).setQuery(query) | |
.setFacetMinCount(1); | |
addAdditionalSolrYearCores(solrQuery); | |
// Set the date facet if present | |
if (dateType != null) | |
{ | |
solrQuery.setParam("facet.date", "time") | |
. | |
// EXAMPLE: NOW/MONTH+1MONTH | |
setParam("facet.date.end", | |
"NOW/" + dateType + dateEnd + dateType).setParam( | |
"facet.date.gap", "+1" + dateType) | |
. | |
// EXAMPLE: NOW/MONTH-" + nbMonths + "MONTHS | |
setParam("facet.date.start", | |
"NOW/" + dateType + dateStart + dateType + "S") | |
.setFacet(true); | |
} | |
if (facetQueries != null) | |
{ | |
for (int i = 0; i < facetQueries.size(); i++) | |
{ | |
String facetQuery = facetQueries.get(i); | |
solrQuery.addFacetQuery(facetQuery); | |
} | |
if (0 < facetQueries.size()) | |
{ | |
solrQuery.setFacet(true); | |
} | |
} | |
if (facetField != null) | |
{ | |
solrQuery.addFacetField(facetField); | |
} | |
// Set the top x of if present | |
if (max != -1) | |
{ | |
solrQuery.setFacetLimit(max); | |
} | |
// A filter is used instead of a regular query to improve | |
// performance and ensure the search result ordering will | |
// not be influenced | |
// Choose to filter by the Legacy spider IP list (may get too long to properly filter all IP's | |
if(ConfigurationManager.getBooleanProperty("solr-statistics", "query.filter.spiderIp",false)) | |
{ | |
solrQuery.addFilterQuery(getIgnoreSpiderIPs()); | |
} | |
// Choose to filter by isBot field, may be overriden in future | |
// to allow views on stats based on bots. | |
if(ConfigurationManager.getBooleanProperty("solr-statistics", "query.filter.isBot",true)) | |
{ | |
solrQuery.addFilterQuery("-isBot:true"); | |
} | |
if(sort != null){ | |
solrQuery.setSortField(sort, (ascending ? SolrQuery.ORDER.asc : SolrQuery.ORDER.desc)); | |
} | |
String bundles; | |
if((bundles = ConfigurationManager.getProperty("solr-statistics", "query.filter.bundles")) != null && 0 < bundles.length()){ | |
/** | |
* The code below creates a query that will allow only records which do not have a bundlename | |
* (items, collections, ...) or bitstreams that have a configured bundle name | |
*/ | |
StringBuffer bundleQuery = new StringBuffer(); | |
//Also add the possibility that if no bundle name is there these results will also be returned ! | |
bundleQuery.append("-(bundleName:[* TO *]"); | |
String[] split = bundles.split(","); | |
for (int i = 0; i < split.length; i++) { | |
String bundle = split[i].trim(); | |
bundleQuery.append("-bundleName:").append(bundle); | |
if(i != split.length - 1){ | |
bundleQuery.append(" AND "); | |
} | |
} | |
bundleQuery.append(")"); | |
solrQuery.addFilterQuery(bundleQuery.toString()); | |
} | |
if (filterQuery != null) | |
{ | |
solrQuery.addFilterQuery(filterQuery); | |
} | |
QueryResponse response; | |
try | |
{ | |
// solr.set | |
response = solr.query(solrQuery); | |
} | |
catch (SolrServerException e) | |
{ | |
System.err.println("Error using query " + query); | |
throw e; | |
} | |
return response; | |
} | |
/** String of IP and Ranges in IPTable as a Solr Query */ | |
private static String filterQuery = null; | |
/** | |
* Returns in a filterQuery string all the ip addresses that should be ignored | |
* | |
* @return a string query with ip addresses | |
*/ | |
public static String getIgnoreSpiderIPs() { | |
if (filterQuery == null) { | |
StringBuilder query = new StringBuilder(); | |
boolean first = true; | |
for (String ip : SpiderDetector.getSpiderIpAddresses()) { | |
if (first) { | |
query.append(" AND "); | |
first = false; | |
} | |
query.append(" NOT(ip: ").append(ip).append(")"); | |
} | |
filterQuery = query.toString(); | |
} | |
return filterQuery; | |
} | |
/** | |
* Maintenance to keep a SOLR index efficient. | |
* Note: This might take a long time. | |
*/ | |
public static void optimizeSOLR() { | |
try { | |
long start = System.currentTimeMillis(); | |
System.out.println("SOLR Optimize -- Process Started:"+start); | |
solr.optimize(); | |
long finish = System.currentTimeMillis(); | |
System.out.println("SOLR Optimize -- Process Finished:"+finish); | |
System.out.println("SOLR Optimize -- Total time taken:"+(finish-start) + " (ms)."); | |
} catch (SolrServerException sse) { | |
System.err.println(sse.getMessage()); | |
} catch (IOException ioe) { | |
System.err.println(ioe.getMessage()); | |
} | |
} | |
public static void main(String[] args) throws IOException, SolrServerException { | |
/* | |
Start by faceting by year so we can include each year in a separate core ! | |
*/ | |
SolrQuery yearRangeQuery = new SolrQuery(); | |
yearRangeQuery.setQuery("*:*"); | |
yearRangeQuery.setRows(0); | |
yearRangeQuery.setFacet(true); | |
yearRangeQuery.add(FacetParams.FACET_RANGE, "time"); | |
//We go back to 2000 the year 2000, this is a bit overkill but this way we ensure we have everything | |
//The alternative would be to sort but that isn't recommended since it would be a very costly query ! | |
yearRangeQuery.add(FacetParams.FACET_RANGE_START, "NOW/YEAR-" + (Calendar.getInstance().get(Calendar.YEAR) - 2000) + "YEARS"); | |
//Add the +0year to ensure that we DO NOT include the current year | |
yearRangeQuery.add(FacetParams.FACET_RANGE_END, "NOW/YEAR+0YEARS"); | |
yearRangeQuery.add(FacetParams.FACET_RANGE_GAP, "+1YEAR"); | |
yearRangeQuery.add(FacetParams.FACET_MINCOUNT, String.valueOf(1)); | |
//Create a temp directory to store our files in ! | |
File tempDirectory = new File(ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator); | |
tempDirectory.mkdirs(); | |
QueryResponse queryResponse = solr.query(yearRangeQuery); | |
//We only have one range query ! | |
List<RangeFacet.Count> yearResults = queryResponse.getFacetRanges().get(0).getCounts(); | |
for (RangeFacet.Count count : yearResults) { | |
long totalRecords = count.getCount(); | |
//Create a range query from this ! | |
//We start with out current year | |
DCDate dcStart = new DCDate(count.getValue()); | |
Calendar endDate = Calendar.getInstance(); | |
//Advance one year for the start of the next one ! | |
endDate.setTime(dcStart.toDate()); | |
endDate.add(Calendar.YEAR, 1); | |
DCDate dcEndDate = new DCDate(endDate.getTime()); | |
StringBuilder filterQuery = new StringBuilder(); | |
filterQuery.append("time:(["); | |
filterQuery.append(ClientUtils.escapeQueryChars(dcStart.toString())); | |
filterQuery.append(" TO "); | |
filterQuery.append(ClientUtils.escapeQueryChars(dcEndDate.toString())); | |
filterQuery.append("]"); | |
//The next part of the filter query excludes the content from midnight of the next year ! | |
filterQuery.append(" NOT ").append(ClientUtils.escapeQueryChars(dcEndDate.toString())); | |
filterQuery.append(")"); | |
Map<String, String> yearQueryParams = new HashMap<String, String>(); | |
yearQueryParams.put(CommonParams.Q, "*:*"); | |
yearQueryParams.put(CommonParams.ROWS, String.valueOf(10000)); | |
yearQueryParams.put(CommonParams.FQ, filterQuery.toString()); | |
yearQueryParams.put(CommonParams.WT, "csv"); | |
//Start by creating a new core | |
String coreName = "statistics-" + dcStart.getYear(); | |
HttpSolrServer statisticsYearServer = createCore(solr, coreName); | |
System.out.println("Moving: " + totalRecords + " into core " + coreName); | |
log.info("Moving: " + totalRecords + " records into core " + coreName); | |
List<File> filesToUpload = new ArrayList<File>(); | |
for(int i = 0; i < totalRecords; i+=10000){ | |
String solrRequestUrl = solr.getBaseURL() + "/select"; | |
solrRequestUrl = generateURL(solrRequestUrl, yearQueryParams); | |
GetMethod get = new GetMethod(solrRequestUrl); | |
new HttpClient().executeMethod(get); | |
InputStream csvInputstream = get.getResponseBodyAsStream(); | |
//Write the csv ouput to a file ! | |
File csvFile = new File(tempDirectory.getPath() + File.separatorChar + "temp." + dcStart.getYear() + "." + i + ".csv"); | |
CSVWriter bw = new CSVWriter(new FileWriter(csvFile)); | |
int excl = -1; | |
try { | |
CSVReader reader = new CSVReader(new InputStreamReader(csvInputstream)); | |
String [] nextLine; | |
String [] firstLine = new String[0]; | |
if ((nextLine = reader.readNext()) != null) { | |
firstLine = nextLine; | |
for(int pi=0; pi<firstLine.length; pi++) { | |
String s = firstLine[pi]; | |
if (s == null) s = ""; | |
if (s.equals("_version_")) { | |
excl = pi; | |
break; | |
} | |
} | |
} | |
for (; nextLine !=null; nextLine = reader.readNext()) { | |
int sz = firstLine.length; | |
if (excl > 0) sz--; | |
String[] outLine = new String[sz]; | |
int outIndex = 0; | |
for(int pi=0; pi<firstLine.length; pi++) { | |
String s = (pi > nextLine.length - 1) ? "\"\"" : nextLine[pi]; | |
if (pi == excl) continue; | |
if (s == null) s = ""; | |
outLine[outIndex++] = s; | |
} | |
bw.writeNext(outLine); | |
} | |
reader.close(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
bw.flush(); | |
bw.close(); | |
//FileUtils.copyInputStreamToFile(csvInputstream, csvFile); | |
filesToUpload.add(csvFile); | |
//Add 10000 & start over again | |
yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000))); | |
} | |
for (File tempCsv : filesToUpload) { | |
//Upload the data in the csv files to our new solr core | |
try { | |
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv"); | |
contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8"); | |
contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); | |
contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8"); | |
statisticsYearServer.request(contentStreamUpdateRequest); | |
} catch (Exception e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} | |
} | |
statisticsYearServer.commit(true, true); | |
//Delete contents of this year from our year query ! | |
solr.deleteByQuery(filterQuery.toString()); | |
solr.commit(true, true); | |
log.info("Moved " + totalRecords + " records into core: " + coreName); | |
} | |
FileUtils.deleteDirectory(tempDirectory); | |
} | |
private static HttpSolrServer createCore(HttpSolrServer solr, String coreName) throws IOException, SolrServerException { | |
String solrDir = ConfigurationManager.getProperty("dspace.dir") + File.separator + "solr" +File.separator; | |
String baseSolrUrl = solr.getBaseURL().replace("statistics", ""); | |
CoreAdminRequest.Create create = new CoreAdminRequest.Create(); | |
create.setCoreName(coreName); | |
create.setInstanceDir("statistics"); | |
create.setDataDir(solrDir + coreName + File.separator + "data"); | |
HttpSolrServer solrServer = new HttpSolrServer(baseSolrUrl); | |
create.process(solrServer); | |
log.info("Created core with name: " + coreName); | |
return new HttpSolrServer(baseSolrUrl + "/" + coreName); | |
} | |
public static void reindexBitstreamHits(boolean removeDeletedBitstreams) throws Exception { | |
Context context = new Context(); | |
try { | |
//First of all retrieve the total number of records to be updated | |
SolrQuery query = new SolrQuery(); | |
query.setQuery("*:*"); | |
query.addFilterQuery("type:" + Constants.BITSTREAM); | |
//Only retrieve records which do not have a bundle name | |
query.addFilterQuery("-bundleName:[* TO *]"); | |
query.setRows(0); | |
addAdditionalSolrYearCores(query); | |
long totalRecords = solr.query(query).getResults().getNumFound(); | |
File tempDirectory = new File(ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator); | |
tempDirectory.mkdirs(); | |
List<File> tempCsvFiles = new ArrayList<File>(); | |
for(int i = 0; i < totalRecords; i+=10000){ | |
Map<String, String> params = new HashMap<String, String>(); | |
params.put(CommonParams.Q, "*:*"); | |
params.put(CommonParams.FQ, "-bundleName:[* TO *] AND type:" + Constants.BITSTREAM); | |
params.put(CommonParams.WT, "csv"); | |
params.put(CommonParams.ROWS, String.valueOf(10000)); | |
params.put(CommonParams.START, String.valueOf(i)); | |
String solrRequestUrl = solr.getBaseURL() + "/select"; | |
solrRequestUrl = generateURL(solrRequestUrl, params); | |
GetMethod get = new GetMethod(solrRequestUrl); | |
new HttpClient().executeMethod(get); | |
InputStream csvOutput = get.getResponseBodyAsStream(); | |
Reader csvReader = new InputStreamReader(csvOutput); | |
List<String[]> rows = new CSVReader(csvReader).readAll(); | |
String[][] csvParsed = rows.toArray(new String[rows.size()][]); | |
String[] header = csvParsed[0]; | |
//Attempt to find the bitstream id index ! | |
int idIndex = 0; | |
for (int j = 0; j < header.length; j++) { | |
if(header[j].equals("id")){ | |
idIndex = j; | |
} | |
} | |
File tempCsv = new File(tempDirectory.getPath() + File.separatorChar + "temp." + i + ".csv"); | |
tempCsvFiles.add(tempCsv); | |
CSVWriter csvp = new CSVWriter(new FileWriter(tempCsv)); | |
//csvp.setAlwaysQuote(false); | |
//Write the header ! | |
csvp.writeNext((String[]) ArrayUtils.add(header, "bundleName")); | |
Map<Integer, String> bitBundleCache = new HashMap<Integer, String>(); | |
//Loop over each line (skip the headers though)! | |
for (int j = 1; j < csvParsed.length; j++){ | |
String[] csvLine = csvParsed[j]; | |
//Write the default line ! | |
int bitstreamId = Integer.parseInt(csvLine[idIndex]); | |
//Attempt to retrieve our bundle name from the cache ! | |
String bundleName = bitBundleCache.get(bitstreamId); | |
if(bundleName == null){ | |
//Nothing found retrieve the bitstream | |
Bitstream bitstream = Bitstream.find(context, bitstreamId); | |
//Attempt to retrieve our bitstream ! | |
if (bitstream != null){ | |
Bundle[] bundles = bitstream.getBundles(); | |
if(bundles != null && 0 < bundles.length){ | |
Bundle bundle = bundles[0]; | |
bundleName = bundle.getName(); | |
context.removeCached(bundle, bundle.getID()); | |
}else{ | |
//No bundle found, we are either a collection or a community logo, check for it ! | |
DSpaceObject parentObject = bitstream.getParentObject(); | |
if(parentObject instanceof Collection){ | |
bundleName = "LOGO-COLLECTION"; | |
}else | |
if(parentObject instanceof Community){ | |
bundleName = "LOGO-COMMUNITY"; | |
} | |
if(parentObject != null){ | |
context.removeCached(parentObject, parentObject.getID()); | |
} | |
} | |
//Cache the bundle name | |
bitBundleCache.put(bitstream.getID(), bundleName); | |
//Remove the bitstream from cache | |
context.removeCached(bitstream, bitstreamId); | |
} | |
//Check if we don't have a bundlename | |
//If we don't have one & we do not need to delete the deleted bitstreams ensure that a BITSTREAM_DELETED bundle name is given ! | |
if(bundleName == null && !removeDeletedBitstreams){ | |
bundleName = "BITSTREAM_DELETED"; | |
} | |
} | |
csvp.writeNext((String[]) ArrayUtils.add(csvLine, bundleName)); | |
} | |
//Loop over our parsed csv | |
csvp.flush(); | |
csvp.close(); | |
} | |
//Add all the separate csv files | |
for (File tempCsv : tempCsvFiles) { | |
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv"); | |
contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8"); | |
contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); | |
contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8"); | |
solr.request(contentStreamUpdateRequest); | |
} | |
//Now that all our new bitstream stats are in place, delete all the old ones ! | |
solr.deleteByQuery("-bundleName:[* TO *] AND type:" + Constants.BITSTREAM); | |
//Commit everything to wrap up | |
solr.commit(true, true); | |
//Clean up our directory ! | |
FileUtils.deleteDirectory(tempDirectory); | |
} catch (Exception e) { | |
log.error("Error while updating the bitstream statistics", e); | |
throw e; | |
} finally { | |
context.abort(); | |
} | |
} | |
private static String generateURL(String baseURL, Map<String, String> parameters) throws UnsupportedEncodingException { | |
boolean first = true; | |
StringBuilder result = new StringBuilder(baseURL); | |
for (String key : parameters.keySet()) | |
{ | |
if (first) | |
{ | |
result.append("?"); | |
first = false; | |
} | |
else | |
{ | |
result.append("&"); | |
} | |
result.append(key).append("=").append(URLEncoder.encode(parameters.get(key), "UTF-8")); | |
} | |
return result.toString(); | |
} | |
private static void addAdditionalSolrYearCores(SolrQuery solrQuery){ | |
//Only add if needed | |
if(0 < statisticYearCores.size()){ | |
//The shards are a comma separated list of the urls to the cores | |
solrQuery.add(ShardParams.SHARDS, StringUtils.join(statisticYearCores.iterator(), ",")); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment