Last active
December 20, 2015 10:48
-
-
Save robsears/6117952 to your computer and use it in GitHub Desktop.
This is just a quick and simple script for importing MySQL data into a Solr index. Very basic and only handles data in a single table; if your data is spread across multiple tables using JOINs, etc, then you're out of luck. Feel free to repurpose any of this code to fit your specific needs.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// A quick and dirty script for bulk importing MySQL data into a Solr index | |
// Edit this array with the MySQL fields that will be sent to the SOLR index | |
// The key is the name of the MySQL column and the value is its respective Solr field | |
// | |
// Example: 'mysql_col_name' => 'solr_field' | |
// | |
$field_mappings = array( | |
'id' => 'id', | |
'name' => 'name', | |
'manufacturer' => 'manufacturer', | |
'description' => 'description', | |
'created_at' => 'created_at', | |
); | |
// Edit these fields to match your database: | |
const DB_HOST = 'localhost'; | |
const DB_USERNAME = 'admin'; | |
const DB_PASSWORD = 'password'; | |
const DB_DBNAME = 'database'; | |
const DB_TABLE = 'table'; | |
// Edit these fields to match your Solr index: | |
const SOLR_PROTOCOL = 'http://'; | |
const SOLR_HOST = 'localhost'; | |
const SOLR_PORT = 8080; | |
const SOLR_LOCATION = '/solr'; | |
// Set your batch size. Note that if the fields you're sending have lots of text | |
// it may get truncated or timed out: | |
const BATCH_SIZE = 10; | |
// ---------------------------------------------------- | |
// Begin importation process: | |
// ---------------------------------------------------- | |
$batch = 0; // A counter for the number of items in the batch | |
$total = 0; // A counter for the total items processed | |
$xmlString = "<add>\n"; // An XML string of data to send to the Solr index | |
// A MySQL query to select the indexed fields from the database: | |
$query = "SELECT " . implode(",", $field_mappings) . " FROM " . DB_TABLE; | |
$result = dbquery($query); // The matching data from the db | |
$totals = mysql_num_rows($result); // The total number of matching rows in the db | |
// Iterate through the data and build | |
while ($row = mysql_fetch_assoc($result)) { | |
$xmlString .= "\t<doc>\n"; | |
foreach ($field_mappings as $mysql_col => $solr_field) { | |
$xmlString .= "\t\t<field name=\"" . $solr_field . "\">" . $row[$mysql_col] . "</field>\n"; | |
} | |
$xmlString .= "\t</doc>\n"; | |
$batch++; | |
$total++; | |
if ($batch == BATCH_SIZE || $total == $totals) { | |
$batch = 0; | |
$xmlString .= "</add>\n\n"; | |
$server_status = sendXML($xmlString); | |
if ($server_status['status'] == TRUE) { | |
print "Successfully sent " . $batch . " rows to the Solr index. Server returned: " . $server_status['message'] . "<br />"; | |
$xmlString = "<add>\n"; | |
} | |
else { | |
print "Script reached an error. cURL returned the following message: " . $server_status['message'] . "<br />Stopping"; | |
exit(); | |
} | |
} | |
} | |
/* | |
A helper function for making MySQL queries | |
Input: A MySQL query | |
Return: The query results | |
*/ | |
function dbquery($query) { | |
mysql_connect(DB_HOST, DB_USERNAME, DB_PASSWORD) or die(mysql_error()); | |
mysql_select_db(DB_DBNAME) or die(mysql_error()); | |
return mysql_query($query); | |
} | |
/* | |
A helper function for sending data to the Solr index | |
Input: An XML-formatted string to send to the Solr index | |
Return: An array containing a return status and message | |
Adapted from code by Robert Capra | |
http://www.ils.unc.edu/~rcapra/solr-update-php.php | |
*/ | |
function sendXML($xmlString) { | |
$url = SOLR_PROTOCOL . SOLR_HOST . ":" . SOLR_PORT . SOLR_LOCATION . "/update"; | |
print "Sending the following data to " . $url . ": " . $xmlString; | |
$header = array("Content-type:text/xml; charset=utf-8"); | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_URL, $url); | |
curl_setopt($ch, CURLOPT_HTTPHEADER, $header); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
curl_setopt($ch, CURLOPT_POST, 1); | |
curl_setopt($ch, CURLOPT_POSTFIELDS, $xmlString); | |
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); | |
curl_setopt($ch, CURLINFO_HEADER_OUT, 1); | |
$data = curl_exec($ch); | |
if (curl_errno($ch)) { | |
return array('status' => FALSE, 'message' => curl_error($ch)); | |
} else { | |
curl_close($ch); | |
return array('status' => TRUE, 'message' => $data); | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Websolr customers should use the following settings:
For example: