Skip to content

Instantly share code, notes, and snippets.

@Ortuna
Created September 28, 2012 18:32
Show Gist options
  • Save Ortuna/3801415 to your computer and use it in GitHub Desktop.
Save Ortuna/3801415 to your computer and use it in GitHub Desktop.
Quick and dirty import into CiviCRM from a webcrawl
<?php
define('SOURCE', '[redacted]');
define('DRUPAL_ROOT', '.');
define('IMPORT_LOCATION_MAIN', 3);
define('IMPORT_WEBSITE_MAIN', 6);
define('IMPORT_PHONE_MOBILE', 2);
define('IMPORT_PHONE_FAX', 3);
$stdout = fopen('php://stdout', 'w');
fwrite($stdout, "Import Script\n");
// Site specific variables
$username = "admin";
$drupal_base_url = '[redacted]'
$_SERVER['HTTP_HOST'] = $drupal_base_url['host'];
// $_SERVER['PHP_SELF'] = $drupal_base_url['path'] . '/index.php';
$_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'];
$_SERVER['REMOTE_ADDR'] = NULL;
$_SERVER['REQUEST_METHOD'] = NULL;
require_once 'includes/bootstrap.inc';
require_once 'lib/simple_html_dom.php';
drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
global $user;
init();
crawl_events(SOURCE);
function create_event($params)
{
$results = civicrm_event_create($params);
if ( civicrm_error( $result ))
return $result['error_message'];
else
print_r($result);
}
function crawl_events($url)
{
$html = file_get_html($url);
$crawl_urls = array(); //list of event URLs to crawl
foreach($html->find('a') as $element) {
$link_url = $element->href;
if(preg_match("/calendar_day/i", $link_url)){
$crawl_urls[] = $link_url;
}
}
foreach ($crawl_urls as $crawl_url) {
if(parse_event($crawl_url))
print "Created event from $crawl_url\n";
else
print "Could not create event from $crawl_url\n";
}
}
function parse_event($crawl_url)
{
$data = file_get_html($crawl_url);
/* Date parse */
preg_match("/date\(s\)\:(.*)/i", $data->plaintext, $matches);
if(empty($matches[1])) {
print "Could not import $crawl_url .. skipping\n";
return false;
}
$date = trim(html_entity_decode($matches[1], ENT_QUOTES, "UTF-8"));
$date = date_parse_from_format("F j, Y", $date);
$date = sprintf('%04d%02d%02d', $date["year"], $date["month"], $date["day"]);
/* Time parse, except no time support for civicrm event creation with time */
preg_match("/time\:(.*)/i", $data->plaintext, $matches);
$time = trim(html_entity_decode($matches[1], ENT_QUOTES, "UTF-8"));
$time = date_parse_from_format("g:i A T", $time);
$time = $time['hour'] . ":" . $time["minute"] . ":00";
/*get Content */
$content = $data->find(".content", 2)->outertext;
$title = $data->find(".calendar_header2", 0);
$title = trim(html_entity_decode($title->innertext, ENT_QUOTES, "UTF-8"));
$event = array(
'title' => $title,
'description' => $content,
'is_public' => true,
'event_type_id' => 1,
'start_date' => "$date",
'is_active' => true,
);
create_event($event);
return true;
}
function init()
{
require_once('sites/default/civicrm.settings.php');
require_once 'CRM/Core/Config.php';
$config = & CRM_Core_Config::singleton();
require_once 'CRM/Core/Error.php';
require_once 'api/v2/Event.php';
mb_language('uni');
mb_internal_encoding('UTF-8');
}
function import_test()
{
$result = civicrm_api('contact', 'get', array('email' => '[email protected]', 'version' => 3));
print_r($result);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment