Skip to content

Instantly share code, notes, and snippets.

@johnsardine
Created July 23, 2013 15:48
Show Gist options
  • Select an option

  • Save johnsardine/6063480 to your computer and use it in GitHub Desktop.

Select an option

Save johnsardine/6063480 to your computer and use it in GitHub Desktop.
O ficheiro Parse.php corre uma instancia da class ParseFile para cada ficheiro.
<?php namespace Tailor\Carmim;
use log;
use SplFileObject;
use Illuminate\Support\Facades\File;
use Symfony\Component\Finder\SplFileInfo;
class Parse
{
/*
* Maps the files to a table
*/
public static $file_table_map = array(
'detalhe_historico_entregas' => 'transactions', // Movimentos de entrega
'fichassocios_fa' => 'users',
'limites_entrega_produtor' => 'limits',
'resumo_historico_entregas' => 'history', // Historico do produtor
);
public static $table_file_map = array(); // Will hold the reverse of $file_table_map
public static function init($console = null)
{
$debug = true;
if ($debug && !$console) {
ob_start();
echo '<pre>';
ob_flush();
flush();
}
// Invert table map
foreach (static::$file_table_map as $file => $table ) {
static::$table_file_map[$table] = $file;
}
$files_dir = base_path();
$files_dir .= '/erp';
$file_count = 0;
$row_count = 0;
$output = array();
$memory_start = memory_get_usage(true);
$start_time = (float) array_sum(explode(' ',microtime()));
// Change execution time to prevent timeout
$default_execution_time = ini_get('max_execution_time');
$default_memory_limit = ini_get('memory_limit');
ini_set('max_execution_time', (60 * 16)); // 60 seconds times 12
ini_set('memory_limit', '256M');
// If console exists, send info to cli
if ($console) {
$console->info('Import started');
$console->info('---');
$console->info('Max execution time: '.ini_get('max_execution_time'));
$console->info('Memory limit: '.ini_get('memory_limit'));
}
// Get all files in erp directory
$files = File::allFiles($files_dir);
foreach ($files as $file_info) {
$file_path = $file_info->getRealPath();
$file_pathinfo = pathinfo($file_path);
// Get filename
$file_name = strtolower($file_pathinfo['filename']);
if ($debug && !$console) {
echo $file_name." --------------- \n\r";
ob_flush();
flush();
}
// If file is not in table map, ignore
if ( !isset(static::$file_table_map[$file_name]) )
continue;
if ($console) {
$console->info('');
$console->info('');
$console->info('Importing: '.$file_path);
$console->info('---');
$console->info('');
}
$table_name = static::$file_table_map[$file_name];
$options = array(
'table' => $table_name,
'column_map' => array(
'idtmov' => 'uniqid',
'idtlim' => 'uniqid',
),
'parse_dates' => array(
'data_entrega',
'datanasc'
),
'debug' => $debug,
'console' => $console
);
$file_parser = new ParseFile(new SplFileObject($file_path), $options);
$file_parser->parse();
//var_dump($file_parser);
// Calculate total row count
$row_count += $file_parser->row_count;
unset($file_parser);
//$output[$table_name] = $file_parser;
// Exit execution, no need while in testing
//break;
}
// Restore execution time to default
//ini_set('max_execution_time', $default_execution_time);
//ini_set('memory_limit', $default_memory_limit);
$memory_end = memory_get_usage(true);
$end_time = (float) array_sum(explode(' ',microtime()));
$script_time = $end_time - $start_time;
$process_minutes = floor($script_time / 60);
$process_seconds = intval((($script_time / 60) - $process_minutes) * 60);
$processed_in = sprintf("Processing time: %s minutes (%.3f seconds).", ($process_minutes.':'.str_pad($process_seconds, 2, 0, STR_PAD_LEFT)), $script_time);
$memory_usage = sprintf("Memory usage: %sMb", ($memory_end-$memory_start) / 1048576);
$processed_rows = sprintf("Processed %s rows", $row_count);
// If console exists, send info to cli
if ($console) {
$console->info('');
$console->info('');
$console->info('Import finished');
$console->info('---');
$console->info($processed_in);
$console->info($memory_usage);
$console->info($processed_rows);
}
log::info('Import Complete', array(
$processed_in,
$memory_usage,
$processed_rows
));
if ($debug && !$console) {
var_dump(array(
$processed_in,
$memory_usage,
$processed_rows
));
}
if ($debug && !$console) {
echo '</pre>';
ob_flush();
flush();
}
}
}
<?php namespace Tailor\Carmim;
use Config;
use DB;
use Log;
use Illuminate\Support\Facades\File;
use SplFileObject;
use Symfony\Component\Finder\SplFileInfo;
class ParseFile
{
public $pdo;
/*
* Excludes certain columns from the file
*/
public $exclude_column = array();
/*
* Specify different column names for existing columns
*/
public $column_map = array();
public $parse_dates = array(
'data_entrega',
);
/*
* To what table should the data go
*/
public $table = '';
/*
* Should the columns be lowercase
*/
public $lowercase_columns = true;
public $push_timestamps = true;
/*
* The file
* Instance of SplFileObject
*/
public $file = '';
public $file_info = '';
public $file_pathinfo = '';
public $file_data = array();
public $row_count = 0;
public $debug = false;
public $console = ''; // Will hold an instance of CarmimImport
public function __construct(SplFileObject $file, array $options = array())
{
$this->pdo = DB::connection()->getPdo();
$this->file = $file;
foreach ($options as $key => $value) {
if ( !isset($this->{$key}) ) continue;
$this->{$key} = $value;
}
$this->file_info = $this->file->getPathInfo();
$this->file_pathinfo = pathinfo($this->file->getPathname());
// get file path $this->file->getPathname();
}
public function parse()
{
$debug = $this->debug;
$file = $this->file;
$file->setFlags(SplFileObject::READ_CSV | SplFileObject::SKIP_EMPTY | SplFileObject::DROP_NEW_LINE);
$file->setCsvControl(';');
$percentage = 0;
$old_percentage = 0;
$count = 0;
DB::connection()->disableQueryLog();
$table = $this->table;
$table_prefix = Config::get('database.connections.mysql.prefix');
$table_index = 'uniqid';
if ($table === 'users')
$table_index = 'n_socio';
$column_map = $this->column_map;
$console = $this->console;
$parse_dates = $this->parse_dates;
$push_timestamps = $this->push_timestamps;
$datetime_now = date('Y-m-d H:i:s');
// Calculate line count only if cli, to provide percentage
$total_line_count = 0;
if ($console || $debug) {
while ( $file->valid()) {
++$total_line_count; $row = $file->fgetcsv();
unset($row);
}
$file->rewind();
}
// Grab first row since its headers
$fields = $file->current();
// Ignore first, since they are the fields
//$file->next();
//$file->seek(69290);
// Process columns
$fields = array_map(function($value) use ($column_map) {
$field = str_replace('-', '_', Slug::make(utf8_encode($value)));
return ( isset($column_map[$field]) ) ? $column_map[$field] : $field;
}, $fields);
$prepare_columns = array();
$prepare_values = array();
$prepare_column_value = array();
// Prepare columns into sql syntax
array_map(function($value) use (&$prepare_columns, &$prepare_values, &$prepare_column_value) {
$prepare_columns[$value] = '`'.$value.'`';
$prepare_values[$value] = ':'.$value;
$prepare_column_value[$value] = '`'.$value.'` = :'.$value;
}, $fields);
// Push timestamp columns into fields
if ($push_timestamps === true) {
$prepare_columns[] = '`created_at`';
$prepare_values['created_at'] = ':created_at';
$prepare_column_value['updated_at'] = '`updated_at` = :updated_at';
}
// Build insert statement
$prepare_insert = $this->pdo->prepare(sprintf(
'INSERT INTO `%s` (%s) VALUES (%s)',
$table_prefix.$table,
implode(', ', $prepare_columns),
implode(', ', $prepare_values)
));
// Build insert statement
unset($prepare_column_value[$table_index]); // remove it from implode and add manually
$prepare_update = $this->pdo->prepare(sprintf(
'UPDATE `%s` SET %s WHERE `%3$s` = :%3$s',
$table_prefix.$table,
implode(', ', $prepare_column_value),
$table_index
));
$prepare_columns = null;
$prepare_values = null;
$prepare_column_value = null;
// Get existing uniqids for faster comparison
$existing_uniqid = array();
$existing_uniqid_raw = DB::table($this->table)->lists($table_index);
foreach ($existing_uniqid_raw as $eur_uniqid) {
$existing_uniqid[$eur_uniqid] = $eur_uniqid;
}
// Go through each of the csv lines and make a first cleanup and organization
while ($file->valid())
{
// Exit execution, no need while in testing
//if ($count >= 5) break;
//if ($count >= 10000) break;
//if ($count >= 40000) break;
//if ($count >= 8600) break;
// Sleep for a while each 1000
// if ($count % 500 == 0) {
// //sleep(1);
// //time_nanosleep(0, 10000000);
// }
// Get row index
//$row_index = $file->key();
// Get csv line
$row = $file->fgetcsv();
// If second item of row is not set, line is empty, ignore
if (!isset($row[0]))
continue;
// Add column names to row
foreach ($row as $row_index => $row_value) {
// Insert processed key => value
$row[$fields[$row_index]] = utf8_encode(trim($row_value));
// Unset old numeric key and value
unset($row[$row_index]);
}
// Parse dates correctly
foreach ($parse_dates as $field) {
if (!isset($row[$field])) continue;
$row[$field] = date('Y-m-d H:i:s', strtotime($row[$field]));
}
// PROCESS ROW INSERTION
// Storing all data into a huge array will consume lots of memory, check if all code can be done within here
$uniqid = $row[$table_index];
$row_exists = isset($existing_uniqid[$uniqid]); // Unsetting this increases memory, in_array is less efficient
// Login contribuinte
// Update
if ($row_exists)
{
if ($push_timestamps === true) {
$row['updated_at'] = $datetime_now;
}
$query = $prepare_update->execute($row);
}
// Insert
else
{
if ($push_timestamps === true) {
$row['created_at'] = $datetime_now;
}
$query = $prepare_insert->execute($row);
}
// PROCESS ROW INSERTION
$row = null;
$uniqid = null;
$row_exists = null;
$query = null;
//unset($row, $uniqid, $row_exists, $query); // spends more memory
// Show progress
// If console exists, send info to cli
if ($console || $debug) {
$percentage = number_format(($count*100) / $total_line_count, 0);
if ($old_percentage !== $percentage) {
if ($console)
$console->comment('Progress: '.$percentage.'%');
if ($debug && !$console) {
echo sprintf('Progress %s%%', $percentage)."\n\r";
ob_flush();
flush();
}
}
unset($old_percentage);
$old_percentage = $percentage;
unset($percentage);
}
++$count;
}
$this->row_count = $count;
$count = null;
$existing_uniqid = null;
$percentage = null;
$old_percentage = null;
//unset($count, $existing_uniqid, $percentage, $old_percentage);
return $this;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment