Created
July 23, 2013 15:48
-
-
Save johnsardine/6063480 to your computer and use it in GitHub Desktop.
O ficheiro Parse.php corre uma instancia da class ParseFile para cada ficheiro.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php namespace Tailor\Carmim; | |
| use log; | |
| use SplFileObject; | |
| use Illuminate\Support\Facades\File; | |
| use Symfony\Component\Finder\SplFileInfo; | |
| class Parse | |
| { | |
| /* | |
| * Maps the files to a table | |
| */ | |
| public static $file_table_map = array( | |
| 'detalhe_historico_entregas' => 'transactions', // Movimentos de entrega | |
| 'fichassocios_fa' => 'users', | |
| 'limites_entrega_produtor' => 'limits', | |
| 'resumo_historico_entregas' => 'history', // Historico do produtor | |
| ); | |
| public static $table_file_map = array(); // Will hold the reverse of $file_table_map | |
| public static function init($console = null) | |
| { | |
| $debug = true; | |
| if ($debug && !$console) { | |
| ob_start(); | |
| echo '<pre>'; | |
| ob_flush(); | |
| flush(); | |
| } | |
| // Invert table map | |
| foreach (static::$file_table_map as $file => $table ) { | |
| static::$table_file_map[$table] = $file; | |
| } | |
| $files_dir = base_path(); | |
| $files_dir .= '/erp'; | |
| $file_count = 0; | |
| $row_count = 0; | |
| $output = array(); | |
| $memory_start = memory_get_usage(true); | |
| $start_time = (float) array_sum(explode(' ',microtime())); | |
| // Change execution time to prevent timeout | |
| $default_execution_time = ini_get('max_execution_time'); | |
| $default_memory_limit = ini_get('memory_limit'); | |
| ini_set('max_execution_time', (60 * 16)); // 60 seconds times 12 | |
| ini_set('memory_limit', '256M'); | |
| // If console exists, send info to cli | |
| if ($console) { | |
| $console->info('Import started'); | |
| $console->info('---'); | |
| $console->info('Max execution time: '.ini_get('max_execution_time')); | |
| $console->info('Memory limit: '.ini_get('memory_limit')); | |
| } | |
| // Get all files in erp directory | |
| $files = File::allFiles($files_dir); | |
| foreach ($files as $file_info) { | |
| $file_path = $file_info->getRealPath(); | |
| $file_pathinfo = pathinfo($file_path); | |
| // Get filename | |
| $file_name = strtolower($file_pathinfo['filename']); | |
| if ($debug && !$console) { | |
| echo $file_name." --------------- \n\r"; | |
| ob_flush(); | |
| flush(); | |
| } | |
| // If file is not in table map, ignore | |
| if ( !isset(static::$file_table_map[$file_name]) ) | |
| continue; | |
| if ($console) { | |
| $console->info(''); | |
| $console->info(''); | |
| $console->info('Importing: '.$file_path); | |
| $console->info('---'); | |
| $console->info(''); | |
| } | |
| $table_name = static::$file_table_map[$file_name]; | |
| $options = array( | |
| 'table' => $table_name, | |
| 'column_map' => array( | |
| 'idtmov' => 'uniqid', | |
| 'idtlim' => 'uniqid', | |
| ), | |
| 'parse_dates' => array( | |
| 'data_entrega', | |
| 'datanasc' | |
| ), | |
| 'debug' => $debug, | |
| 'console' => $console | |
| ); | |
| $file_parser = new ParseFile(new SplFileObject($file_path), $options); | |
| $file_parser->parse(); | |
| //var_dump($file_parser); | |
| // Calculate total row count | |
| $row_count += $file_parser->row_count; | |
| unset($file_parser); | |
| //$output[$table_name] = $file_parser; | |
| // Exit execution, no need while in testing | |
| //break; | |
| } | |
| // Restore execution time to default | |
| //ini_set('max_execution_time', $default_execution_time); | |
| //ini_set('memory_limit', $default_memory_limit); | |
| $memory_end = memory_get_usage(true); | |
| $end_time = (float) array_sum(explode(' ',microtime())); | |
| $script_time = $end_time - $start_time; | |
| $process_minutes = floor($script_time / 60); | |
| $process_seconds = intval((($script_time / 60) - $process_minutes) * 60); | |
| $processed_in = sprintf("Processing time: %s minutes (%.3f seconds).", ($process_minutes.':'.str_pad($process_seconds, 2, 0, STR_PAD_LEFT)), $script_time); | |
| $memory_usage = sprintf("Memory usage: %sMb", ($memory_end-$memory_start) / 1048576); | |
| $processed_rows = sprintf("Processed %s rows", $row_count); | |
| // If console exists, send info to cli | |
| if ($console) { | |
| $console->info(''); | |
| $console->info(''); | |
| $console->info('Import finished'); | |
| $console->info('---'); | |
| $console->info($processed_in); | |
| $console->info($memory_usage); | |
| $console->info($processed_rows); | |
| } | |
| log::info('Import Complete', array( | |
| $processed_in, | |
| $memory_usage, | |
| $processed_rows | |
| )); | |
| if ($debug && !$console) { | |
| var_dump(array( | |
| $processed_in, | |
| $memory_usage, | |
| $processed_rows | |
| )); | |
| } | |
| if ($debug && !$console) { | |
| echo '</pre>'; | |
| ob_flush(); | |
| flush(); | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php namespace Tailor\Carmim; | |
| use Config; | |
| use DB; | |
| use Log; | |
| use Illuminate\Support\Facades\File; | |
| use SplFileObject; | |
| use Symfony\Component\Finder\SplFileInfo; | |
| class ParseFile | |
| { | |
| public $pdo; | |
| /* | |
| * Excludes certain columns from the file | |
| */ | |
| public $exclude_column = array(); | |
| /* | |
| * Specify different column names for existing columns | |
| */ | |
| public $column_map = array(); | |
| public $parse_dates = array( | |
| 'data_entrega', | |
| ); | |
| /* | |
| * To what table should the data go | |
| */ | |
| public $table = ''; | |
| /* | |
| * Should the columns be lowercase | |
| */ | |
| public $lowercase_columns = true; | |
| public $push_timestamps = true; | |
| /* | |
| * The file | |
| * Instance of SplFileObject | |
| */ | |
| public $file = ''; | |
| public $file_info = ''; | |
| public $file_pathinfo = ''; | |
| public $file_data = array(); | |
| public $row_count = 0; | |
| public $debug = false; | |
| public $console = ''; // Will hold an instance of CarmimImport | |
| public function __construct(SplFileObject $file, array $options = array()) | |
| { | |
| $this->pdo = DB::connection()->getPdo(); | |
| $this->file = $file; | |
| foreach ($options as $key => $value) { | |
| if ( !isset($this->{$key}) ) continue; | |
| $this->{$key} = $value; | |
| } | |
| $this->file_info = $this->file->getPathInfo(); | |
| $this->file_pathinfo = pathinfo($this->file->getPathname()); | |
| // get file path $this->file->getPathname(); | |
| } | |
| public function parse() | |
| { | |
| $debug = $this->debug; | |
| $file = $this->file; | |
| $file->setFlags(SplFileObject::READ_CSV | SplFileObject::SKIP_EMPTY | SplFileObject::DROP_NEW_LINE); | |
| $file->setCsvControl(';'); | |
| $percentage = 0; | |
| $old_percentage = 0; | |
| $count = 0; | |
| DB::connection()->disableQueryLog(); | |
| $table = $this->table; | |
| $table_prefix = Config::get('database.connections.mysql.prefix'); | |
| $table_index = 'uniqid'; | |
| if ($table === 'users') | |
| $table_index = 'n_socio'; | |
| $column_map = $this->column_map; | |
| $console = $this->console; | |
| $parse_dates = $this->parse_dates; | |
| $push_timestamps = $this->push_timestamps; | |
| $datetime_now = date('Y-m-d H:i:s'); | |
| // Calculate line count only if cli, to provide percentage | |
| $total_line_count = 0; | |
| if ($console || $debug) { | |
| while ( $file->valid()) { | |
| ++$total_line_count; $row = $file->fgetcsv(); | |
| unset($row); | |
| } | |
| $file->rewind(); | |
| } | |
| // Grab first row since its headers | |
| $fields = $file->current(); | |
| // Ignore first, since they are the fields | |
| //$file->next(); | |
| //$file->seek(69290); | |
| // Process columns | |
| $fields = array_map(function($value) use ($column_map) { | |
| $field = str_replace('-', '_', Slug::make(utf8_encode($value))); | |
| return ( isset($column_map[$field]) ) ? $column_map[$field] : $field; | |
| }, $fields); | |
| $prepare_columns = array(); | |
| $prepare_values = array(); | |
| $prepare_column_value = array(); | |
| // Prepare columns into sql syntax | |
| array_map(function($value) use (&$prepare_columns, &$prepare_values, &$prepare_column_value) { | |
| $prepare_columns[$value] = '`'.$value.'`'; | |
| $prepare_values[$value] = ':'.$value; | |
| $prepare_column_value[$value] = '`'.$value.'` = :'.$value; | |
| }, $fields); | |
| // Push timestamp columns into fields | |
| if ($push_timestamps === true) { | |
| $prepare_columns[] = '`created_at`'; | |
| $prepare_values['created_at'] = ':created_at'; | |
| $prepare_column_value['updated_at'] = '`updated_at` = :updated_at'; | |
| } | |
| // Build insert statement | |
| $prepare_insert = $this->pdo->prepare(sprintf( | |
| 'INSERT INTO `%s` (%s) VALUES (%s)', | |
| $table_prefix.$table, | |
| implode(', ', $prepare_columns), | |
| implode(', ', $prepare_values) | |
| )); | |
| // Build insert statement | |
| unset($prepare_column_value[$table_index]); // remove it from implode and add manually | |
| $prepare_update = $this->pdo->prepare(sprintf( | |
| 'UPDATE `%s` SET %s WHERE `%3$s` = :%3$s', | |
| $table_prefix.$table, | |
| implode(', ', $prepare_column_value), | |
| $table_index | |
| )); | |
| $prepare_columns = null; | |
| $prepare_values = null; | |
| $prepare_column_value = null; | |
| // Get existing uniqids for faster comparison | |
| $existing_uniqid = array(); | |
| $existing_uniqid_raw = DB::table($this->table)->lists($table_index); | |
| foreach ($existing_uniqid_raw as $eur_uniqid) { | |
| $existing_uniqid[$eur_uniqid] = $eur_uniqid; | |
| } | |
| // Go through each of the csv lines and make a first cleanup and organization | |
| while ($file->valid()) | |
| { | |
| // Exit execution, no need while in testing | |
| //if ($count >= 5) break; | |
| //if ($count >= 10000) break; | |
| //if ($count >= 40000) break; | |
| //if ($count >= 8600) break; | |
| // Sleep for a while each 1000 | |
| // if ($count % 500 == 0) { | |
| // //sleep(1); | |
| // //time_nanosleep(0, 10000000); | |
| // } | |
| // Get row index | |
| //$row_index = $file->key(); | |
| // Get csv line | |
| $row = $file->fgetcsv(); | |
| // If second item of row is not set, line is empty, ignore | |
| if (!isset($row[0])) | |
| continue; | |
| // Add column names to row | |
| foreach ($row as $row_index => $row_value) { | |
| // Insert processed key => value | |
| $row[$fields[$row_index]] = utf8_encode(trim($row_value)); | |
| // Unset old numeric key and value | |
| unset($row[$row_index]); | |
| } | |
| // Parse dates correctly | |
| foreach ($parse_dates as $field) { | |
| if (!isset($row[$field])) continue; | |
| $row[$field] = date('Y-m-d H:i:s', strtotime($row[$field])); | |
| } | |
| // PROCESS ROW INSERTION | |
| // Storing all data into a huge array will consume lots of memory, check if all code can be done within here | |
| $uniqid = $row[$table_index]; | |
| $row_exists = isset($existing_uniqid[$uniqid]); // Unsetting this increases memory, in_array is less efficient | |
| // Login contribuinte | |
| // Update | |
| if ($row_exists) | |
| { | |
| if ($push_timestamps === true) { | |
| $row['updated_at'] = $datetime_now; | |
| } | |
| $query = $prepare_update->execute($row); | |
| } | |
| // Insert | |
| else | |
| { | |
| if ($push_timestamps === true) { | |
| $row['created_at'] = $datetime_now; | |
| } | |
| $query = $prepare_insert->execute($row); | |
| } | |
| // PROCESS ROW INSERTION | |
| $row = null; | |
| $uniqid = null; | |
| $row_exists = null; | |
| $query = null; | |
| //unset($row, $uniqid, $row_exists, $query); // spends more memory | |
| // Show progress | |
| // If console exists, send info to cli | |
| if ($console || $debug) { | |
| $percentage = number_format(($count*100) / $total_line_count, 0); | |
| if ($old_percentage !== $percentage) { | |
| if ($console) | |
| $console->comment('Progress: '.$percentage.'%'); | |
| if ($debug && !$console) { | |
| echo sprintf('Progress %s%%', $percentage)."\n\r"; | |
| ob_flush(); | |
| flush(); | |
| } | |
| } | |
| unset($old_percentage); | |
| $old_percentage = $percentage; | |
| unset($percentage); | |
| } | |
| ++$count; | |
| } | |
| $this->row_count = $count; | |
| $count = null; | |
| $existing_uniqid = null; | |
| $percentage = null; | |
| $old_percentage = null; | |
| //unset($count, $existing_uniqid, $percentage, $old_percentage); | |
| return $this; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment