Created
July 16, 2012 14:46
-
-
Save agarzon/3123118 to your computer and use it in GitHub Desktop.
Extract all email address from any text content removing duplicated, and exporting as TEXT, SQL or CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* | |
* Extract all email address from any text content removing duplicated | |
* Return string formated as: TEXT, SQL or CSV | |
* @author Alexander Garzon | |
* | |
*/ | |
class emailXtractor { | |
function __construct($file, $format = 'txt', $tablename = 'emailmass'){ | |
$clean = $this->removeDuplicated($this->extract(file_get_contents($file))); | |
header('Content-Type: text/plain'); | |
switch ($format){ | |
case 'txt' : echo $this->exportTxt($clean); break; | |
case 'sql' : echo $this->exportSql($clean, $tablename); break; | |
case 'csv' : echo $this->exportcsv($clean); break; | |
} | |
} | |
protected function extract($content) { | |
$regexp = '/([a-z0-9_\.\-])+\@(([a-z0-9\-])+\.)+([a-z0-9]{2,4})+/i'; | |
preg_match_all($regexp, $content, $m); | |
return isset($m[0]) ? $m[0] : array (); | |
} | |
protected function removeDuplicated($array){ | |
return array_map('strtolower', array_unique($array)); | |
} | |
protected function exportCsv($array){ | |
foreach($array as $value){ | |
$csv .= '"null","'. $value . '","null"' . PHP_EOL; | |
} | |
return $csv; | |
} | |
protected function exportSql($array, $tablename){ | |
$sql = ''; | |
foreach($array as $value){ | |
$sql .= "INSERT IGNORE INTO $tablename VALUES (NULL, '$value', '0');" . PHP_EOL; | |
} | |
return $sql; | |
} | |
protected function exportTxt($array){ | |
foreach($array as $value){ | |
$txt .= $value . PHP_EOL; | |
} | |
return $txt; | |
} | |
} | |
new emailXtractor('emailmass2.xml', 'sql', 'emailmass3'); |
You're right. Changing ;)
Bro but how do I convert his youtube id to
Gmail .
There is all the details but no gmail
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
can be replaced with
However, I would rather see you use the file socket handles to read in chunks at a time.