Created
April 3, 2017 05:35
-
-
Save rizqidjamaluddin/10a8ffa95432ec74c42301f683a7d783 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$file = fopen("src.txt", 'r'); | |
$out = fopen("result.csv", 'w'); | |
fputcsv($out, ['first_name', 'last_name', 'address', 'city', 'state', 'zip']); | |
$buffer = []; | |
$counter = 0; | |
while(true) { | |
$line = fgets($file); | |
if (!$line) break; | |
if (strlen(trim($line)) == 0){ | |
echo "\n"; | |
continue; | |
} | |
$matches = []; | |
if (substr($line, 0, 1) == '#') { | |
// is line with ML Number | |
// dump existing buffer | |
if (!empty($buffer)) { | |
$counter++; | |
echo str_repeat('-', 50); | |
echo "\n Saving #" . ($counter) ."\n"; | |
echo str_repeat('-', 50) . "\n\n"; | |
fputcsv($out, $buffer); | |
$buffer = []; | |
} | |
printf("%-100s %s", trim($line), "New address detected.\n"); | |
continue; | |
} | |
if (substr($line, 0, 5) == 'Attn:') { | |
// is name line | |
$name = trim(substr($line, 5)); | |
// get rid of spare period in name | |
if (substr($name, -1) == '.') { | |
$name = substr($name, 0, -1); | |
} | |
$buffer[0] = $name; | |
$buffer[1] = ''; | |
printf("%-100s %s", trim($line), "Name found. {$buffer[0]} \n"); | |
continue; | |
} | |
if (preg_match('/^([A-Z]{2}) (\d{5})/', $line, $matches) || | |
preg_match('/^([A-Z]{2})(\d{5})/', $line, $matches)) { | |
// is state/zip | |
$buffer[4] = $matches[1]; | |
$buffer[5] = $matches[2]; | |
printf("%-100s %s", trim($line), "State/zip found. {$buffer[4]} ; {$buffer[5]} \n"); | |
continue; | |
} | |
if (preg_match('/\(\d{3}\)/', $line) || preg_match('/\d{3}-\d{4}/', $line)) { | |
// phone number, ignore | |
printf("%-100s %s", trim($line), "Phone number; skipping. \n"); | |
continue; | |
} | |
// is address | |
$firstLine = trim($line); | |
$secondLine = trim(fgets($file)); | |
$city = trim(fgets($file)); | |
// get rid of spare comma in address | |
if (substr($secondLine, -1) == ',') { | |
$secondLine = substr($secondLine, 0, -1); | |
} | |
$buffer[2] = $firstLine . ' ' . $secondLine; | |
$buffer[3] = $city; | |
printf("%-100s %s", $firstLine, "Company name in address line found.\n"); | |
printf("%-100s %s", $secondLine, "Main address line found. Full: {$buffer[2]} \n"); | |
printf("%-100s %s", $city, "City line found. Full: {$buffer[3]} \n"); | |
} | |
if (!empty($buffer)) { | |
$counter++; | |
fputcsv($out, $buffer); | |
$buffer = []; | |
echo str_repeat('-', 50); | |
echo "\n Saving #" . ($counter) ."\n"; | |
echo str_repeat('-', 50) . "\n\n"; | |
} | |
echo "Done, $counter entries.\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment