Created
January 27, 2019 18:04
-
-
Save hallboav/b22e42e2771e820d73243f5c69ff2ec6 to your computer and use it in GitHub Desktop.
Sandman
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"require": { | |
"ext-openssl": "*", | |
"fabpot/goutte": "^3.2", | |
"symfony/console": "^4.2" | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env php | |
<?php | |
use Goutte\Client; | |
use Symfony\Component\DomCrawler\Crawler; | |
use Symfony\Component\Console\Input\ArgvInput; | |
use Symfony\Component\Console\Question\Question; | |
use Symfony\Component\Console\Output\ConsoleOutput; | |
use Symfony\Component\Console\Helper\QuestionHelper; | |
use Symfony\Component\Console\Output\OutputInterface; | |
require_once 'vendor/autoload.php'; | |
function parsePhoneNumber(string $text): string | |
{ | |
if (!preg_match('#\(?0?(?P<area_code>\d{2})?\)?\ ?(?P<phone_number>9?\d{4}\-?\d{4})#', $text, $matches)) { | |
throw new \UnexpectedValueException(sprintf('Falha ao extrair número de telefone do texto: "%s"', $text)); | |
} | |
// Normalization | |
$phoneNumber = str_replace('-', '', $matches['phone_number']); | |
$phoneNumber = str_pad($phoneNumber, 9, '9', STR_PAD_LEFT); | |
$phoneNumber = sprintf('%s%s', $matches['area_code'], $phoneNumber); | |
// Adiciona 61 aos que não tem código de área | |
$phoneNumber = str_pad($phoneNumber, 11, '61', STR_PAD_LEFT); | |
return $phoneNumber; | |
} | |
function decrypt(string $message, string $password, string $method = 'aes-256-cbc'): string | |
{ | |
// Extração iv e mensagem cifrada | |
$bytes = base64_decode($message); | |
$ivlen = openssl_cipher_iv_length($method); | |
$iv = substr($bytes, 0, $ivlen); | |
$data = substr($bytes, $ivlen - 1); | |
return openssl_decrypt($data, $method, $password, 0, $iv); | |
} | |
try { | |
$input = new ArgvInput(); | |
$output = new ConsoleOutput(); | |
$filename = '.output.csv'; | |
if (file_exists($filename)) { | |
throw new \RuntimeException(sprintf('Arquivo "%s" já existe', $filename)); | |
} | |
$fp = fopen($filename, 'w'); | |
if ($fp === false) { | |
throw new \RuntimeException(sprintf('Não foi possível criar o arquivo "%s"', $filename)); | |
} | |
$question = new Question('in-pom-pom in-pom-pom: '); | |
$question->setHidden(true); | |
$question->setHiddenFallback(false); | |
$questionHelper = new QuestionHelper(); | |
$password = $questionHelper->ask($input, $output, $question); | |
$message = 'YZWGu5DByd7jjGlDRit2qXJ1djVEYUIxMkNFbnJ5TnJxUzI1THpiRWMvL2xCU1lMUUt0Q29EY3RCQ1ZiSlFpUUN6K1Njcy95aEllWEEyZkZjMlliQ0lENU9oMXgzcHdXTG5UWnl3PT0='; | |
$url = decrypt($message, $password); | |
$client = new Client(); | |
$crawler = $client->request('GET', $url); | |
$message = '+AkTQka7WtX6CqGr3HYVjFpWSG9MMkk3TkZ4OUdTZEpoSEhNMWZmVThyaGVrYkw3RC9ONTRuc01hYUx5blpsbi9mamE0ajBxbVdmMTNycVg='; | |
$ads = $crawler->filter(decrypt($message, $password)); | |
$output->writeln(sprintf('%d resultados encontrados', $ads->count())); | |
$headers = 'Name,Given Name,Additional Name,Family Name,Yomi Name,Given Name Yomi,Additional Name Yomi,Family Name Yomi,Name Prefix,Name Suffix,Initials,Nickname,Short Name,Maiden Name,Birthday,Gender,Location,Billing Information,Directory Server,Mileage,Occupation,Hobby,Sensitivity,Priority,Subject,Notes,Language,Photo,Group Membership,E-mail 1 - Type,E-mail 1 - Value,E-mail 2 - Type,E-mail 2 - Value,Phone 1 - Type,Phone 1 - Value,Phone 2 - Type,Phone 2 - Value,Website 1 - Type,Website 1 - Value'; | |
fwrite($fp, sprintf('%s%s', $headers, PHP_EOL)); | |
$infoDivId = decrypt('f0Ya36+UWgxMf3oNSuPipnI4ZjUvVG8wQitKdDc3SGF4czBTU1E9PQ==', $password); | |
$ads->each(function (Crawler $anchor) use ($client, $infoDivId, $fp) { | |
foreach ($anchor->links() as $link) { | |
$profile = $client->click($link); | |
$profile->filter($infoDivId)->each(function (Crawler $info) use ($link, $fp) { | |
$headerText = $info->children('h1')->text(); | |
$name = trim(current(explode("\n", $headerText)), "\x20\x9\xa\xd\x0\xb\xc2\xa0"); | |
$infoAnchor = $info->filter('a'); | |
$phoneNumberText = 0 < $infoAnchor->count() ? $infoAnchor->eq(0)->text() : $headerText; | |
$mainPhoneNumber = parsePhoneNumber($phoneNumberText); | |
$infoSubheader = $info->filter('h2'); | |
if (0 < $infoSubheader->count()) { | |
$infoSubheaderText = $infoSubheader->eq(0)->text(); | |
if (false !== strstr($infoSubheaderText, 'ou')) { | |
$additionalPhoneNumber = parsePhoneNumber($infoSubheaderText); | |
} | |
} | |
$contact = [ | |
md5($name), | |
'', // given name | |
'', // additional name | |
'', // family name | |
'', // yomi name | |
'', // given name yomi | |
'', // additional name yomi | |
'', // family name yomi | |
'', // name prefix | |
'', // name suffix | |
'', // initials | |
'', // nickname | |
'', // short name | |
'', // maiden name | |
'', // birthday | |
'', // gender | |
'', // location | |
'', // billing information | |
'', // directory server | |
'', // mileage | |
'', // occupation | |
'', // hobby | |
'', // sensitivity | |
'', // priority | |
'', // subject | |
'', // notes | |
'', // language | |
'', // photo | |
'foo', // group membership | |
'', // email 1 type | |
'', // email 1 value | |
'', // email 2 type | |
'', // email 2 value | |
'Mobile', | |
$mainPhoneNumber, | |
isset($additionalPhoneNumber) ? 'Mobile' : '', | |
$additionalPhoneNumber ?? '', | |
'Work', // website 1 type | |
base64_encode($link->getUri()), // website 1 value | |
]; | |
fputcsv($fp, $contact); | |
}); | |
} | |
}); | |
$output->writeln('done.'); | |
} catch (\Exception $exception) { | |
if (isset($fp) && false !== $fp) { | |
fclose($fp); | |
} | |
$output->getErrorOutput()->writeln($exception->getMessage()); | |
exit(-1); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chmod +x crawl | |
./crawl && ls -la |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment