Skip to content

Instantly share code, notes, and snippets.

@motebaya
Created June 12, 2023 11:11
Show Gist options
  • Save motebaya/f0e99dd799e46a5bc2f405a44cc94df7 to your computer and use it in GitHub Desktop.
Save motebaya/f0e99dd799e46a5bc2f405a44cc94df7 to your computer and use it in GitHub Desktop.
scrape readable word from site with PHPGuzzle and PHPHTMLParser
<?php
/*
@for get readable word to change function or variable name
@credit: t.me/dvinchii a.k.a michino
File: word.php
Size: 1966 Blocks: 8 IO Block: 4096 regular file
Device: 10304h/66308d Inode: 243109 Links: 1
Access: (0777/-rwxrwxrwx) Uid: ( 1000/ mochino) Gid: ( 1000/ mochino)
Access: 2023-05-02 12:32:30.134484200 +0700
Modify: 2023-03-27 21:16:44.640398400 +0700
Change: 2023-03-27 21:16:44.640398400 +0700
Birth: -
*/
require_once '../vendor/autoload.php';
use GuzzleHttp\Client;
use PHPHtmlParser\Dom;
class getWord
{
private $client;
private $options;
private $soup;
private $alpa;
private $count;
public function __construct()
{
$this->client = new Client();
$this->soup = new Dom;
$this->options = [
"headers" => [
"User-Agent" => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
]
];
$this->alpa = str_split("abcdefghijklmnopqrstuvwxyz");
}
private function clear_word($string)
{
return strtolower(
preg_replace(
'/_+/',
'_',
preg_replace(
'/[^A-Za-z]/',
'_',
$string->text
)
)
);
}
public function get_animal($f)
{
// scrap from a-z animals
$page = $this->client->get('https://a-z-animals.com/animals/', $this->options);
$this->soup->loadStr($page->getBody());
foreach ($this->alpa as $alpha) {
$table = $this->soup->find("#h-animals-that-start-with-{$alpha}");
if (count($table) != 0) {
foreach ($table->nextSibling()->find("a") as $a) {
$clear_word = $this->clear_word($a);
fwrite($f, $clear_word . "\n");
print " $this->count -> $clear_word \n";
$this->count++;
}
}
}
}
public function get_fruit($f)
{
// scrap from wiki
$page = $this->client->get("https://en.wikipedia.org/wiki/List_of_culinary_fruits", $this->options);
$this->soup->loadStr($page->getBody());
$table = $this->soup->find(".wikitable");
if (count($table) != 0) {
foreach ($table as $tab) {
foreach ($tab->find("i") as $i) {
$clear_word = $this->clear_word($i);
fwrite($f, $clear_word . "\n");
print " $this->count -> $clear_word \n";
$this->count++;
}
}
}
}
}
$f = fopen("word.txt", "a");
$main = new getWord();
$main->get_animal($f);
$main->get_fruit($f);
fclose($f);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment