Source from the book PHP 7 Programming Cookbook by Doug Bierer
with some improvements.
File Application/Web/Parser.php
:
<?php
namespace Application\Web;
use \DOMDocument, \DOMNodeList, \DOMElement, \DOMAttr;
class Parser
{
protected $content;
protected $url;
public function __construct($url)
{
$this->url = $url;
}
public function getContent()
{
if (!$this->content) {
if (stripos($this->url, 'http' !== 0)) {
$this->url = 'http://' . $this->url;
}
$this->content = new DOMDocument('1.0', 'utf-8');
$this->content->preserveWhiteSpace = false;
@$this->content->loadHTMLFile($this->url);
}
return $this->content;
}
public function getTags($tag)
{
$result = [];
/* @var DOMNodeList $elements */
$elements = $this->getContent()->getElementsByTagName($tag);
/* @var DOMElement $node */
foreach ($elements as $node) {
$item['value'] = trim(preg_replace('/\s+/', ' ', $node->nodeValue));
if ($node->hasAttributes()) {
/* @var DOMAttr $attr */
foreach ($node->attributes as $name => $attr) {
$item['attributes'][$name] = $attr->value;
}
}
$result[] = $item;
}
return $result;
}
public function getAttribute($attr, $domain = null)
{
$result = [];
/* @var DOMNodeList $elements */
$elements = $this->getContent()->getElementsByTagName('*');
/* @var DOMElement $node */
foreach ($elements as $node) {
if ($node->hasAttribute($attr)) {
$value = $node->getAttribute($attr);
if ($domain) {
if (stripos($value, $domain) !== false) {
$result[] = trim($value);
}
} else {
$result[] = trim($value);
}
}
}
return $result;
}
}
File index.php
:
<?php
require 'Application\Autoload\Loader.php';
\Application\Autoload\Loader::init(__DIR__);
$url = 'http://oreilly.com';
$parser = new Application\Web\Parser($url);
echo 'All "img" tags:<br><pre>', print_r($parser->getTags('img'), true), '</pre>';
echo 'All "href" attributes:<br><pre>', print_r($parser->getAttribute('href'), true), '</pre>';
echo 'All "href" attributes with "css" domain:<br><pre>',
print_r($parser->getAttribute('href', 'css'), true),
'</pre>';
Note: class is loading using autoloader from this gist
Sample output:
All "img" tags:
Array
(
[0] => Array
(
[value] =>
[attributes] => Array
(
[src] => //cdn.oreillystatic.com/oreilly/promos/ba-security-ny-20161019.png
[width] => 720
[height] => 298
[alt] => O'Reilly Security Conference in New York, NY, October 31 � November 2, 2016. See what you'll learn.
)
)
[1] => Array
(
[value] =>
[attributes] => Array
(
[src] => //cdn.oreillystatic.com/oreilly/promos/ba-sa-ca-20161013.png
[width] => 720
[height] => 298
[alt] => O'Reilly Software Architecture Conference in San Francisco, CA, November 14-16, 2016. See what you'll learn.
)
)
[2] => Array
(
[value] =>
[attributes] => Array
(
[src] => //cdn.oreillystatic.com/oreilly/promos/ba-live-training-cal-20160916.png
[width] => 724
[height] => 298
[alt] => Live training events calendar. See all upcoming events.
)
)
[3] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920047124/rc_thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[4] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/9781680502008/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[5] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/9780994347008/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[6] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920051961/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[7] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920055570/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[8] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920054993/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[9] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/9781593277413/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[10] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920055594/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[11] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920047506/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[12] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920049517/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[13] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/9781593277604/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[14] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920051732/rc_thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[15] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920041504/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[16] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920031833/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[17] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920044079/rc_thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[18] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920047391/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[19] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920042228/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[20] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920031130/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[21] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920052654/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[22] => Array
(
[value] =>
[attributes] => Array
(
[src] => //akamaicovers.oreilly.com/images/0636920052616/thumb.gif
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
)
)
[23] => Array
(
[value] =>
[attributes] => Array
(
[src] => //cdn.oreillystatic.com/oreilly/promos/homepage-newsletter-quote-20160525.png
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
[style] => margin:10px auto 15px;
)
)
[24] => Array
(
[value] =>
[attributes] => Array
(
[src] => //cdn.oreillystatic.com/oreilly/promos/online-training-photo-20160603.jpg
[width] => 724
[height] => 298
[alt] =>
[itemprop] => image
[style] => position:absolute; z-index:1; top:0; left:0; display:block;
)
)
[25] => Array
(
[value] =>
[attributes] => Array
(
[src] => http://cdn.oreillystatic.com/oreilly/promos/safari-logo-202x57.png
[width] => 150
[height] => 298
[alt] => Safari
[itemprop] => image
[style] => position:absolute; z-index:1; top:0; left:0; display:block;
[class] => mb
)
)
[26] => Array
(
[value] =>
[attributes] => Array
(
[src] => http://covers.oreillystatic.com/images/0636920050612/rc_bkt.gif
[width] => 150
[height] => 298
[alt] =>
[itemprop] => image
[style] => position:absolute; z-index:1; top:0; left:0; display:block;
[class] => floatl book-cover media
)
)
[27] => Array
(
[value] =>
[attributes] => Array
(
[src] => http://covers.oreillystatic.com/images/9780128020425/bkt.gif
[width] => 150
[height] => 298
[alt] =>
[itemprop] => image
[style] => position:absolute; z-index:1; top:0; left:0; display:block;
[class] => floatl book-cover media
)
)
[28] => Array
(
[value] =>
[attributes] => Array
(
[src] => http://covers.oreillystatic.com/images/0636920045052/thumb.gif
[width] => 150
[height] => 298
[alt] =>
[itemprop] => image
[style] => position:absolute; z-index:1; top:0; left:0; display:block;
[class] => video-cover media
)
)
[29] => Array
(
[value] =>
[attributes] => Array
(
[src] => //cdn.oreillystatic.com/images/sitewide-headers/tarsier-footer.png
[width] => 150
[height] => 298
[alt] => Tarsier
[itemprop] => image
[style] => position:absolute; z-index:1; top:0; left:0; display:block;
[class] => video-cover media
)
)
)
All "href" attributes:
Array
(
[0] => https://plus.google.com/108442503368488643007
[1] => http://www.oreilly.com
[2] => //www.oreilly.com/favicon.ico
[3] => //cdn.oreillystatic.com/oreilly/ml/css/stylesheet.css
[4] => //cdn.oreillystatic.com/oreilly/ml/css/ml.css
[5] => //www.oreilly.com/css/oreilly.css
[6] => //cdn.oreillystatic.com/assets/css/norm-home-160908.css
[7] => http://feeds.feedburner.com/oreilly/newbooks
[8] => http://feeds.feedburner.com/oreilly/upcomingbooks
[9] => http://feeds.feedburner.com/oreilly/ebookdealoftheday
[10] => //oreilly.com
[11] => //www.oreilly.com/ideas
[12] => //www.oreilly.com/learning
[13] => //www.oreilly.com/conferences/
[14] => //shop.oreilly.com/
[15] => http://members.oreilly.com
[16] => /topics/ai
[17] => /topics/economy
[18] => /topics/business
[19] => /topics/data
[20] => /topics/design
[21] => /topics/operations
[22] => /topics/security
[23] => /topics/software-engineering
[24] => /topics/software-architecture
[25] => https://www.oreilly.com/topics
[26] => #
[27] => #
[28] => #
[29] => http://conferences.oreilly.com/security/network-data-security-ny
[30] => http://conferences.oreilly.com/software-architecture/engineering-business-ca
[31] => http://www.oreilly.com/live-training/
[32] => http://shop.oreilly.com/category/new.do
[33] => //shop.oreilly.com/product/0636920047124.do
[34] => //shop.oreilly.com/product/0636920047124.do
[35] => //shop.oreilly.com/product/9781680502008.do
[36] => //shop.oreilly.com/product/9781680502008.do
[37] => //shop.oreilly.com/product/9780994347008.do
[38] => //shop.oreilly.com/product/9780994347008.do
[39] => //shop.oreilly.com/product/0636920051961.do
[40] => //shop.oreilly.com/product/0636920051961.do
[41] => //shop.oreilly.com/product/0636920055570.do
[42] => //shop.oreilly.com/product/0636920055570.do
[43] => //shop.oreilly.com/product/0636920054993.do
[44] => //shop.oreilly.com/product/0636920054993.do
[45] => //shop.oreilly.com/product/9781593277413.do
[46] => //shop.oreilly.com/product/9781593277413.do
[47] => //shop.oreilly.com/product/0636920055594.do
[48] => //shop.oreilly.com/product/0636920055594.do
[49] => //shop.oreilly.com/product/0636920047506.do
[50] => //shop.oreilly.com/product/0636920047506.do
[51] => //shop.oreilly.com/product/0636920049517.do
[52] => //shop.oreilly.com/product/0636920049517.do
[53] => //shop.oreilly.com/product/9781593277604.do
[54] => //shop.oreilly.com/product/9781593277604.do
[55] => //shop.oreilly.com/product/0636920051732.do
[56] => //shop.oreilly.com/product/0636920051732.do
[57] => //shop.oreilly.com/product/0636920041504.do
[58] => //shop.oreilly.com/product/0636920041504.do
[59] => //shop.oreilly.com/product/0636920031833.do
[60] => //shop.oreilly.com/product/0636920031833.do
[61] => //shop.oreilly.com/product/0636920044079.do
[62] => //shop.oreilly.com/product/0636920044079.do
[63] => //shop.oreilly.com/product/0636920047391.do
[64] => //shop.oreilly.com/product/0636920047391.do
[65] => //shop.oreilly.com/product/0636920042228.do
[66] => //shop.oreilly.com/product/0636920042228.do
[67] => //shop.oreilly.com/product/0636920031130.do
[68] => //shop.oreilly.com/product/0636920031130.do
[69] => //shop.oreilly.com/product/0636920052654.do
[70] => //shop.oreilly.com/product/0636920052654.do
[71] => //shop.oreilly.com/product/0636920052616.do
[72] => //shop.oreilly.com/product/0636920052616.do
[73] => #
[74] => #
[75] => http://www.oreilly.com/emails/newsletters/
[76] => http://www.oreilly.com/emails/newsletters/
[77] => http://www.oreilly.com/jobs/
[78] => http://jobs.jobvite.com/oreilly-media/job/oP683fwQ
[79] => http://jobs.jobvite.com/oreilly-media/job/oPUu3fw0
[80] => http://www.oreilly.com/jobs/
[81] => http://www.oreilly.com/live-training/
[82] => http://shop.oreilly.com/category/videos.do
[83] => http://shop.oreilly.com/category/videos.do
[84] => https://www.safaribooksonline.com/?utm_medium=referral&utm_campaign=publisher&utm_source=oreilly&utm_content=homepage
[85] => https://www.safaribooksonline.com/?utm_medium=referral&utm_campaign=publisher&utm_source=oreilly&utm_content=homepage
[86] => http://shop.oreilly.com/product/0636920050612.do?code=DEAL
[87] => http://shop.oreilly.com/product/0636920050612.do?code=DEAL
[88] => http://shop.oreilly.com/product/9780128020425.do?code=MSDEAL
[89] => http://shop.oreilly.com/product/9780128020425.do?code=MSDEAL
[90] => http://shop.oreilly.com/product/0636920045052.do?code=VDWK
[91] => http://shop.oreilly.com/product/0636920045052.do?code=VDWK
[92] => http://support.oreilly.com/oreilly?from_gsfn=true
[93] => //oreilly.com/about/
[94] => //oreilly.com/work-with-us.html
[95] => //shop.oreilly.com/category/customer-service.do
[96] => //www.oreilly.com/about/contact.html
[97] => http://fb.co/OReilly
[98] => http://twitter.com/oreillymedia
[99] => https://www.youtube.com/user/OreillyMedia
[100] => https://plus.google.com/+oreillymedia
[101] => https://www.linkedin.com/company/o%27reilly-media
[102] => //oreilly.com
[103] => //oreilly.com/terms/
[104] => //oreilly.com/privacy.html
[105] => //www.oreilly.com/about/editorial_independence.html
)
All "href" attributes with "css" domain:
Array
(
[0] => //cdn.oreillystatic.com/oreilly/ml/css/stylesheet.css
[1] => //cdn.oreillystatic.com/oreilly/ml/css/ml.css
[2] => //www.oreilly.com/css/oreilly.css
[3] => //cdn.oreillystatic.com/assets/css/norm-home-160908.css
)