Last active
February 10, 2017 13:05
-
-
Save silvioq/6957a72f279ca7fb729a5ea685ffbcde to your computer and use it in GitHub Desktop.
Proyecto de prueba para obtener datos de una página
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vendor/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ID: 12170236 | |
Installation: | |
composer install | |
Usage: | |
php crawler.php URL-to-process | |
Example: | |
php crawler.php https://www.freelancer.com/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"require": { | |
"fabpot/goutte": "^3.2" | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"_readme": [ | |
"This file locks the dependencies of your project to a known state", | |
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", | |
"This file is @generated automatically" | |
], | |
"hash": "defc342ae76c7f0ce5b64dd4004facca", | |
"content-hash": "425aac93d170ece8a1f83c754fbc0f1d", | |
"packages": [ | |
{ | |
"name": "fabpot/goutte", | |
"version": "v3.2.0", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/FriendsOfPHP/Goutte.git", | |
"reference": "8cc89de5e71daf84051859616891d3320d88a9e8" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/FriendsOfPHP/Goutte/zipball/8cc89de5e71daf84051859616891d3320d88a9e8", | |
"reference": "8cc89de5e71daf84051859616891d3320d88a9e8", | |
"shasum": "" | |
}, | |
"require": { | |
"guzzlehttp/guzzle": "^6.0", | |
"php": ">=5.5.0", | |
"symfony/browser-kit": "~2.1|~3.0", | |
"symfony/css-selector": "~2.1|~3.0", | |
"symfony/dom-crawler": "~2.1|~3.0" | |
}, | |
"type": "application", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "3.2-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Goutte\\": "Goutte" | |
} | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Fabien Potencier", | |
"email": "[email protected]" | |
} | |
], | |
"description": "A simple PHP Web Scraper", | |
"homepage": "https://github.com/FriendsOfPHP/Goutte", | |
"keywords": [ | |
"scraper" | |
], | |
"time": "2016-11-15 16:27:29" | |
}, | |
{ | |
"name": "guzzlehttp/guzzle", | |
"version": "6.2.2", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/guzzle/guzzle.git", | |
"reference": "ebf29dee597f02f09f4d5bbecc68230ea9b08f60" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/guzzle/guzzle/zipball/ebf29dee597f02f09f4d5bbecc68230ea9b08f60", | |
"reference": "ebf29dee597f02f09f4d5bbecc68230ea9b08f60", | |
"shasum": "" | |
}, | |
"require": { | |
"guzzlehttp/promises": "^1.0", | |
"guzzlehttp/psr7": "^1.3.1", | |
"php": ">=5.5" | |
}, | |
"require-dev": { | |
"ext-curl": "*", | |
"phpunit/phpunit": "^4.0", | |
"psr/log": "^1.0" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "6.2-dev" | |
} | |
}, | |
"autoload": { | |
"files": [ | |
"src/functions_include.php" | |
], | |
"psr-4": { | |
"GuzzleHttp\\": "src/" | |
} | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Michael Dowling", | |
"email": "[email protected]", | |
"homepage": "https://github.com/mtdowling" | |
} | |
], | |
"description": "Guzzle is a PHP HTTP client library", | |
"homepage": "http://guzzlephp.org/", | |
"keywords": [ | |
"client", | |
"curl", | |
"framework", | |
"http", | |
"http client", | |
"rest", | |
"web service" | |
], | |
"time": "2016-10-08 15:01:37" | |
}, | |
{ | |
"name": "guzzlehttp/promises", | |
"version": "1.3.0", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/guzzle/promises.git", | |
"reference": "2693c101803ca78b27972d84081d027fca790a1e" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/guzzle/promises/zipball/2693c101803ca78b27972d84081d027fca790a1e", | |
"reference": "2693c101803ca78b27972d84081d027fca790a1e", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.5.0" | |
}, | |
"require-dev": { | |
"phpunit/phpunit": "~4.0" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "1.0-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"GuzzleHttp\\Promise\\": "src/" | |
}, | |
"files": [ | |
"src/functions_include.php" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Michael Dowling", | |
"email": "[email protected]", | |
"homepage": "https://github.com/mtdowling" | |
} | |
], | |
"description": "Guzzle promises library", | |
"keywords": [ | |
"promise" | |
], | |
"time": "2016-11-18 17:47:58" | |
}, | |
{ | |
"name": "guzzlehttp/psr7", | |
"version": "1.3.1", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/guzzle/psr7.git", | |
"reference": "5c6447c9df362e8f8093bda8f5d8873fe5c7f65b" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/guzzle/psr7/zipball/5c6447c9df362e8f8093bda8f5d8873fe5c7f65b", | |
"reference": "5c6447c9df362e8f8093bda8f5d8873fe5c7f65b", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.4.0", | |
"psr/http-message": "~1.0" | |
}, | |
"provide": { | |
"psr/http-message-implementation": "1.0" | |
}, | |
"require-dev": { | |
"phpunit/phpunit": "~4.0" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "1.4-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"GuzzleHttp\\Psr7\\": "src/" | |
}, | |
"files": [ | |
"src/functions_include.php" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Michael Dowling", | |
"email": "[email protected]", | |
"homepage": "https://github.com/mtdowling" | |
} | |
], | |
"description": "PSR-7 message implementation", | |
"keywords": [ | |
"http", | |
"message", | |
"stream", | |
"uri" | |
], | |
"time": "2016-06-24 23:00:38" | |
}, | |
{ | |
"name": "psr/http-message", | |
"version": "1.0.1", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/php-fig/http-message.git", | |
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363", | |
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.3.0" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "1.0.x-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Psr\\Http\\Message\\": "src/" | |
} | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "PHP-FIG", | |
"homepage": "http://www.php-fig.org/" | |
} | |
], | |
"description": "Common interface for HTTP messages", | |
"homepage": "https://github.com/php-fig/http-message", | |
"keywords": [ | |
"http", | |
"http-message", | |
"psr", | |
"psr-7", | |
"request", | |
"response" | |
], | |
"time": "2016-08-06 14:39:51" | |
}, | |
{ | |
"name": "symfony/browser-kit", | |
"version": "v3.1.7", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/symfony/browser-kit.git", | |
"reference": "901319a31c9b3cee7857b4aeeb81b5d64dfa34fc" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/symfony/browser-kit/zipball/901319a31c9b3cee7857b4aeeb81b5d64dfa34fc", | |
"reference": "901319a31c9b3cee7857b4aeeb81b5d64dfa34fc", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.5.9", | |
"symfony/dom-crawler": "~2.8|~3.0" | |
}, | |
"require-dev": { | |
"symfony/css-selector": "~2.8|~3.0", | |
"symfony/process": "~2.8|~3.0" | |
}, | |
"suggest": { | |
"symfony/process": "" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "3.1-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Symfony\\Component\\BrowserKit\\": "" | |
}, | |
"exclude-from-classmap": [ | |
"/Tests/" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Fabien Potencier", | |
"email": "[email protected]" | |
}, | |
{ | |
"name": "Symfony Community", | |
"homepage": "https://symfony.com/contributors" | |
} | |
], | |
"description": "Symfony BrowserKit Component", | |
"homepage": "https://symfony.com", | |
"time": "2016-09-06 11:02:40" | |
}, | |
{ | |
"name": "symfony/css-selector", | |
"version": "v3.1.7", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/symfony/css-selector.git", | |
"reference": "a37b3359566415a91cba55a2d95820b3fa1a9658" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/symfony/css-selector/zipball/a37b3359566415a91cba55a2d95820b3fa1a9658", | |
"reference": "a37b3359566415a91cba55a2d95820b3fa1a9658", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.5.9" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "3.1-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Symfony\\Component\\CssSelector\\": "" | |
}, | |
"exclude-from-classmap": [ | |
"/Tests/" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Jean-François Simon", | |
"email": "[email protected]" | |
}, | |
{ | |
"name": "Fabien Potencier", | |
"email": "[email protected]" | |
}, | |
{ | |
"name": "Symfony Community", | |
"homepage": "https://symfony.com/contributors" | |
} | |
], | |
"description": "Symfony CssSelector Component", | |
"homepage": "https://symfony.com", | |
"time": "2016-11-03 08:04:31" | |
}, | |
{ | |
"name": "symfony/dom-crawler", | |
"version": "v3.1.7", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/symfony/dom-crawler.git", | |
"reference": "1eb3b4d216e8db117218dd2bb7d23dfe67bdf518" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/symfony/dom-crawler/zipball/1eb3b4d216e8db117218dd2bb7d23dfe67bdf518", | |
"reference": "1eb3b4d216e8db117218dd2bb7d23dfe67bdf518", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.5.9", | |
"symfony/polyfill-mbstring": "~1.0" | |
}, | |
"require-dev": { | |
"symfony/css-selector": "~2.8|~3.0" | |
}, | |
"suggest": { | |
"symfony/css-selector": "" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "3.1-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Symfony\\Component\\DomCrawler\\": "" | |
}, | |
"exclude-from-classmap": [ | |
"/Tests/" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Fabien Potencier", | |
"email": "[email protected]" | |
}, | |
{ | |
"name": "Symfony Community", | |
"homepage": "https://symfony.com/contributors" | |
} | |
], | |
"description": "Symfony DomCrawler Component", | |
"homepage": "https://symfony.com", | |
"time": "2016-11-14 16:20:02" | |
}, | |
{ | |
"name": "symfony/polyfill-mbstring", | |
"version": "v1.3.0", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/symfony/polyfill-mbstring.git", | |
"reference": "e79d363049d1c2128f133a2667e4f4190904f7f4" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/symfony/polyfill-mbstring/zipball/e79d363049d1c2128f133a2667e4f4190904f7f4", | |
"reference": "e79d363049d1c2128f133a2667e4f4190904f7f4", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.3.3" | |
}, | |
"suggest": { | |
"ext-mbstring": "For best performance" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "1.3-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Symfony\\Polyfill\\Mbstring\\": "" | |
}, | |
"files": [ | |
"bootstrap.php" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Nicolas Grekas", | |
"email": "[email protected]" | |
}, | |
{ | |
"name": "Symfony Community", | |
"homepage": "https://symfony.com/contributors" | |
} | |
], | |
"description": "Symfony polyfill for the Mbstring extension", | |
"homepage": "https://symfony.com", | |
"keywords": [ | |
"compatibility", | |
"mbstring", | |
"polyfill", | |
"portable", | |
"shim" | |
], | |
"time": "2016-11-14 01:06:16" | |
} | |
], | |
"packages-dev": [], | |
"aliases": [], | |
"minimum-stability": "stable", | |
"stability-flags": [], | |
"prefer-stable": false, | |
"prefer-lowest": false, | |
"platform": [], | |
"platform-dev": [] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
# Installation: | |
# composer install | |
# | |
# Usage: | |
# php crawler.php URL-to-process | |
# | |
# Example | |
# php crawler.php https://www.freelancer.com/ | |
require "vendor/autoload.php"; | |
if( count( $argv ) != 2 ) | |
{ | |
echo "Usage:\n php crawler.php URL-to-process\n\nExample:\n php crawler.php https://www.freelancer.com/\n\n"; | |
exit( 1 ); | |
} | |
$client = new Goutte\Client; | |
try | |
{ | |
$start = microtime(true); | |
$crawler = $client->request('GET', $argv[1] ); | |
$end = microtime(true); | |
} catch( GuzzleHttp\Exception\ConnectException $e ) | |
{ | |
print( "Can't connect to " . $argv[1] ."\nDetailed error:\n" ); | |
print $e->getMessage() . PHP_EOL; | |
exit( 1 ); | |
} | |
$result = [ | |
"title" => 'No title', | |
"description" => 'No metatag description', | |
"keywords" => 'No metatag keywords', | |
"time" => sprintf("%d ms." , ($end - $start)*1000), | |
] ; | |
$crawler->filter("head > meta")->each(function($node,$i) use( &$result ){ | |
$name = strtolower( $node->attr('name') ); | |
switch( $name ) | |
{ | |
case 'description': | |
case 'keywords': | |
$result[$name] = strlen( $node->attr('content') ); | |
} | |
}); | |
$crawler->filter("head > title")->each(function($node) use (&$result){ | |
$result['title'] = $node->text(); | |
} ); | |
print_r( $result ); | |
exit( 0 ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment