Created
May 11, 2018 07:58
-
-
Save romainnorberg/b4176e2e90717faded9ffffbbfd0c861 to your computer and use it in GitHub Desktop.
Php performances test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$excludeBotsRegex = '/(googlebot|Googlebot-Mobile|Googlebot-Image|Google favicon|Mediapartners-Google|bingbot|slurp|java|wget|curl|Commons-HttpClient|Python-urllib|libwww|httpunit|nutch|phpcrawl|msnbot|jyxobot|FAST-WebCrawler|FAST Enterprise Crawler|biglotron|teoma|convera|seekbot|gigablast|exabot|ngbot|ia_archiver|GingerCrawler|webmon |httrack|webcrawler|grub.org|UsineNouvelleCrawler|antibot|netresearchserver|speedy|fluffy|bibnum.bnf|findlink|msrbot|panscient|yacybot|AISearchBot|IOI|ips-agent|tagoobot|MJ12bot|dotbot|woriobot|yanga|buzzbot|mlbot|yandexbot|purebot|Linguee Bot|Voyager|CyberPatrol|voilabot|baiduspider|citeseerxbot|spbot|twengabot|postrank|turnitinbot|scribdbot|page2rss|sitebot|linkdex|Adidxbot|blekkobot|ezooms|dotbot|Mail.RU_Bot|discobot|heritrix|findthatfile|europarchive.org|NerdByNature.Bot|sistrix crawler|ahrefsbot|Aboundex|domaincrawler|wbsearchbot|summify|ccbot|edisterbot|seznambot|ec2linkfinder|gslfbot|aihitbot|intelium_bot|facebookexternalhit|yeti|RetrevoPageAnalyzer|lb-spider|sogou|lssbot|careerbot|wotbox|wocbot|ichiro|DuckDuckBot|lssrocketcrawler|drupact|webcompanycrawler|acoonbot|openindexspider|gnam gnam spider|web-archive-net.com.bot|backlinkcrawler|coccoc|integromedb|content crawler spider|toplistbot|seokicks-robot|it2media-domain-crawler|ip-web-crawler.com|siteexplorer.info|elisabot|proximic|changedetection|blexbot|arabot|WeSEE:Search|niki-bot|CrystalSemanticsBot|rogerbot|360Spider|psbot|InterfaxScanBot|Lipperhey SEO Service|CC Metadata Scaper|g00g1e.net|GrapeshotCrawler|urlappendbot|brainobot|fr-crawler|binlar|SimpleCrawler|Livelapbot|Twitterbot|cXensebot|smtbot|bnf.fr_bot|A6-Indexer|ADmantX|Facebot|Twitterbot|OrangeBot|memorybot|AdvBot|MegaIndex|SemanticScholarBot|ltx71|nerdybot|xovibot|BUbiNG|Qwantify|archive.org_bot|Applebot|TweetmemeBot|crawler4j|findxbot|SemrushBot|yoozBot|lipperhey|y!j-asr|Domain Re-Animator Bot|AddThis)/ui'; | |
$excludeBotsArray = [ | |
'googlebot', | |
'Googlebot-Mobile', | |
'Googlebot-Image', | |
'Google favicon', | |
'Mediapartners-Google', | |
'bingbot', | |
'slurp', | |
'java', | |
'wget', | |
'curl', | |
'Commons-HttpClient', | |
'Python-urllib', | |
'libwww', | |
'httpunit', | |
'nutch', | |
'phpcrawl', | |
'msnbot', | |
'jyxobot', | |
'FAST-WebCrawler', | |
'FAST Enterprise Crawler', | |
'biglotron', | |
'teoma', | |
'convera', | |
'seekbot', | |
'gigablast', | |
'exabot', | |
'ngbot', | |
'ia_archiver', | |
'GingerCrawler', | |
'webmon ', | |
'httrack', | |
'webcrawler', | |
'grub.org', | |
'UsineNouvelleCrawler', | |
'antibot', | |
'netresearchserver', | |
'speedy', | |
'fluffy', | |
'bibnum.bnf', | |
'findlink', | |
'msrbot', | |
'panscient', | |
'yacybot', | |
'AISearchBot', | |
'IOI', | |
'ips-agent', | |
'tagoobot', | |
'MJ12bot', | |
'dotbot', | |
'woriobot', | |
'yanga', | |
'buzzbot', | |
'mlbot', | |
'yandexbot', | |
'purebot', | |
'Linguee Bot', | |
'Voyager', | |
'CyberPatrol', | |
'voilabot', | |
'baiduspider', | |
'citeseerxbot', | |
'spbot', | |
'twengabot', | |
'postrank', | |
'turnitinbot', | |
'scribdbot', | |
'page2rss', | |
'sitebot', | |
'linkdex', | |
'Adidxbot', | |
'blekkobot', | |
'ezooms', | |
'dotbot', | |
'Mail.RU_Bot', | |
'discobot', | |
'heritrix', | |
'findthatfile', | |
'europarchive.org', | |
'NerdByNature.Bot', | |
'sistrix crawler', | |
'ahrefsbot', | |
'Aboundex', | |
'domaincrawler', | |
'wbsearchbot', | |
'summify', | |
'ccbot', | |
'edisterbot', | |
'seznambot', | |
'ec2linkfinder', | |
'gslfbot', | |
'aihitbot', | |
'intelium_bot', | |
'facebookexternalhit', | |
'yeti', | |
'RetrevoPageAnalyzer', | |
'lb-spider', | |
'sogou', | |
'lssbot', | |
'careerbot', | |
'wotbox', | |
'wocbot', | |
'ichiro', | |
'DuckDuckBot', | |
'lssrocketcrawler', | |
'drupact', | |
'webcompanycrawler', | |
'acoonbot', | |
'openindexspider', | |
'gnam gnam spider', | |
'web-archive-net.com.bot', | |
'backlinkcrawler', | |
'coccoc', | |
'integromedb', | |
'content crawler spider', | |
'toplistbot', | |
'seokicks-robot', | |
'it2media-domain-crawler', | |
'ip-web-crawler.com', | |
'siteexplorer.info', | |
'elisabot', | |
'proximic', | |
'changedetection', | |
'blexbot', | |
'arabot', | |
'WeSEE:Search', | |
'niki-bot', | |
'CrystalSemanticsBot', | |
'rogerbot', | |
'360Spider', | |
'psbot', | |
'InterfaxScanBot', | |
'Lipperhey SEO Service', | |
'CC Metadata Scaper', | |
'g00g1e.net', | |
'GrapeshotCrawler', | |
'urlappendbot', | |
'brainobot', | |
'fr-crawler', | |
'binlar', | |
'SimpleCrawler', | |
'Livelapbot', | |
'Twitterbot', | |
'cXensebot', | |
'smtbot', | |
'bnf.fr_bot', | |
'A6-Indexer', | |
'ADmantX', | |
'Facebot', | |
'Twitterbot', | |
'OrangeBot', | |
'memorybot', | |
'AdvBot', | |
'MegaIndex', | |
'SemanticScholarBot', | |
'ltx71', | |
'nerdybot', | |
'xovibot', | |
'BUbiNG', | |
'Qwantify', | |
'archive.org_bot', | |
'Applebot', | |
'TweetmemeBot', | |
'crawler4j', | |
'findxbot', | |
'SemrushBot', | |
'yoozBot', | |
'lipperhey', | |
'y!j-asr', | |
'Domain Re-Animator Bot', | |
'AddThis', | |
]; | |
$user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'; | |
function rutime($ru, $rus, $index) { | |
return ($ru["ru_$index.tv_sec"]*1000 + intval($ru["ru_$index.tv_usec"]/1000)) | |
- ($rus["ru_$index.tv_sec"]*1000 + intval($rus["ru_$index.tv_usec"]/1000)); | |
} | |
// regex | |
echo "------------------------------\n"; | |
echo "REGEX ------------------------\n"; | |
echo "------------------------------\n"; | |
$rustart = getrusage(); | |
for ($i=0 ; $i<100000 ; $i++) { | |
if (preg_match($excludeBotsRegex, $user_agent, $matches)) { | |
} | |
} | |
$ru = getrusage(); | |
echo "This process used " . rutime($ru, $rustart, "utime") . | |
" ms for its computations\n"; | |
echo "It spent " . rutime($ru, $rustart, "stime") . | |
" ms in system calls\n"; | |
// array | |
echo "------------------------------\n"; | |
echo "ARRAY ------------------------\n"; | |
echo "------------------------------\n"; | |
$rustart = getrusage(); | |
for ($i=0 ; $i<100000 ; $i++) { | |
foreach ($excludeBotsArray as $user_agent) { | |
if (preg_match('#' . $user_agent . '#ui', $user_agent, $matches)) { | |
continue; | |
} | |
} | |
} | |
$ru = getrusage(); | |
echo "This process used " . rutime($ru, $rustart, "utime") . | |
" ms for its computations\n"; | |
echo "It spent " . rutime($ru, $rustart, "stime") . | |
" ms in system calls\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment