Created
January 24, 2013 17:30
-
-
Save robhammond/4625461 to your computer and use it in GitHub Desktop.
Analysis of keywords in different parts of URLs. You'll need to extract the URLs from Google's SERPs yourself and paste into the @urls array.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use Modern::Perl; | |
use Domain::PublicSuffix; | |
use URI; | |
my $keyword = 'broadband'; | |
my @urls = qw(http://www.moneysupermarket.com/broadband/ | |
http://www.o2.co.uk/broadband | |
http://www.o2.co.uk/ | |
http://www.sky.com/shop/broadband-talk/ | |
http://en.wikipedia.org/wiki/Broadband | |
http://www.uswitch.com/broadband/ | |
http://www.virginmedia.com/ | |
http://www.thinkbroadband.com/ | |
http://www.broadbandgenie.co.uk/ | |
http://www.plus.net/home-broadband/broadband-only/ | |
http://www.plus.net/business/broadband/ | |
http://sales.talktalk.co.uk/ | |
http://www.broadbandspeedchecker.co.uk/ | |
http://www.moneysavingexpert.com/phones/cheap-broadband | |
http://www.broadbandchoices.co.uk/ | |
http://phone-shop.tesco.com/broadband-and-home-phone/broadband/ | |
http://broadband.ee.co.uk/ | |
http://en.wikipedia.org/wiki/Internet_access | |
http://www.samknows.com/ | |
http://www.bethere.co.uk/ | |
http://www.productsandservices.bt.com/products/broadband | |
http://www.telegraph.co.uk/technology/broadband/ | |
http://www.cable.co.uk/ | |
http://www.cable.co.uk/compare/broadband/ | |
http://www.culture.gov.uk/what_we_do/telecommunications_and_online/7763.aspx | |
http://store.virginmedia.com/broadband.html | |
http://www.moneysupermarket.com/broadband/deals/ | |
http://www.zen.co.uk/broadband/ZenBroadband.aspx | |
http://www.t-mobile.co.uk/shop/mobile-broadband/ | |
http://www.broadband.co.uk/ | |
http://www.broadbandworldforum.com/ | |
http://www.theregister.co.uk/2013/01/11/bt_broadband_ip_profile_throttled/ | |
http://www.fasthosts.co.uk/broadband/ | |
http://www.kent.gov.uk/news_and_events/make_kent_quicker.aspx | |
http://www.ukbroadband.com/ | |
http://www.plus.net/ | |
http://www.guardian.co.uk/technology/broadband | |
http://en.wikipedia.org/wiki/Mobile_broadband | |
http://www.zen.co.uk/Broadband/athome.aspx | |
http://mea.broadbandworldforum.com/ | |
http://www.broadbandworldforum.com/conference/cloud-summit/ | |
http://explore.ee.co.uk/fibre-broadband | |
http://www.zen.co.uk/business/broadband/ | |
http://www.allpaybroadband.com/ | |
http://www.bedford.gov.uk/broadband | |
http://www.pcpro.co.uk/broadband | |
http://www.eastsussex.gov.uk/business/broadband/default.htm | |
http://www.essex.gov.uk/Pages/Superfast-Essex-Broadband.aspx | |
http://www.techweekeurope.co.uk/tag/broadband | |
http://www.broadbandbananas.com/ | |
http://www.aolbroadband.co.uk/ | |
http://www.culture.gov.uk/what_we_do/telecommunications_and_online/8252.aspx | |
http://www.culture.gov.uk/what_we_do/telecommunications_and_online/8661.aspx | |
http://www.nottinghamshire.gov.uk/broadband/ | |
http://www.thinkbroadband.com/guide/beginners-guide-to-broadband.html | |
http://wales.gov.uk/topics/businessandeconomy/broadbandandict/broadband/ngbw/%3Flang%3Den | |
http://www.bbc.co.uk/news/technology-20710796 | |
https://www3.hants.gov.uk/broadband/broadband-signup.htm | |
http://www.thescte.eu/index.php/publications/broadband-journal | |
http://www.oxfordshire.gov.uk/broadband | |
http://www.xlntelecom.co.uk/business-broadband/ | |
http://www.kent.gov.uk/community_and_living/community_grants_and_funding/broadband/vote_for_better_broadband.aspx | |
http://rdpenetwork.defra.gov.uk/funding-sources/rural-community-broadband-fund | |
http://www.westsussex.gov.uk/living/better_faster_broadband.aspx | |
http://www.broadband-finder.co.uk/ | |
http://www.daisygroupplc.com/business-broadband/ | |
http://www.worcestershire.gov.uk/cms/superfast-broadband.aspx | |
http://www.inmarsat.com/services/types/broadband | |
http://www.hertsdirect.org/your-community/broadband/ | |
http://www3.hants.gov.uk/broadband.htm | |
http://www.broadband-uk.coop/ | |
http://www.leics.gov.uk/broadband | |
http://www.broadband-notspot.org.uk/ | |
http://www.broadbandworldforum.com/awards/about-the-awards/ | |
http://www.simplifydigital.co.uk/broadband/compare-broadband/ | |
http://help.ee.co.uk/system/web/custom/EE/getArticleDL.jsp%3Fuuid%3D70A6C395-972E-4BBD-A0B7-5C4B04FA28A8 | |
http://demon.net/ | |
http://www.o2.co.uk/broadband/mobile | |
http://www.daisywholesale.com/data/broadband | |
http://broadband.eastriding.gov.uk/ | |
http://www.bbc.co.uk/news/uk-wales-politics-21015413 | |
http://www.bbc.co.uk/news/technology-20413324 | |
http://www.broadbandworldforum.com/broadband-infovision-awards-winners/ | |
http://www.madasafish.com/broadband/ | |
http://www.gradwell.com/broadband/ | |
http://www.aa.net.uk/broadband.html | |
http://webarchive.nationalarchives.gov.uk/20110709203937/discuss.bis.gov.uk/bduk/ | |
http://www.centralbedfordshire.gov.uk/local-business/business-information-and-advice/broadband.aspx | |
http://www.samknows.com/broadband/broadband_checker | |
http://www.hydro.co.uk/broadband/ | |
http://www.btplc.com/Innovation/Innovation/Coolbroadband/index.htm | |
http://www.staffordshire.gov.uk/broadbandconsultation | |
http://www.suffolk.gov.uk/broadband | |
http://www.connectingcambridgeshire.co.uk/ | |
http://latam.broadbandtrafficevent.com/ | |
http://maps.ofcom.org.uk/broadband/ | |
http://www.culture.gov.uk/news/news_stories/7621.aspx | |
http://www.culture.gov.uk/news/news_stories/8389.aspx | |
http://www.cable.co.uk/compare/student-broadband/ | |
http://www.aolbroadband.co.uk/contact-us.html); | |
foreach my $url (@urls) { | |
my $suffix = Domain::PublicSuffix->new(); | |
my $uri = URI->new($url); | |
my $host = $uri->host(); | |
my $path = $uri->path(); | |
$host =~ s/\.\z//; | |
my $root = $suffix->get_root_domain($host); | |
$suffix->get_root_domain($host); | |
my $tld = $suffix->suffix(); | |
# Get subdomain | |
my ($subdomain) = $host =~ m{^(.*?)$root$}; | |
# Get host name w/o TLD | |
$root =~ s!\.$tld$!!; | |
if ($path =~ m{$keyword}) { | |
say "path"; | |
} elsif ($subdomain =~ m{$keyword}) { | |
say "sub-domain"; | |
} elsif ($root =~ m{$keyword}) { | |
say "domain"; | |
} else { | |
say "none"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment