Last active
July 7, 2022 01:33
-
-
Save Alex3917/fec45e5c2114c2c13b41b30e19887542 to your computer and use it in GitHub Desktop.
Get the PageSpeed Insights data for every Angular site on madewithangular.com, and every Next.js site from the nextjs.org/showcase
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pprint import pprint | |
import csv | |
import os | |
import urllib3 | |
import re | |
import requests | |
import bs4 | |
from bs4 import BeautifulSoup | |
# See instructions here to get an API key for: https://pagespeed.web.dev/ | |
PAGESPEED_INSIGHTS_KEY = '' | |
# Every website from Made With Angular (https://www.madewithangular.com/): | |
# ostensibly_angular_sites = ["https://jsonschema.net/","https://www.24ur.com/","https://voice.google.com/","https://transparencyreport.google.com/","https://productexperts.withgoogle.com/","https://opensource.google/","https://messages.google.com/","https://staykeen.com/","https://issuetracker.google.com.","https://firebase.google.com/","https://firebase.google.com/products/crashlytics","console.cloud.google.com","https://play.google.com/books","https://artsandculture.google.com/","https://educationonair.withgoogle.com/","https://www.samsung-forward.ru/","https://traduora.com","https://sticker.plus/","https://akmirge.github.io/","https://www.myodds.bet/","https://about.google/","https://www.delta.com/","https://codverter.com/src/index","https://www.ptttrade.com/dashboard","https://www.office.com/apps?auth=2","https://www.infinijith.com/","https://brandstory.in/","https://www.aviastore.in/","https://clarity.design/","https://www.santander.com.br/","https://www.primefaces.org/primeng/#/","https://ngxlite.com/","https://www.forbes.com/sites/karstenstrauss/2019/01/22/the-most-sustainable-companies-in-2019","https://ng.ant.design/docs/introduce/zh","https://findadealer.bmw.com.au/#/dlo","https://driveawaypricing.bmw.com.au/#/price","https://www.swiftviews.com/home","https://immi.homeaffairs.gov.au/","https://www.zagat.com/","https://www.luis.ai/home","https://summerofcode.withgoogle.com/","https://marketingplatform.google.com/","https://learndigital.withgoogle.com/digitalgarage","https://families.google.com/familylink","https://marketfinder.thinkwithgoogle.com/intl/en_us/","https://taskify.aveek.io/","https://shop.safeway.com/welcome.html","https://domains.google","https://video.blender.org/videos/trending?sort=-trending&page=1","https://galaxy.ansible.com/","https://account.xbox.com","https://nsmgr8.github.io/git-stats-demo/#/","https://rippertshirts.co.uk/","http://quizbangla.com/","https://www.propertyok.com/","https://primablock.com/","https://lakeshore-rv.com/","https://kilid.com/","https://jochefina.com/","https://www.idiliz.com/","https://www.findrvparks.com/","https://eltocadordecarlota.com/","https://www.eisedo.com/","https://coursables.com/","https://www.biletkolik.com/","https://2muchcoffee.com/","https://source.cloud.google.com/","https://cloud.google.com/blog/","https://www.expobeds.com","https://www.adaptario.com/","https://www.cpjandmatcomps.tech/","http://www.salonhome.com/#/","https://www.infinitefleet.com/","https://vanna.com/?hl=en","http://parrot-translate.github.io/parrot.github.io/","https://bountechsummit.com/","https://wellbeing.google/","https://rupsinthekitchen.com/#/","http://www.curlify.net/","https://dreyhub.com/","https://singler.biz/deals","http://dictionary.apa.org/","https://www.angular-universal-pwa.maciejtreder.com/","https://www.jachta-chorvatsko.cz","https://www.eat24.com","https://cloud-player.io","https://www.cvs.com/shop","https://ppsr.cloud/","http://www.onefootball.com/en/home","http://ulysseonthecar.altervista.org","https://www.nextontop.com/","http://peinture-passion-emotion.fr/home","https://www.friluftsland.dk/","https://startup.google.com/","https://customers.microsoft.com","https://express.google.com","https://eregistration.elections.on.ca/en/home","https://www.pixm.io/","https://ngx-restangular.com/","https://windycity.devfest.io/home","https://www.omegagi.com/","https://www.halodoc.com","https://code.google.com/codejam/kickstart/","https://teradata.github.io/covalent/#/","http://www.telewebion.com/","http://www.stagecoachfestival.com/","https://www.citytv.com/toronto/","https://online.citi.com/US/ag/contactus","http://beatsloop.com/","https://www.barnesandnoble.com/w/the-maze-runner-series-complete-collection-james-dashner/1118636173#/","https://findadentist.ada.org/","https://chillisauce.com","https://www.zingzing.co.uk/","https://www.yoigo.com/","https://www.winningappliances.com.au/","https://www.winc.com/","https://wildcase.com/","https://m.whitewall.com/","https://www.usefuldev.com/","https://ttp.cbp.dhs.gov/","http://www.triathlondetoulouse.com/","https://www.toyotacertified.com/","https://tirelibrary.com/","https://blog.google/","https://www.tescomobile.com/","https://www.swenews.info/","http://streamhive.com/","https://www.stanstedexpress.com/","http://slyng.surge.sh/","https://skyeng.tv/","https://chromebusinessdevices.withgoogle.com/","http://m.sears.com/","https://scuola365.com","https://thescope.com/","https://saywhathearing.com/","https://www.satsumaloans.co.uk/","https://rooms.ryanair.com/","https://runrepeat.com/","https://www.rssc.com/","https://www.rappi.com/","http://player.radio.com/","https://fi.google.com/about/","http://powermylearning.org/","https://www.powerspins.com/","https://www.playyourdamnturn.com/","https://pcwa.net/","https://outdooraccess.com/","https://nickys.se/","https://www.nflgamepass.com/","https://docs.nestjs.com/","https://mysterytacklebox.com/","https://www.myjar.com/","https://motor.at/","https://mixer.com/","https://us.memebox.com/","https://www.megaplextheatres.com/","https://www.mcourser.com/","https://www.malabi.co/","http://makeithappenworld.com/","https://magazyny.pl/","https://machinelabs.ai/","https://likebb.com/","https://www.legfi.com/","https://app.kparser.com/","https://www.juniper.net/us/en/","https://www.jpenterprises.com/","https://jackspath.nau.edu/","https://identillect.com/","https://www.hunterdouglas.com/","https://howmuch.net/","https://library.fresnostate.edu/","https://guardian.theater/","https://www.google.org/","https://duo.google.com/","http://diversity.google/","https://www.google.com/cloud/","https://analytics.google.com/analytics/academy/","https://globalweather.tamu.edu/","https://www.flocheer.com/","https://www.flobikes.com/","https://fireface.social/","https://www.erosads.com/","https://www.eponymous.co/","https://seller.dronestock.com/","https://dockstore.org/","https://www.devintent.com/","https://devfest.mn/","https://www.csgolive.com/","https://csgoroll.com/en/","https://www.cosmunity.com/","https://cloudify.cc/","http://www.ckoi.com/","http://go.cps.edu/","http://www.chasecenter.com/","https://www.carguruclub.com/","https://thebuildcard.com/","https://www.bluekangaroo.com/","https://blox.src.zone/","https://blispay.com/","https://bitmark.com/","https://www.bgo.com/","https://www.avery.com/","http://animationsftw.in/","https://www.amazing.com/","http://www.airvuz.com/","https://www.airasia.com/","https://aiesec.org/","https://actions.cloud/","https://zoeskitchen.com/","https://yatrum.com/","https://www.wunderground.com/","https://waymo.com/","http://vmware.github.io/","https://in.udacity.com/","https://tvcmarketing.com/","http://www.pizzapedalr.com/","https://www.teamsky.com/","https://smackmag.com/","https://www.shopstyle.com/","https://www.sharp.com/san-diego-doctors/search","https://www.shakr.com/","https://savelist.co/","https://www.sapientconsulting.com/","https://secure.royalcaribbean.com/cruises/","http://www.rockstargames.com/bully/","https://redditgrid.com/","https://www.quidco.com/","http://puritii.com/","https://passport2017.ca/","http://ng-boosted.orange.com/","https://onespeed.io/","https://www.octadesk.com/","https://www.obbod.com/cs","https://nrwl.io/","https://health.nokia.com/us/en/","https://nkdpizza.com/","https://play.nativescript.org/","https://market.nativescript.org/","https://www.megaplextheatres.com/","https://ladyleet.com/","https://nit.kaplan.com/","https://iahsp.com/","https://shop.googlemerchandisestore.com/","http://www.google.com/maps/about/behind-the-scenes/streetview/treks/petra/#streetview","https://store.google.com/","https://newsstand.google.com/","https://www.gdeapp.com/","https://adwords.google.com/home/","https://www.goarmyedge.com/","https://www.gentlehandsmassagetherapy.com/","https://www.freelancer.com/","https://foursource.com/","https://www.flogymnastics.com/","https://www.fantrax.com/","http://expium.com/","https://www.ensemble.com","https://launches.endclothing.com/","https://www.empireblue.com/","https://devfestkc.com/","https://order.dennys.com/locations","http://www.crunchbase.com/","https://www.cashify.in/","https://www.getcanal.com/","http://www.cafepress.com/","https://www.bose.com/en_us/store_locator.html","https://bkstg.com/","https://bitpay.com/","https://www.bitecatering.net/","http://www.angular-artist.com/#/home","https://workify.io/","https://www.fresh-trim.com/","https://showcase-it.firebaseapp.com/","https://map.sagegrouseinitiative.com/ecosystem/mesic-resources?ll=43.4799,-110.7624","https://www.fxdomains.com/","https://grow.google/","https://donuts.withgoogle.com/","https://madeby.google.com/intl/en_us/askmore/","https://uk.megabus.com/journey-planner/journeys","http://inceptionnotes.com/","https://gradient.google/","https://www.cosmunity.com/","https://www.biznstool.com","https://ai.google","https://chatbase.com/","https://www.aat.org.uk/","https://hire.withgoogle.com","https://material.io/color/#/usr/libexec/java_home","https://diebuchrezension.de/#/","https://www.koppy.io","https://www.playsimilar.com/us","https://codigorefinado.github.io/giphy-search/","https://gsuite.google.com/","http://echoesplayer.com/#/","https://www.urlaubszeit.de/","https://angular2-instagram.firebaseapp.com/","https://assistant.google.com/","https://environment.google/","https://msdb.lapli.fr/#/home","http://www.techprimelab.com/","https://allo.google.com/","https://madeby.google.com/","https://beam.pro/","https://fonts.google.com/","https://blispay.com/","https://beam.pro/","https://splice.com/","https://home.google.com/","https://www.gybo.com/","https://landing.google.com/onedayiwill/","https://landing.google.com/sre/","https://landing.google.com/ads/try/","https://musiclab.chromeexperiments.com/Experiments","https://onhubmakers.withgoogle.com/","https://newslab.withgoogle.com/","https://frightgeist.withgoogle.com/","https://editionsatplay.withgoogle.com/#/","https://www.seamless.com/","https://www.grubhub.com/","https://www.doubleclickbygoogle.com/","https://digitalgarage.withgoogle.com/","http://research.google.com/","https://www.portablenorthpole.com/en/","http://formspree-generator.navjinder.com/","http://www.jetblue.com/","https://onetoday.google.com/","https://transformationgallery.withgoogle.com/","https://get.google.com/tips/","https://www.google.com/retail/","https://www.google.com/cast/","https://www.google.com/trends/","http://www.google.com/analytics/","http://learn.googleapps.com/","https://cloud-playground.appspot.com/playground/","https://www.google.com/edu/","https://cloud.google.com/","https://devart.withgoogle.com/","https://www.google.com/intx/en/work/apps/business/index.html","http://www.google.com/intl/en/admob/","https://chinesenewyear.withgoogle.com","https://www.google.com/partners","https://eduproducts.withgoogle.com/","https://atmosphere.withgoogle.com","https://cube.withgoogle.com","http://ics-web.jp/projects/particle-develop/","http://www.nba.com/"] | |
# The subset of the above list, containing only sites that 1) are still on the web 2) actually use Angular, as verified with the Angular DevTools Chrome extension | |
angular_sites = ["https://codigorefinado.github.io/giphy-search/","https://nsmgr8.github.io/git-stats-demo/#/","https://rupsinthekitchen.com/#/","https://diebuchrezension.de/#/","https://www.primefaces.org/primeng/#/","https://teradata.github.io/covalent/#/","https://workify.io/","http://www.salonhome.com/#/","https://akmirge.github.io/","https://msdb.lapli.fr/#/home","https://fonts.google.com/","https://blox.src.zone/","https://www.myjar.com/","https://brandstory.in/","https://www.jachta-chorvatsko.cz","http://www.onefootball.com/en/home","https://www.nflgamepass.com/","https://www.fxdomains.com/","https://transparencyreport.google.com/","http://www.triathlondetoulouse.com/","http://ng-boosted.orange.com/","https://www.halodoc.com","https://play.google.com/books","https://ng.ant.design/docs/introduce/zh","https://angular2-instagram.firebaseapp.com/","http://puritii.com/","https://source.cloud.google.com/","http://inceptionnotes.com/","https://chillisauce.com","https://www.biznstool.com","http://slyng.surge.sh/","https://ngxlite.com/","https://www.freelancer.com/","https://findadentist.ada.org/","https://ngx-restangular.com/","https://taskify.aveek.io/","https://www.nextontop.com/","https://cloud.google.com/blog/","https://savelist.co/","https://productexperts.withgoogle.com/","https://devfest.mn/","https://www.zingzing.co.uk/","https://video.blender.org/videos/trending?sort=-trending&page=1","https://www.gentlehandsmassagetherapy.com/","https://issuetracker.google.com","http://www.airvuz.com/","https://rooms.ryanair.com/","https://iahsp.com/","https://nrwl.io/","https://2muchcoffee.com/","https://dockstore.org/","https://cloud-player.io","https://sticker.plus/","https://www.shopstyle.com/","https://ppsr.cloud/","https://www.usefuldev.com/","https://digitalgarage.withgoogle.com/","https://www.devintent.com/","https://m.whitewall.com/","https://www.hunterdouglas.com/","https://console.cloud.google.com","https://www.24ur.com/","https://galaxy.ansible.com/","https://www.fantrax.com/","https://guardian.theater/","https://showcase-it.firebaseapp.com/","https://material.io/color/#/usr/libexec/java_home","https://staykeen.com/","https://docs.nestjs.com/","https://motor.at/","https://zoeskitchen.com/","https://www.goarmyedge.com/","http://ics-web.jp/projects/particle-develop/","https://redditgrid.com/","https://coursables.com/","https://www.seamless.com/","https://summerofcode.withgoogle.com/","http://peinture-passion-emotion.fr/home","http://quizbangla.com/","http://animationsftw.in/","https://eregistration.elections.on.ca/en/home","https://www.infinitefleet.com/","https://cloudify.cc/","https://uk.megabus.com/journey-planner/journeys","https://www.erosads.com/","http://dictionary.apa.org/","https://nit.kaplan.com/","https://splice.com/","https://educationonair.withgoogle.com/","https://vanna.com/?hl=en","https://www.playyourdamnturn.com/","https://www.infinijith.com/","https://www.yoigo.com/","http://go.cps.edu/","http://www.crunchbase.com/","https://atmosphere.withgoogle.com","https://www.santander.com.br/","https://learndigital.withgoogle.com/digitalgarage","https://primablock.com/","https://kilid.com/","https://www.avery.com/","https://map.sagegrouseinitiative.com/ecosystem/mesic-resources?ll=43.4799,-110.7624","https://www.grubhub.com/","https://www.expobeds.com","https://identillect.com/","https://rippertshirts.co.uk/","https://tirelibrary.com/","http://expium.com/","https://www.winningappliances.com.au/","https://jsonschema.net/","https://marketfinder.thinkwithgoogle.com/intl/en_us/","https://ttp.cbp.dhs.gov/","https://www.juniper.net/us/en","https://www.bitecatering.net/","https://howmuch.net/","https://shop.safeway.com/welcome.html","https://app.kparser.com/","https://www.flocheer.com/","https://www.flobikes.com/","http://www.ckoi.com/","http://www.chasecenter.com/","https://www.wunderground.com/","https://www.flogymnastics.com/","https://www.bose.com/en_us/store_locator.html","https://www.legfi.com/","https://csgoroll.com/en/","https://order.dennys.com/locations","https://jackspath.nau.edu/","https://www.csgolive.com/","http://www.jetblue.com/","http://www.telewebion.com/","https://outdooraccess.com/","https://www.megaplextheatres.com/","https://www.idiliz.com/","https://online.citi.com/US/ag/contactus","https://www.megaplextheatres.com/","https://www.mcourser.com/","https://scuola365.com","https://customers.microsoft.com","https://secure.royalcaribbean.com/cruises/","http://www.rockstargames.com/bully/","https://www.samsung-forward.ru/","https://www.myodds.bet/","https://www.delta.com/","https://codverter.com/src/index","https://www.sharp.com/san-diego-doctors/search"] | |
# Every website from the Next.js showcase (https://nextjs.org/showcase): | |
# ostensibly_nextjs_sites = ["https://m.tiktok.com","https://m.twitch.tv","https://jobs.netflix.com","https://copilot.github.com/","https://www.hulu.com","https://www.nike.com/help","https://www.realtor.com","https://www.att.com","https://xw.qq.com","https://www.ign.com","https://www.ticketmaster.com","https://compete.playstation.com","https://www.doordash.com","https://www.audible.com/about","https://www.typeform.com","https://www.hilton.com/en/hilton","https://m.staples.com","https://www.binance.com","https://auth0.com","https://www.hostgator.com/help","https://www.trip.com/travel-guide","https://theculturetrip.com","https://success.docker.com","https://www.invisionapp.com","https://www.lego.com/en-us/kids","https://truecar.com","https://www.elastic.co","https://www.leafly.com","https://www.jwplayer.com","https://worldpopulationreview.com","https://repl.it","https://www.marvel.com","https://www.futurism.com","https://nubank.com.br","https://weedmaps.com","https://deliveroo.co.uk","https://material-ui.com","https://square-enix-games.com","https://friday.kodansha.co.jp","https://www.realself.com/","https://jet.com","https://expo.io","https://plot.ly","https://pusher.com","https://sumup.com","https://hotels.eurostar.com/uk-en/paris","https://www.ferrari.com","https://www.supervielle.com.ar","https://www.eaze.com","https://www.ftd.com","https://gartic.io","https://www.a24.com","https://vercel.com/","https://hashnode.com","https://stv.tv","https://store.framer.com","https://opencollective.com","https://avocode.com","https://www.ohtuleht.ee","https://www.movietickets.com","https://www.bang-olufsen.com","https://www.tvpublica.com.ar","https://order.shakeshack.com","https://www.styled-components.com","https://www.mprnews.org","https://www.mesalva.com","https://www.suburbia.com.mx","https://www.piesync.com","https://www.lightningdesignsystem.com","https://www.hollar.com","https://www.giveindia.org","https://magicleap.com","https://www.designbetter.co","https://dice.fm","https://scale.com","https://www.iota.org","https://www.vogue.de/fashion-shows","https://www.thehhub.com","https://blendle.com","https://www.aenetworks.com","https://bitscreener.com","https://hyper.is","https://carbon.now.sh","https://onuniverse.com","https://sanity.io","https://www.eltonjohn.com","https://new.artsmia.org","https://www.stargatecommand.co","https://www.institchu.com","https://www.starbucksreserve.com","https://vergecurrency.com","https://www.colorbox.io","https://yicaiglobal.com","https://fontba.se","https://www.ticketswap.uk","https://nilefm.com","https://syntax.fm","https://midrive.com","https://www.idean.com","https://inflect.com","https://allvoices.co","https://archbee.io","https://nteract.io","https://www.givecrypto.org","https://underbelly.is","https://satoshis.place","https://www.heramerica.com"] | |
# The subset of the above list, containing only sites that 1) are still on the web 2) actually use Next.js, as verified with the React Developer Tools Chrome extension. (Presumably few if any sites switched from Next.js to Create React App, or some other React framework.) | |
nextjs_sites = ["https://m.tiktok.com","https://fontba.se","https://hyper.is","https://www.colorbox.io","https://vergecurrency.com","https://xw.qq.com","https://opencollective.com","https://inflect.com","https://www.lightningdesignsystem.com","https://pusher.com","https://vercel.com/","https://avocode.com","https://dice.fm","https://syntax.fm","https://www.thehhub.com","https://m.twitch.tv","https://nteract.io","https://www.invisionapp.com","https://www.typeform.com","https://www.styled-components.com","https://nubank.com.br","https://www.mprnews.org","https://sanity.io","https://www.givecrypto.org","https://www.realself.com/","https://new.artsmia.org","https://material-ui.com","https://deliveroo.co.uk","https://www.starbucksreserve.com","https://hashnode.com","https://bitscreener.com","https://www.realtor.com","https://repl.it","https://expo.io","https://copilot.github.com/","https://midrive.com","https://satoshis.place","https://magicleap.com","https://www.doordash.com","https://www.audible.com/about","https://truecar.com","https://weedmaps.com","https://www.ticketswap.uk","https://www.idean.com","https://carbon.now.sh","https://sumup.com","https://hotels.eurostar.com/uk-en/paris","https://theculturetrip.com","https://www.lego.com/en-us/kids","https://www.leafly.com","https://stv.tv","https://scale.com","https://www.aenetworks.com","https://gartic.io","https://jet.com","https://www.institchu.com","https://www.movietickets.com","https://www.ftd.com","https://www.binance.com","https://www.giveindia.org","https://plot.ly","https://www.tvpublica.com.ar","https://auth0.com","https://square-enix-games.com","https://onuniverse.com","https://www.eltonjohn.com","https://www.ticketmaster.com","https://www.futurism.com","https://jobs.netflix.com","https://www.hulu.com","https://www.ign.com","https://www.att.com","https://friday.kodansha.co.jp","https://www.eaze.com","https://store.framer.com","https://www.elastic.co","https://www.mesalva.com","https://nilefm.com","https://underbelly.is","https://www.marvel.com","https://www.hostgator.com/help","https://www.trip.com/travel-guide","https://www.ferrari.com","https://www.ohtuleht.ee","https://order.shakeshack.com","https://www.supervielle.com.ar","https://www.designbetter.co","https://www.vogue.de/fashion-shows","https://www.hilton.com/en/hilton","https://www.bang-olufsen.com","https://www.nike.com/help","https://www.suburbia.com.mx","https://compete.playstation.com"] | |
FIELD_NAMES = ['url', 'performance_score', 'first_contentful_paint', 'speed_index', 'largest_contentful_paint', 'time_to_interactive', 'total_blocking_time', 'cumulative_layout_shift'] | |
urllib3.disable_warnings() | |
def _get_pagespeed_insights_data(i, site_to_fetch): | |
print(i, site_to_fetch) | |
r = requests.get(f"https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url={site_to_fetch}&key={PAGESPEED_INSIGHTS_KEY}&strategy=mobile") | |
if r.status_code != 200: | |
raise | |
else: | |
pagespeed_insights_api_resp = r.json() | |
return pagespeed_insights_api_resp | |
def _parse_pagespeed_insights_data(site_url, pagespeed_insights_api_resp): | |
performance_score = pagespeed_insights_api_resp['lighthouseResult']['categories']['performance']['score'] | |
performance_score = round(performance_score * 100) | |
audit_dict = pagespeed_insights_api_resp['lighthouseResult']['audits'] | |
first_contentful_paint_seconds = audit_dict['first-contentful-paint']['displayValue'].strip('\xa0s') | |
speed_index_seconds = audit_dict['speed-index']['displayValue'].strip('\xa0s') | |
largest_contentful_page_seconds = audit_dict['largest-contentful-paint']['displayValue'].strip('\xa0s') | |
time_to_interactive_seconds = audit_dict['interactive']['displayValue'].strip('\xa0s') | |
total_blocking_time_seconds = audit_dict['total-blocking-time']['displayValue'].strip('\xa0ms') | |
total_blocking_time_seconds = int(total_blocking_time_seconds.replace(',', '')) | |
cumulative_layout_shift_seconds = audit_dict['cumulative-layout-shift']['displayValue'].strip('\xa0s') | |
site_data_dict = { | |
'url': site_url, | |
'performance_score': performance_score, | |
'first_contentful_paint': first_contentful_paint_seconds, | |
'speed_index': speed_index_seconds, | |
'largest_contentful_paint': largest_contentful_page_seconds, | |
'time_to_interactive': time_to_interactive_seconds, | |
'total_blocking_time': total_blocking_time_seconds, | |
'cumulative_layout_shift': cumulative_layout_shift_seconds | |
} | |
return site_data_dict | |
def _write_to_csv(csv_file, csv_dict_list): | |
does_file_exist = os.path.exists(csv_file) | |
with open(csv_file, 'a', newline='') as csvfile: | |
csv_writer = csv.DictWriter(csvfile, delimiter=',', fieldnames=FIELD_NAMES) | |
if not does_file_exist: | |
csv_writer.writeheader() | |
for csv_dict in csv_dict_list: | |
csv_writer.writerow(csv_dict) | |
def _detect_ssr(): | |
ssr_sites = [] | |
for i, url in enumerate(angular_sites): | |
try: | |
r = requests.get(url, verify=False, timeout=10) | |
except Exception: | |
continue | |
soup = BeautifulSoup(r.text, "lxml") | |
for elem in soup.findAll(attrs={"ng-version" : re.compile(r".*")}): | |
if elem.children: | |
ssr_sites.append(url) | |
print(url) | |
else: | |
print(i) | |
print("***") | |
print(ssr_sites) | |
if __name__ == "__main__": | |
nextjs_site_data_dict_list = [] | |
angular_site_data_dict_list = [] | |
nextjs_sites_with_errors = [] | |
angular_sites_with_errors = [] | |
for i, nextjs_site in enumerate(nextjs_sites, start=0): | |
try: | |
pagespeed_insights_api_resp = _get_pagespeed_insights_data(i, nextjs_site) | |
site_data_dict = _parse_pagespeed_insights_data(nextjs_site, pagespeed_insights_api_resp) | |
except Exception: | |
nextjs_sites_with_errors.append(nextjs_site) | |
continue | |
nextjs_site_data_dict_list.append(nextjs_site) | |
for i, angular_site in enumerate(angular_sites, start=0): | |
try: | |
pagespeed_insights_api_resp = _get_pagespeed_insights_data(i, angular_site) | |
site_data_dict = _parse_pagespeed_insights_data(angular_site, pagespeed_insights_api_resp) | |
except Exception: | |
angular_sites_with_errors.append(angular_site) | |
continue | |
angular_site_data_dict_list.append(site_data_dict) | |
_write_to_csv('./nextjs_sites.csv', nextjs_site_data_dict_list) | |
_write_to_csv('./angular_sites.csv', angular_site_data_dict_list) | |
print("Nextjs sites with errors: ", nextjs_sites_with_errors) | |
print("Angular sites with errors: ", angular_sites_with_errors) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment