Last active
April 5, 2024 08:12
-
-
Save teryror/c945d0b5b89e4235368988e5ad2ebc63 to your computer and use it in GitHub Desktop.
Scrape links off TV Tropes to find common tropes in a set of shows
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use scraper::{Html, Selector}; | |
use std::collections::HashMap; | |
fn main() { | |
let source_sites: &[&[&str]] = &[ | |
&[ | |
"https://tvtropes.org/pmwiki/pmwiki.php/BreakingBad/TropesAToB", | |
"https://tvtropes.org/pmwiki/pmwiki.php/BreakingBad/TropesCToD", | |
"https://tvtropes.org/pmwiki/pmwiki.php/BreakingBad/TropesEToL", | |
"https://tvtropes.org/pmwiki/pmwiki.php/BreakingBad/TropesMToR", | |
"https://tvtropes.org/pmwiki/pmwiki.php/BreakingBad/TropesSToZ", | |
], | |
&["https://tvtropes.org/pmwiki/pmwiki.php/Manga/DeathNote"], | |
&["https://tvtropes.org/pmwiki/pmwiki.php/Series/HouseOfCardsUS"], | |
&["https://tvtropes.org/pmwiki/pmwiki.php/Series/Impulse2018"], | |
&["https://tvtropes.org/pmwiki/pmwiki.php/Film/InsideMan"], | |
&["https://tvtropes.org/pmwiki/pmwiki.php/Series/IZombie"], | |
&["https://tvtropes.org/pmwiki/pmwiki.php/Series/LaCasaDePapel"], | |
&["https://tvtropes.org/pmwiki/pmwiki.php/Series/OrphanBlack"], | |
&["https://tvtropes.org/pmwiki/pmwiki.php/Literature/TheExpanse"], | |
]; | |
let mut occurs = HashMap::new(); | |
for (show_index, &show) in source_sites.iter().enumerate() { | |
for &page in show { | |
let response = reqwest::blocking::get(page).expect("Failed to get page content."); | |
let response_text = response.text().expect("Response doesn't have text."); | |
let html_document = Html::parse_document(&response_text); | |
let selector = Selector::parse("a").unwrap(); | |
for element in html_document.select(&selector) { | |
if let Some(href) = element.value().attr("href") { | |
*occurs.entry(String::from(href)).or_insert(0u64) |= 1 << show_index; | |
} | |
} | |
} | |
} | |
let mut trope_set_with_occurence_count: Vec<(u32, String)> = occurs | |
.into_iter() | |
.filter_map(|(mut href, set)| { | |
let instances = set.count_ones(); | |
let is_unrelated = instances == source_sites.len() as u32; | |
let is_trope = href.starts_with("/pmwiki/pmwiki.php/Main/"); | |
if instances > 1 && !is_unrelated && is_trope { | |
href.drain(0..24); | |
Some((instances, href)) | |
} else { | |
None | |
} | |
}).collect(); | |
trope_set_with_occurence_count.sort_by_key(|(instances, href)| (u32::MAX - instances, href.clone())); | |
for (instances, href) in trope_set_with_occurence_count.iter() { | |
println!("{}; {}", instances, href); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
7; AssholeVictim | |
7; Foreshadowing | |
7; ShoutOut | |
6; ChekhovsGun | |
5; AscendedExtra | |
5; BerserkButton | |
5; BlackComedy | |
5; BookEnds | |
5; BoomHeadshot | |
5; CallBack | |
5; DoesThisRemindYouOfAnything | |
5; DownerEnding | |
5; EnemyMine | |
5; EstablishingCharacterMoment | |
5; EvenEvilHasStandards | |
5; KansasCityShuffle | |
5; KarmaHoudini | |
5; ManipulativeBastard | |
5; MeaningfulName | |
5; MercyKill | |
5; OhCrap | |
5; PetTheDog | |
5; SarcasticConfession | |
5; SeriesOfThe2010s | |
5; TakeThat | |
5; TooDumbToLive | |
5; TookALevelInBadass | |
5; UpToEleven | |
5; VillainProtagonist | |
5; WhatTheHellHero | |
4; AbusiveParents | |
4; AffablyEvil | |
4; AlasPoorVillain | |
4; AnyoneCanDie | |
4; AxCrazy | |
4; BatmanGambit | |
4; BigBad | |
4; BittersweetEnding | |
4; BlatantLies | |
4; ButtMonkey | |
4; CelebrityParadox | |
4; CharacterDevelopment | |
4; ContrivedCoincidence | |
4; DramaticIrony | |
4; DrivenToSuicide | |
4; EvenEvilHasLovedOnes | |
4; HeelFaceTurn | |
4; HeelRealization | |
4; HeroicBSOD | |
4; HeroicSacrifice | |
4; HoistByHisOwnPetard | |
4; Hypocrite | |
4; InUniverse | |
4; JumpingOffTheSlipperySlope | |
4; MoodWhiplash | |
4; MoralityPet | |
4; NiceGuy | |
4; NiceJobBreakingItHero | |
4; PapaWolf | |
4; PlayingWithATrope | |
4; PoliceAreUseless | |
4; PoorCommunicationKills | |
4; RealLife | |
4; RedHerring | |
4; SpannerInTheWorks | |
4; StealthPun | |
4; TheDragon | |
4; TheReasonYouSuckSpeech | |
4; TheSociopath | |
4; ThoseTwoGuys | |
4; WhamEpisode | |
4; WhamLine | |
4; WhamShot | |
4; XanatosSpeedChess | |
3; ALighterShadeOfBlack | |
3; AbortedArc | |
3; ActionGirl | |
3; AdultFear | |
3; AndIMustScream | |
3; AntiHero | |
3; ArcWords | |
3; ArmorPiercingQuestion | |
3; ArtisticLicenseLaw | |
3; BadassBoast | |
3; BecomingTheMask | |
3; BilingualBonus | |
3; BloodFromTheMouth | |
3; BreakTheCutie | |
3; BreakingTheFourthWall | |
3; CatchPhrase | |
3; ChekhovsBoomerang | |
3; ChekhovsGunman | |
3; ChekhovsSkill | |
3; ColorCodedForYourConvenience | |
3; ConsummateLiar | |
3; CorruptCorporateExecutive | |
3; CountryMatters | |
3; CurbStompBattle | |
3; DemotedToExtra | |
3; DiscussedTrope | |
3; DisposingOfABody | |
3; DoubleMeaningTitle | |
3; EarlyBirdCameo | |
3; EarlyInstallmentWeirdness | |
3; EurekaMoment | |
3; EvilVersusEvil | |
3; ExactWords | |
3; Expy | |
3; EyeScream | |
3; Fanservice | |
3; ForWantOfANail | |
3; ForegoneConclusion | |
3; ForeignRemake | |
3; FourTemperamentEnsemble | |
3; FreezeFrameBonus | |
3; FriendlyEnemy | |
3; GambitRoulette | |
3; GenreSavvy | |
3; GoneHorriblyRight | |
3; GoryDiscretionShot | |
3; GrandFinale | |
3; HaveYouToldAnyoneElse | |
3; HeroAntagonist | |
3; HiddenDepths | |
3; HollywoodLaw | |
3; HopeSpot | |
3; IgnoredEpiphany | |
3; IronicEcho | |
3; Irony | |
3; JerkJock | |
3; JustForPun | |
3; KickTheDog | |
3; LampshadeHanging | |
3; LeaningOnTheFourthWall | |
3; LoadsAndLoadsOfCharacters | |
3; LoveTriangle | |
3; MagnificentBastard | |
3; MaleGaze | |
3; MamaBear | |
3; ManChild | |
3; MaybeMagicMaybeMundane | |
3; MegaCorp | |
3; MoralEventHorizon | |
3; MyGodWhatHaveIDone | |
3; MythologyGag | |
3; NiceJobFixingItVillain | |
3; NightmareFuel | |
3; NoodleIncident | |
3; ObfuscatingStupidity | |
3; ObliviousGuiltSlinging | |
3; OnceMoreWithClarity | |
3; OohMeAccentsSlipping | |
3; OvertRendezvous | |
3; PacManFever | |
3; PlayedWith | |
3; PlotArmor | |
3; PragmaticAdaptation | |
3; PragmaticVillainy | |
3; PrecisionFStrike | |
3; ProductPlacement | |
3; PunchClockVillain | |
3; PutOnABus | |
3; RealityEnsues | |
3; RealityIsUnrealistic | |
3; RefugeInAudacity | |
3; RuleOfSymbolism | |
3; SerialKiller | |
3; SexyDiscretionShot | |
3; ShownTheirWork | |
3; SmugSnake | |
3; SpinOff | |
3; StagingAnIntervention | |
3; StepfordSmiler | |
3; SympatheticInspectorAntagonist | |
3; TakingYouWithMe | |
3; TheBadGuyWins | |
3; TheDogBitesBack | |
3; TheGhost | |
3; TheMole | |
3; ThereIsNoKillLikeOverkill | |
3; TimeSkip | |
3; UndyingLoyalty | |
3; UnspokenPlanGuarantee | |
3; VillainousBreakdown | |
3; WhatHappenedToTheMouse | |
3; WordOfGod | |
3; WrongGenreSavvy | |
3; XanatosGambit | |
3; YouHaveFailedMe | |
3; YouHaveOutlivedYourUsefulness | |
3; YourDaysAreNumbered | |
2; ADayInTheLimelight | |
2; AMillionIsAStatistic | |
2; AccidentalMurder | |
2; ActorAllusion | |
2; AdaptationExpansion | |
2; AllLoveIsUnrequited | |
2; AllThereInTheManual | |
2; AmbitionIsEvil | |
2; AmericanSeries | |
2; AmoralAttorney | |
2; AnAesop | |
2; AnalogyBackfire | |
2; AndStarring | |
2; AnimalMotifs | |
2; AntiVillain | |
2; ArchEnemy | |
2; ArtisticLicenseChemistry | |
2; ArtisticLicenseMedicine | |
2; ArtisticLicensePhysics | |
2; AsHimself | |
2; AttemptedRape | |
2; AvertedTrope | |
2; AwesomeMcCoolname | |
2; AwesomenessByAnalysis | |
2; AwfulTruth | |
2; BackFromTheDead | |
2; BadBoss | |
2; BaitAndSwitch | |
2; Beat | |
2; BeautyIsNeverTarnished | |
2; BewareTheNiceOnes | |
2; BigDamnHeroes | |
2; BigFancyHouse | |
2; BitchInSheepsClothing | |
2; BlackAndGrayMorality | |
2; Blackmail | |
2; BlueAndOrangeMorality | |
2; BodyHorror | |
2; BottomlessMagazines | |
2; BreakHisHeartToSaveHim | |
2; BreakTheHaughty | |
2; BullyHunter | |
2; BunnyEarsLawyer | |
2; BurnerPhones | |
2; BusCrash | |
2; CallingTheOldManOut | |
2; CameBackWrong | |
2; CaperRationalization | |
2; CaptainObvious | |
2; CassandraTruth | |
2; CastingGag | |
2; Catchphrase | |
2; CentralTheme | |
2; CharactersDroppingLikeFlies | |
2; CigaretteOfAnxiety | |
2; CliffHanger | |
2; Cliffhanger | |
2; ClosetGeek | |
2; Cloudcuckoolander | |
2; ColdOpen | |
2; CompoundTitle | |
2; ConcealmentEqualsCover | |
2; ContinuityNod | |
2; ControlFreak | |
2; CoolOldGuy | |
2; CorporateConspiracy | |
2; CosmicPlaything | |
2; CrapsackWorld | |
2; CrazyPrepared | |
2; CrossReferencedTitles | |
2; CutLexLuthorACheck | |
2; DamselInDistress | |
2; DarkerAndEdgier | |
2; DeadpanSnarker | |
2; DeathByAdaptation | |
2; DeathFakedForYou | |
2; DeathTrap | |
2; DeliberateValuesDissonance | |
2; DescentIntoAddiction | |
2; DespairEventHorizon | |
2; DeusExMachina | |
2; Deuteragonist | |
2; DiabolusExMachina | |
2; DiegeticSoundtrackUsage | |
2; DirtyCop | |
2; DiscOneFinalBoss | |
2; DisproportionateRetribution | |
2; DoomedMoralVictor | |
2; DroppedABridgeOnHim | |
2; DrowningMySorrows | |
2; DrunkWithPower | |
2; DuctTapeForEverything | |
2; DueToTheDead | |
2; DyingMomentOfAwesome | |
2; EnhanceButton | |
2; EpicFail | |
2; EvenBadMenLoveTheirMamas | |
2; EvilCounterpart | |
2; EvilIsPetty | |
2; ExploitedTrope | |
2; FaceDeathWithDignity | |
2; FaceFramedInShadow | |
2; FakingTheDead | |
2; FanDisservice | |
2; FateWorseThanDeath | |
2; FauxAffablyEvil | |
2; FemmeFatale | |
2; FireForgedFriends | |
2; FlippingTheBird | |
2; FoodPorn | |
2; ForScience | |
2; FrameUp | |
2; FreudianExcuse | |
2; FreudianTrio | |
2; FridgeBrilliance | |
2; FriendToAllChildren | |
2; FromBadToWorse | |
2; FromNobodyToNightmare | |
2; FunnyBackgroundEvent | |
2; GambitPileup | |
2; GilliganCut | |
2; GivenNameReveal | |
2; Gorn | |
2; GreaterScopeVillain | |
2; GreenEyedMonster | |
2; GreyAndGrayMorality | |
2; GuileHero | |
2; HandWave | |
2; Handwave | |
2; HappierHomeMovie | |
2; HeKnowsTooMuch | |
2; HeWhoFightsMonsters | |
2; HealingFactor | |
2; HeelFaceDoorSlam | |
2; HeterosexualLifePartners | |
2; HitmanWithAHeart | |
2; HollywoodHacking | |
2; HollywoodHealing | |
2; HoneyTrap | |
2; HowsYourBritishAccent | |
2; HumiliationConga | |
2; IAmTheNoun | |
2; ICannotSelfTerminate | |
2; IKnowYouKnowIKnow | |
2; IdiosyncraticEpisodeNaming | |
2; IdiotBall | |
2; ImagineSpot | |
2; InNameOnly | |
2; IncrediblyLamePun | |
2; IncurableCoughOfDeath | |
2; IndyPloy | |
2; InnerMonologue | |
2; InsideJob | |
2; InsistentTerminology | |
2; InspectorJavert | |
2; InvokedTrope | |
2; ItAmusedMe | |
2; ItHasBeenAnHonor | |
2; ItsAlwaysSpring | |
2; JackBauerInterrogationTechnique | |
2; JerkWithAHeartOfGold | |
2; JerkWithAHeartOfJerk | |
2; JustifiedTrope | |
2; KarmicDeath | |
2; KickTheSonOfABitch | |
2; KillEmAll | |
2; KilledMidSentence | |
2; KilledOffscreen | |
2; Lampshade | |
2; Lampshaded | |
2; LaserGuidedKarma | |
2; LastNameBasis | |
2; LaughingMad | |
2; Leitmotif | |
2; LenoDevice | |
2; LiteralMetaphor | |
2; LiteraryAllusionTitle | |
2; LonelyAtTheTop | |
2; LoopholeAbuse | |
2; LoveInterest | |
2; LoveMakesYouDumb | |
2; LoveMakesYouEvil | |
2; MacGyvering | |
2; MachiavelliWasWrong | |
2; MadeOfIron | |
2; MedicalRapeAndImpregnate | |
2; MemeticMutation | |
2; MexicanStandoff | |
2; MissingMom | |
2; MohsScaleOfSciFiHardness | |
2; MomentKiller | |
2; MoneyIsNotPower | |
2; Mooks | |
2; MundaneMadeAwesome | |
2; MurderByInaction | |
2; MyBelovedSmother | |
2; NakedApron | |
2; NeatFreak | |
2; NeverHurtAnInnocent | |
2; NewscasterCameo | |
2; NextSundayAD | |
2; NiceToTheWaiter | |
2; NoCelebritiesWereHarmed | |
2; NoNameGiven | |
2; NoPreggerSex | |
2; NoSenseOfPersonalSpace | |
2; NobleBigotWithABadge | |
2; NotDistractedByTheSexy | |
2; NotQuiteDead | |
2; NotSoDifferent | |
2; NotSoDifferentRemark | |
2; NotTheFallThatKillsYou | |
2; ObviouslyEvil | |
2; OffingTheOffspring | |
2; OffstageVillainy | |
2; OminousLatinChanting | |
2; OmnidisciplinaryScientist | |
2; OneSteveLimit | |
2; OnlyAFleshWound | |
2; OnlySaneMan | |
2; OurZombiesAreDifferent | |
2; OutGambitted | |
2; OutOfCharacterMoment | |
2; OutOfFocus | |
2; OutsideContextProblem | |
2; PaperThinDisguise | |
2; ParentalSubstitute | |
2; PayEvilUntoEvil | |
2; Pilot | |
2; PlayedForDrama | |
2; PlayedForLaughs | |
2; PlayingSick | |
2; PlayingWithSyringes | |
2; PlotHole | |
2; PoisonousFriend | |
2; PornStash | |
2; PosthumousCharacter | |
2; PowerTrio | |
2; PowersInTheFirstEpisode | |
2; PreMortemOneLiner | |
2; PromotedToOpeningTitles | |
2; ProperlyParanoid | |
2; ProtagonistJourneyToVillain | |
2; PunctuatedForEmphasis | |
2; PunnyName | |
2; RecklessGunUsage | |
2; RedEyesTakeWarning | |
2; RedOniBlueOni | |
2; RedemptionEqualsDeath | |
2; RefusalOfTheCall | |
2; RemovingTheHeadOrDestroyingTheBrain | |
2; ReplacementGoldfish | |
2; RevealingCoverup | |
2; ReverseWhodunnit | |
2; RoaringRampageOfRevenge | |
2; RootingForTheEmpire | |
2; RunningGag | |
2; SWATTeam | |
2; SanitySlippage | |
2; SarcasmMode | |
2; ScareChord | |
2; ScienceFiction | |
2; ScienceHero | |
2; SecretTestOfCharacter | |
2; SelectiveObliviousness | |
2; SelfServingMemory | |
2; SeriousBusiness | |
2; ShootTheDog | |
2; SickbedSlaying | |
2; SignificantAnagram | |
2; SlidingScaleOfIdealismVersusCynicism | |
2; SoundtrackDissonance | |
2; SpiritualSuccessor | |
2; SplashOfColor | |
2; Squick | |
2; StalkerWithACrush | |
2; StepfordSuburbia | |
2; StrangeMindsThinkAlike | |
2; SubvertedTrope | |
2; SuicideByCop | |
2; SuperStrength | |
2; SurprisinglyRealisticOutcome | |
2; Symbolism | |
2; TeethClenchedTeamwork | |
2; TemptingFate | |
2; ThanatosGambit | |
2; TheAtoner | |
2; TheBusCameBack | |
2; TheCameo | |
2; TheCaper | |
2; TheChessmaster | |
2; TheCoronerDothProtestTooMuch | |
2; TheDarkSideWillMakeYouForget | |
2; TheDreaded | |
2; TheDyingWalk | |
2; TheHeroDies | |
2; TheInformant | |
2; TheLancer | |
2; TheLeader | |
2; ThePiratesWhoDontDoAnything | |
2; TheQuietOne | |
2; TheReveal | |
2; TheScapegoat | |
2; TheUnreveal | |
2; TheVoiceless | |
2; TheWoobie | |
2; ThereAreNoTherapists | |
2; Thriller | |
2; TitleDrop | |
2; TomatoInTheMirror | |
2; TookALevelInJerkass | |
2; TrademarkFavoriteFood | |
2; Tragedy | |
2; TraumaCongaLine | |
2; Tykebomb | |
2; Ubermensch | |
2; Understatement | |
2; UnholyMatrimony | |
2; UnwittingPawn | |
2; UrbanFantasy | |
2; VertigoEffect | |
2; VillainWithGoodPublicity | |
2; VillainousFriendship | |
2; VisionaryVillain | |
2; VisualPun | |
2; VitriolicBestBuds | |
2; VomitIndiscretionShot | |
2; WasItAllALie | |
2; WellIntentionedExtremist | |
2; WildCard | |
2; WoobieDestroyerOfWorlds | |
2; WorthyOpponent | |
2; WouldHurtAChild | |
2; WouldntHurtAChild | |
2; WoundedGazelleGambit | |
2; YankTheDogsChain | |
2; YouAreBetterThanYouThinkYouAre | |
2; YouAreTooLate | |
2; YouKillItYouBoughtIt | |
2; ZombieApocalypse |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment