Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.common.action_chains import ActionChains | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.support.ui import Select | |
import time | |
import pandas as pd | |
import os |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Extract Google Trends topics | |
//Usage: copy and paste in Chrome Javascript console with trending pages open. for example: | |
//https://trends.google.com/trends/trendingsearches/realtime?geo=US&category=all | |
function getTopics(){ | |
topics_anchors = document.querySelectorAll("div.details-wrapper > div.details > div.details-bottom > div.subtitles-text-wrapper.visible > div.summary-text > a"); | |
var topics = []; | |
for (topic of topics_anchors){ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import os | |
from sumy.parsers.html import HtmlParser | |
from sumy.parsers.plaintext import PlaintextParser | |
from sumy.nlp.tokenizers import Tokenizer | |
from sumy.summarizers.lsa import LsaSummarizer as Lsa | |
from sumy.summarizers.luhn import LuhnSummarizer as Luhn | |
from sumy.summarizers.text_rank import TextRankSummarizer as TxtRank | |
from sumy.summarizers.lex_rank import LexRankSummarizer as LexRank | |
from sumy.summarizers.sum_basic import SumBasicSummarizer as SumBasic |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# basic scrape and write demonstration used in Goldsmiths digital sandbox 2014 | |
import urllib # fetches raw web pages for us | |
import bs4 # turns raw web pages into object hierarchy and provides selectors (like CSS and Xpath does) | |
import csv # simplifies the process of writing data to Comma Separated Values in a file | |
# a list of URLs on YouTube that we want to scrape data from | |
pagesToScrape = ['http://www.youtube.com/watch?v=9hIQjrMHTv4' | |
,'https://www.youtube.com/watch?v=Uk8x3V-sUgU'] | |
# open a file in append mode to write into in the same directory where we ran this script from |