This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jspagedata = requests.get("https://vuejs.github.io/vue-hackernews/#!/news/1") | |
jspagedataclean = bs4.BeautifulSoup(jspagedata.text, 'html.parser') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html lang="en"> | |
<head> | |
<meta charset="utf-8"/> | |
<title>Vue.js HN Clone</title> | |
<meta content="initial-scale=1, maximum-scale=1, user-scalable=no, minimal-ui" name="viewport"/> | |
<link href="static/logo.png" rel="icon" type="image/x-icon"/> | |
</head> | |
<body> | |
<div id="app"></div> | |
<script src="static/build.js"></script> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
import requests, bs4, re, time |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
driver = webdriver.Chrome() | |
driver.get("https://vuejs.github.io/vue-hackernews/#!/news/1") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jspagedata = bs4.BeautifulSoup(driver.page_source, 'html.parser') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Show hidden characters
jsIDsearch = re.compile(r'\<a href="#\/item\/(\d+)"') # Don’t forget the \ before the /’s and <’s! | |
jsthreadIDs = jsIDsearch.findall(str(jspagedata)) | |
jscommentlinks = [] | |
for i in range(len(jsthreadIDs)): | |
jscommentlinks.append('https://vuejs.github.io/vue-hackernews/#/item/' + jsthreadIDs[i]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def jsscrapethread(jscleanthread): | |
jssinglethreadlinksearch = re.compile(r'\<a class="title" href="(.+?)"') | |
jssinglethreadlink = jssinglethreadlinksearch.findall(str(jscleanthread)) | |
jscommenterIDsearch = re.compile(r'#\/user\/(.+?)"') | |
jscommenterIDs = jscommenterIDsearch.findall(str(jscleanthread)) | |
try: | |
jsfirstcommenter = jscommenterIDs[1] | |
except: | |
jsfirstcommenter = "No Commenters" | |
return jssinglethreadlink, jsfirstcommenter |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>> pagedata | |
<Response [200]> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
threadIDs = {list} ['19279396', '19279396', '19279396', '19279396', '19277809', '19277809', '19277809', '19277809', '19279003', '19279003', '19279003', '19279003', '19278075', '19278075', '19278075', '19278075', '19273955', '19273955', '19273955', '19273955', '19278555', '19 | |
000 = {str} '19279396' | |
001 = {str} '19279396' | |
002 = {str} '19279396' | |
003 = {str} '19279396' | |
004 = {str} '19277809' | |
005 = {str} '19277809' | |
006 = {str} '19277809' | |
007 = {str} '19277809' | |
008 = {str} '19279003' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
threadIDs = {list} ['19279396', '19277809', '19279003', '19278075', '19273955', '19278555', '19278936', '19274941', '19277846', '19277653', '19274406', '19278891', '19278302', '19276113', '19277263', '19276977', '19277978', '19275755', '19276751', '19277910', '19275738', '19 | |
00 = {str} '19279396' | |
01 = {str} '19277809' | |
02 = {str} '19279003' | |
03 = {str} '19278075' | |
04 = {str} '19273955' | |
05 = {str} '19278555' | |
06 = {str} '19278936' | |
07 = {str} '19274941' | |
08 = {str} '19277846' |