Skip to content

Instantly share code, notes, and snippets.

@5shekel
Last active November 27, 2018 22:11
Show Gist options
  • Save 5shekel/a8d39c193bf6a2eec0516ebeb07d8f7a to your computer and use it in GitHub Desktop.
Save 5shekel/a8d39c193bf6a2eec0516ebeb07d8f7a to your computer and use it in GitHub Desktop.
get yo links
#script guidance from John G. Fisher
# check his youtube https://www.youtube.com/watch?v=e6xZAISu-5E
# grab all links we talked about
from bs4 import BeautifulSoup
import requests
import re
#use 'python http.server' on the directory to serve the folder. sb4 doesnt do local links
inbox_path='http://localhost:8000/messages/inbox/ElBootkeh_64b09d1ba5/message.html'
print(inbox_path)
url = requests.get(inbox_path)
soup = BeautifulSoup(url.text, 'html.parser')
with open('idiot_links.txt', 'w') as f:
for link in soup.find_all(href=True):
try:
#grab href see> https://stackoverflow.com/q/5815747/184085
link=link['href']
print(link)
# afterwards pass .html\?.*$ into .html (in vscode to fomrat long aliexpress links...)
link=re.sub(r'.html\?.*$',r'.html',link)
#TBD remove stickers messages with '^messages/stickers_used.*png'
#TBD shorten deep dirs
#TBD remove dups 'awk '!seen[$0]++' idiot_links.txt > idiot_linksU.txt'
#TBD reverse order
#TBD add {{currly bractes}} to media
#TBD extract date of event
#TBD auto uplaod relevant files to wiki
f.write(link.lstrip() + '\n\n')
except:
TypeError
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment