Created
November 12, 2019 17:32
-
-
Save werrpy/0f816d49e628ada627fae46a70906723 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# find all imagebam urls with this regex | |
# https?:\/\/(www\.)?imagebam\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*) | |
# gives us something like | |
# http://www.imagebam.com/image/9e31091131177484 | |
import requests | |
from bs4 import BeautifulSoup | |
from selenium import webdriver | |
# headless chrome | |
options = webdriver.ChromeOptions() | |
options.add_argument('headless') | |
driver = webdriver.Chrome(options=options) | |
urls = [ | |
'http://www.imagebam.com/image/9e31091131177484' | |
] | |
bbcodes = dict() | |
for url in urls: | |
#print(url) | |
s = requests.Session() | |
response = s.get(url) | |
if response.status_code == 200: | |
# get it again to bypass redirect | |
print("Got[1]: " + url) | |
response = s.get(url) | |
if response.status_code == 200: | |
# we good! | |
print("Got[2]: " + url) | |
#print(response.text) | |
# parse html for image redirect | |
soup = BeautifulSoup(response.text, 'html.parser') | |
metatag = soup.find_all('meta') | |
for tag in metatag: | |
if tag.get("property") == 'og:image': | |
direct_url = tag.get("content") | |
print("direct url: " + direct_url) | |
driver.get("https://postimages.org/web") | |
form_el = driver.find_element_by_id("uploadForm") | |
links_el = driver.find_element_by_id("links") | |
links_el.send_keys(direct_url) | |
driver.implicitly_wait(10) | |
#driver.execute_script("upload();console.log('ddd');") | |
driver.find_element_by_css_selector("a.width100").click() | |
bb_code = driver.find_element_by_id("code_bb_thumb") | |
bb_code_val = bb_code.get_attribute("value") | |
#print(bb_code_val) | |
bbcodes[direct_url] = bb_code_val | |
driver.close() | |
else: | |
print("Failed[2]: " + url) | |
else: | |
print("Failed[1]: " + url) | |
print(bbcodes) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment