Created
September 30, 2019 10:59
-
-
Save MervinPraison/324f8ec5ffb65336ffbfc7d3ac108b7d to your computer and use it in GitHub Desktop.
Ads Detector
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask | |
import time | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
import unittest | |
from selenium.webdriver.common.keys import Keys | |
import argparse | |
import json | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as ec | |
chrome_options = Options() | |
chrome_options.add_argument("--disable-extensions") | |
chrome_options.add_argument('--headless') | |
chrome_options.add_argument('--no-sandbox') | |
chrome_options.add_argument('--disable-dev-shm-usage') | |
driver = webdriver.Chrome('/usr/bin/chromedriver', options=chrome_options) # Optional argument, if not specified will search path. | |
app = Flask(__name__) | |
@app.route('/') | |
def hello_world(): | |
return 'Hello, World!' | |
@app.route('/url/<path:urlpath>', methods=['GET', 'POST']) | |
def api_url(urlpath): | |
print(urlpath) | |
if urlpath: | |
url = "https://"+urlpath | |
else: | |
url = 'https://praison.com/django-task-browser-app/' | |
driver.get(url) | |
driver.maximize_window() | |
wait = WebDriverWait(driver, 10) | |
time.sleep(10) | |
iframe = driver.find_elements_by_tag_name('iframe') | |
frames = [] | |
frames.append({"url":url, "title":driver.title, "total_iframes":len(iframe)}) | |
for frame in iframe: | |
frames.append([frame.size,"src:"+frame.get_attribute('src'), frame.get_attribute('href')]) | |
#print(frame.text) | |
#print(frame.get_attribute('innerHTML')) | |
#print(frame.get_attribute('href')) | |
#driver.switch_to_frame(frame) | |
#print(driver.title) | |
# print(frame.screenshot_as_png('/var/www/html/adsdetector')) Option to save as PNG | |
#driver.quit() | |
return json.dumps(frames) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-u', '--url', required=False, help="Please provide URL") | |
args = parser.parse_args() | |
if args.url: | |
url = args.url | |
else: | |
url = 'https://praison.com/django-task-browser-app/' | |
driver.get(url); | |
iframe = driver.find_elements_by_tag_name('iframe') | |
for frame in iframe: | |
print(frame.size) | |
print(frame.text) | |
# print(frame.screenshot_as_png('/var/www/html/adsdetector')) Option to save as PNG | |
print(driver.title) | |
driver.quit() | |
class PythonOrgSearch(unittest.TestCase): | |
def setUp(self): | |
chrome_options = Options() | |
chrome_options.add_argument("--disable-extensions") | |
chrome_options.add_argument('--headless') | |
chrome_options.add_argument('--no-sandbox') | |
chrome_options.add_argument('--disable-dev-shm-usage') | |
self.driver = webdriver.Chrome('/usr/bin/chromedriver', options=chrome_options) | |
def test_search_in_python_org(self): | |
driver = self.driver | |
driver.get("http://www.python.org") | |
self.assertIn("Python", driver.title) | |
elem = driver.find_element_by_name("q") | |
elem.send_keys("pycon") | |
elem.send_keys(Keys.RETURN) | |
assert "No results found." not in driver.page_source | |
def tearDown(self): | |
self.driver.close() | |
if __name__ == "__main__": | |
app.run() | |
main() | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment