abhigenie92 · June 26, 2015 16:18
diff --git a/gistfile1.py b/gistfile1.py
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from bs4 import BeautifulSoup
 import urllib,requests,unidecode,lxml

 class wait_for_more_than_n_elements_to_be_present(object):
    def __init__(self, locator, count):
        self.locator = locator
        self.count = count

    def __call__(self, driver):
        try:
            elements = EC._find_elements(driver, self.locator)
            return len(elements) > self.count
        except StaleElementReferenceException:
            return False

 def return_html_code(url):
 	driver = webdriver.Firefox()
 	driver.maximize_window()
 	driver.get(url)
 	# initial wait for the tweets to load
 	wait = WebDriverWait(driver, 10)
 	wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "li[data-item-id]")))
 	# scroll down to the last tweet until there is no more tweets loaded
 	while True:
 	    tweets = driver.find_elements_by_css_selector("li[data-item-id]")
 	    number_of_tweets = len(tweets)
 	    print number_of_tweets
 	    driver.execute_script("arguments[0].scrollIntoView();", tweets[-1])
 	    try:
 	        wait.until(wait_for_more_than_n_elements_to_be_present((By.CSS_SELECTOR, "li[data-item-id]"), number_of_tweets))
 	    except TimeoutException:
 	        break
 	html_full_source=driver.page_source
 	driver.close()
 	return html_full_source

 url='https://twitter.com/thecoolstacks'
 #using selenium browser

 html_source=return_html_code(url)
 soup = BeautifulSoup(html_source, "lxml")
 for tweet in soup.select("div.tweet div.content"):
    print tweet.p.text





 #using request modules
 # if False:
 # 	req = requests.get(url)
 # 	soup = BeautifulSoup(req.content)
 # 	text_tweet=[]
 # 	alltweets = soup.find_all(attrs={'data-item-type' : 'tweet'})
 # 	for tweet in alltweets:
 # 		#Text of tweet
 # 		html_tweet= tweet.find_all("p", class_="TweetTextSize TweetTextSize--16px js-tweet-text tweet-text")
 # 		text_tweet.append(''.join(html_tweet[0].findAll(text=True)))	
 # 	print text_tweet


 #finalcode

 # alltweets_selenium = soup_selenium.find_all(attrs={'data-item-type' : 'tweet'})
 # for tweet in alltweets_selenium:
 # 	#Text of tweet
 # 	html_tweet= tweet.find_all("p", class_="TweetTextSize TweetTextSize--16px js-tweet-text tweet-text")
 # 	text_tweet.append(''.join(html_tweet[0].findAll(text=True)))	
 # print text_tweet
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
	from selenium.webdriver.common.by import By
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	from bs4 import BeautifulSoup
	import urllib,requests,unidecode,lxml

	class wait_for_more_than_n_elements_to_be_present(object):
	def __init__(self, locator, count):
	self.locator = locator
	self.count = count

	def __call__(self, driver):
	try:
	elements = EC._find_elements(driver, self.locator)
	return len(elements) > self.count
	except StaleElementReferenceException:
	return False

	def return_html_code(url):
	driver = webdriver.Firefox()
	driver.maximize_window()
	driver.get(url)
	# initial wait for the tweets to load
	wait = WebDriverWait(driver, 10)
	wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "li[data-item-id]")))
	# scroll down to the last tweet until there is no more tweets loaded
	while True:
	tweets = driver.find_elements_by_css_selector("li[data-item-id]")
	number_of_tweets = len(tweets)
	print number_of_tweets
	driver.execute_script("arguments[0].scrollIntoView();", tweets[-1])
	try:
	wait.until(wait_for_more_than_n_elements_to_be_present((By.CSS_SELECTOR, "li[data-item-id]"), number_of_tweets))
	except TimeoutException:
	break
	html_full_source=driver.page_source
	driver.close()
	return html_full_source

	url='https://twitter.com/thecoolstacks'
	#using selenium browser

	html_source=return_html_code(url)
	soup = BeautifulSoup(html_source, "lxml")
	for tweet in soup.select("div.tweet div.content"):
	print tweet.p.text





	#using request modules
	# if False:
	# req = requests.get(url)
	# soup = BeautifulSoup(req.content)
	# text_tweet=[]
	# alltweets = soup.find_all(attrs={'data-item-type' : 'tweet'})
	# for tweet in alltweets:
	# #Text of tweet
	# html_tweet= tweet.find_all("p", class_="TweetTextSize TweetTextSize--16px js-tweet-text tweet-text")
	# text_tweet.append(''.join(html_tweet[0].findAll(text=True)))
	# print text_tweet


	#finalcode

	# alltweets_selenium = soup_selenium.find_all(attrs={'data-item-type' : 'tweet'})
	# for tweet in alltweets_selenium:
	# #Text of tweet
	# html_tweet= tweet.find_all("p", class_="TweetTextSize TweetTextSize--16px js-tweet-text tweet-text")
	# text_tweet.append(''.join(html_tweet[0].findAll(text=True)))
	# print text_tweet