AnasAlmasri’s gists

AnasAlmasri / Main.py

Created May 9, 2020 18:05

	# set up our plots
	fig, axes = plt.subplots(nrows=2, ncols=3)
	df['views_cumsum'].plot(ax=axes[0,0], title='Views Over Time', c='blue', grid=True).set(ylabel='# of Views')
	df['reads_cumsum'].plot(ax=axes[0,1], title='Reads Over Time', c='green', grid=True).set(ylabel='# of Reads')
	df['fans_cumsum'].plot(ax=axes[0,2], title='Fans Over Time', c='red', grid=True).set(ylabel='# of Fans')
	df[['reads', 'fans']].plot(ax=axes[1,0], title='Reads/Fans by Story', kind='bar', stacked=True, grid=True).legend(['# of Reads', '# of Fans'])
	df[['reads_cumsum', 'fans_cumsum']].plot.area(ax=axes[1,1], title='Views/Fans Over Time', grid=True).legend(['# of Reads', '# of Fans'])
	df.groupby('year')['views'].sum().plot.bar(ax=axes[1,2], x='year', y='views', title='Yearly Views', grid=True)

	# set subplot style

AnasAlmasri / Main.py

Created May 9, 2020 18:01

	# add cumulative sum columns to the dataframe
	df['views_cumsum'] = df['views'].cumsum()
	df['reads_cumsum'] = df['reads'].cumsum()
	df['fans_cumsum'] = df['fans'].cumsum()

AnasAlmasri / Main.py

Created May 9, 2020 17:59

	# convert numeric data types to float
	df['views'] = df['views'].astype(float)
	df['reads'] = df['reads'].astype(float)
	df['fans'] = df['fans'].astype(float)

AnasAlmasri / Main.py

Created May 9, 2020 17:56

	# combine all yearly dataframes into one dataframe
	df = pd.concat(df_yearly)
	# reverse dataframe so that oldest story has the smallest index
	df = df.iloc[::-1]
	# reset index to remove overlapping indices across dataframes
	df.reset_index(drop=True, inplace=True)

AnasAlmasri / Main.py

Created May 9, 2020 17:54

	# create a list of dataframes for each year
	df_yearly = []
	for year in stats.keys():
	# create dataframe
	tmp_df = pd.DataFrame(stats[year])
	# add 'year' column to dataframe
	tmp_df['year'] = year
	df_yearly.append(tmp_df)

AnasAlmasri / stats.dict

Created May 9, 2020 17:49

	stats = {
	'2019': [
	{
	'title': 'Creating The Twitter Sentiment Analysis Program in Python with Naive Bayes Classification',
	'views': '89434',
	'reads': '23128',
	'ratio': '26%',
	'fans': '115'
	}
	],

AnasAlmasri / Main.py

Created May 9, 2020 17:43

	current_year = 0
	# loop through table rows retrieving the data and adding it to our stats dictionary
	for row in stats_table.find_elements_by_tag_name('tr'):
	if (len(row.get_attribute('class')) > 0):
	if ('sortableTable-row--dateBucket' in row.get_attribute('class')): # when encountering a year row
	for cell in row.find_elements_by_tag_name('td'):
	current_year = cell.text
	stats[current_year] = []
	elif ('js-statsTableRow' in row.get_attribute('class')): # when encountering a story
	td_idx = 1

AnasAlmasri / Main.py

Created May 9, 2020 17:40

	# get stats table
	stats_table = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_xpath('/html/body/div[1]/div[2]/div/div[3]/div/div[4]/table'))

	time.sleep(sleep_duration)

	# scroll a couple of times to make sure the whole table is rendered
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
	time.sleep(nap_duration)
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
	time.sleep(nap_duration)

AnasAlmasri / Main.py

Created May 9, 2020 17:35

	# fill out login form
	username = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_xpath('//*[@id="username_or_email"]'))
	password = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_xpath('//*[@id="password"]'))
	login = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_xpath('//*[@id="allow"]'))

	time.sleep(sleep_duration)
	username.clear()
	username.send_keys(email_cred)

	time.sleep(nap_duration)

AnasAlmasri / Main.py

Created May 9, 2020 17:30

	# choose twitter
	WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_xpath('//*[@data-action="twitter-auth"]')).click()

Anas Al-Masri AnasAlmasri