Last active
February 13, 2017 10:00
-
-
Save xecgr/208202d05c21be6ce24c6d01d38de319 to your computer and use it in GitHub Desktop.
For those that want to filter some advanced fields in upwork jobs (rising talent, minimun avg hourly rate, etc)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import feedparser,requests,bs4 | |
from datetime import datetime,timedelta | |
import sys,os,re | |
from time import mktime | |
from smtplib import SMTP_SSL as SMTP # this invokes the secure SMTP protocol (port 465, uses SSL) | |
# from smtplib import SMTP # use this for standard SMTP protocol (port 25, no encryption) | |
from email.mime.multipart import MIMEMultipart | |
from email.mime.text import MIMEText | |
##mail config | |
SMTPserver = 'authsmtp.mailserver.com' | |
USERNAME = "[email protected]" | |
PASSWORD = "your_password" | |
sender = '[email protected]' | |
subject="[UPWORK] Sumary" | |
destination = sender | |
#upwork config | |
feed_url = 'https://www.upwork.com/ab/feed/topics/rss?securityToken=AUTOGENERATED_TOKEN' | |
login_url = 'https://www.upwork.com/ab/account-security/login' | |
username = 'upwork_username' | |
pwd = 'upwork_password' | |
yesterday = datetime.now().date()- timedelta(days=1) | |
filters = { | |
'has_no_job_score': { | |
'not_present' : True, | |
'element': 'span', | |
'kwargs' : {'text' : 'Job Success Score:', 'class_' : 'text-muted'} | |
}, | |
#'has_rising_talent': { | |
# 'element': 'span', | |
# 'kwargs' : {'text' : 'Rising Talent:', 'class_' : 'text-muted'} | |
#}, | |
'is_not_hired': { | |
'not_present' : True, | |
'element': 'span', | |
'kwargs' : {'text' : 'Hired:', 'class_' : 'text-muted'} | |
}, | |
'avg_hour': { | |
'avg_hour__gt' : 15, | |
'element': 'span', | |
'kwargs' : {'text' : '/hr', 'class_' : 'text-muted'} | |
} | |
#feel free to implement more filters | |
} | |
def send_mail(jobs, extra_columns = []): | |
fields = ['title','budget','summary','link'] + extra_columns | |
ths = u'\n'.join( | |
[u"<th>{}</th>".format(f.capitalize()) for f in fields ] | |
) | |
jobs_table=u"<table><tr>"+ths+"</tr>{}</table>" | |
job_rows = [] | |
for job in jobs: | |
row = u"<tr>" | |
for f in fields: | |
value = job.get(f,None) or u'' | |
row +=u"<td>{}</td>".format(value) | |
row += u"</tr>" | |
job_rows.append(row) | |
jobs_table = jobs_table.format(u'\n'.join(job_rows)) | |
try: | |
msg = MIMEMultipart('alternative') | |
msg['Subject']= subject | |
msg['From'] = sender # some SMTP servers will do this automatically, not all | |
text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.python.org" | |
html = u"""\ | |
<html> | |
<head></head> | |
<body> | |
{} | |
</body> | |
</html> | |
""".format(jobs_table) | |
# Record the MIME types of both parts - text/plain and text/html. | |
part1 = MIMEText(text, "plain", "utf-8") | |
part2 = MIMEText(html, 'html', "utf-8") | |
# Attach parts into message container. | |
# According to RFC 2046, the last part of a multipart message, in this case | |
# the HTML message, is best and preferred. | |
msg.attach(part1) | |
msg.attach(part2) | |
conn = SMTP(SMTPserver) | |
conn.set_debuglevel(False) | |
conn.login(USERNAME, PASSWORD) | |
try: | |
conn.sendmail(sender, destination, msg.as_string()) | |
finally: | |
conn.quit() | |
except Exception, exc: | |
raise#sys.exit( "mail failed; %s" % str(exc) ) # give a error message | |
s = requests.session() | |
#get login page, to generate current token | |
headers = { | |
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
'Accept-Encoding' : 'gzip, deflate, sdch, br', | |
'Accept-Language' : 'es,ca;q=0.8,en;q=0.6', | |
'Connection' : 'keep-alive', | |
'Upgrade-Insecure-Requests' : '1', | |
'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36', | |
} | |
login_page = s.get(login_url,headers=headers) | |
soup = bs4.BeautifulSoup(login_page.text, "html.parser") | |
login_token = soup.find('input',id='login__token').attrs['value'] | |
login_iovation = '' | |
payload = { 'login[username]' : username, | |
'login[password]' : pwd, | |
'login[redir]' : '/home', | |
'login[_token]' : login_token, | |
'login[iovation]' : login_iovation | |
} | |
#login | |
r = s.post(login_url,payload) | |
feed = feedparser.parse(feed_url) | |
jobs = [] | |
for item in feed['items']: | |
job_dt = datetime.fromtimestamp(mktime(item.published_parsed)).date() | |
if job_dt<yesterday : continue | |
link = item.get('link','') | |
#get budget and slice description | |
budget = re.findall(u'<b>Budget<\/b>: \$(\d+)',item['description']) | |
if budget: | |
item['budget'] = budget[0] | |
item['description'] = item['description'][:100]+"..." | |
if link: | |
r = s.get(link) | |
soup = bs4.BeautifulSoup(r.text, "html.parser") | |
matched_filters = {} | |
for filter,config in filters.iteritems(): | |
element, kwargs,not_present,avg_hour__gt = config.get('element',''),config.get('kwargs',{}),config.get('not_present',{}),config.get('avg_hour__gt',{}) | |
if element or kwargs: | |
values = soup.find_all(element,**kwargs) | |
#we search by not presence of field | |
if not_present: | |
matched_filters[filter] = not values | |
else: | |
#avg hour filtering | |
if not budget and avg_hour__gt: | |
if values: | |
avg_rate = values[0].parent.get_text() | |
matches = re.findall(r'(\$?\d+\.\d+)', avg_rate) | |
not_match = matches and float(matches[0].replace("$",""))> avg_hour__gt | |
item['budget'] = matches[0]+"/h" | |
matched_filters[filter] = not not_match | |
else: | |
#if it's a new contractor and has no avg hour or it's a pfixed price project | |
matched_filters[filter] = True | |
else: | |
#default case: only check presence | |
matched_filters[filter] = bool(values) | |
#set filter value to allow it appear in summary mail | |
item[filter] = matched_filters[filter] | |
if all(matched_filters.values()): | |
jobs.append(item) | |
#send summary mail, with those jobs that match with all filters | |
if jobs: | |
send_mail(jobs,extra_columns=filters.keys()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment