Skip to content

Instantly share code, notes, and snippets.

@xecgr
Last active February 13, 2017 10:00
Show Gist options
  • Save xecgr/208202d05c21be6ce24c6d01d38de319 to your computer and use it in GitHub Desktop.
Save xecgr/208202d05c21be6ce24c6d01d38de319 to your computer and use it in GitHub Desktop.
For those that want to filter some advanced fields in upwork jobs (rising talent, minimun avg hourly rate, etc)
import feedparser,requests,bs4
from datetime import datetime,timedelta
import sys,os,re
from time import mktime
from smtplib import SMTP_SSL as SMTP # this invokes the secure SMTP protocol (port 465, uses SSL)
# from smtplib import SMTP # use this for standard SMTP protocol (port 25, no encryption)
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
##mail config
SMTPserver = 'authsmtp.mailserver.com'
USERNAME = "[email protected]"
PASSWORD = "your_password"
sender = '[email protected]'
subject="[UPWORK] Sumary"
destination = sender
#upwork config
feed_url = 'https://www.upwork.com/ab/feed/topics/rss?securityToken=AUTOGENERATED_TOKEN'
login_url = 'https://www.upwork.com/ab/account-security/login'
username = 'upwork_username'
pwd = 'upwork_password'
yesterday = datetime.now().date()- timedelta(days=1)
filters = {
'has_no_job_score': {
'not_present' : True,
'element': 'span',
'kwargs' : {'text' : 'Job Success Score:', 'class_' : 'text-muted'}
},
#'has_rising_talent': {
# 'element': 'span',
# 'kwargs' : {'text' : 'Rising Talent:', 'class_' : 'text-muted'}
#},
'is_not_hired': {
'not_present' : True,
'element': 'span',
'kwargs' : {'text' : 'Hired:', 'class_' : 'text-muted'}
},
'avg_hour': {
'avg_hour__gt' : 15,
'element': 'span',
'kwargs' : {'text' : '/hr', 'class_' : 'text-muted'}
}
#feel free to implement more filters
}
def send_mail(jobs, extra_columns = []):
fields = ['title','budget','summary','link'] + extra_columns
ths = u'\n'.join(
[u"<th>{}</th>".format(f.capitalize()) for f in fields ]
)
jobs_table=u"<table><tr>"+ths+"</tr>{}</table>"
job_rows = []
for job in jobs:
row = u"<tr>"
for f in fields:
value = job.get(f,None) or u''
row +=u"<td>{}</td>".format(value)
row += u"</tr>"
job_rows.append(row)
jobs_table = jobs_table.format(u'\n'.join(job_rows))
try:
msg = MIMEMultipart('alternative')
msg['Subject']= subject
msg['From'] = sender # some SMTP servers will do this automatically, not all
text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.python.org"
html = u"""\
<html>
<head></head>
<body>
{}
</body>
</html>
""".format(jobs_table)
# Record the MIME types of both parts - text/plain and text/html.
part1 = MIMEText(text, "plain", "utf-8")
part2 = MIMEText(html, 'html', "utf-8")
# Attach parts into message container.
# According to RFC 2046, the last part of a multipart message, in this case
# the HTML message, is best and preferred.
msg.attach(part1)
msg.attach(part2)
conn = SMTP(SMTPserver)
conn.set_debuglevel(False)
conn.login(USERNAME, PASSWORD)
try:
conn.sendmail(sender, destination, msg.as_string())
finally:
conn.quit()
except Exception, exc:
raise#sys.exit( "mail failed; %s" % str(exc) ) # give a error message
s = requests.session()
#get login page, to generate current token
headers = {
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding' : 'gzip, deflate, sdch, br',
'Accept-Language' : 'es,ca;q=0.8,en;q=0.6',
'Connection' : 'keep-alive',
'Upgrade-Insecure-Requests' : '1',
'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36',
}
login_page = s.get(login_url,headers=headers)
soup = bs4.BeautifulSoup(login_page.text, "html.parser")
login_token = soup.find('input',id='login__token').attrs['value']
login_iovation = ''
payload = { 'login[username]' : username,
'login[password]' : pwd,
'login[redir]' : '/home',
'login[_token]' : login_token,
'login[iovation]' : login_iovation
}
#login
r = s.post(login_url,payload)
feed = feedparser.parse(feed_url)
jobs = []
for item in feed['items']:
job_dt = datetime.fromtimestamp(mktime(item.published_parsed)).date()
if job_dt<yesterday : continue
link = item.get('link','')
#get budget and slice description
budget = re.findall(u'<b>Budget<\/b>: \$(\d+)',item['description'])
if budget:
item['budget'] = budget[0]
item['description'] = item['description'][:100]+"..."
if link:
r = s.get(link)
soup = bs4.BeautifulSoup(r.text, "html.parser")
matched_filters = {}
for filter,config in filters.iteritems():
element, kwargs,not_present,avg_hour__gt = config.get('element',''),config.get('kwargs',{}),config.get('not_present',{}),config.get('avg_hour__gt',{})
if element or kwargs:
values = soup.find_all(element,**kwargs)
#we search by not presence of field
if not_present:
matched_filters[filter] = not values
else:
#avg hour filtering
if not budget and avg_hour__gt:
if values:
avg_rate = values[0].parent.get_text()
matches = re.findall(r'(\$?\d+\.\d+)', avg_rate)
not_match = matches and float(matches[0].replace("$",""))> avg_hour__gt
item['budget'] = matches[0]+"/h"
matched_filters[filter] = not not_match
else:
#if it's a new contractor and has no avg hour or it's a pfixed price project
matched_filters[filter] = True
else:
#default case: only check presence
matched_filters[filter] = bool(values)
#set filter value to allow it appear in summary mail
item[filter] = matched_filters[filter]
if all(matched_filters.values()):
jobs.append(item)
#send summary mail, with those jobs that match with all filters
if jobs:
send_mail(jobs,extra_columns=filters.keys())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment