Last active
January 26, 2017 00:50
-
-
Save SohierDane/66a606556042433527bff59bd72406f9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Calculates key performance indicators for the 1 click appliers. | |
Manual validation of 5 cl missing reply link failures (is link actually there?) | |
https://columbus.craigslist.org/eng/5939246199.html', correct | |
'https://seattle.craigslist.org/tac/edu/5946859577.html', correct | |
'https://seattle.craigslist.org/see/egr/5955565817.html', correct | |
'https://minneapolis.craigslist.org/hnp/npo/5961895846.html', correct | |
'https://seattle.craigslist.org/see/hea/5964050043.html correct | |
Manual validation of 5 cl missing reply email failures (is email actually there?) | |
[u'https://columbus.craigslist.org/eng/5939246199.html', correct | |
u'https://seattle.craigslist.org/tac/edu/5946859577.html', correct | |
u'https://seattle.craigslist.org/see/egr/5955565817.html', correct | |
u'https://columbus.craigslist.org/sof/5935009087.html', wrong/has reply email | |
u'https://minneapolis.craigslist.org/hnp/npo/5961895846.html' correct] | |
""" | |
import pandas as pd | |
from sqlalchemy import create_engine | |
from collections import OrderedDict | |
def user_ids_to_ignore(): | |
return {6134, 151, 28808, 32952, 32953, 8177, 32949, 15, 8176, 320, 179, 1, 41269, 41265, | |
6152, 375, 6119, 6120, 6084, 278, 4196, 6064, 21718, 14471, 21163, 15267, 9847, | |
212, 324, 207, 272, 134, 209, 210, 190, 144, 196, 321, 140, 88, 170, 188, 243, | |
169, 120, 15243, 6167, 217, 132, 133, 8184, 15244, 15245, 15246, 8164, 317, 215, | |
148, 160, 182, 21717, 6148, 6150, 6135, 6113, 6146, 6166, 216, 202, 203, 197, 219, | |
161, 271, 198, 6162, 21171, 359, 386, 8163, 194, 159, 239, 6158, 6161, 6163, 195, | |
255, 8178, 51, 41} | |
def load_job_scouts(): | |
pg_user_name = 'sohier' | |
pg_database_name = 'talentworks_development' | |
engine = create_engine('postgresql://{0}@localhost:5432/{1}'.format( | |
pg_user_name, pg_database_name)) | |
query = "select * from job_scouts where created_at > '20170101'::date" | |
df = pd.read_sql_query(query, con=engine) | |
return df[~df.user_id.isin(user_ids_to_ignore()) & ~df.aasm_state.isin( | |
['deleted', 'not_valid'])] | |
def load_postings(job_scout_df): | |
pg_user_name = 'sohier' | |
pg_database_name = 'talentworks_development' | |
engine = create_engine('postgresql://{0}@localhost:5432/{1}'.format( | |
pg_user_name, pg_database_name)) | |
postings = pd.read_sql_query('select * from postings', con=engine) | |
postings.rename(columns={x: 'post_'+x for x in postings.columns if x in | |
job_scout_df.columns}, inplace=True) | |
return postings | |
def has_events_in_context(df): | |
df = df.dropna(subset=['context']) | |
return df[df.context.apply(lambda x: 'events' in x.keys())] | |
def began_application(context): | |
for event in [x for x in context['events'] if 'event' in x.keys()]: | |
if "Applying..." in event['event']: | |
return True | |
return False | |
def finished_application(context): | |
for event in [x for x in context['events'] if 'event' in x.keys()]: | |
if event['event'].startswith('Applied'): | |
return True | |
return False | |
def user_rejected(context): | |
for event in [x for x in context['events'] if 'event' in x.keys()]: | |
if event['event'].startswith('Rejected'): | |
return True | |
return False | |
def user_requested_application(context): | |
for event in [x for x in context['events'] if 'msg' in x.keys()]: | |
if "You told us that you'd like us to apply to this job on your behalf." in event['msg']: | |
return True | |
return False | |
def times_requested_more_info(context): | |
return len([event['event'] for event in [x for x in context['events'] if 'msg' in x.keys()] | |
if event['event'].startswith("Info Needed")]) | |
def times_user_gave_requested_info(context): | |
return len([event['event'] for event in [x for x in context['events'] if 'event' in x.keys()] | |
if "Info Provided" in event['event']]) | |
def get_parser(post_context): | |
if not post_context: | |
return None | |
if 'parser' not in post_context: | |
return None | |
return post_context['parser'] | |
def has_reply_link(post_context): | |
if not post_context: | |
return False | |
if 'reply_link'not in post_context: | |
return False | |
if len(post_context['reply_link']) > 0: | |
return True | |
def has_reply_email(post_context): | |
if not post_context: | |
return False | |
if 'reply_email'not in post_context: | |
return False | |
if len(post_context['reply_email']) > 0: | |
return True | |
def recent_posts_with_no_parser(df): | |
days_to_pull = 2 | |
return df[df.parser.isnull() & (df.created_at.apply(pd.datetime.date) > | |
(pd.datetime.today() - pd.DateOffset(days_to_pull)).date())] | |
def calc_kpis(df): | |
kpis = OrderedDict() | |
kpis['num_job_scouts'] = len(load_job_scouts()) | |
kpis['num_applications_started'] = sum(df.began_application) | |
kpis['posts_with_no_parser'] = len(df[df.parser.isnull()]) | |
kpis['application_success_by_ATS'] = df[ | |
['parser', 'began_application', 'user_rejected_after_began', 'finished_application', | |
'failed', 'user_gave_all_requested_info', 'missing_info', 'failed_missing_info']].groupby('parser').sum() | |
kpis['application_success_by_ATS']['job_scouts'] = df[['parser', 'id']].groupby( | |
'parser').count() | |
column_totals = kpis['application_success_by_ATS'].sum() | |
column_totals.name = "Total" | |
kpis['application_success_by_ATS'] = kpis['application_success_by_ATS'].append(column_totals) | |
return kpis | |
def process_job_scouts(df): | |
df['has_events_in_context'] = df.index.isin(has_events_in_context(df).index) | |
df = df[df['has_events_in_context']] | |
df['application_requested'] = df['context'].apply(user_requested_application) | |
df['began_application'] = df['context'].apply(began_application) | |
df['user_rejected'] = df['context'].apply(user_rejected) | |
df['user_rejected_after_began'] = (df.user_rejected & df.began_application) | |
df['finished_application'] = df['context'].apply(finished_application) | |
df['failed'] = (df.began_application & ~df.user_rejected & ~df.finished_application) | |
df['times_requested_more_info'] = df['context'].apply(times_requested_more_info) | |
df['times_user_gave_requested_info'] = df['context'].apply(times_user_gave_requested_info) | |
df['missing_info'] = df['times_requested_more_info'] > df['times_user_gave_requested_info'] | |
df['failed_missing_info'] = (df.failed & df.missing_info) | |
df['user_gave_all_requested_info'] = (df.began_application & | |
(df.times_user_gave_requested_info > 0) & ~df.failed_missing_info) | |
df['parser'] = df['post_context'].apply(get_parser) | |
return df | |
if __name__ == '__main__': | |
df = load_job_scouts() | |
postings = load_postings(df) | |
df = pd.merge(df, postings, left_on='posting_id', right_on='post_id') | |
df = process_job_scouts(df) | |
kpis = calc_kpis(df) | |
print('\n\n') | |
for label, kpi in kpis.iteritems(): | |
if label == 'application_success_by_ATS': | |
print('\n') | |
print(kpi.to_string()) | |
print('\n') | |
else: | |
print label, kpi | |
cl_df = df[(df.parser == 'CraigslistParser') & df.failed].copy() | |
cl_df['has_reply_link'] = cl_df.post_context.apply(has_reply_link) | |
cl_df['has_reply_email'] = cl_df.post_context.apply(has_reply_email) | |
cl_df['unexplained_failure'] = (cl_df.has_reply_link & cl_df.has_reply_email) | |
print "Craigslist failures due to missing reply link: %i" % sum(~cl_df['has_reply_link']) | |
print "Craigslist failures due to missing reply email: %i" % sum( | |
~cl_df['has_reply_email'] & cl_df['has_reply_link']) | |
print "Craigslist failures with unknown cause: %i" % sum(cl_df['unexplained_failure']) | |
# print "Unexplained failures concentrated in the following user IDs:" | |
# print cl_df[cl_df.unexplained_failure][['unexplained_failure', 'user_id']].groupby('user_id').count().to_string() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment