Last active
March 15, 2018 15:03
-
-
Save mwek/0f43a9b201d8427efcc5464047c2c518 to your computer and use it in GitHub Desktop.
Greenhouse "Past interviews" view to CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Usage: | |
# 1. Go to https://app.greenhouse.io/interviews?type=past and copy the source code. | |
# 2. Run "pbpaste | ./greenhouse.py" | |
# 3. Analyze the CSV in the program of your choice. | |
from bs4 import BeautifulSoup | |
from itertools import chain | |
import csv | |
from io import StringIO | |
from datetime import datetime, timedelta | |
from sys import stdin | |
def to_date(time_string): | |
if time_string == 'Yesterday': | |
time_string = '1 day ago' | |
if time_string.endswith('ago'): | |
number, timetype = time_string.split(maxsplit=1) | |
if timetype.startswith('day'): | |
diff = timedelta(days=int(number)) | |
elif timetype.startswith('hr'): | |
diff = timedelta(hours=int(number)) | |
else: | |
return time_string | |
return (datetime.now() - diff).strftime('%b %d, %Y') | |
return time_string | |
def get_status(soup): | |
if soup.find('a', class_='awaiting-feedback'): | |
return 'Awaiting feedback' | |
if soup.find('span', class_='two-thumbs-down'): | |
return 'Strong no' | |
if soup.find('span', class_='thumbs-down'): | |
return 'No' | |
if soup.find('span', class_='no-decision'): | |
return 'No decision' | |
if soup.find('span', class_='thumbs-up'): | |
return 'Yes' | |
if soup.find('span', class_='two-thumbs-up'): | |
return 'Strong yes' | |
return '' | |
def parse_interview(soup): | |
interview = {} | |
candidate = soup.find('div', class_='candidate') | |
if candidate: | |
fields = candidate.stripped_strings | |
interview['candidate'] = next(fields, '') | |
interview['position'] = next(fields, '') | |
details = soup.find('div', class_='interview-details') | |
if details: | |
fields = details.stripped_strings | |
interview['date'] = to_date(next(fields, '')) | |
interview['type'] = next(fields, '') | |
interview['application'] = next(fields, '') | |
interview['location'] = next(fields, '') | |
interview['status'] = get_status(soup) | |
return interview if any(interview.values()) else None | |
def parse_html(html): | |
soup = BeautifulSoup(html, 'html.parser') | |
interviews_section = soup.find('div', id='interviews_section') or soup | |
result = interviews_section.find_all('li', class_='row') | |
result = map(parse_interview, result) | |
result = filter(lambda x: x, result) | |
return list(result) | |
def to_csv(interviews): | |
fieldnames = ['candidate', 'position', 'date', 'type', 'status'] | |
with StringIO() as output: | |
writer = csv.DictWriter(output, fieldnames=fieldnames, extrasaction='ignore') | |
writer.writeheader() | |
for d in interviews: | |
writer.writerow(d) | |
return output.getvalue() | |
if __name__ == '__main__': | |
print(to_csv(parse_html(stdin.read()))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment