mwek · March 15, 2018 15:03
diff --git a/greenhouse.py b/greenhouse.py
 #!/usr/bin/env python

 # Usage:
 # 1. Go to https://app.greenhouse.io/interviews?type=past and copy the source code.
 # 2. Run "pbpaste | ./greenhouse.py"
 # 3. Analyze the CSV in the program of your choice.

 from bs4 import BeautifulSoup
 from itertools import chain
 import csv
 from io import StringIO
 from datetime import datetime, timedelta
 from sys import stdin


 def to_date(time_string):
    if time_string == 'Yesterday':
        time_string = '1 day ago'
    if time_string.endswith('ago'):
        number, timetype = time_string.split(maxsplit=1)
        if timetype.startswith('day'):
            diff = timedelta(days=int(number))
        elif timetype.startswith('hr'):
            diff = timedelta(hours=int(number))
        else:
            return time_string

        return (datetime.now() - diff).strftime('%b %d, %Y')
    return time_string


 def get_status(soup):
    if soup.find('a', class_='awaiting-feedback'):
        return 'Awaiting feedback'
    if soup.find('span', class_='two-thumbs-down'):
        return 'Strong no'
    if soup.find('span', class_='thumbs-down'):
        return 'No'
    if soup.find('span', class_='no-decision'):
        return 'No decision'
    if soup.find('span', class_='thumbs-up'):
        return 'Yes'
    if soup.find('span', class_='two-thumbs-up'):
        return 'Strong yes'
    return ''


 def parse_interview(soup):
    interview = {}
    candidate = soup.find('div', class_='candidate')
    if candidate:
        fields = candidate.stripped_strings
        interview['candidate'] = next(fields, '')
        interview['position'] = next(fields, '')
    details = soup.find('div', class_='interview-details')
    if details:
        fields = details.stripped_strings
        interview['date'] = to_date(next(fields, ''))
        interview['type'] = next(fields, '')
        interview['application'] = next(fields, '')
        interview['location'] = next(fields, '')
    interview['status'] = get_status(soup)

    return interview if any(interview.values()) else None


 def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    interviews_section = soup.find('div', id='interviews_section') or soup
    result = interviews_section.find_all('li', class_='row')
    result = map(parse_interview, result)
    result = filter(lambda x: x, result)
    return list(result)


 def to_csv(interviews):
    fieldnames = ['candidate', 'position', 'date', 'type', 'status']

    with StringIO() as output:
        writer = csv.DictWriter(output, fieldnames=fieldnames, extrasaction='ignore')

        writer.writeheader()
        for d in interviews:
            writer.writerow(d)

        return output.getvalue()


 if __name__ == '__main__':
    print(to_csv(parse_html(stdin.read())))
	#!/usr/bin/env python

	# Usage:
	# 1. Go to https://app.greenhouse.io/interviews?type=past and copy the source code.
	# 2. Run "pbpaste \| ./greenhouse.py"
	# 3. Analyze the CSV in the program of your choice.

	from bs4 import BeautifulSoup
	from itertools import chain
	import csv
	from io import StringIO
	from datetime import datetime, timedelta
	from sys import stdin


	def to_date(time_string):
	if time_string == 'Yesterday':
	time_string = '1 day ago'
	if time_string.endswith('ago'):
	number, timetype = time_string.split(maxsplit=1)
	if timetype.startswith('day'):
	diff = timedelta(days=int(number))
	elif timetype.startswith('hr'):
	diff = timedelta(hours=int(number))
	else:
	return time_string

	return (datetime.now() - diff).strftime('%b %d, %Y')
	return time_string


	def get_status(soup):
	if soup.find('a', class_='awaiting-feedback'):
	return 'Awaiting feedback'
	if soup.find('span', class_='two-thumbs-down'):
	return 'Strong no'
	if soup.find('span', class_='thumbs-down'):
	return 'No'
	if soup.find('span', class_='no-decision'):
	return 'No decision'
	if soup.find('span', class_='thumbs-up'):
	return 'Yes'
	if soup.find('span', class_='two-thumbs-up'):
	return 'Strong yes'
	return ''


	def parse_interview(soup):
	interview = {}
	candidate = soup.find('div', class_='candidate')
	if candidate:
	fields = candidate.stripped_strings
	interview['candidate'] = next(fields, '')
	interview['position'] = next(fields, '')
	details = soup.find('div', class_='interview-details')
	if details:
	fields = details.stripped_strings
	interview['date'] = to_date(next(fields, ''))
	interview['type'] = next(fields, '')
	interview['application'] = next(fields, '')
	interview['location'] = next(fields, '')
	interview['status'] = get_status(soup)

	return interview if any(interview.values()) else None


	def parse_html(html):
	soup = BeautifulSoup(html, 'html.parser')
	interviews_section = soup.find('div', id='interviews_section') or soup
	result = interviews_section.find_all('li', class_='row')
	result = map(parse_interview, result)
	result = filter(lambda x: x, result)
	return list(result)


	def to_csv(interviews):
	fieldnames = ['candidate', 'position', 'date', 'type', 'status']

	with StringIO() as output:
	writer = csv.DictWriter(output, fieldnames=fieldnames, extrasaction='ignore')

	writer.writeheader()
	for d in interviews:
	writer.writerow(d)

	return output.getvalue()


	if __name__ == '__main__':
	print(to_csv(parse_html(stdin.read())))