Created
June 5, 2019 05:31
-
-
Save Antrikshy/e117f072192357c7e4d01df9bfb47737 to your computer and use it in GitHub Desktop.
Script to re-process Stack Overflow Developer Survey 2019 data to get tech popularity stats by self-reported developer type
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from collections import Counter | |
have_tally = {} | |
want_tally = {} | |
# Get the dataset at insights.stackoverflow.com/survey | |
path_to_datum = '/path/to/downloaded/developer_survey_2019/survey_results_public.csv' | |
# This *should* work without too much modification with SO survey datasets from other | |
# years, but might require changing the names of columns. | |
with open(, 'r') as datum: | |
reader = csv.DictReader(datum) | |
for row in reader: | |
dev_types = row['DevType'].split(';') | |
have_worked_language = row['LanguageWorkedWith'].split(';') | |
have_worked_web_frame = row['WebFrameWorkedWith'].split(';') | |
have_worked_database = row['DatabaseWorkedWith'].split(';') | |
have_worked_platform = row['PlatformWorkedWith'].split(';') | |
have_worked_misc_tech = row['MiscTechWorkedWith'].split(';') | |
want_work_language = row['LanguageDesireNextYear'].split(';') | |
want_work_web_frame = row['WebFrameDesireNextYear'].split(';') | |
want_work_database = row['DatabaseDesireNextYear'].split(';') | |
want_work_platform = row['PlatformDesireNextYear'].split(';') | |
want_work_misc_tech = row['MiscTechDesireNextYear'].split(';') | |
have_techs = have_worked_language + have_worked_web_frame + have_worked_database + have_worked_platform + have_worked_misc_tech | |
want_techs = want_work_language + want_work_web_frame + want_work_database + want_work_platform + want_work_misc_tech | |
for dev_type in dev_types: | |
if dev_type == 'NA': | |
continue | |
if dev_type not in have_tally: | |
have_tally[dev_type] = Counter() | |
if dev_type not in want_tally: | |
want_tally[dev_type] = Counter() | |
for tech in have_techs: | |
if tech == 'NA': | |
continue | |
if tech not in have_tally[dev_type]: | |
have_tally[dev_type][tech] = 1 | |
else: | |
have_tally[dev_type][tech] += 1 | |
for tech in want_techs: | |
if tech == 'NA': | |
continue | |
if tech not in want_tally[dev_type]: | |
want_tally[dev_type][tech] = 1 | |
else: | |
want_tally[dev_type][tech] += 1 | |
print('====== Haves ======\n') | |
for dev_type in have_tally: | |
print(dev_type + '\n') | |
for tech, count in have_tally[dev_type].most_common(15): | |
print(tech + ' | ' + str(count)) | |
print('\n') | |
print('====== Wants ======\n') | |
for dev_type in want_tally: | |
print(dev_type + '\n') | |
for tech, count in want_tally[dev_type].most_common(15): | |
print(tech + ' | ' + str(count)) | |
print('\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment