This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | domain = 'http://www.igmchicago.org' | |
| response = requests.get(domain + '/igm-economic-experts-panel') | |
| soup = BeautifulSoup(response.text, 'lxml') | |
| polls = soup.findAll('div', {'class': 'poll-listing'}) | |
| topics = {} | |
| for poll in polls: | |
| topic = poll.find('h2').get_text() | |
| handle = poll.find('h3', {'class': 'surveyQuestion'}).a['href'] | |
| topics[topic] = handle | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | data = [] | |
| for topic, handle in topics.items(): | |
| print(handle) | |
| response = requests.get(domain + handle) | |
| soup = BeautifulSoup(response.text, 'lxml') | |
| survey_questions = list('ABCD')[:len(soup.findAll('h3', {'class': 'surveyQuestion'}))] | |
| survey_date = soup.find('h6').get_text() | |
| tables = soup.findAll('table', {'class': 'responseDetail'}) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | for survey_question, table in zip(survey_questions, tables): | |
| rows = table.findAll('tr')#, {'class': 'parent-row'}) | |
| for row in rows: | |
| if row.get('class') == ['parent-row']: | |
| cells = row.findAll('td') | |
| response = cells[2].get_text().strip() | |
| confidence = cells[3].get_text().strip() | |
| comment = cells[4].get_text().strip() | |
| tmp_data = { | |
| 'survey_date': dtutil.parse(survey_date), | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | col_order = ['survey_date', 'topic_name', 'topic_url', 'survey_question', 'economist_name', 'economist_url', 'economist_headshot', 'institution', 'response', 'confidence', 'comment'] | |
| df = pd.DataFrame(data, columns=col_order) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import sqlalchemy | |
| engine = sqlalchemy.create_engine('sqlite:///igmchicago.db') | |
| df.to_sql('igmchicago', engine, index=False, if_exists='replace') | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | cnxn = engine.connect() | |
| r = cnxn.execute(""" | |
| WITH igm AS ( | |
| SELECT economist_name, institution, AVG(confidence) AS avg_conf | |
| FROM igmchicago | |
| GROUP BY economist_name, institution | |
| ) | |
| SELECT * FROM igm i | |
| WHERE avg_conf > | |
| (SELECT AVG(avg_conf) FROM igm g WHERE i.institution = g.institution) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | df.columns | |
| df.shape | |
| df.describe() | |
| df.info() | |
| any(df.duplicated()) | |
| df.head() | |
| df.tail() | |
| df['institution'].value_counts().plot(kind='bar') | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Correct response types | |
| df.loc[df['response'].str.contains(r'did not', case=False) | df['response'].str.contains(r'---'), 'response'] = np.nan | |
| # Convert empty string comments into null types | |
| df['comment'] = df['comment'].replace(r'^$', np.nan, regex=True) | |
| # Assign sex variable to economists | |
| sex = pd.read_csv(os.path.join(os.path.dirname(__file__), 'economist_sex_mapping.csv'), index_col='economist_name') | |
| df['sex'] = df['economist_name'].map(sex['sex']) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | facet_labels = ['economist_name', 'institution', 'sex'] | |
| facets = df.groupby(facet_labels).first().reset_index()[facet_labels] | |
| # Summary statistics | |
| len(df.groupby('topic_name')) # 132 topics | |
| len(df.groupby(['topic_name', 'survey_question'])) # 195 survey questions | |
| len(facets['economist_name'].unique()) # 51 economists | |
| facets.groupby('sex').size() # 11 female, 40 male | |
| facets.groupby('institution').size().sort_values(ascending=False) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | -- Drop tables if they exist to allow for a "CREATE OR REPLACE" like behavior in SQLite | |
| DROP TABLE IF EXISTS "order_details"; | |
| DROP TABLE IF EXISTS "orders"; | |
| DROP TABLE IF EXISTS "products"; | |
| DROP TABLE IF EXISTS "categories"; | |
| DROP TABLE IF EXISTS "customers"; | |
| DROP TABLE IF EXISTS "employees"; | |
| DROP TABLE IF EXISTS "shippers"; | |
| DROP TABLE IF EXISTS "suppliers"; | |
| DROP TABLE IF EXISTS "demo"; |