This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
def strange_zeroes(dataset, drop_columns = None, zero = 0): | |
"""Exploratory Data Analysis function that looks to see if there is any correlation between '0' values occurerence, | |
which would suggest a relationship between the instruments used and their non-detection of a value.""" | |
# Find all zeroes (can be altered to including other common missing / non-response values as well) | |
zeroes = (dataset == zero) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
headers = {"Authorization" : "Bearer [INSERT AIRTABLE API KEY HERE]"} | |
params = ( | |
('maxRecords', '100'), | |
('view', 'Grid view')) | |
URL = 'https://api.airtable.com/v0/appgaUkvDLKnYOmdA/URLS%20to%20Process' | |
response = requests.get(URL, params = params, headers = headers) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import newspaper | |
article = newspaper.Article(urls[0]) | |
article.download() | |
article.parse() | |
article.nlp() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
upload_dict = { | |
"fields": { | |
"URL" : article.url, | |
"Title" : article.title, | |
"Date" : publish_date(article), | |
"Author(s)" : article.authors, | |
"Site Name" : article.meta_data['og']['site_name'], | |
"Media" : determine_media(article), | |
"Opinion" : determine_opinion(article), | |
"Keywords" : article.keywords, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
author_dict = create_author_dict() | |
for author in article.authors: | |
if author in list(author_dict.keys()): | |
print("Author already exists") | |
else: | |
print(add_new_author(author)) | |
author_dict = create_author_dict() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def convert_authors_to_ids(authors): | |
author_ids = [] | |
for author in authors: | |
author_ids.append(author_dict[author]) | |
return author_ids | |
upload_dict = { | |
"fields": { | |
"URL" : article.url, | |
"Title" : article.title, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
upload_data = json.dumps(upload_dict) | |
headers = { | |
"Authorization" : "Bearer INSERT AIRTABLE USER API KEY HERE", | |
'Content-Type': 'application/json', | |
} | |
URL = 'https://api.airtable.com/v0/INSERT AIRTABLE DOC KEY HERE/Online%20Media%20Hits' | |
response = requests.post(URL, headers=headers, data=upload_data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def find_locations_in_pandas_dataframe(dataframe, search_term, starts_with_search_term=False): | |
""" Returns an ordered list of (dataframe column, dataframe row, specific string) index values where `search_term` is | |
found in Pandas `dataframe` string data. | |
Keyword arguments: | |
starts_with_search_term: if set to True, then returns only strings that start with `search_term` | |
i.e., "Re" in "Rebecca" but not "González, Rebecca" | |
Source: https://gist.github.com/KalebNyquist/6781634b4ad307576046352696d2d194 | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import pandas as pd | |
def airtable_download(table, params_dict={}, api_key=None, base_id=None, record_id=None): | |
"""Makes a request to Airtable for all records from a single table. | |
Returns data in dictionary format. | |
Keyword Arguments: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pandas as pd | |
from bs4 import BeautifulSoup | |
# List of States | |
# source: https://gist.github.com/mshafrir/2646763#gistcomment-1583853 | |
states = {"al":"alabama","ak":"alaska","az":"arizona","ar":"arkansas","ca":"california","co":"colorado","ct":"connecticut","dc":"district of columbia","de":"delaware","fl":"florida","ga":"georgia","hi":"hawaii","id":"idaho","il":"illinois","in":"indiana","ia":"iowa","ks":"kansas","ky":"kentucky","la":"louisiana","me":"maine","md":"maryland","ma":"massachusetts","mi":"michigan","mn":"minnesota","ms":"mississippi","mo":"missouri","mt":"montana","ne":"nebraska","nv":"nevada","nh":"new hampshire","nj":"new jersey","nm":"new mexico","ny":"new york","nc":"north carolina","nd":"north dakota","oh":"ohio","ok":"oklahoma","or":"oregon","pa":"pennsylvania","ri":"rhode island","sc":"south carolina","sd":"south dakota","tn":"tennessee","tx":"texas","ut":"utah","vt":"vermont","va":"virginia","wa":"washington","wv":"west virginia","wi":"wisconsin","wy":"wyoming"} | |
states = |
OlderNewer