This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"config": { | |
"view": {"continuousWidth": 400, "continuousHeight": 500}, | |
"axis": {"labelFontSize": 12, "titleFontSize": 12}, | |
"legend": {"labelFontSize": 12, "titleFontSize": 12} | |
}, | |
"concat": [ | |
{ | |
"mark": "bar", | |
"encoding": { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tempfile import TemporaryFile | |
import requests | |
import shutil | |
import tarfile | |
import csv | |
URL = 'https://ai2-semanticscholar-cord-19.s3-us-west-2.amazonaws.com/historical_releases/cord-19_{date}.tar.gz' | |
CSV_PATH = '{date}/all_sources_metadata_{date}.csv' | |
def stream_to_file(url, fileobj): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from contextlib import contextmanager | |
import boto3 | |
from io import BytesIO | |
from transformers import PretrainedConfig, PreTrainedModel | |
import json | |
from tempfile import NamedTemporaryFile | |
BUCKET_NAME = "open-jobs-lake" | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import time | |
from functools import lru_cache | |
WINNER_TEXT = "The winner is" | |
@lru_cache() | |
def get_data(): | |
data = pd.read_excel("ProFinda Registration Drive (Responses).xlsx").to_dict(orient="records") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import wikipedia | |
from functools import lru_cache | |
@lru_cache() | |
def get_page_cats(query): | |
try: | |
page = wikipedia.page(query, auto_suggest=False) | |
except (wikipedia.PageError, wikipedia.DisambiguationError): | |
return [] | |
return page.categories |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nesta.core.orms.orm_utils import db_session, get_mysql_engine | |
from nesta.core.orms.arxiv_orm import ArticleVector | |
import numpy as np | |
import json | |
import os | |
os.environ['MYSQLDB'] = "/path/to/innovation-mapping-5712.config" | |
def query_and_bundle(session, fields, start, limit, filter_): | |
q = session.query(*fields) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
ignore = ["2020", "onsOpenGeo", 'worldbank', 'table_updates'] | |
out_data = defaultdict(dict) | |
for dataset, tables in schemas.items(): | |
for name, table in tables.items(): | |
if any(i in name for i in ignore): | |
continue | |
table = [dict(field_name=str(c.key), type=str(c.type), primary_key=c.primary_key, nullable=c.nullable) | |
for c in table.columns] | |
out_data[dataset][name] = table |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from data_getters.core import get_engine | |
def bad_tokenizer(text): | |
return x.lower().replace(".", "").split() | |
columns=['id', 'created', 'title', 'abstract', 'mag_id', 'citation_count', 'article_source'] | |
con = get_engine("/path/to/innovation-mapping-5712.config") | |
chunks = pd.read_sql_table('arxiv_articles', con, columns=columns, chunksize=1000) | |
keywords = ('covid', 'covid-19', 'coronavirus') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def do_the_other_thing(run, output): | |
def wrap(self): | |
run(self) | |
output(self) | |
return wrap | |
class A: | |
name='a' | |
def run(self): | |
pass |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def assym(a): | |
return 1 - (np.linalg.det(0.5*(a + a.T)) / np.linalg.det(a)) | |
for a in ([[10,123,0],[123,10,0],[0,0,10]], [[10,123,0],[121,10,0],[0,0,10]], | |
[[10,123,0],[50,10,0],[0,0,10]], [[10,123,0],[0,10,0],[23,0,10]], | |
[[10,123,0],[-123,10,0],[5422,0,10]]): | |
a = np.matrix(a) | |
print(a) |
NewerOlder