This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Collection of methods built to assist in data augmentation for extraction datasets | |
""" | |
from ast import literal_eval | |
import json | |
import random | |
from collections import defaultdict | |
from functools import partial | |
from typing import Iterable, Dict, Callable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Demo script for Needle-in-a-haystack problems | |
""" | |
from functools import partial | |
import numpy as np | |
from indicoio.custom import vectorize | |
from scipy.spatial.distance import cdist | |
from scipy.stats import gmean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
def fix_moodle_output(assn_dir, outdir): | |
for (_, _, filenames) in os.walk(assn_dir): | |
for filename in filenames: | |
# Remove spaces so we can use this as a package name | |
name = filename.split("_")[0].replace(" ", "").replace("-", "") | |
if not os.path.exists("%s/%s" % (outdir, name)): | |
os.makedirs("%s/%s" % (outdir, name)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[4.8502882289326867e-14, 8.820403952763587e-14, 1.0250590153123516e-13, 1.6954166246183968e-13, 1.908753789966549e-13, 2.004836996365545e-13, 3.5909784960471013e-13, 5.166693869401628e-13, 7.550572170701337e-13, 9.448695131203987e-13, 1.0156517689399418e-12, 1.1252632977953388e-12, 1.2755835948385307e-12, 1.718926371953829e-12, 5.561410412067812e-12, 5.612637550150243e-12, 8.453230126630751e-12, 8.725636132121303e-12, 1.613513316811227e-11, 1.9752204589154072e-11, 3.992415416880054e-11, 5.064793173473998e-11, 6.907684718699313e-11, 7.573645788325202e-11, 1.9359271945838294e-10, 2.5869603101103545e-10, 3.2604244542604767e-10, 6.107383625965949e-10, 3.054349088416707e-09, 3.948608349010387e-09, 4.447405216487758e-09, 1.0212339772102215e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.3008598108779382e-06, 1.300859810 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def firstNonRepeated(s): | |
return ([letter for i, letter in enumerate(s) if letter not in (s[:i] + s[i+1:])] + [""])[0] | |
def first_non_repeated1(s): | |
for i, letter in enumerate(s): | |
if letter not in (s[:i] + s[i+1:]): | |
return letter | |
return "" | |
def first_non_repeated1(s): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from collections import OrderedDict | |
import numpy as np | |
from scipy.spatial.distance import cdist | |
from indicoio.custom import vectorize | |
from nouns import NOUNS | |
FEATURES = json.load(open("features.json"), object_pairs_hook=OrderedDict) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"http://images.neimanmarcus.com/product_assets/T/8/A/P/X/NMT8APX_mz.jpg": [0.0, 0.0, 0.7503045797348022, 0.0, 4.050353527069092, 0.0, 0.0, 3.970454454421997, 2.56343936920166, 0.0, 0.0, 0.0, 0.0, 0.8763924241065979, 2.9059646129608154, 1.7854936122894287, 0.0, 0.0, 0.0, 3.603835105895996, 0.0, 0.0, 0.0, 0.0, 2.9097509384155273, 0.6788361072540283, 0.0, 0.0, 0.0, 0.0, 0.9168252348899841, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21072697639465332, 0.0, 2.585055112838745, 0.0, 0.18340826034545898, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.772193431854248, 0.0, 0.0, 0.0, 0.0, 1.1300804615020752, 0.0, 0.0, 0.0, 1.8126261234283447, 0.0, 0.0, 2.4351160526275635, 1.0090645551681519, 6.7620415687561035, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6218217611312866, 0.0, 3.785642385482788, 0.0, 0.0, 0.0, 0.4094317555427551, 0.0, 1.046784520149231, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5131956338882446, 0.0, 2.341181516647339, 3.7282121181488037, 2.7647757530212402, 0.0, 0.0, 0.2037421613931656, 0.0, 0.0, 2.629900932 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
id title description google_product_category product_type price sale_price sale_price_effective_date link mobile_link image_link additional_image_link brand gtin mpn identifier_exists condition availability availability_date item_group_id color material pattern size size_type size_system gender age_group tax shipping shipping_weight shipping_label multipack is_bundle adult adwords_redirect adwords_grouping adwords_labels custom_label_0 custom_label_1 custom_label_2 custom_label_3 custom_label_4 excluded_destination expiration_date promotion_id display_ads_id display_ads_link display_ads_similar_id display_ads_title display_ads_value shipping_length shipping_width shipping_height | |
sku152300450 Washable-Crepe Straight-Leg Pants, Petite, Size: PS (6/8), BLACK - Eileen Fisher Eileen Fisher Washable-Crepe Straight-Leg Pants, Petite Details From Eileen Fisher, bi-stretch crepe pants with elegant day-to-night texture and remarkable fit memory, great for travel. 29" approx. inseam. Regular rise; yoked waist contours t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_voter_links(outfile="voter_info.txt"): | |
start_urls = ("http://usavoters.directory/complete.php?id=%s" % i for i in xrange(128555, 214545)) | |
with open(outfile, 'a') as sink: | |
for url in start_urls: | |
document = etree.HTML(requests.get(url).content) | |
link_selector = CSSSelector('tr>td>a') | |
person_links = link_selector(document)[14:] | |
sink.write('\n'.join(link.get('href') for link in person_links if link.get('href'))) | |
print url |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import urllib | |
URLS = { | |
'css': 'http://cssminifier.com/raw', | |
'js': 'http://javascript-minifier.com/raw' | |
} | |
def new_filepath(filepath): |
NewerOlder