This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Entropy experiment.""" | |
from dataclasses import dataclass | |
from math import inf | |
from secrets import randbits | |
DEFAULT_STEP: int = 64 # shall we use bigger step? | |
@dataclass | |
class Entropy: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from js2xml import parse, pretty_print as tostring | |
from js2xml.jsonlike import make_dict, getall as get_json_objs | |
MAKE_DICT_TYPES = ( | |
# Types that can be handled by make_dict. | |
'array', | |
'object', | |
'property', | |
'string', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################## | |
# /etc/elasticsearch/elasticsearch.yml | |
# | |
# Base configuration for a write heavy cluster | |
# | |
# Cluster / Node Basics | |
cluster.name: logng | |
# Node can have abritrary attributes we can use for routing |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dask import delayed | |
from elasticsearch import Elasticsearch | |
from elasticsearch.helpers import scan | |
def read_elasticsearch(query=None, npartitions=8, client_cls=None, | |
client_kwargs=None, **kwargs): | |
"""Reads documents from Elasticsearch. | |
By default, documents are sorted by ``_doc``. For more information see the |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""A fastavro-based avro reader for Dask. | |
Disclaimer: This code was recovered from dask's distributed project. | |
""" | |
import io | |
import fastavro | |
import json | |
from dask import delayed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/home/rolando/miniconda3/envs/datascience/lib/python3.5/site-packages/distributed/protocol/pickle.py - INFO - Failed to serialize <_io.BufferedReader name='/home/shared/input-01.jl.gz'> | |
Traceback (most recent call last): | |
File "/home/rolando/miniconda3/envs/datascience/lib/python3.5/site-packages/distributed/protocol/pickle.py", line 30, in dumps | |
result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) | |
TypeError: cannot serialize '_io.BufferedReader' object | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "/home/rolando/miniconda3/envs/datascience/lib/python3.5/site-packages/distributed/protocol/pickle.py", line 43, in dumps |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
settings = {} | |
bot = scrapy.CrawlerBot(name="mybot/1.0", settings=settings) | |
def follow_links(response): | |
for link in response.iter_links(): | |
bot.crawl(link.url, callback=follow_links, referer=response) | |
bot.emit({ | |
"url": response.url, | |
"status": response.status, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sqlite3 | |
from hashlib import md5 | |
from time import time | |
import simplejson as json | |
from flask import Flask | |
from flask.ext import restful | |
from flask import g | |
from flask import request |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
#setting up a comparable dataframe | |
df = pd.DataFrame(np.random.randint(20,100,size=(50, 4)), columns=['A','B','C','D']) | |
#these two columns become a multi-column index | |
df['year_idx'] = np.random.randint(2000,2004,50) | |
df['id_idx'] = np.random.randint(10000,19999,50) | |
df.drop_duplicates(subset=['year_idx','id_idx'],inplace=True) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
NewerOlder