A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
| from flask import Flask, request, jsonify | |
| from flask_sqlalchemy import SQLAlchemy | |
| from flask_marshmallow import Marshmallow | |
| from flask_restful import Resource, Api | |
| app = Flask(__name__) | |
| api = Api(app) | |
| app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///users.db' | |
| app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False |
| class QueryBuilder: | |
| def __init__(self): | |
| self.select_value = '' | |
| self.from_table_name = '' | |
| self.where_value = '' | |
| self.groupby_value = '' | |
| def select(self, select_arg): | |
| self.select_value = select_arg | |
| return self |
| """A liveness prober dag for monitoring composer.googleapis.com/environment/healthy.""" | |
| import airflow | |
| from airflow import DAG | |
| from airflow.operators.bash_operator import BashOperator | |
| from datetime import timedelta | |
| default_args = { | |
| 'start_date': airflow.utils.dates.days_ago(0), | |
| 'retries': 1, | |
| 'retry_delay': timedelta(minutes=5) |
| import cv2, queue, threading, time | |
| class VideoCapture: | |
| def __init__(self, name): | |
| self.cap = cv2.VideoCapture(name) | |
| self.q = queue.Queue() | |
| t = threading.Thread(target=self._reader) | |
| t.daemon = True | |
| t.start() |
| #!/usr/bin/env python3 | |
| import os | |
| import re | |
| import logging | |
| import click | |
| # don't index these | |
| excludes = { | |
| 'single_files', | |
| 'Icon', |
| class BertEmbeddingBag(nn.Module): | |
| """Construct the embeddings from word, position and token_type embeddings. | |
| """ | |
| def __init__(self, config): | |
| super(BertEmbeddingBag, self).__init__() | |
| # self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) | |
| ngram_matrix=np.load(config.ngram_matrix_path) | |
| self.old_bag = config.old_bag |
| import re, csv | |
| from time import sleep, time | |
| from random import uniform, randint | |
| from selenium import webdriver | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.wait import WebDriverWait | |
| from selenium.webdriver.common.action_chains import ActionChains | |
| from selenium.webdriver.support import expected_conditions as EC | |
| from selenium.common.exceptions import NoSuchElementException |
| s1 = u'ÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚÝàáâãèéêìíòóôõùúýĂăĐđĨĩŨũƠơƯưẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾếỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹ' | |
| s0 = u'AAAAEEEIIOOOOUUYaaaaeeeiioooouuyAaDdIiUuOoUuAaAaAaAaAaAaAaAaAaAaAaAaEeEeEeEeEeEeEeEeIiIiOoOoOoOoOoOoOoOoOoOoOoOoUuUuUuUuUuUuUuYyYyYyYy' | |
| def remove_accents(input_str): | |
| s = '' | |
| print input_str.encode('utf-8') | |
| for c in input_str: | |
| if c in s1: | |
| s += s0[s1.index(c)] | |
| else: | |
| s += c |
| import pandas as pd | |
| def _map_to_pandas(rdds): | |
| """ Needs to be here due to pickling issues """ | |
| return [pd.DataFrame(list(rdds))] | |
| def toPandas(df, n_partitions=None): | |
| """ | |
| Returns the contents of `df` as a local `pandas.DataFrame` in a speedy fashion. The DataFrame is | |
| repartitioned if `n_partitions` is passed. |
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)