m_fx nguyenhieuec

Useful Pandas Snippets

A personal diary of DataFrame munging over the years.

Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)

	from flask import Flask, request, jsonify
	from flask_sqlalchemy import SQLAlchemy
	from flask_marshmallow import Marshmallow
	from flask_restful import Resource, Api


	app = Flask(__name__)
	api = Api(app)
	app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///users.db'
	app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

	"""A liveness prober dag for monitoring composer.googleapis.com/environment/healthy."""
	import airflow
	from airflow import DAG
	from airflow.operators.bash_operator import BashOperator
	from datetime import timedelta

	default_args = {
	'start_date': airflow.utils.dates.days_ago(0),
	'retries': 1,
	'retry_delay': timedelta(minutes=5)

	import cv2, queue, threading, time


	class VideoCapture:
	def __init__(self, name):
	self.cap = cv2.VideoCapture(name)
	self.q = queue.Queue()
	t = threading.Thread(target=self._reader)
	t.daemon = True
	t.start()

	class BertEmbeddingBag(nn.Module):
	"""Construct the embeddings from word, position and token_type embeddings.
	"""
	def __init__(self, config):
	super(BertEmbeddingBag, self).__init__()

	# self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
	ngram_matrix=np.load(config.ngram_matrix_path)

	self.old_bag = config.old_bag

	import re, csv
	from time import sleep, time
	from random import uniform, randint
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.support.wait import WebDriverWait
	from selenium.webdriver.common.action_chains import ActionChains
	from selenium.webdriver.support import expected_conditions as EC
	from selenium.common.exceptions import NoSuchElementException

	s1 = u'ÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚÝàáâãèéêìíòóôõùúýĂăĐđĨĩŨũƠơƯưẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾếỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹ'
	s0 = u'AAAAEEEIIOOOOUUYaaaaeeeiioooouuyAaDdIiUuOoUuAaAaAaAaAaAaAaAaAaAaAaAaEeEeEeEeEeEeEeEeIiIiOoOoOoOoOoOoOoOoOoOoOoOoUuUuUuUuUuUuUuYyYyYyYy'
	def remove_accents(input_str):
	s = ''
	print input_str.encode('utf-8')
	for c in input_str:
	if c in s1:
	s += s0[s1.index(c)]
	else:
	s += c

	import pandas as pd

	def _map_to_pandas(rdds):
	""" Needs to be here due to pickling issues """
	return [pd.DataFrame(list(rdds))]

	def toPandas(df, n_partitions=None):
	"""
	Returns the contents of `df` as a local `pandas.DataFrame` in a speedy fashion. The DataFrame is
	repartitioned if `n_partitions` is passed.