This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import reduce | |
import numpy as np | |
import pandas as pd | |
class Counts: | |
"""COUNT ... GROUP BY on every column of a large dataset""" | |
def __init__(self, file, ddl_file, n_cols=None, n_top=10): | |
self.file = file | |
self.columns = get_columns_from_ddl(ddl_file) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
def fix(file, sep, nf, output): | |
"""Checks and fixes prematurely terminated lines in a tabular file. | |
:param file: input file | |
:param sep: delimiter or its ASCII **octal** code | |
:param nf: expected number of fields | |
:param output: output file | |
:return: None |
NewerOlder