Skip to content

Instantly share code, notes, and snippets.

@TomAugspurger
Last active September 12, 2018 09:58
Show Gist options
  • Save TomAugspurger/b9c20a55c996c68d0437e70459345f09 to your computer and use it in GitHub Desktop.
Save TomAugspurger/b9c20a55c996c68d0437e70459345f09 to your computer and use it in GitHub Desktop.
import pandas as pd
import dask.dataframe as dd
import numpy as np
import dask.array as da
import inspect
from numpydoc.docscrape import NumpyDocString
import pydoc
def maybe_build_signature(obj, name):
try:
return inspect.signature(getattr(obj, name))
except (TypeError, ValueError, AttributeError):
return None
pandas_methods = {name: maybe_build_signature(np, name)
for name in dir(np)}
pandas_methods = {k: v for k, v in pandas_methods.items() if v}
dask_methods = {name: maybe_build_signature(da, name)
for name in dir(da)}
dask_methods = {k: v for k, v in dask_methods.items() if k}
common = set(dask_methods) & set(pandas_methods)
public = {x for x in common if not x.startswith('_')}
records = []
matching = []
for method in public:
a = dask_methods[method]
b = pandas_methods[method]
pd_doc = NumpyDocString(pydoc.getdoc(getattr(np, method)))
dd_doc = NumpyDocString(pydoc.getdoc(getattr(da, method)))
pd_doc = dict((name, (type_, ''.join(desc)))
for name, type_, desc in pd_doc['Parameters'])
dd_doc = dict((name, (type_, ''.join(desc)))
for name, type_, desc in dd_doc['Parameters'])
extra = set(a.parameters) - set(b.parameters)
missing = set(b.parameters) - set(a.parameters)
for parameter in extra:
records.append((method, parameter, 'extra', a.parameters[parameter],
*dd_doc.get(parameter, '')))
for parameter in missing:
records.append((method, parameter, 'missing', b.parameters[parameter],
*pd_doc.get(parameter, '')))
for parameter in set(a.parameters) & set(b.parameters):
check_sig = a.parameters[parameter] != b.parameters[parameter]
check_doc = dd_doc.get(parameter, '') != pd_doc.get(parameter, '')
matching.append((
method, parameter,
a.parameters[parameter],
b.parameters[parameter],
*dd_doc.get(parameter, ''),
*pd_doc.get(parameter, ''),
check_sig,
check_doc,
))
df = pd.DataFrame(records, columns=['method', 'parameter', 'kind', 'value', 'type', 'doc'])
df.head()
match = pd.DataFrame(matching, columns=['method', 'parameter', 'pandas_value', 'dask_value',
'pandas_type', 'pandas_doc',
'dask_type', 'dask_doc',
'check_sig',
'check_doc'])
df.to_csv("array-differ.csv", index=False)
match.to_csv("array-match.csv", index=False)
import pandas as pd
import dask.dataframe as dd
import inspect
from numpydoc.docscrape import NumpyDocString
import pydoc
def maybe_build_signature(obj, name):
try:
return inspect.signature(getattr(obj, name))
except (TypeError, ValueError, AttributeError):
return None
pandas_methods = {name: maybe_build_signature(pd.DataFrame, name)
for name in dir(pd.DataFrame)}
pandas_methods = {k: v for k, v in pandas_methods.items() if v}
dask_methods = {name: maybe_build_signature(dd.DataFrame, name)
for name in dir(dd.DataFrame)}
dask_methods = {k: v for k, v in dask_methods.items() if k}
common = set(dask_methods) & set(pandas_methods)
public = {x for x in common if not x.startswith('_')}
records = []
matching = []
for method in public:
a = dask_methods[method]
b = pandas_methods[method]
pd_doc = NumpyDocString(pydoc.getdoc(getattr(pd.DataFrame, method)))
dd_doc = NumpyDocString(pydoc.getdoc(getattr(dd.DataFrame, method)))
pd_doc = dict((name, (type_, ''.join(desc)))
for name, type_, desc in pd_doc['Parameters'])
dd_doc = dict((name, (type_, ''.join(desc)))
for name, type_, desc in dd_doc['Parameters'])
extra = set(a.parameters) - set(b.parameters)
missing = set(b.parameters) - set(a.parameters)
for parameter in extra:
records.append((method, parameter, 'extra', a.parameters[parameter],
*dd_doc.get(parameter, '')))
for parameter in missing:
records.append((method, parameter, 'missing', b.parameters[parameter],
*pd_doc.get(parameter, '')))
for parameter in set(a.parameters) & set(b.parameters):
check_sig = a.parameters[parameter] != b.parameters[parameter]
check_doc = dd_doc.get(parameter, '') != pd_doc.get(parameter, '')
matching.append((
method, parameter,
a.parameters[parameter],
b.parameters[parameter],
*dd_doc.get(parameter, ''),
*pd_doc.get(parameter, ''),
check_sig,
check_doc,
))
df = pd.DataFrame(records, columns=['method', 'parameter', 'kind', 'value', 'type', 'doc'])
df.head()
match = pd.DataFrame(matching, columns=['method', 'parameter', 'pandas_value', 'dask_value',
'pandas_type', 'pandas_doc',
'dask_type', 'dask_doc',
'check_sig',
'check_doc'])
df.to_csv("differ.csv", index=False)
match.to_csv("match.csv", index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment