Skip to content

Instantly share code, notes, and snippets.

@andersy005
Forked from TomAugspurger/da_docs.py
Created September 12, 2018 09:58
Show Gist options
  • Save andersy005/8091596fffb6b7f9dd8ee9b251464953 to your computer and use it in GitHub Desktop.
Save andersy005/8091596fffb6b7f9dd8ee9b251464953 to your computer and use it in GitHub Desktop.
import pandas as pd
import dask.dataframe as dd
import numpy as np
import dask.array as da
import inspect
from numpydoc.docscrape import NumpyDocString
import pydoc
def maybe_build_signature(obj, name):
try:
return inspect.signature(getattr(obj, name))
except (TypeError, ValueError, AttributeError):
return None
pandas_methods = {name: maybe_build_signature(np, name)
for name in dir(np)}
pandas_methods = {k: v for k, v in pandas_methods.items() if v}
dask_methods = {name: maybe_build_signature(da, name)
for name in dir(da)}
dask_methods = {k: v for k, v in dask_methods.items() if k}
common = set(dask_methods) & set(pandas_methods)
public = {x for x in common if not x.startswith('_')}
records = []
matching = []
for method in public:
a = dask_methods[method]
b = pandas_methods[method]
pd_doc = NumpyDocString(pydoc.getdoc(getattr(np, method)))
dd_doc = NumpyDocString(pydoc.getdoc(getattr(da, method)))
pd_doc = dict((name, (type_, ''.join(desc)))
for name, type_, desc in pd_doc['Parameters'])
dd_doc = dict((name, (type_, ''.join(desc)))
for name, type_, desc in dd_doc['Parameters'])
extra = set(a.parameters) - set(b.parameters)
missing = set(b.parameters) - set(a.parameters)
for parameter in extra:
records.append((method, parameter, 'extra', a.parameters[parameter],
*dd_doc.get(parameter, '')))
for parameter in missing:
records.append((method, parameter, 'missing', b.parameters[parameter],
*pd_doc.get(parameter, '')))
for parameter in set(a.parameters) & set(b.parameters):
check_sig = a.parameters[parameter] != b.parameters[parameter]
check_doc = dd_doc.get(parameter, '') != pd_doc.get(parameter, '')
matching.append((
method, parameter,
a.parameters[parameter],
b.parameters[parameter],
*dd_doc.get(parameter, ''),
*pd_doc.get(parameter, ''),
check_sig,
check_doc,
))
df = pd.DataFrame(records, columns=['method', 'parameter', 'kind', 'value', 'type', 'doc'])
df.head()
match = pd.DataFrame(matching, columns=['method', 'parameter', 'pandas_value', 'dask_value',
'pandas_type', 'pandas_doc',
'dask_type', 'dask_doc',
'check_sig',
'check_doc'])
df.to_csv("array-differ.csv", index=False)
match.to_csv("array-match.csv", index=False)
import pandas as pd
import dask.dataframe as dd
import inspect
from numpydoc.docscrape import NumpyDocString
import pydoc
def maybe_build_signature(obj, name):
try:
return inspect.signature(getattr(obj, name))
except (TypeError, ValueError, AttributeError):
return None
pandas_methods = {name: maybe_build_signature(pd.DataFrame, name)
for name in dir(pd.DataFrame)}
pandas_methods = {k: v for k, v in pandas_methods.items() if v}
dask_methods = {name: maybe_build_signature(dd.DataFrame, name)
for name in dir(dd.DataFrame)}
dask_methods = {k: v for k, v in dask_methods.items() if k}
common = set(dask_methods) & set(pandas_methods)
public = {x for x in common if not x.startswith('_')}
records = []
matching = []
for method in public:
a = dask_methods[method]
b = pandas_methods[method]
pd_doc = NumpyDocString(pydoc.getdoc(getattr(pd.DataFrame, method)))
dd_doc = NumpyDocString(pydoc.getdoc(getattr(dd.DataFrame, method)))
pd_doc = dict((name, (type_, ''.join(desc)))
for name, type_, desc in pd_doc['Parameters'])
dd_doc = dict((name, (type_, ''.join(desc)))
for name, type_, desc in dd_doc['Parameters'])
extra = set(a.parameters) - set(b.parameters)
missing = set(b.parameters) - set(a.parameters)
for parameter in extra:
records.append((method, parameter, 'extra', a.parameters[parameter],
*dd_doc.get(parameter, '')))
for parameter in missing:
records.append((method, parameter, 'missing', b.parameters[parameter],
*pd_doc.get(parameter, '')))
for parameter in set(a.parameters) & set(b.parameters):
check_sig = a.parameters[parameter] != b.parameters[parameter]
check_doc = dd_doc.get(parameter, '') != pd_doc.get(parameter, '')
matching.append((
method, parameter,
a.parameters[parameter],
b.parameters[parameter],
*dd_doc.get(parameter, ''),
*pd_doc.get(parameter, ''),
check_sig,
check_doc,
))
df = pd.DataFrame(records, columns=['method', 'parameter', 'kind', 'value', 'type', 'doc'])
df.head()
match = pd.DataFrame(matching, columns=['method', 'parameter', 'pandas_value', 'dask_value',
'pandas_type', 'pandas_doc',
'dask_type', 'dask_doc',
'check_sig',
'check_doc'])
df.to_csv("differ.csv", index=False)
match.to_csv("match.csv", index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment