-
-
Save andersy005/8091596fffb6b7f9dd8ee9b251464953 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import dask.dataframe as dd | |
import numpy as np | |
import dask.array as da | |
import inspect | |
from numpydoc.docscrape import NumpyDocString | |
import pydoc | |
def maybe_build_signature(obj, name): | |
try: | |
return inspect.signature(getattr(obj, name)) | |
except (TypeError, ValueError, AttributeError): | |
return None | |
pandas_methods = {name: maybe_build_signature(np, name) | |
for name in dir(np)} | |
pandas_methods = {k: v for k, v in pandas_methods.items() if v} | |
dask_methods = {name: maybe_build_signature(da, name) | |
for name in dir(da)} | |
dask_methods = {k: v for k, v in dask_methods.items() if k} | |
common = set(dask_methods) & set(pandas_methods) | |
public = {x for x in common if not x.startswith('_')} | |
records = [] | |
matching = [] | |
for method in public: | |
a = dask_methods[method] | |
b = pandas_methods[method] | |
pd_doc = NumpyDocString(pydoc.getdoc(getattr(np, method))) | |
dd_doc = NumpyDocString(pydoc.getdoc(getattr(da, method))) | |
pd_doc = dict((name, (type_, ''.join(desc))) | |
for name, type_, desc in pd_doc['Parameters']) | |
dd_doc = dict((name, (type_, ''.join(desc))) | |
for name, type_, desc in dd_doc['Parameters']) | |
extra = set(a.parameters) - set(b.parameters) | |
missing = set(b.parameters) - set(a.parameters) | |
for parameter in extra: | |
records.append((method, parameter, 'extra', a.parameters[parameter], | |
*dd_doc.get(parameter, ''))) | |
for parameter in missing: | |
records.append((method, parameter, 'missing', b.parameters[parameter], | |
*pd_doc.get(parameter, ''))) | |
for parameter in set(a.parameters) & set(b.parameters): | |
check_sig = a.parameters[parameter] != b.parameters[parameter] | |
check_doc = dd_doc.get(parameter, '') != pd_doc.get(parameter, '') | |
matching.append(( | |
method, parameter, | |
a.parameters[parameter], | |
b.parameters[parameter], | |
*dd_doc.get(parameter, ''), | |
*pd_doc.get(parameter, ''), | |
check_sig, | |
check_doc, | |
)) | |
df = pd.DataFrame(records, columns=['method', 'parameter', 'kind', 'value', 'type', 'doc']) | |
df.head() | |
match = pd.DataFrame(matching, columns=['method', 'parameter', 'pandas_value', 'dask_value', | |
'pandas_type', 'pandas_doc', | |
'dask_type', 'dask_doc', | |
'check_sig', | |
'check_doc']) | |
df.to_csv("array-differ.csv", index=False) | |
match.to_csv("array-match.csv", index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import dask.dataframe as dd | |
import inspect | |
from numpydoc.docscrape import NumpyDocString | |
import pydoc | |
def maybe_build_signature(obj, name): | |
try: | |
return inspect.signature(getattr(obj, name)) | |
except (TypeError, ValueError, AttributeError): | |
return None | |
pandas_methods = {name: maybe_build_signature(pd.DataFrame, name) | |
for name in dir(pd.DataFrame)} | |
pandas_methods = {k: v for k, v in pandas_methods.items() if v} | |
dask_methods = {name: maybe_build_signature(dd.DataFrame, name) | |
for name in dir(dd.DataFrame)} | |
dask_methods = {k: v for k, v in dask_methods.items() if k} | |
common = set(dask_methods) & set(pandas_methods) | |
public = {x for x in common if not x.startswith('_')} | |
records = [] | |
matching = [] | |
for method in public: | |
a = dask_methods[method] | |
b = pandas_methods[method] | |
pd_doc = NumpyDocString(pydoc.getdoc(getattr(pd.DataFrame, method))) | |
dd_doc = NumpyDocString(pydoc.getdoc(getattr(dd.DataFrame, method))) | |
pd_doc = dict((name, (type_, ''.join(desc))) | |
for name, type_, desc in pd_doc['Parameters']) | |
dd_doc = dict((name, (type_, ''.join(desc))) | |
for name, type_, desc in dd_doc['Parameters']) | |
extra = set(a.parameters) - set(b.parameters) | |
missing = set(b.parameters) - set(a.parameters) | |
for parameter in extra: | |
records.append((method, parameter, 'extra', a.parameters[parameter], | |
*dd_doc.get(parameter, ''))) | |
for parameter in missing: | |
records.append((method, parameter, 'missing', b.parameters[parameter], | |
*pd_doc.get(parameter, ''))) | |
for parameter in set(a.parameters) & set(b.parameters): | |
check_sig = a.parameters[parameter] != b.parameters[parameter] | |
check_doc = dd_doc.get(parameter, '') != pd_doc.get(parameter, '') | |
matching.append(( | |
method, parameter, | |
a.parameters[parameter], | |
b.parameters[parameter], | |
*dd_doc.get(parameter, ''), | |
*pd_doc.get(parameter, ''), | |
check_sig, | |
check_doc, | |
)) | |
df = pd.DataFrame(records, columns=['method', 'parameter', 'kind', 'value', 'type', 'doc']) | |
df.head() | |
match = pd.DataFrame(matching, columns=['method', 'parameter', 'pandas_value', 'dask_value', | |
'pandas_type', 'pandas_doc', | |
'dask_type', 'dask_doc', | |
'check_sig', | |
'check_doc']) | |
df.to_csv("differ.csv", index=False) | |
match.to_csv("match.csv", index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment