Skip to content

Instantly share code, notes, and snippets.

@e-roux
Last active January 7, 2018 14:11
Show Gist options
  • Save e-roux/6183265d103152f38a99d0cfed33558f to your computer and use it in GitHub Desktop.
Save e-roux/6183265d103152f38a99d0cfed33558f to your computer and use it in GitHub Desktop.
Python: PyDataSet
class PyDataSet(object):
import pandas as pd
"""
Retrieves R Dataset
"""
db = pd.read_csv("http://vincentarelbundock.github.com/Rdatasets/datasets.csv")
db.set_index('Item', drop=True, inplace=True)
@classmethod
def search(cls, name):
"""
Search a dataset """
#if name.isalpha():
# name = name.lower()
return cls.db[cls.db.Title.str.contains(name, case=False) |
cls.db.index.str.contains(name, case=False)]
@classmethod
def get(cls, name):
assert name in cls.db.Title, f"No dataset named {name}"
df = pd.read_csv(cls.db.loc[name].csv)
df.set_index(df.columns[0], drop=True, inplace=True)
df.index.name = 'Sample'
return df
@classmethod
def list(cls):
return cls.db
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment