Skip to content

Instantly share code, notes, and snippets.

@chulman444
Last active March 19, 2018 11:06
Show Gist options
  • Save chulman444/ad798e3d839c9ee231932464529b245d to your computer and use it in GitHub Desktop.
Save chulman444/ad798e3d839c9ee231932464529b245d to your computer and use it in GitHub Desktop.
All sklearn modules used in Udacity "Intro to machine learning" lessons. Refer to [here](https://classroom.udacity.com/courses/ud120)
{
"train_test_split": {
"raw": [
"outliers/outlier_removal_regression.py:23:from sklearn.cross_validation import train_test_split",
"regression/finance_regression.py:30:from sklearn.cross_validation import train_test_split",
"pca/eigenfaces.py:26:from sklearn.cross_validation import train_test_split",
"final_project/poi_id.py.bak:52:from sklearn.cross_validation import train_test_split",
"final_project/poi_id.py:67: from sklearn.cross_validation import train_test_split",
"validation/validate_poi.py:32:from sklearn.model_selection import train_test_split",
"evaluation/validate_poi.py:32:from sklearn.model_selection import train_test_split",
"evaluation/evaluate_poi_identifier.py:31:from sklearn.model_selection import train_test_split"
]
},
"LinearRegression": {
"raw": [
"outliers/outlier_removal_regression.py:28:from sklearn.linear_model import LinearRegression",
"regression/finance_regression.py:41:from sklearn.linear_model import LinearRegression"
]
},
"TfidfVectorizer": {
"raw": [
"text_learning/vectorize_text.py:88:from sklearn.feature_extraction.text import TfidfVectorizer",
"feature_selection/find_signature.py:23:from sklearn.feature_extraction.text import TfidfVectorizer",
"tools/email_preprocess.py:8:from sklearn.feature_extraction.text import TfidfVectorizer"
]
},
"stop_words": {
"raw": [
"text_learning/vectorize_text.py:89:from sklearn.feature_extraction import stop_words"
]
},
"RandomForestClassifier": {
"raw": [
"choose_your_own/your_algorithm.py:33:from sklearn.ensemble import RandomForestClassifier"
]
},
"AdaBoostClassifier": {
"raw": [
"choose_your_own/your_algorithm.py:34:from sklearn.ensemble import AdaBoostClassifier"
]
},
"KNeighborsClassifier": {
"raw": [
"choose_your_own/your_algorithm.py:35:from sklearn.neighbors import KNeighborsClassifier"
]
},
"cross_validation": {
"raw": [
"feature_selection/find_signature.py:20:from sklearn import cross_validation",
"tools/email_preprocess.py:7:from sklearn import cross_validation"
]
},
"DecisionTreeClassifier": {
"raw": [
"feature_selection/find_signature.py:54:from sklearn.tree import DecisionTreeClassifier",
"decision_tree/dt_author_id.py:27:from sklearn.tree import DecisionTreeClassifier",
"validation/validate_poi.py:31:from sklearn.tree import DecisionTreeClassifier",
"evaluation/validate_poi.py:31:from sklearn.tree import DecisionTreeClassifier",
"evaluation/evaluate_poi_identifier.py:30:from sklearn.tree import DecisionTreeClassifier"
]
},
"KMeans": {
"raw": [
"k_means/k_means_cluster.py:109:from sklearn.cluster import KMeans"
]
},
"SVC": {
"raw": [
"svm/svm_author_id.py:28:from sklearn.svm import SVC",
"pca/eigenfaces.py:32:from sklearn.svm import SVC"
]
},
"fetch_lfw_people": {
"raw": [
"pca/eigenfaces.py:27:from sklearn.datasets import fetch_lfw_people"
]
},
"GridSearchCV": {
"raw": [
"pca/eigenfaces.py:28:from sklearn.grid_search import GridSearchCV"
]
},
"classification_report": {
"raw": [
"pca/eigenfaces.py:29:from sklearn.metrics import classification_report"
]
},
"confusion_matrix": {
"raw": [
"pca/eigenfaces.py:30:from sklearn.metrics import confusion_matrix"
]
},
"RandomizedPCA": {
"raw": [
"pca/eigenfaces.py:31:from sklearn.decomposition import RandomizedPCA"
]
},
"GaussianNB": {
"raw": [
"final_project/poi_id.py.bak:41:from sklearn.naive_bayes import GaussianNB",
"final_project/poi_id.py:56: from sklearn.naive_bayes import GaussianNB",
"naive_bayes/nb_author_id.py:29:from sklearn.naive_bayes import GaussianNB"
]
},
"StratifiedShuffleSplit": {
"raw": [
"final_project/tester.py:15:from sklearn.cross_validation import StratifiedShuffleSplit"
]
},
"VarianceThreshold": {
"raw": [
"final_project/poi_id.py:6:from sklearn.feature_selection import VarianceThreshold"
]
},
"PCA": {
"raw": [
"final_project/poi_id.py:7:from sklearn.decomposition import PCA"
]
},
"SelectPercentile, f_classif": {
"raw": [
"tools/email_preprocess.py:9:from sklearn.feature_selection import SelectPercentile, f_classif"
]
},
"sklearn": {
"raw": [
"tools/startup.py:24: import sklearn"
]
},
"metrics": {
"raw": [
"evaluation/evaluate_poi_identifier.py:32:from sklearn import metrics"
]
}
}
## Used this script to generate the file
## Use this script at `ud120-projects`
# 'meh' file contains output of `grep -nr "sklearn" --exclude-dir="maildir" --exclude="*.txt" --exclude="*.pkl"`
with open('meh') as f:
c = f.read()
lines = c.split("\n")
target_str = "import "
occurrences = []
_mods = []
mods = {}
for line in lines:
splits = line.split(":")
candidate = splits[-1]
if 'import' not in candidate:
continue
pos = candidate.find(target_str)
pos1 = pos + len(target_str)
mod = candidate[pos1:]
if mod not in _mods:
mods[mod] = {
'raw': [line]
}
_mods.append(mod)
else:
mods[mod]['raw'].append(line)
print(mods)
# unique_mods = list(set(mods))
# from pprint import pprint
# pprint(unique_mods)
import json
with open('mods_in_lesson', 'w') as f:
json.dump(mods, f, indent=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment