Skip to content

Instantly share code, notes, and snippets.

@kiku1705
Created December 29, 2020 09:21
Show Gist options
  • Save kiku1705/21721513d6d6ec0c383fdfbce962df7b to your computer and use it in GitHub Desktop.
Save kiku1705/21721513d6d6ec0c383fdfbce962df7b to your computer and use it in GitHub Desktop.
Assignment
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
df = pd.read_csv('mammographic_masses.data.txt',feature_names=['BI_RADS','age','shape','margin','density','severity'])
df = df.replace('?', np.nan)
#check data is not baised and equally distributed
df.describe(include='all')
df.dropna(inplace=True)
X = df[['age','shape','margin','density']].values
scaler = MinMaxScaler()
#check differnece betwee fit and fit_transform
X = scaler.fit_transform(X)
y = df.severity
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
#check random_state
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
clf = DecisionTreeClassifier(random_state=1)
clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
metrics.accuracy_score(y_test, y_pred)
# 0.7269076305220884
from sklearn.model_selection import KFold,cross_val_score
cv = KFold(n_splits=10, random_state=1, shuffle=True)
scores = cross_val_score(clf, X, y, scoring='accuracy', cv=cv)
np.mean(scores)
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier(random_state=1)
rf_clf.fit(X_train,y_train)
# RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
# criterion='gini', max_depth=None, max_features='auto',
# max_leaf_nodes=None, max_samples=None,
# min_impurity_decrease=0.0, min_impurity_split=None,
# min_samples_leaf=1, min_samples_split=2,
# min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=2,
# oob_score=False, random_state=0, verbose=0,
# warm_start=False)
rf_clf.predict(X_test[0:10])
scores = cross_val_score(clf, X, y, scoring='accuracy', cv=cv)
np.mean(scores)
# 0.7759036144578313
from sklearn import svm
svm_clf = svm.SVC(kernel='linear', C = 1.0)
scores = cross_val_score(svm_clf, X, y, scoring='accuracy', cv=cv)
np.mean(scores)
#0.7915662650602409
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_neighbors=3)
scores = cross_val_score(knn_clf, X, y, scoring='accuracy', cv=cv)
np.mean(scores)
#0.7674698795180723
from sklearn.naive_bayes import MultinomialNB
nb_clf = MultinomialNB()
scores = cross_val_score(nb_clf, X, y, scoring='accuracy', cv=cv)
np.mean(scores)
model = LogisticRegression()
scores = cross_val_score(nb_clf, X, y, scoring='accuracy', cv=cv)
np.mean(scores)
5,67,3,5,3,1
4,43,1,1,?,1
5,58,4,5,3,1
4,28,1,1,3,0
5,74,1,5,?,1
4,65,1,?,3,0
4,70,?,?,3,0
5,42,1,?,3,0
5,57,1,5,3,1
5,60,?,5,1,1
5,76,1,4,3,1
3,42,2,1,3,1
4,64,1,?,3,0
4,36,3,1,2,0
4,60,2,1,2,0
4,54,1,1,3,0
3,52,3,4,3,0
4,59,2,1,3,1
4,54,1,1,3,1
4,40,1,?,?,0
?,66,?,?,1,1
5,56,4,3,1,1
4,43,1,?,?,0
5,42,4,4,3,1
4,59,2,4,3,1
5,75,4,5,3,1
2,66,1,1,?,0
5,63,3,?,3,0
5,45,4,5,3,1
5,55,4,4,3,0
4,46,1,5,2,0
5,54,4,4,3,1
5,57,4,4,3,1
4,39,1,1,2,0
4,81,1,1,3,0
4,77,3,?,?,0
4,60,2,1,3,0
5,67,3,4,2,1
4,48,4,5,?,1
4,55,3,4,2,0
4,59,2,1,?,0
4,78,1,1,1,0
4,50,1,1,3,0
4,61,2,1,?,0
5,62,3,5,2,1
5,44,2,4,?,1
5,64,4,5,3,1
4,23,1,1,?,0
2,42,?,?,4,0
5,67,4,5,3,1
4,74,2,1,2,0
5,80,3,5,3,1
4,23,1,1,?,0
4,63,2,1,?,0
4,53,?,5,3,1
4,43,3,4,?,0
4,49,2,1,1,0
5,51,2,4,?,0
4,45,2,1,?,0
5,59,2,?,?,1
5,52,4,3,3,1
5,60,4,3,3,1
4,57,2,5,3,0
3,57,2,1,?,0
5,74,4,4,3,1
4,25,2,1,?,0
4,49,1,1,3,0
5,72,4,3,?,1
4,45,2,1,3,0
4,64,2,1,3,0
4,73,2,1,2,0
5,68,4,3,3,1
5,52,4,5,3,0
5,66,4,4,3,1
5,70,?,4,?,1
4,25,1,1,3,0
5,74,1,1,2,1
4,64,1,1,3,0
5,60,4,3,2,1
5,67,2,4,1,0
4,67,4,5,3,0
5,44,4,4,2,1
3,68,1,1,3,1
4,57,?,4,1,0
5,51,4,?,?,1
4,33,1,?,?,0
5,58,4,4,3,1
5,36,1,?,?,0
4,63,1,1,?,0
5,62,1,5,3,1
4,73,3,4,3,1
4,80,4,4,3,1
4,67,1,1,?,0
5,59,2,1,3,1
5,60,1,?,3,0
5,54,4,4,3,1
4,40,1,1,?,0
4,47,2,1,?,0
5,62,4,4,3,0
4,33,2,1,3,0
5,59,2,?,?,0
4,65,2,?,?,0
4,58,4,4,?,0
4,29,2,?,?,0
4,58,1,1,?,0
4,54,1,1,?,0
4,44,1,1,?,1
3,34,2,1,?,0
4,57,1,1,3,0
5,33,4,4,?,1
4,45,4,4,3,0
5,71,4,4,3,1
5,59,4,4,2,0
4,56,2,1,?,0
4,40,3,4,?,0
4,56,1,1,3,0
4,45,2,1,?,0
4,57,2,1,2,0
5,55,3,4,3,1
5,84,4,5,3,0
5,51,4,4,3,1
4,43,1,1,?,0
4,24,2,1,2,0
4,66,1,1,3,0
5,33,4,4,3,0
4,59,4,3,2,0
4,76,2,3,?,0
4,40,1,1,?,0
4,52,?,4,?,0
5,40,4,5,3,1
5,67,4,4,3,1
5,75,4,3,3,1
5,86,4,4,3,0
4,60,2,?,?,0
5,66,4,4,3,1
5,46,4,5,3,1
4,59,4,4,3,1
5,65,4,4,3,1
4,53,1,1,3,0
5,67,3,5,3,1
5,80,4,5,3,1
4,55,2,1,3,0
4,48,1,1,?,0
4,47,1,1,2,0
4,50,2,1,?,0
5,62,4,5,3,1
5,63,4,4,3,1
4,63,4,?,3,1
4,71,4,4,3,1
4,41,1,1,3,0
5,57,4,4,4,1
5,71,4,4,4,1
4,66,1,1,3,0
4,47,2,4,2,0
3,34,4,4,3,0
4,59,3,4,3,0
5,55,2,?,?,1
4,51,?,?,3,0
4,62,2,1,?,0
4,58,4,?,3,1
5,67,4,4,3,1
4,41,2,1,3,0
4,23,3,1,3,0
4,53,?,4,3,0
4,42,2,1,3,0
5,87,4,5,3,1
4,68,1,1,3,1
4,64,1,1,3,0
5,54,3,5,3,1
5,86,4,5,3,1
4,21,2,1,3,0
4,39,1,1,?,0
4,53,4,4,3,0
4,44,4,4,3,0
4,54,1,1,3,0
5,63,4,5,3,1
4,62,2,1,?,0
4,45,2,1,2,0
5,71,4,5,3,0
5,49,4,4,3,1
4,49,4,4,3,0
5,66,4,4,4,0
4,19,1,1,3,0
4,35,1,1,2,0
4,71,3,3,?,1
5,74,4,5,3,1
5,37,4,4,3,1
4,67,1,?,3,0
5,81,3,4,3,1
5,59,4,4,3,1
4,34,1,1,3,0
5,79,4,3,3,1
5,60,3,1,3,0
4,41,1,1,3,1
4,50,1,1,3,0
5,85,4,4,3,1
4,46,1,1,3,0
5,66,4,4,3,1
4,73,3,1,2,0
4,55,1,1,3,0
4,49,2,1,3,0
3,49,4,4,3,0
4,51,4,5,3,1
2,48,4,4,3,0
4,58,4,5,3,0
5,72,4,5,3,1
4,46,2,3,3,0
4,43,4,3,3,1
?,52,4,4,3,0
4,66,2,1,?,0
4,46,1,1,1,0
4,69,3,1,3,0
2,59,1,1,?,1
5,43,2,1,3,1
5,76,4,5,3,1
4,46,1,1,3,0
4,59,2,4,3,0
4,57,1,1,3,0
5,43,4,5,?,0
3,45,2,1,3,0
3,43,2,1,3,0
4,45,2,1,3,0
5,57,4,5,3,1
5,79,4,4,3,1
5,54,2,1,3,1
4,40,3,4,3,0
5,63,4,4,3,1
2,55,1,?,1,0
4,52,2,1,3,0
4,38,1,1,3,0
3,72,4,3,3,0
5,80,4,3,3,1
5,76,4,3,3,1
4,62,3,1,3,0
5,64,4,5,3,1
5,42,4,5,3,0
3,60,?,3,1,0
4,64,4,5,3,0
4,63,4,4,3,1
4,24,2,1,2,0
5,72,4,4,3,1
4,63,2,1,3,0
4,46,1,1,3,0
3,33,1,1,3,0
5,76,4,4,3,1
4,36,2,3,3,0
4,40,2,1,3,0
5,58,1,5,3,1
4,43,2,1,3,0
3,42,1,1,3,0
4,32,1,1,3,0
5,57,4,4,2,1
4,37,1,1,3,0
4,70,4,4,3,1
5,56,4,2,3,1
3,76,?,3,2,0
5,73,4,4,3,1
5,77,4,5,3,1
5,67,4,4,1,1
5,71,4,3,3,1
5,65,4,4,3,1
4,43,1,1,3,0
4,40,2,1,?,0
4,49,2,1,3,0
5,76,4,2,3,1
4,55,4,4,3,0
5,72,4,5,3,1
3,53,4,3,3,0
5,75,4,4,3,1
5,61,4,5,3,1
5,67,4,4,3,1
5,55,4,2,3,1
5,66,4,4,3,1
2,76,1,1,2,0
4,57,4,4,3,1
5,71,3,1,3,0
5,70,4,5,3,1
4,35,4,2,?,0
5,79,1,?,3,1
4,63,2,1,3,0
5,40,1,4,3,1
4,41,1,1,3,0
4,47,2,1,2,0
4,68,1,1,3,1
4,64,4,3,3,1
4,65,4,4,?,1
4,73,4,3,3,0
4,39,4,3,3,0
5,55,4,5,4,1
5,53,3,4,4,0
5,66,4,4,3,1
4,43,3,1,2,0
5,44,4,5,3,1
4,77,4,4,3,1
4,62,2,4,3,0
5,80,4,4,3,1
4,33,4,4,3,0
4,50,4,5,3,1
4,71,1,?,3,0
5,46,4,4,3,1
5,49,4,5,3,1
4,53,1,1,3,0
3,46,2,1,2,0
4,57,1,1,3,0
4,54,3,1,3,0
4,54,1,?,?,0
2,49,2,1,2,0
4,47,3,1,3,0
4,40,1,1,3,0
4,45,1,1,3,0
4,50,4,5,3,1
5,54,4,4,3,1
4,67,4,1,3,1
4,77,4,4,3,1
4,66,4,3,3,0
4,71,2,?,3,1
4,36,2,3,3,0
4,69,4,4,3,0
4,48,1,1,3,0
4,64,4,4,3,1
4,71,4,2,3,1
5,60,4,3,3,1
4,24,1,1,3,0
5,34,4,5,2,1
4,79,1,1,2,0
4,45,1,1,3,0
4,37,2,1,2,0
4,42,1,1,2,0
4,72,4,4,3,1
5,60,4,5,3,1
5,85,3,5,3,1
4,51,1,1,3,0
5,54,4,5,3,1
5,55,4,3,3,1
4,64,4,4,3,0
5,67,4,5,3,1
5,75,4,3,3,1
5,87,4,4,3,1
4,46,4,4,3,1
4,59,2,1,?,0
55,46,4,3,3,1
5,61,1,1,3,1
4,44,1,4,3,0
4,32,1,1,3,0
4,62,1,1,3,0
5,59,4,5,3,1
4,61,4,1,3,0
5,78,4,4,3,1
5,42,4,5,3,0
4,45,1,2,3,0
5,34,2,1,3,1
5,39,4,3,?,1
4,27,3,1,3,0
4,43,1,1,3,0
5,83,4,4,3,1
4,36,2,1,3,0
4,37,2,1,3,0
4,56,3,1,3,1
5,55,4,4,3,1
5,46,3,?,3,0
4,88,4,4,3,1
5,71,4,4,3,1
4,41,2,1,3,0
5,49,4,4,3,1
3,51,1,1,4,0
4,39,1,3,3,0
4,46,2,1,3,0
5,52,4,4,3,1
5,58,4,4,3,1
4,67,4,5,3,1
5,80,4,4,3,1
3,46,1,?,?,0
3,43,1,?,?,0
4,45,1,1,3,0
5,68,4,4,3,1
4,54,4,4,?,1
4,44,2,3,3,0
5,74,4,3,3,1
5,55,4,5,3,0
4,49,4,4,3,1
4,49,1,1,3,0
5,50,4,3,3,1
5,52,3,5,3,1
4,45,1,1,3,0
4,66,1,1,3,0
4,68,4,4,3,1
4,72,2,1,3,0
5,64,?,?,3,0
2,49,?,3,3,0
3,44,?,4,3,0
5,74,4,4,3,1
5,58,4,4,3,1
4,77,2,3,3,0
4,49,3,1,3,0
4,34,?,?,4,0
5,60,4,3,3,1
5,69,4,3,3,1
4,53,2,1,3,0
3,46,3,4,3,0
5,74,4,4,3,1
4,58,1,1,3,0
5,68,4,4,3,1
5,46,4,3,3,0
5,61,2,4,3,1
5,70,4,3,3,1
5,37,4,4,3,1
3,65,4,5,3,1
4,67,4,4,3,0
5,69,3,4,3,0
5,76,4,4,3,1
4,65,4,3,3,0
5,72,4,2,3,1
4,62,4,2,3,0
5,42,4,4,3,1
5,66,4,3,3,1
5,48,4,4,3,1
4,35,1,1,3,0
5,60,4,4,3,1
5,67,4,2,3,1
5,78,4,4,3,1
4,66,1,1,3,1
4,26,1,1,?,0
4,48,1,1,3,0
4,31,1,1,3,0
5,43,4,3,3,1
5,72,2,4,3,0
5,66,1,1,3,1
4,56,4,4,3,0
5,58,4,5,3,1
5,33,2,4,3,1
4,37,1,1,3,0
5,36,4,3,3,1
4,39,2,3,3,0
4,39,4,4,3,1
5,83,4,4,3,1
4,68,4,5,3,1
5,63,3,4,3,1
5,78,4,4,3,1
4,38,2,3,3,0
5,46,4,3,3,1
5,60,4,4,3,1
5,56,2,3,3,1
4,33,1,1,3,0
4,?,4,5,3,1
4,69,1,5,3,1
5,66,1,4,3,1
4,72,1,3,3,0
4,29,1,1,3,0
5,54,4,5,3,1
5,80,4,4,3,1
5,68,4,3,3,1
4,35,2,1,3,0
4,57,3,?,3,0
5,?,4,4,3,1
4,50,1,1,3,0
4,32,4,3,3,0
0,69,4,5,3,1
4,71,4,5,3,1
5,87,4,5,3,1
3,40,2,?,3,0
4,31,1,1,?,0
4,64,1,1,3,0
5,55,4,5,3,1
4,18,1,1,3,0
3,50,2,1,?,0
4,53,1,1,3,0
5,84,4,5,3,1
5,80,4,3,3,1
4,32,1,1,3,0
5,77,3,4,3,1
4,38,1,1,3,0
5,54,4,5,3,1
4,63,1,1,3,0
4,61,1,1,3,0
4,52,1,1,3,0
4,36,1,1,3,0
4,41,?,?,3,0
4,59,1,1,3,0
5,51,4,4,2,1
4,36,1,1,3,0
5,40,4,3,3,1
4,49,1,1,3,0
4,37,2,3,3,0
4,46,1,1,3,0
4,63,1,1,3,0
4,28,2,1,3,0
4,47,2,1,3,0
4,42,2,1,3,1
5,44,4,5,3,1
4,49,4,4,3,0
5,47,4,5,3,1
5,52,4,5,3,1
4,53,1,1,3,1
5,83,3,3,3,1
4,50,4,4,?,1
5,63,4,4,3,1
4,82,?,5,3,1
4,54,1,1,3,0
4,50,4,4,3,0
5,80,4,5,3,1
5,45,2,4,3,0
5,59,4,4,?,1
4,28,2,1,3,0
4,31,1,1,3,0
4,41,2,1,3,0
4,21,3,1,3,0
5,44,3,4,3,1
5,49,4,4,3,1
5,71,4,5,3,1
5,75,4,5,3,1
4,38,2,1,3,0
4,60,1,3,3,0
5,87,4,5,3,1
4,70,4,4,3,1
5,55,4,5,3,1
3,21,1,1,3,0
4,50,1,1,3,0
5,76,4,5,3,1
4,23,1,1,3,0
3,68,?,?,3,0
4,62,4,?,3,1
5,65,1,?,3,1
5,73,4,5,3,1
4,38,2,3,3,0
2,57,1,1,3,0
5,65,4,5,3,1
5,67,2,4,3,1
5,61,2,4,3,1
5,56,4,4,3,0
5,71,2,4,3,1
4,49,2,2,3,0
4,55,?,?,3,0
4,44,2,1,3,0
0,58,4,4,3,0
4,27,2,1,3,0
5,73,4,5,3,1
4,34,2,1,3,0
5,63,?,4,3,1
4,50,2,1,3,1
4,62,2,1,3,0
3,21,3,1,3,0
4,49,2,?,3,0
4,36,3,1,3,0
4,45,2,1,3,1
5,67,4,5,3,1
4,21,1,1,3,0
4,57,2,1,3,0
5,66,4,5,3,1
4,71,4,4,3,1
5,69,3,4,3,1
6,80,4,5,3,1
3,27,2,1,3,0
4,38,2,1,3,0
4,23,2,1,3,0
5,70,?,5,3,1
4,46,4,3,3,0
4,61,2,3,3,0
5,65,4,5,3,1
4,60,4,3,3,0
5,83,4,5,3,1
5,40,4,4,3,1
2,59,?,4,3,0
4,53,3,4,3,0
4,76,4,4,3,0
5,79,1,4,3,1
5,38,2,4,3,1
4,61,3,4,3,0
4,56,2,1,3,0
4,44,2,1,3,0
4,64,3,4,?,1
4,66,3,3,3,0
4,50,3,3,3,0
4,46,1,1,3,0
4,39,1,1,3,0
4,60,3,?,?,0
5,55,4,5,3,1
4,40,2,1,3,0
4,26,1,1,3,0
5,84,3,2,3,1
4,41,2,2,3,0
4,63,1,1,3,0
2,65,?,1,2,0
4,49,1,1,3,0
4,56,2,2,3,1
5,65,4,4,3,0
4,54,1,1,3,0
4,36,1,1,3,0
5,49,4,4,3,0
4,59,4,4,3,1
5,75,4,4,3,1
5,59,4,2,3,0
5,59,4,4,3,1
4,28,4,4,3,1
5,53,4,5,3,0
5,57,4,4,3,0
5,77,4,3,4,0
5,85,4,3,3,1
4,59,4,4,3,0
5,59,1,5,3,1
4,65,3,3,3,1
4,54,2,1,3,0
5,46,4,5,3,1
4,63,4,4,3,1
4,53,1,1,3,1
4,56,1,1,3,0
5,66,4,4,3,1
5,66,4,5,3,1
4,55,1,1,3,0
4,44,1,1,3,0
5,86,3,4,3,1
5,47,4,5,3,1
5,59,4,5,3,1
5,66,4,5,3,0
5,61,4,3,3,1
3,46,?,5,?,1
4,69,1,1,3,0
5,93,1,5,3,1
4,39,1,3,3,0
5,44,4,5,3,1
4,45,2,2,3,0
4,51,3,4,3,0
4,56,2,4,3,0
4,66,4,4,3,0
5,61,4,5,3,1
4,64,3,3,3,1
5,57,2,4,3,0
5,79,4,4,3,1
4,57,2,1,?,0
4,44,4,1,1,0
4,31,2,1,3,0
4,63,4,4,3,0
4,64,1,1,3,0
5,47,4,5,3,0
5,68,4,5,3,1
4,30,1,1,3,0
5,43,4,5,3,1
4,56,1,1,3,0
4,46,2,1,3,0
4,67,2,1,3,0
5,52,4,5,3,1
4,67,4,4,3,1
4,47,2,1,3,0
5,58,4,5,3,1
4,28,2,1,3,0
4,43,1,1,3,0
4,57,2,4,3,0
5,68,4,5,3,1
4,64,2,4,3,0
4,64,2,4,3,0
5,62,4,4,3,1
4,38,4,1,3,0
5,68,4,4,3,1
4,41,2,1,3,0
4,35,2,1,3,1
4,68,2,1,3,0
5,55,4,4,3,1
5,67,4,4,3,1
4,51,4,3,3,0
2,40,1,1,3,0
5,73,4,4,3,1
4,58,?,4,3,1
4,51,?,4,3,0
3,50,?,?,3,1
5,59,4,3,3,1
6,60,3,5,3,1
4,27,2,1,?,0
5,54,4,3,3,0
4,56,1,1,3,0
5,53,4,5,3,1
4,54,2,4,3,0
5,79,1,4,3,1
5,67,4,3,3,1
5,64,3,3,3,1
4,70,1,2,3,1
5,55,4,3,3,1
5,65,3,3,3,1
5,45,4,2,3,1
4,57,4,4,?,1
5,49,1,1,3,1
4,24,2,1,3,0
4,52,1,1,3,0
4,50,2,1,3,0
4,35,1,1,3,0
5,?,3,3,3,1
5,64,4,3,3,1
5,40,4,1,1,1
5,66,4,4,3,1
4,64,4,4,3,1
5,52,4,3,3,1
5,43,1,4,3,1
4,56,4,4,3,0
4,72,3,?,3,0
6,51,4,4,3,1
4,79,4,4,3,1
4,22,2,1,3,0
4,73,2,1,3,0
4,53,3,4,3,0
4,59,2,1,3,1
4,46,4,4,2,0
5,66,4,4,3,1
4,50,4,3,3,1
4,58,1,1,3,1
4,55,1,1,3,0
4,62,2,4,3,1
4,60,1,1,3,0
5,57,4,3,3,1
4,57,1,1,3,0
6,41,2,1,3,0
4,71,2,1,3,1
4,32,2,1,3,0
4,57,2,1,3,0
4,19,1,1,3,0
4,62,2,4,3,1
5,67,4,5,3,1
4,50,4,5,3,0
4,65,2,3,2,0
4,40,2,4,2,0
6,71,4,4,3,1
6,68,4,3,3,1
4,68,1,1,3,0
4,29,1,1,3,0
4,53,2,1,3,0
5,66,4,4,3,1
4,60,3,?,4,0
5,76,4,4,3,1
4,58,2,1,2,0
5,96,3,4,3,1
5,70,4,4,3,1
4,34,2,1,3,0
4,59,2,1,3,0
4,45,3,1,3,1
5,65,4,4,3,1
4,59,1,1,3,0
4,21,2,1,3,0
3,43,2,1,3,0
4,53,1,1,3,0
4,65,2,1,3,0
4,64,2,4,3,1
4,53,4,4,3,0
4,51,1,1,3,0
4,59,2,4,3,0
4,56,2,1,3,0
4,60,2,1,3,0
4,22,1,1,3,0
4,25,2,1,3,0
6,76,3,?,3,0
5,69,4,4,3,1
4,58,2,1,3,0
5,62,4,3,3,1
4,56,4,4,3,0
4,64,1,1,3,0
4,32,2,1,3,0
5,48,?,4,?,1
5,59,4,4,2,1
4,52,1,1,3,0
4,63,4,4,3,0
5,67,4,4,3,1
5,61,4,4,3,1
5,59,4,5,3,1
5,52,4,3,3,1
4,35,4,4,3,0
5,77,3,3,3,1
5,71,4,3,3,1
5,63,4,3,3,1
4,38,2,1,2,0
5,72,4,3,3,1
4,76,4,3,3,1
4,53,3,3,3,0
4,67,4,5,3,0
5,69,2,4,3,1
4,54,1,1,3,0
2,35,2,1,2,0
5,68,4,3,3,1
4,68,4,4,3,0
4,67,2,4,3,1
3,39,1,1,3,0
4,44,2,1,3,0
4,33,1,1,3,0
4,60,?,4,3,0
4,58,1,1,3,0
4,31,1,1,3,0
3,23,1,1,3,0
5,56,4,5,3,1
4,69,2,1,3,1
6,63,1,1,3,0
4,65,1,1,3,1
4,44,2,1,2,0
4,62,3,3,3,1
4,67,4,4,3,1
4,56,2,1,3,0
4,52,3,4,3,0
4,43,1,1,3,1
4,41,4,3,2,1
4,42,3,4,2,0
3,46,1,1,3,0
5,55,4,4,3,1
5,58,4,4,2,1
5,87,4,4,3,1
4,66,2,1,3,0
0,72,4,3,3,1
5,60,4,3,3,1
5,83,4,4,2,1
4,31,2,1,3,0
4,53,2,1,3,0
4,64,2,3,3,0
5,31,4,4,2,1
5,62,4,4,2,1
4,56,2,1,3,0
5,58,4,4,3,1
4,67,1,4,3,0
5,75,4,5,3,1
5,65,3,4,3,1
5,74,3,2,3,1
4,59,2,1,3,0
4,57,4,4,4,1
4,76,3,2,3,0
4,63,1,4,3,0
4,44,1,1,3,0
4,42,3,1,2,0
4,35,3,?,2,0
5,65,4,3,3,1
4,70,2,1,3,0
4,48,1,1,3,0
4,74,1,1,1,1
6,40,?,3,4,1
4,63,1,1,3,0
5,60,4,4,3,1
5,86,4,3,3,1
4,27,1,1,3,0
4,71,4,5,2,1
5,85,4,4,3,1
4,51,3,3,3,0
6,72,4,3,3,1
5,52,4,4,3,1
4,66,2,1,3,0
5,71,4,5,3,1
4,42,2,1,3,0
4,64,4,4,2,1
4,41,2,2,3,0
4,50,2,1,3,0
4,30,1,1,3,0
4,67,1,1,3,0
5,62,4,4,3,1
4,46,2,1,2,0
4,35,1,1,3,0
4,53,1,1,2,0
4,59,2,1,3,0
4,19,3,1,3,0
5,86,2,1,3,1
4,72,2,1,3,0
4,37,2,1,2,0
4,46,3,1,3,1
4,45,1,1,3,0
4,48,4,5,3,0
4,58,4,4,3,1
4,42,1,1,3,0
4,56,2,4,3,1
4,47,2,1,3,0
4,49,4,4,3,1
5,76,2,5,3,1
5,62,4,5,3,1
5,64,4,4,3,1
5,53,4,3,3,1
4,70,4,2,2,1
5,55,4,4,3,1
4,34,4,4,3,0
5,76,4,4,3,1
4,39,1,1,3,0
2,23,1,1,3,0
4,19,1,1,3,0
5,65,4,5,3,1
4,57,2,1,3,0
5,41,4,4,3,1
4,36,4,5,3,1
4,62,3,3,3,0
4,69,2,1,3,0
4,41,3,1,3,0
3,51,2,4,3,0
5,50,3,2,3,1
4,47,4,4,3,0
4,54,4,5,3,1
5,52,4,4,3,1
4,30,1,1,3,0
3,48,4,4,3,1
5,?,4,4,3,1
4,65,2,4,3,1
4,50,1,1,3,0
5,65,4,5,3,1
5,66,4,3,3,1
6,41,3,3,2,1
5,72,3,2,3,1
4,42,1,1,1,1
4,80,4,4,3,1
0,45,2,4,3,0
4,41,1,1,3,0
4,72,3,3,3,1
4,60,4,5,3,0
5,67,4,3,3,1
4,55,2,1,3,0
4,61,3,4,3,1
4,55,3,4,3,1
4,52,4,4,3,1
4,42,1,1,3,0
5,63,4,4,3,1
4,62,4,5,3,1
4,46,1,1,3,0
4,65,2,1,3,0
4,57,3,3,3,1
4,66,4,5,3,1
4,45,1,1,3,0
4,77,4,5,3,1
4,35,1,1,3,0
4,50,4,5,3,1
4,57,4,4,3,0
4,74,3,1,3,1
4,59,4,5,3,0
4,51,1,1,3,0
4,42,3,4,3,1
4,35,2,4,3,0
4,42,1,1,3,0
4,43,2,1,3,0
4,62,4,4,3,1
4,27,2,1,3,0
5,?,4,3,3,1
4,57,4,4,3,1
4,59,2,1,3,0
5,40,3,2,3,1
4,20,1,1,3,0
5,74,4,3,3,1
4,22,1,1,3,0
4,57,4,3,3,0
4,57,4,3,3,1
4,55,2,1,2,0
4,62,2,1,3,0
4,54,1,1,3,0
4,71,1,1,3,1
4,65,3,3,3,0
4,68,4,4,3,0
4,64,1,1,3,0
4,54,2,4,3,0
4,48,4,4,3,1
4,58,4,3,3,0
5,58,3,4,3,1
4,70,1,1,1,0
5,70,1,4,3,1
4,59,2,1,3,0
4,57,2,4,3,0
4,53,4,5,3,0
4,54,4,4,3,1
4,53,2,1,3,0
0,71,4,4,3,1
5,67,4,5,3,1
4,68,4,4,3,1
4,56,2,4,3,0
4,35,2,1,3,0
4,52,4,4,3,1
4,47,2,1,3,0
4,56,4,5,3,1
4,64,4,5,3,0
5,66,4,5,3,1
4,62,3,3,3,0
1. Title: Mammographic Mass Data
2. Sources:
(a) Original owners of database:
Prof. Dr. R�diger Schulz-Wendtland
Institute of Radiology, Gynaecological Radiology, University Erlangen-Nuremberg
Universit�tsstra�e 21-23
91054 Erlangen, Germany
(b) Donor of database:
Matthias Elter
Fraunhofer Institute for Integrated Circuits (IIS)
Image Processing and Medical Engineering Department (BMT)
Am Wolfsmantel 33
91058 Erlangen, Germany
[email protected]
(49) 9131-7767327
(c) Date received: October 2007
3. Past Usage:
M. Elter, R. Schulz-Wendtland and T. Wittenberg (2007)
The prediction of breast cancer biopsy outcomes using two CAD approaches that both emphasize an intelligible decision process.
Medical Physics 34(11), pp. 4164-4172
4. Relevant Information:
Mammography is the most effective method for breast cancer screening
available today. However, the low positive predictive value of breast
biopsy resulting from mammogram interpretation leads to approximately
70% unnecessary biopsies with benign outcomes. To reduce the high
number of unnecessary breast biopsies, several computer-aided diagnosis
(CAD) systems have been proposed in the last years.These systems
help physicians in their decision to perform a breast biopsy on a suspicious
lesion seen in a mammogram or to perform a short term follow-up
examination instead.
This data set can be used to predict the severity (benign or malignant)
of a mammographic mass lesion from BI-RADS attributes and the patient's age.
It contains a BI-RADS assessment, the patient's age and three BI-RADS attributes
together with the ground truth (the severity field) for 516 benign and
445 malignant masses that have been identified on full field digital mammograms
collected at the Institute of Radiology of the
University Erlangen-Nuremberg between 2003 and 2006.
Each instance has an associated BI-RADS assessment ranging from 1 (definitely benign)
to 5 (highly suggestive of malignancy) assigned in a double-review process by
physicians. Assuming that all cases with BI-RADS assessments greater or equal
a given value (varying from 1 to 5), are malignant and the other cases benign,
sensitivities and associated specificities can be calculated. These can be an
indication of how well a CAD system performs compared to the radiologists.
5. Number of Instances: 961
6. Number of Attributes: 6 (1 goal field, 1 non-predictive, 4 predictive attributes)
7. Attribute Information:
1. BI-RADS assessment: 1 to 5 (ordinal)
2. Age: patient's age in years (integer)
3. Shape: mass shape: round=1 oval=2 lobular=3 irregular=4 (nominal)
4. Margin: mass margin: circumscribed=1 microlobulated=2 obscured=3 ill-defined=4 spiculated=5 (nominal)
5. Density: mass density high=1 iso=2 low=3 fat-containing=4 (ordinal)
6. Severity: benign=0 or malignant=1 (binominal)
8. Missing Attribute Values: Yes
- BI-RADS assessment: 2
- Age: 5
- Shape: 31
- Margin: 48
- Density: 76
- Severity: 0
9. Class Distribution: benign: 516; malignant: 445
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment