This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| from sklearn import preprocessing |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| input = np.array([[8,2,3], | |
| [4,7,1], | |
| [9,2,6]]) | |
| data_biner = preprocessing.Binarizer(threshold = 5).transform(input) | |
| output: | |
| [[1 0 0] | |
| [0 1 0] | |
| [1 0 1]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Sebelum mean removal | |
| print("Mean = ", input.mean(), "\n", "Std deviation = ", input.std(axis = 0)) | |
| output: | |
| Mean = 4.66666666667 | |
| Std deviation = [ 2.1602469 2.3570226 2.05480467] | |
| # Sesudah mean removal | |
| data_scaled = preprocessing.scale(input) | |
| print("Mean = ", data_scaled) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| data_scaler_minmax = preprocessing.MinMaxScaler(feature_range=(0, 1)) | |
| data_scaled_minmax = data_scaler_minmax.fit_transform(input) | |
| data_scaled_minmax | |
| output: | |
| array([[ 0.8, 0. , 0.4], | |
| [ 0. , 1. , 0. ], | |
| [ 1. , 0. , 1. ]]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| data_normalized_l1 = preprocessing.normalize(input, norm='l1') | |
| data_normalized_l2 = preprocessing.normalize(input, norm='l2') | |
| print("\nL1 normalized data:\n", data_normalized_l1) | |
| print("\nL2 normalized data:\n", data_normalized_l2) | |
| L1 normalized data: | |
| [[ 0.61538462 0.15384615 0.23076923] | |
| [ 0.33333333 0.58333333 0.08333333] | |
| [ 0.52941176 0.11764706 0.35294118]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| label_kategori = ['senin', 'selasa', 'rabu', 'kamis', 'jumat', 'sabtu', 'minggu'] | |
| encoder = preprocessing.LabelEncoder() | |
| encoder.fit(label_kategori) | |
| print("\nLabel mapping:") | |
| for i, item in enumerate(encoder.classes_): | |
| print(item, '>', i) | |
| output: | |
| Label mapping: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| bucket = <your-bucket> | |
| prefix = 'topic-kmeans' | |
| import warnings | |
| warnings.simplefilter("ignore") | |
| import os | |
| import boto3 | |
| import sagemaker | |
| import numpy as np |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Load some categories from the training set | |
| categories = [ | |
| 'alt.atheism', | |
| 'talk.religion.misc', | |
| 'comp.graphics', | |
| 'sci.space', | |
| ] | |
| # Uncomment the following to do the analysis on all the categories | |
| # categories = None |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| print("Extracting features from the training dataset using a sparse vectorizer") | |
| t0 = time() | |
| vectorizer = TfidfVectorizer(max_df = 0.5, max_features = 10000, | |
| min_df = 2, stop_words = 'english', | |
| use_idf = True) | |
| X = vectorizer.fit_transform(dataset.data) | |
| print("done in %fs" % (time() - t0)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| minibatch = False | |
| true_k = 4 | |
| if minibatch: | |
| km = MiniBatchKMeans(n_clusters = true_k, init='k-means++', n_init=1, | |
| init_size=1000, batch_size=1000, verbose = False) | |
| else: | |
| km = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1, | |
| verbose = False) |
OlderNewer