Created
July 19, 2020 16:27
-
-
Save aialenti/8d6dfc0b1e7695eac341bbf3d269eff9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from tensorflow.keras.applications.resnet50 import ResNet50 | |
| from tensorflow.keras.preprocessing import image | |
| from tensorflow.keras.applications.resnet50 import preprocess_input | |
| import numpy as np | |
| import os | |
| from sklearn.manifold import TSNE | |
| from sklearn.decomposition import PCA | |
| from PIL import Image | |
| import pandas as pd | |
| import pickle | |
| import matplotlib.pyplot as plt | |
| from matplotlib.offsetbox import OffsetImage, AnnotationBbox | |
| from sklearn.cluster import DBSCAN | |
| # create the base pre-trained model | |
| base_model = ResNet50(weights='imagenet', include_top=False) | |
| # Lists for data collection | |
| all_features = [] | |
| files_column = [] | |
| files_onlyname_column = [] | |
| # Read each image and extract ResNet50 features | |
| for root, dirs, files in os.walk('./data/images/images/images/'): | |
| for name in files: | |
| if name.endswith((".jpg")): | |
| filename = "{}/{}".format(root, name) | |
| # Load image and resize | |
| img = image.load_img(filename, target_size=(224, 224)) | |
| img_array = image.img_to_array(img) | |
| img_array = np.expand_dims(img_array, axis=0) | |
| # Preprocess input | |
| preprocessed = preprocess_input(img_array) | |
| # Extract features and flatten in array (Shape is (7,7,2048) ) | |
| features = base_model.predict(preprocessed, batch_size=1) | |
| flatten_features = features.flatten() | |
| all_features.append(flatten_features) | |
| files_column.append(filename) | |
| files_onlyname_column.append(name) | |
| # Save thumbnail | |
| image_resize = Image.open(filename) | |
| image_resize.thumbnail((64, 64), Image.ANTIALIAS) | |
| image_resize.save("./data/images/resized_leaflet/{}".format(name), "JPEG") | |
| # Convert all the features in a Numpy Array | |
| all_features = np.asarray(all_features) | |
| # pickle.dump(all_features,open('features.pkl', 'wb')) | |
| # all_features = pickle.load(open('features.pkl', 'rb')) | |
| # Apply PCA to reduce features to 2048 | |
| embedded_features = PCA(n_components=2048).fit_transform(all_features) | |
| # Apply tSNE | |
| embedded_features = TSNE(n_components=2, verbose=3, perplexity=25).fit_transform(embedded_features) | |
| # Find Clusters using DBSCAN | |
| dbscan = DBSCAN(eps=1.5, min_samples=15).fit(embedded_features) | |
| colors = ["black", "brown", "chocolate", "gold", "forestgreen", "teal", | |
| "dodgerblue", "navy", "darkviolet", "deeppink"] | |
| # Create data frame and print the results using matplotlib | |
| df = pd.DataFrame() | |
| df["files"] = files_column | |
| df["files_names"] = files_onlyname_column | |
| df["x"] = embedded_features[:, 0] | |
| df["y"] = embedded_features[:, 1] | |
| df["labels"] = dbscan.labels_ | |
| df.to_csv("output.csv", index=False) | |
| df = df.sort_values(by=["labels"]) | |
| x = df["x"].values * 200 | |
| y = df["y"].values * 200 | |
| fig, ax = plt.subplots(figsize=(150, 150)) | |
| ax.scatter(x, y) | |
| def getImage(path, alpha=1): | |
| return OffsetImage(plt.imread(path), alpha=alpha) | |
| for x0, y0, path, l in zip(x, y, df["files_names"].values, df["labels"].values): | |
| if l == -1: | |
| ab = AnnotationBbox(getImage("{}/{}".format("data/images/resized-leaflet", path), 0.1), (x0, y0), | |
| frameon=True, pad=0) | |
| else: | |
| ab = AnnotationBbox(getImage("{}/{}".format("data/images/resized-leaflet", path)), (x0, y0), | |
| frameon=True, bboxprops=dict(edgecolor=colors[l % len(colors)], lw=10), pad=0) | |
| ax.add_artist(ab) | |
| plt.savefig('foo.png') | |
| # Output as JSON | |
| df[df["labels"] != -1].to_json("output.json", orient="records") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment