This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!cp flower_photos/daisy/100080576_f52e8ee070_n.jpg flower_photos/sample/ | |
!cp flower_photos/daisy/10140303196_b88d3d6cec.jpg flower_photos/sample/ | |
!cp flower_photos/tulips/100930342_92e8746431_n.jpg flower_photos/sample/ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import IPython.display as dp | |
# collect all .png files in ssample dir | |
fs = !ls flower_photos/sample/*.jpg | |
# create list of image objects | |
images = [] | |
for ea in fs: | |
images.append(dp.Image(filename=ea, format='png')) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sparkdl import readImages | |
# Read images using Spark | |
image_df = readImages("flower_photos/sample/") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.ml.image import ImageSchema | |
from pyspark.sql.functions import lit | |
from sparkdl.image import imageIO | |
tulips_df = ImageSchema.readImages("flower_photos/tulips").withColumn("label", lit(1)) | |
daisy_df = imageIO.readImagesWithCustomFn("flower_photos/daisy", decode_f=imageIO.PIL_decode).withColumn("label", lit(0)) | |
tulips_train, tulips_test, _ = tulips_df.randomSplit([0.1, 0.05, 0.85]) # use larger training sets (e.g. [0.6, 0.4] for getting more images) | |
daisy_train, daisy_test, _ = daisy_df.randomSplit([0.1, 0.05, 0.85]) # use larger training sets (e.g. [0.6, 0.4] for getting more images) | |
train_df = tulips_train.unionAll(daisy_train) | |
test_df = tulips_test.unionAll(daisy_test) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.ml.classification import LogisticRegression | |
from pyspark.ml import Pipeline | |
from sparkdl import DeepImageFeaturizer | |
featurizer = DeepImageFeaturizer(inputCol="image", outputCol="features", modelName="InceptionV3") | |
lr = LogisticRegression(maxIter=10, regParam=0.05, elasticNetParam=0.3, labelCol="label") | |
p = Pipeline(stages=[featurizer, lr]) | |
p_model = p.fit(train_df) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.ml.evaluation import MulticlassClassificationEvaluator | |
tested_df = p_model.transform(test_df) | |
evaluator = MulticlassClassificationEvaluator(metricName="accuracy") | |
print("Test set accuracy = " + str(evaluator.evaluate(tested_df.select("prediction", "label")))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.types import DoubleType | |
from pyspark.sql.functions import expr | |
from pyspark.sql.functions import * | |
from pyspark.sql.types import * | |
def _p1(v): | |
return float(v.array[1])y | |
take_one = udf(_p1, DoubleType()) | |
df = tested_df.withColumn("p", take_one(tested_df.probability)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sparkdl import DeepImagePredictor | |
# Read images using Spark | |
image_df = ImageSchema.readImages("flower_photos/sample/") | |
predictor = DeepImagePredictor(inputCol="image", outputCol="predicted_labels", modelName="InceptionV3", decodePredictions=True, topK=10) | |
predictions_df = predictor.transform(image_df) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = p_model.transform(image_df) | |
# 100930342_92e8746431_n.jpg not a daisy | |
df.select("image.origin",(1-take_one(df.probability)).alias("p_daisy")).show(truncate=False) | |
+---------------------------------------------------+--------------------+ | |
|origin |p_daisy | | |
+---------------------------------------------------+--------------------+ | |
|.../100930342_92e8746431_n.jpg |0.016760347798379538| | |
|.../10140303196_b88d3d6cec.jpg |0.9704259547739851 | | |
|.../100080576_f52e8ee070_n.jpg |0.9705190124824862 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.applications import InceptionV3 | |
model = InceptionV3(weights="imagenet") | |
model.save('model-full.h5') # saves to the local filesystem |