Last active
May 25, 2024 10:18
-
-
Save dacr/dc8830bb54db2f1a47db62a8368d5c4e to your computer and use it in GitHub Desktop.
Photos check coherency - When you have ~80000 photos/videos tooling is mandatory / published by https://github.com/dacr/code-examples-manager #096b979c-6bf1-4261-a585-1d4d67effe81/b04d805ecbd831f06a47950864c3a28895df97e7
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// summary : Photos check coherency - When you have ~80000 photos/videos tooling is mandatory | |
// keywords : scala, photos, memories, zio, djl, machine-learning, elasticsearch | |
// publish : gist | |
// authors : David Crosson | |
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2) | |
// id : 096b979c-6bf1-4261-a585-1d4d67effe81 | |
// created-on : 2022-03-11T07:25:52+01:00 | |
// managed-by : https://github.com/dacr/code-examples-manager | |
// run-with : scala-cli $file | |
// --------------------- | |
//> using scala "3.4.2" | |
//> using dep "dev.zio::zio:2.0.13" | |
//> using dep "dev.zio::zio-streams:2.0.13" | |
//> using dep "dev.zio::zio-json:0.5.0" | |
//> using dep "com.drewnoakes:metadata-extractor:2.18.0" | |
//> using dep "com.fasterxml.uuid:java-uuid-generator:4.1.0" | |
//> using dep "com.sksamuel.elastic4s::elastic4s-effect-zio:8.7.0" | |
//> using dep "com.sksamuel.elastic4s::elastic4s-client-esjava:8.7.0" | |
//> using dep "com.sksamuel.elastic4s::elastic4s-json-zio:8.7.0" | |
//---------------------- | |
import zio.* | |
import zio.json.* | |
import zio.stream.* | |
import zio.stream.ZPipeline.{splitLines, utf8Decode} | |
import com.fasterxml.uuid.Generators | |
import com.drew.imaging.ImageMetadataReader | |
import com.drew.metadata.exif.{ExifDirectoryBase, ExifIFD0Directory, ExifSubIFDDirectory} | |
import java.io.{File, IOException} | |
import java.nio.charset.Charset | |
import java.nio.file.attribute.BasicFileAttributes | |
import java.nio.file.{Files, Path, Paths} | |
import java.time.{Instant, OffsetDateTime, ZoneId, ZoneOffset, ZonedDateTime} | |
import java.util.UUID | |
import scala.util.matching.Regex | |
import scala.util.{Either, Failure, Left, Properties, Right, Success, Try} | |
import java.time.format.DateTimeFormatter.ISO_DATE_TIME | |
import java.time.temporal.ChronoField | |
import java.util.concurrent.TimeUnit | |
import scala.jdk.CollectionConverters.* | |
import scala.Console.{BLUE, BOLD, CYAN, GREEN, MAGENTA, RED, RESET, UNDERLINED, YELLOW} | |
// ===================================================================================================================== | |
object HashOps { | |
def sha1(that: String): String = | |
import java.math.BigInteger | |
import java.security.MessageDigest | |
val content = if (that == null) "" else that // TODO - probably discutable, migrate to an effect | |
val md = MessageDigest.getInstance("SHA-1") // TODO - can fail => potential border side effect ! | |
val digest = md.digest(content.getBytes) | |
val bigInt = new BigInteger(1, digest) | |
val hashedString = bigInt.toString(16) | |
hashedString | |
def fileDigest(path: Path, algo: String = "SHA-256"): String = | |
import java.math.BigInteger | |
import java.security.{MessageDigest, DigestInputStream} | |
import java.io.FileInputStream | |
val buffer = new Array[Byte](8192) | |
val md5 = MessageDigest.getInstance(algo) | |
val dis = new DigestInputStream(new FileInputStream(path.toFile), md5) | |
try { while (dis.read(buffer) != -1) {} } | |
finally { dis.close() } | |
md5.digest.map("%02x".format(_)).mkString | |
} | |
// ===================================================================================================================== | |
object ElasticOps { | |
import com.sksamuel.elastic4s.zio.instances.* | |
import com.sksamuel.elastic4s.ziojson.* | |
import com.sksamuel.elastic4s.{ElasticClient, ElasticProperties} | |
import com.sksamuel.elastic4s.ElasticDsl.* | |
import com.sksamuel.elastic4s.Index | |
import com.sksamuel.elastic4s.{ElasticClient, ElasticProperties} | |
import com.sksamuel.elastic4s.http.JavaClient | |
import com.sksamuel.elastic4s.ElasticDsl.* | |
import com.sksamuel.elastic4s.requests.mappings.* | |
import com.sksamuel.elastic4s.Response | |
import com.sksamuel.elastic4s.requests.bulk.BulkResponse | |
import com.sksamuel.elastic4s.requests.searches.SearchResponse | |
import org.elasticsearch.client.RestClientBuilder.{HttpClientConfigCallback, RequestConfigCallback} | |
import org.apache.http.auth.{AuthScope, UsernamePasswordCredentials} | |
import org.apache.http.client.config.RequestConfig | |
import org.apache.http.impl.client.BasicCredentialsProvider | |
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder | |
import scala.concurrent.duration.FiniteDuration | |
import java.time.temporal.ChronoField | |
import java.util.concurrent.TimeUnit | |
import scala.util.Properties.{envOrNone, envOrElse} | |
val elasticUrl = envOrNone("PHOTOS_ELASTIC_URL").orElse(envOrNone("CEM_ELASTIC_URL")).getOrElse("http://127.0.0.1:9200") | |
val elasticUrlTrust = envOrNone("PHOTOS_ELASTIC_URL_TRUST_SSL").getOrElse("false").trim.toLowerCase | |
val elasticUsername = envOrNone("PHOTOS_ELASTIC_USERNAME").orElse(envOrNone("CEM_ELASTIC_USERNAME")) | |
val elasticPassword = envOrNone("PHOTOS_ELASTIC_PASSWORD").orElse(envOrNone("CEM_ELASTIC_PASSWORD")) | |
private val client = { // TODO rewrite to be fully effect based | |
if (elasticPassword.isEmpty || elasticUsername.isEmpty) ElasticClient(JavaClient(ElasticProperties(elasticUrl))) | |
else { | |
lazy val provider = { | |
val basicProvider = new BasicCredentialsProvider | |
val credentials = new UsernamePasswordCredentials(elasticUsername.get, elasticPassword.get) | |
basicProvider.setCredentials(AuthScope.ANY, credentials) | |
basicProvider | |
} | |
import org.apache.http.ssl.SSLContexts | |
import org.apache.http.conn.ssl.TrustSelfSignedStrategy | |
val sslContext = elasticUrlTrust match { | |
case "true" => SSLContexts.custom().loadTrustMaterial(TrustSelfSignedStrategy()).build() | |
case _ => SSLContexts.createDefault() | |
} | |
val customElasticClient = ElasticClient( | |
JavaClient( | |
ElasticProperties(elasticUrl), | |
(requestConfigBuilder: RequestConfig.Builder) => requestConfigBuilder, | |
(httpClientBuilder: HttpAsyncClientBuilder) => httpClientBuilder.setDefaultCredentialsProvider(provider).setSSLContext(sslContext) | |
) | |
) | |
customElasticClient | |
} | |
} | |
private val scrollKeepAlive = FiniteDuration(30, "seconds") | |
private val timeout = 20.seconds | |
private val retrySchedule = Schedule.exponential(100.millis, 2).jittered && Schedule.recurs(5) | |
val upsertGrouping = 50 | |
val searchPageSize = 500 | |
// ------------------------------------------------------ | |
private def indexNameFromTimestamp(indexPrefix: String, timestamp: OffsetDateTime): String = { | |
val year = timestamp.get(ChronoField.YEAR) | |
val month = timestamp.get(ChronoField.MONTH_OF_YEAR) | |
val day = timestamp.get(ChronoField.DAY_OF_MONTH) | |
val week = timestamp.get(ChronoField.ALIGNED_WEEK_OF_YEAR) | |
s"$indexPrefix-$year-$month" | |
} | |
// ------------------------------------------------------ | |
private def streamFromScroll(scrollId: String) = { | |
ZStream.paginateChunkZIO(scrollId) { currentScrollId => | |
for { | |
response <- client.execute(searchScroll(currentScrollId).keepAlive(scrollKeepAlive)) | |
nextScrollId = response.result.scrollId | |
results = Chunk.fromArray(response.result.hits.hits.map(_.sourceAsString)) | |
_ <- ZIO.log(s"Got ${results.size} more documents") | |
} yield results -> (if (results.size > 0) nextScrollId else None) | |
} | |
} | |
def fetchAll[T](indexName: String)(implicit decoder: JsonDecoder[T]) = { | |
fetchAllEithers[T](indexName).absolve.mapError(err => Exception(err.toString)) | |
} | |
def fetchAllEithers[T](indexName: String)(implicit decoder: JsonDecoder[T]) = { | |
val result = for { | |
response <- client.execute(search(Index(indexName)).size(searchPageSize).scroll(scrollKeepAlive)) | |
scrollId <- ZIO.fromOption(response.result.scrollId) | |
firstResults = Chunk.fromArray(response.result.hits.hits.map(_.sourceAsString)) | |
// _ <- ZIO.log(s"Got ${firstResults.size} first documents") | |
nextResultsStream = streamFromScroll(scrollId) | |
} yield ZStream.fromChunk(firstResults) ++ nextResultsStream | |
ZStream.unwrap(result).map(_.fromJson[T]) | |
} | |
// ------------------------------------------------------ | |
def upsert[T](indexPrefix: String, documents: Chunk[T])(timestampExtractor: T => OffsetDateTime, idExtractor: T => String)(implicit encoder: JsonEncoder[T]) = { | |
val responseEffect = client.execute { | |
bulk { | |
for { document <- documents } yield { | |
val indexName = indexNameFromTimestamp(indexPrefix, timestampExtractor(document)) | |
val id = idExtractor(document) | |
indexInto(indexName).id(id).doc(document) | |
} | |
} | |
} | |
val upsertEffect = for { | |
response <- responseEffect | |
failures = response.result.failures.flatMap(_.error).map(_.toString) | |
_ <- ZIO.log(s"${if (response.isSuccess) "Upserted" else "Failed to upsert"} ${documents.size} into elasticsearch") | |
_ <- ZIO.cond(response.isSuccess, (), failures.mkString("\n")) | |
} yield () | |
upsertEffect.timeout(timeout).retry(retrySchedule) | |
} | |
} | |
// ===================================================================================================================== | |
object Photos extends ZIOAppDefault { | |
val generatorPUUID = Generators.nameBasedGenerator() | |
/* Attempt to generate a unique photo identifier */ | |
def makePUUID(camera: Option[String], shootDateTime: Option[Instant], filePath: Path, fileHash: String): UUID = { | |
generatorPUUID.generate(filePath.getFileName().toString + shootDateTime.map(_.toString).getOrElse("")) | |
} | |
case class GeoPoint(lat: Double, lon: Double) | |
object GeoPoint: | |
implicit val decoder: JsonDecoder[GeoPoint] = DeriveJsonDecoder.gen | |
implicit val encoder: JsonEncoder[GeoPoint] = DeriveJsonEncoder.gen | |
case class Photo( | |
uuid: UUID, | |
timestamp: OffsetDateTime, | |
filePath: Path, | |
fileSize: Long, | |
fileHash: String, | |
fileLastUpdated: OffsetDateTime, | |
category: Option[String], | |
shootDateTime: Option[OffsetDateTime], | |
camera: Option[String], | |
tags: Map[String, String], | |
keywords: List[String], // Extracted from category | |
classifications: List[String], // Extracted from AI DJL | |
detectedObjects: List[String], // Extracted from AI DJL | |
place: Option[GeoPoint] | |
) | |
object Photo { | |
implicit val pathEncoder: JsonEncoder[Path] = JsonEncoder[String].contramap(p => p.toString) | |
implicit val pathDecoder: JsonDecoder[Path] = JsonDecoder[String].map(p => Path.of(p)) | |
implicit val decoder: JsonDecoder[Photo] = DeriveJsonDecoder.gen | |
implicit val encoder: JsonEncoder[Photo] = DeriveJsonEncoder.gen | |
def makeTagKey(tag: com.drew.metadata.Tag): String = { | |
val prefix = tag.getDirectoryName().trim.replaceAll("""\s+""", "") | |
val name = tag.getTagName().trim.replaceAll("""\s+""", "") | |
val key = s"$prefix$name" | |
key.head.toLower + key.tail | |
} | |
def tagsToMap(tags: List[com.drew.metadata.Tag]): Map[String, String] = { | |
tags | |
.filterNot(_.getDescription == null) | |
.map(tag => makeTagKey(tag) -> tag.getDescription) | |
.toMap | |
} | |
def now = OffsetDateTime.now() // TODO : migrate to ZIO Clock.now | |
def checkTimestampValid(ts: OffsetDateTime) = ts.get(ChronoField.YEAR) >= 2000 & ts.isBefore(now) | |
def computeTimestamp(mayBeShootDateTime: Option[OffsetDateTime], fileLastUpdated: OffsetDateTime): OffsetDateTime = | |
mayBeShootDateTime match | |
case Some(shootDateTime) if checkTimestampValid(shootDateTime) => shootDateTime | |
case _ => fileLastUpdated | |
def makePhoto( | |
uuid: UUID, | |
filePath: Path, | |
fileSize: Long, | |
fileHash: String, | |
fileLastUpdated: Instant, | |
category: Option[String], | |
shootDateTime: Option[Instant], | |
camera: Option[String], | |
metaDataTags: List[com.drew.metadata.Tag], | |
keywords: List[String], // Extracted from category | |
classifications: List[String], // Extracted from AI DJL | |
detectedObjects: List[String] // Extracted from AI DJL | |
): Photo = { | |
val shootOffsetDateTime = shootDateTime.map(_.atOffset(ZoneOffset.UTC)) | |
val fileLastUpdatedOffsetDateTime = fileLastUpdated.atOffset(ZoneOffset.UTC) | |
val tags = tagsToMap(metaDataTags) | |
Photo( | |
uuid = uuid, | |
timestamp = computeTimestamp(shootOffsetDateTime, fileLastUpdatedOffsetDateTime), | |
filePath = filePath, | |
fileSize = fileSize, | |
fileHash = fileHash, | |
fileLastUpdated = fileLastUpdatedOffsetDateTime, | |
category = category, | |
shootDateTime = shootOffsetDateTime, | |
camera = camera, | |
tags = tags, | |
keywords = keywords, | |
classifications = classifications, | |
detectedObjects = detectedObjects, | |
place = computeGeoPoint(tags) | |
) | |
} | |
} | |
case class PhotoFileIssue(filepath: Path, throwable: Throwable) | |
case class PhotoFileContentIssue(filepath: Path, throwable: Throwable) | |
case class PhotoMetadataIssue(filepath: Path, throwable: Throwable) | |
case class PhotoUUIDIssue(filepath: Path, throwable: Throwable) | |
case class PhotoAIIssue(filepath: Path, throwable: Throwable) | |
type PhotoIssue = PhotoFileContentIssue | PhotoMetadataIssue | PhotoUUIDIssue | PhotoFileIssue | PhotoAIIssue | |
def categoryFromFilepath(filePath: Path, searchPath: Path): Option[String] = | |
Option(filePath.getParent) | |
.map(parent => searchPath.relativize(parent)) | |
.map(_.toString) | |
def camelTokenize(that: String): Array[String] = that.split("(?=[A-Z][^A-Z])|(?:(?<=[^A-Z])(?=[A-Z]+))") | |
def camelToKebabCase(that: String): String = camelTokenize(that).map(_.toLowerCase).mkString("-") | |
val excludes = Set("et", "par", "le", "la", "de", "du", "au", "aux", "pour", "à", "a", "les", "des", "avec", "du", "dans", "sur", "d") | |
def extractKeywords(input: Option[String]): List[String] = | |
input match { | |
case None => Nil | |
case Some(category) => | |
category | |
.split("[- /]+") | |
.toList | |
.filter(_.trim.size > 0) | |
.filterNot(_.matches("^[0-9]+$")) | |
.filterNot(_.contains("'")) | |
.flatMap(key => camelToKebabCase(key).split("-")) | |
.map(_.toLowerCase) | |
.filter(_.size > 1) | |
.filterNot(key => excludes.contains(key)) | |
} | |
/* | |
tags.gPSGPSLatitude : 45° 19' 19,29" | |
tags.gPSGPSLatitudeRef : N | |
tags.gPSGPSLongitude : 6° 32' 39,47" | |
tags.gPSGPSLongitudeRef : E | |
*/ | |
val dmsRE = """[-+]?(\d+)[°]\s*(\d+)['′]\s*(\d+(?:[.,]\d+)?)(?:(?:")|(?:'')|(?:′′)|(?:″))""".r | |
def convert(d: Double, m: Double, s: Double): Double = d + m / 60d + s / 3600d | |
def degreesMinuteSecondsToDecimalDegrees( | |
dms: String, | |
ref: String | |
): Try[Double] = Try { | |
val dd = dms.trim match { | |
case dmsRE(d, m, s) => convert(d.toDouble, m.toDouble, s.replaceAll(",", ".").toDouble) | |
} | |
if ("NE".contains(ref.trim.toUpperCase.head)) dd else -dd | |
} | |
def computeGeoPoint(photoTags: Map[String, String]): Option[GeoPoint] = | |
// Degrees Minutes Seconds to Decimal Degrees | |
for { | |
latitude <- photoTags.get("gPSGPSLatitude") | |
latitudeRef <- photoTags.get("gPSGPSLatitudeRef") | |
longitude <- photoTags.get("gPSGPSLongitude") | |
longitudeRef <- photoTags.get("gPSGPSLongitudeRef") | |
lat <- degreesMinuteSecondsToDecimalDegrees(latitude, latitudeRef).toOption // TODO enhance error processing | |
lon <- degreesMinuteSecondsToDecimalDegrees(longitude, longitudeRef).toOption // TODO enhance error processing | |
} yield GeoPoint(lat, lon) | |
def makePhoto(searchPath: Path, filePath: Path) = | |
for | |
metadataEither <- ZIO | |
.attemptBlockingIO(ImageMetadataReader.readMetadata(filePath.toFile)) | |
.tapError(th => ZIO.logWarning(s"readMetadata issue with $filePath : ${th.getMessage}")) | |
.either | |
exifSubIFD = metadataEither.toOption.flatMap(metaData => Option(metaData.getFirstDirectoryOfType(classOf[ExifSubIFDDirectory]))) | |
exifIFD0 = metadataEither.toOption.flatMap(metaData => Option(metaData.getFirstDirectoryOfType(classOf[ExifIFD0Directory]))) | |
shootDateTime = exifIFD0.flatMap(dir => Option(dir.getDate(ExifDirectoryBase.TAG_DATETIME))).map(_.toInstant) | |
camera = exifIFD0.flatMap(dir => Option(dir.getString(ExifDirectoryBase.TAG_MODEL))) | |
fileSize <- ZIO.attemptBlockingIO(filePath.toFile.length()).mapError(th => PhotoFileIssue(filePath, th)) | |
fileLastUpdated <- ZIO.attemptBlockingIO(filePath.toFile.lastModified()).mapAttempt(Instant.ofEpochMilli).mapError(th => PhotoFileIssue(filePath, th)) | |
fileHash <- ZIO.attemptBlockingIO(HashOps.fileDigest(filePath)).mapError(th => PhotoFileContentIssue(filePath, th)) | |
category = categoryFromFilepath(filePath, searchPath) | |
metaDirectories = metadataEither.map(_.getDirectories.asScala).getOrElse(Nil) | |
metaDataTags = metaDirectories.flatMap(dir => dir.getTags.asScala).toList | |
puuid <- ZIO.attemptBlockingIO(makePUUID(camera, shootDateTime, filePath, fileHash)).mapError(th => PhotoUUIDIssue(filePath, th)) | |
classifications <- ZIO.succeed(List.empty[String]) | |
detectedObjects <- ZIO.succeed(List.empty[String]) | |
keywords = extractKeywords(category) | |
// _ <- Console.printLine( | |
// s"$filePath - $RED${camera.getOrElse("")}$RESET - $YELLOW${keywords.mkString(",")}$RESET - $GREEN${classifications.mkString(",")}$RESET - $BLUE${detectedObjects | |
// .mkString(",")}$RESET - $RED$shootDateTime$RESET - $fileLastUpdated" | |
// ) | |
photo = Photo.makePhoto( | |
uuid = puuid, | |
filePath = filePath, | |
fileSize = fileSize, | |
fileHash = fileHash, | |
fileLastUpdated = fileLastUpdated, | |
category = category, | |
shootDateTime = shootDateTime, | |
camera = camera, | |
metaDataTags = metaDataTags, | |
keywords = keywords, | |
classifications = classifications, | |
detectedObjects = detectedObjects | |
) | |
// _ <- ZIO.logInfo(s"processed $photo") | |
yield photo | |
// ------------------------------------------------------------------------------------------------------------------- | |
def searchPredicate(includeMaskRegex: Option[Regex], ignoreMaskRegex: Option[Regex])(path: Path, attrs: BasicFileAttributes): Boolean = { | |
attrs.isRegularFile && | |
(ignoreMaskRegex.isEmpty || ignoreMaskRegex.get.findFirstIn(path.toString).isEmpty) && | |
(includeMaskRegex.isEmpty || includeMaskRegex.get.findFirstIn(path.toString).isDefined) | |
} | |
def findFromSearchRoot( | |
searchRoot: Path, | |
includeMaskRegex: Option[Regex], | |
ignoreMaskRegex: Option[Regex] | |
) = { | |
val result = for { | |
searchPath <- ZIO.attempt(searchRoot) | |
javaStream = Files.find(searchPath, 10, searchPredicate(includeMaskRegex, ignoreMaskRegex)) | |
pathStream = ZStream.fromJavaStream(javaStream).map(path => searchRoot -> path) | |
} yield pathStream | |
ZStream.unwrap(result) | |
} | |
def fetch() = { | |
val result = for { | |
// _ <- ZIO.logInfo("photos inventory") | |
searchRoots <- System | |
.env("PHOTOS_SEARCH_ROOTS") | |
.someOrFail("nowhere to search") | |
.map(_.split("[,;]").toList.map(_.trim)) | |
includeMask <- System.env("PHOTOS_SEARCH_INCLUDE_MASK") | |
includeMaskRegex <- ZIO.attempt(includeMask.map(_.r)) | |
ignoreMask <- System.env("PHOTOS_SEARCH_IGNORE_MASK") | |
ignoreMaskRegex <- ZIO.attempt(ignoreMask.map(_.r)) | |
searchRootsStreams = Chunk.fromIterable(searchRoots).map(searchRoot => findFromSearchRoot(Path.of(searchRoot), includeMaskRegex, ignoreMaskRegex)) | |
zCandidatesStream = ZStream.concatAll(searchRootsStreams) | |
} yield zCandidatesStream | |
ZStream.unwrap(result) | |
} | |
def isInvalidDateTime(photo: Photo): Boolean = | |
val shootDateTime = photo.shootDateTime | |
val lastUpdated = photo.fileLastUpdated | |
val now = OffsetDateTime.now() | |
def checkInvalid(ts: OffsetDateTime) = ts.get(ChronoField.YEAR) < 2000 || ts.isAfter(now) | |
shootDateTime.filter(checkInvalid).isDefined || checkInvalid(lastUpdated) | |
def commonChecks(photosEithers: Chunk[Either[Object, Photo]]) = { | |
val photos = photosEithers.collect { case Right(photo) => photo } | |
val issues = photosEithers.collect { case Left(issue) => issue } | |
val duplicates = photos.groupBy(_.fileHash).filter((hash, photoForHash) => photoForHash.size > 1) | |
val missingCategoryKeywords = photos.filter(_.keywords.size == 0) | |
val allCategoryKeywords = photos.flatMap(_.keywords).toSet | |
val allClassifications = photos.flatMap(_.classifications).toSet | |
val allDetectedObjects = photos.flatMap(_.detectedObjects).toSet | |
val invalidDates = photos.filter(photo => isInvalidDateTime(photo)) | |
val noShootDateTimes = photos.filter(photo => photo.shootDateTime.isEmpty) | |
val photoWithPlaces = photos.filter(_.place.isDefined) | |
val oldestPhotoWithPlace = photoWithPlaces.minBy(_.timestamp) | |
for { | |
_ <- Console.printLine(s"${UNDERLINED}FOUND ISSUES :$RESET") | |
_ <- ZIO.foreach(issues)(issue => Console.printLine(s"$RED$issue$RESET")) | |
_ <- Console.printLine(s"${UNDERLINED}FOUND DUPLICATED PHOTOS :$RESET") | |
_ <- ZIO.foreach(duplicates.toList)((hash, hashDuplicates) => Console.printLine(s"""${YELLOW}Duplicated photos$RESET : ${hashDuplicates.map(_.filePath.toString).mkString(", ")}""")) | |
_ <- Console.printLine(s"${UNDERLINED}FOUND PHOTO WITHOUT CATEGORY KEYWORDS$RESET") | |
_ <- ZIO.foreach(missingCategoryKeywords)(photo => Console.printLine(s"${CYAN}No keywords : ${photo.filePath}$RESET")) | |
_ <- Console.printLine(s"${UNDERLINED}FOUND PHOTO WITH INVALID DATES$RESET") | |
_ <- ZIO.foreach(invalidDates)(photo => Console.printLine(s"${YELLOW}Invalid Date : ${photo.filePath} - ${photo.shootDateTime} - ${photo.fileLastUpdated}$RESET")) | |
_ <- Console.printLine(s"${UNDERLINED}SUMMARY$RESET") | |
_ <- Console.printLine(s"${YELLOW}Found ${duplicates.size} duplicated photos$RESET") | |
_ <- Console.printLine(s"${RED}Encounter ${issues.size} processing issues$RESET") | |
_ <- Console.printLine(s"${RED}Found ${invalidDates.size} photo with invalid dates$RESET") | |
_ <- Console.printLine(s"${CYAN}Found ${missingCategoryKeywords.size} photos without category keywords$RESET") | |
_ <- Console.printLine(s"${GREEN}Found ${allCategoryKeywords.size} distinct category keywords$RESET") | |
_ <- Console.printLine(s"${GREEN}Found ${allClassifications.size} distinct photo classes$RESET") | |
_ <- Console.printLine(s"${GREEN}Found ${allDetectedObjects.size} distinct detected object types$RESET") | |
_ <- Console.printLine(s"${YELLOW}Found ${noShootDateTimes.size} photos without shoot date time$RESET") | |
_ <- Console.printLine(s"${BLUE}Found ${photoWithPlaces.size} photos with GPS information$RESET") | |
_ <- Console.printLine(s"${BLUE}Oldest photo with GPS information ${oldestPhotoWithPlace.timestamp}$RESET") | |
} yield () | |
} | |
def localChecks(photosEithers: Chunk[Either[Object, Photo]]) = for { | |
_ <- Console.printLine(s"${YELLOW}Local photos checks$RESET") | |
_ <- commonChecks(photosEithers) | |
} yield () | |
def remoteChecks(photosEithers: Chunk[Either[Object, Photo]]) = for { | |
_ <- Console.printLine(s"${YELLOW}Remote photos checks$RESET") | |
_ <- commonChecks(photosEithers) | |
} yield () | |
def run = for { | |
started <- Clock.instant | |
remotePhotoEithers <- ElasticOps.fetchAllEithers[Photo]("photos-*").runCollect | |
// localPhotosEithers <- fetch() // without detectedObject & classifications | |
// .mapZIOParUnordered(4)((searchPath, path) => makePhoto(searchPath, path).either) | |
// .runCollect | |
// _ <- localChecks(localPhotosEithers) | |
_ <- remoteChecks(remotePhotoEithers) | |
// localPhotos = localPhotosEithers.collect { case Right(photo) => photo } | |
remotePhotos = remotePhotoEithers.collect { case Right(photo) => photo } | |
// _ <- Console.printLine(s"${GREEN}LOCAL PHOTOS COUNT ${localPhotos.size}$RESET") | |
_ <- Console.printLine(s"${GREEN}REMOTE PHOTOS COUNT ${remotePhotos.size}$RESET") | |
finished <- Clock.instant | |
duration = finished.getEpochSecond - started.getEpochSecond | |
_ <- Console.printLine(s"${YELLOW}Run operations done in $duration seconds$RESET") | |
} yield () | |
} | |
Photos.main(Array.empty) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment