Created
March 23, 2025 15:00
-
-
Save Mahoney/857eadbef4c8763dfc26d3ac03ac2369 to your computer and use it in GitHub Desktop.
Script to view repetitions in a directory
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/env kotlin | |
| import java.io.File | |
| import java.nio.file.Path | |
| import java.security.MessageDigest | |
| import java.text.NumberFormat | |
| import kotlin.io.encoding.Base64 | |
| import kotlin.io.encoding.ExperimentalEncodingApi | |
| import kotlin.io.path.fileSize | |
| import kotlin.io.path.relativeTo | |
| val md: MessageDigest = MessageDigest.getInstance("MD5") | |
| val directory = args.getOrNull(0) ?: "." | |
| val filter = args.getOrNull(1) | |
| val base = File(directory) | |
| val filesWithRepetitions = base | |
| .walkTopDown() | |
| .filter { it.isFile } | |
| .filter { if (filter == null) true else it.path.contains(filter) } | |
| val repetitionSummary = filesWithRepetitions | |
| .map { file -> | |
| val md5 = md.digest(file.inputStream().use { it.readAllBytes() }).base64() | |
| val size = file.toPath().fileSize() | |
| FileDetails(file.name, file.toPath(), size, md5) | |
| } | |
| .groupBy { "${it.fileName} ${it.md5Sum}" } | |
| .map { (_, files) -> | |
| val file = files.first() | |
| FileSummary( | |
| fileName = file.fileName, | |
| size = file.size, | |
| md5Sum = file.md5Sum, | |
| repetitions = files.size, | |
| pathExample = file.path.parent.relativeTo(base.toPath()), | |
| ) | |
| } | |
| .filter { it.saving > 0 } | |
| .sortedByDescending { it.saving } | |
| val (totalSize, totalSaving) = repetitionSummary.fold(0L to 0L) { (size, saving), row -> | |
| (size + row.totalSize) to (saving + row.saving) | |
| } | |
| println( | |
| renderMarkdownTable( | |
| header = listOf( | |
| "File name", | |
| "Example path", | |
| "File size (b)", | |
| "Repetitions", | |
| "Potential saving (b)", | |
| ), | |
| rows = repetitionSummary.map { | |
| listOf( | |
| it.fileName, | |
| it.pathExample, | |
| it.size, | |
| it.repetitions, | |
| it.saving, | |
| ) | |
| } + listOf( | |
| listOf( | |
| "Total", | |
| null, | |
| totalSize, | |
| null, | |
| totalSaving, | |
| ), | |
| ), | |
| ), | |
| ) | |
| @OptIn(ExperimentalEncodingApi::class) | |
| fun ByteArray.base64() = Base64.encode(this) | |
| data class FileSummary( | |
| val fileName: String, | |
| val size: Long, | |
| val md5Sum: String, | |
| val repetitions: Int, | |
| val pathExample: Path, | |
| ) { | |
| val totalSize = repetitions * size | |
| val saving = totalSize - size | |
| } | |
| data class FileDetails( | |
| val fileName: String, | |
| val path: Path, | |
| val size: Long, | |
| val md5Sum: String, | |
| ) | |
| fun renderMarkdownTable( | |
| header: List<String>, | |
| rows: List<List<Any?>> | |
| ): String { | |
| val columnsAreNumeric: List<Boolean> = rows | |
| .map { row -> row.map { cell -> cell == null || cell is Number } } | |
| .reduce { areNumbersSoFar, row -> | |
| areNumbersSoFar.mapIndexed { index, isNumberSoFar -> | |
| isNumberSoFar && row[index] | |
| } | |
| } | |
| val formattedRows: List<List<String>> = rows | |
| .map { row -> | |
| row.mapIndexed { index, cell -> | |
| val isNumeric = columnsAreNumeric[index] | |
| if (isNumeric) { | |
| cell?.formatAsNumber() | |
| } else { | |
| cell?.toString() | |
| } ?: "" | |
| } | |
| } | |
| val maxSizes: List<Int> = (listOf(header) + formattedRows) | |
| .map { row -> row.map { cell -> cell.length } } | |
| .reduce { maxSizesSoFar, row -> | |
| maxSizesSoFar.mapIndexed { index, maxSizeSoFar -> | |
| maxOf(maxSizeSoFar, row[index]) | |
| } | |
| } | |
| val headerLine: List<String> = header | |
| .mapIndexed { index, cell -> | |
| val padding = maxSizes[index] | |
| cell.padEnd(padding) | |
| } | |
| val horizontalLine: List<String> = List(header.size) { index -> | |
| val padding = maxSizes[index] | |
| val isNumeric = columnsAreNumeric[index] | |
| val content = if (isNumeric) "---:" else "---" | |
| content.padStart(padding, '-') | |
| } | |
| val rowLines: List<List<String>> = formattedRows | |
| .map { row -> | |
| row.mapIndexed { index, cell -> | |
| val padding = maxSizes[index] | |
| val isNumeric = columnsAreNumeric[index] | |
| if (isNumeric) { | |
| cell.padStart(padding) | |
| } else { | |
| cell.padEnd(padding) | |
| } | |
| } | |
| } | |
| return (listOf(headerLine, horizontalLine) + rowLines).joinToString("\n") { | |
| it.joinToString(prefix = "| ", separator = " | ", postfix = " |") | |
| } | |
| } | |
| fun Any.formatAsNumber(): String = NumberFormat.getInstance().format(this) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment