Skip to content

Instantly share code, notes, and snippets.

@Mahoney
Created March 23, 2025 15:00
Show Gist options
  • Select an option

  • Save Mahoney/857eadbef4c8763dfc26d3ac03ac2369 to your computer and use it in GitHub Desktop.

Select an option

Save Mahoney/857eadbef4c8763dfc26d3ac03ac2369 to your computer and use it in GitHub Desktop.
Script to view repetitions in a directory
#! /usr/bin/env kotlin
import java.io.File
import java.nio.file.Path
import java.security.MessageDigest
import java.text.NumberFormat
import kotlin.io.encoding.Base64
import kotlin.io.encoding.ExperimentalEncodingApi
import kotlin.io.path.fileSize
import kotlin.io.path.relativeTo
val md: MessageDigest = MessageDigest.getInstance("MD5")
val directory = args.getOrNull(0) ?: "."
val filter = args.getOrNull(1)
val base = File(directory)
val filesWithRepetitions = base
.walkTopDown()
.filter { it.isFile }
.filter { if (filter == null) true else it.path.contains(filter) }
val repetitionSummary = filesWithRepetitions
.map { file ->
val md5 = md.digest(file.inputStream().use { it.readAllBytes() }).base64()
val size = file.toPath().fileSize()
FileDetails(file.name, file.toPath(), size, md5)
}
.groupBy { "${it.fileName} ${it.md5Sum}" }
.map { (_, files) ->
val file = files.first()
FileSummary(
fileName = file.fileName,
size = file.size,
md5Sum = file.md5Sum,
repetitions = files.size,
pathExample = file.path.parent.relativeTo(base.toPath()),
)
}
.filter { it.saving > 0 }
.sortedByDescending { it.saving }
val (totalSize, totalSaving) = repetitionSummary.fold(0L to 0L) { (size, saving), row ->
(size + row.totalSize) to (saving + row.saving)
}
println(
renderMarkdownTable(
header = listOf(
"File name",
"Example path",
"File size (b)",
"Repetitions",
"Potential saving (b)",
),
rows = repetitionSummary.map {
listOf(
it.fileName,
it.pathExample,
it.size,
it.repetitions,
it.saving,
)
} + listOf(
listOf(
"Total",
null,
totalSize,
null,
totalSaving,
),
),
),
)
@OptIn(ExperimentalEncodingApi::class)
fun ByteArray.base64() = Base64.encode(this)
data class FileSummary(
val fileName: String,
val size: Long,
val md5Sum: String,
val repetitions: Int,
val pathExample: Path,
) {
val totalSize = repetitions * size
val saving = totalSize - size
}
data class FileDetails(
val fileName: String,
val path: Path,
val size: Long,
val md5Sum: String,
)
fun renderMarkdownTable(
header: List<String>,
rows: List<List<Any?>>
): String {
val columnsAreNumeric: List<Boolean> = rows
.map { row -> row.map { cell -> cell == null || cell is Number } }
.reduce { areNumbersSoFar, row ->
areNumbersSoFar.mapIndexed { index, isNumberSoFar ->
isNumberSoFar && row[index]
}
}
val formattedRows: List<List<String>> = rows
.map { row ->
row.mapIndexed { index, cell ->
val isNumeric = columnsAreNumeric[index]
if (isNumeric) {
cell?.formatAsNumber()
} else {
cell?.toString()
} ?: ""
}
}
val maxSizes: List<Int> = (listOf(header) + formattedRows)
.map { row -> row.map { cell -> cell.length } }
.reduce { maxSizesSoFar, row ->
maxSizesSoFar.mapIndexed { index, maxSizeSoFar ->
maxOf(maxSizeSoFar, row[index])
}
}
val headerLine: List<String> = header
.mapIndexed { index, cell ->
val padding = maxSizes[index]
cell.padEnd(padding)
}
val horizontalLine: List<String> = List(header.size) { index ->
val padding = maxSizes[index]
val isNumeric = columnsAreNumeric[index]
val content = if (isNumeric) "---:" else "---"
content.padStart(padding, '-')
}
val rowLines: List<List<String>> = formattedRows
.map { row ->
row.mapIndexed { index, cell ->
val padding = maxSizes[index]
val isNumeric = columnsAreNumeric[index]
if (isNumeric) {
cell.padStart(padding)
} else {
cell.padEnd(padding)
}
}
}
return (listOf(headerLine, horizontalLine) + rowLines).joinToString("\n") {
it.joinToString(prefix = "| ", separator = " | ", postfix = " |")
}
}
fun Any.formatAsNumber(): String = NumberFormat.getInstance().format(this)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment