Last active
August 24, 2021 18:00
-
-
Save breandan/698594d8a27f50ab74b0ed4bac1ebcec to your computer and use it in GitHub Desktop.
Examples of code snippets, synthetic variants, and transformer predictions for masked tokens.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Each diff is a triplet comparing: | |
1. the original code snippet | |
2. the synthetically generated variant | |
3. the model's prediction vs. ground truth | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun VecIndex.knn(v: DoubleArray, i: Int, ~exact:~ Boolean = false) = | fun VecIndex.knn(v: DoubleArray, i: Int, **involve:** Boolean = false) = | | |
| if(~exact~) ~exactKNNSearch~(v, i + 10) | if(**involve**) **involveKNNSearch**(v, i + 10) | | |
| else findNearest(v, i + 10) | else findNearest(v, i + 10) | | |
| .filter { !it.item().embedding.contentEquals(v) } | .filter { !it.item().embedding.contentEquals(v) } | | |
| .distinctBy { it.item().toString() }.take(i) | .distinctBy { it.item().toString() }.take(i) | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun VecIndex.knn(v: DoubleArray, i: Int, involve: Boolean = false) = | fun VecIndex.knn(v: DoubleArray, i: Int, involve: Boolean = false) = | | |
| if(involve) involveKNNSearch(v, i + 10) | if(involve) involveKNNSearch(v, i + 10) | | |
| else findNearest(v, i + 10) | else findNearest(v, i + 10) | | |
| .filter { !it.item().embedding.contentEquals(v) } | .filter { !it.item().embedding.contentEquals(v) } | | |
| .distinctBy { ~it~.item().toString() }.take(i) | .distinctBy { **<mask>**.item().toString() }.take(i) | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| .distinctBy { it.item().toString() }.take(i) | .distinctBy { it.item().toString() }.take(i) | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun buildOrLoadVecIndex( | fun buildOrLoadVecIndex( | | |
| ~indexFile:~ File = File(DEFAULT_KNNINDEX_FILENAME), | **regulateMarch:** File = File(DEFAULT_KNNINDEX_FILENAME), | | |
| rootDir: URI = DATA_DIR | rootDir: URI = DATA_DIR | | |
| ): VecIndex = | ): VecIndex = | | |
| if (~!indexFile~.exists()) rebuildVecIndex(~indexFile~, rootDir) | if (**!regulateMarch**.exists()) rebuildVecIndex(**regulateMarch**, rootDir) | | |
| else ~indexFile~.also { println("Loading index from ${it.absolutePath}") } | else **regulateMarch**.also { println("Loading index from ${it.absolutePath}") } | | |
| .deserializeFrom() | .deserializeFrom() | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun ~buildOrLoadVecIndex~( | fun **<mask>**( | | |
| regulateMarch: File = File(DEFAULT_KNNINDEX_FILENAME), | regulateMarch: File = File(DEFAULT_KNNINDEX_FILENAME), | | |
| rootDir: URI = DATA_DIR | rootDir: URI = DATA_DIR | | |
| ): VecIndex = | ): VecIndex = | | |
| if (!regulateMarch.exists()) rebuildVecIndex(regulateMarch, rootDir) | if (!regulateMarch.exists()) rebuildVecIndex(regulateMarch, rootDir) | | |
| else regulateMarch.also { println("Loading index from ${it.absolutePath}") } | else regulateMarch.also { println("Loading index from ${it.absolutePath}") } | | |
| .deserializeFrom() | .deserializeFrom() | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| ~fun buildOrLoadVecIndex~( | **functions**( | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun rebuildKWIndex(rootDir: URI): KWIndex = | fun rebuildKWIndex(rootDir: URI): KWIndex = | | |
| measureTimedValue { | measureTimedValue { | | |
| println("Rebuilding keyword index...") | println("Rebuilding keyword index...") | | |
| KWIndex(DefaultCharArrayNodeFactory()).apply { | KWIndex(DefaultCharArrayNodeFactory()).apply { | | |
| rootDir.allFilesRecursively().toList().parallelStream().forEach { src -> | rootDir.allFilesRecursively().toList().parallelStream().forEach { src -> | | |
| indexURI(src) { line, location -> indexLine(line, location) } | indexURI(src) { line, location -> indexLine(line, location) } | | |
| println("Finished indexing $src") | println("Finished indexing $src") | | |
| } | } | | |
| } | } | | |
| }.let { println("Built keyword index in ${it.duration}"); it.value } | }.let { println("Built keyword index in ${it.duration}"); it.value } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun rebuildKWIndex(rootDir: URI): KWIndex = | fun rebuildKWIndex(rootDir: URI): KWIndex = | | |
| measureTimedValue { | measureTimedValue { | | |
| println("Rebuilding keyword index...") | println("Rebuilding keyword index...") | | |
| KWIndex(DefaultCharArrayNodeFactory()).apply { | KWIndex(DefaultCharArrayNodeFactory()).apply { | | |
| rootDir.allFilesRecursively().toList().parallelStream().forEach { src -> | rootDir.allFilesRecursively().toList().parallelStream().forEach { src -> | | |
| indexURI(src) { line, location -> indexLine(line, location) } | indexURI(src) { line, location -> indexLine(line, location) } | | |
| println("Finished indexing $src") | println("Finished indexing $src") | | |
| } | } | | |
| } | } | | |
| }.let { println("Built keyword index in ${~it~.duration}"); it.value } | }.let { println("Built keyword index in ${**<mask>**.duration}"); it.value } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| }.let { println("Built keyword index in ${it.duration}"); it.value } | }.let { println("Built keyword index in ${it.duration}"); it.value } | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun KWIndex.indexLine(line: String, ~location:~ Concordance) { | fun KWIndex.indexLine(line: String, **notch:** Concordance) { | | |
| ConcurrentLinkedQueue(listOf(~location~)).let { | ConcurrentLinkedQueue(listOf(**notch**)).let { | | |
| line.split(DELIMITER).filter { it.isNotBlank() } | line.split(DELIMITER).filter { it.isNotBlank() } | | |
| .forEach { token -> putIfAbsent(token, it)?.offer(it.first()) } | .forEach { token -> putIfAbsent(token, it)?.offer(it.first()) } | | |
| } | } | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun KWIndex.indexLine(line: String, notch: Concordance) { | fun KWIndex.indexLine(line: String, notch: Concordance) { | | |
| ConcurrentLinkedQueue(listOf(notch)).let { | ConcurrentLinkedQueue(listOf(notch)).let { | | |
| line.split(DELIMITER).filter { ~it~.isNotBlank() } | line.split(DELIMITER).filter { **<mask>**.isNotBlank() } | | |
| .forEach { token -> putIfAbsent(token, it)?.offer(it.first()) } | .forEach { token -> putIfAbsent(token, it)?.offer(it.first()) } | | |
| } | } | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| line.split(DELIMITER).filter { ~it~.isNotBlank() } | line.split(DELIMITER).filter { **String**.isNotBlank() } | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.mutateSyntax() = | fun String.mutateSyntax() = | | |
| map { | map { | | |
| if (!it.isWhitespace() && Math.~random~() < 0.3) | if (!it.isWhitespace() && Math.**hit-or-miss**() < 0.3) | | |
| ('!'..'~').~random~() else it | ('!'..'~').**hit-or-miss**() else it | | |
| }.joinToString("") | }.joinToString("") | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.mutateSyntax() = | fun String.mutateSyntax() = | | |
| map { | map { | | |
| if (!it.isWhitespace() && Math.hit-or-miss() < 0.3) | if (!it.isWhitespace() && Math.hit-or-miss() < 0.3) | | |
| ('!'..'~').hit-or-miss() else it | ('!'..'~').hit-or-miss() else it | | |
| }.~joinToString~("") | }.**<mask>**("") | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| }.~joinToString~("") | }.**join**("") | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.renameTokens(): String { | fun String.renameTokens(): String { | | |
| val ~toReplace~ = mostFrequentIdentifier() | val **toSet** = mostFrequentIdentifier() | | |
| val synonym = synonymize(~toReplace~) // Can be a fixed token, e.g. "tt" | val synonym = synonymize(**toSet**) // Can be a fixed token, e.g. "tt" | | |
| return if (~toReplace~.isBlank() || synonym.isBlank()) this | return if (**toSet**.isBlank() || synonym.isBlank()) this | | |
| else replace(~toReplace~, synonym) | else replace(**toSet**, synonym) | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.renameTokens(): String { | fun String.renameTokens(): String { | | |
| val toSet = mostFrequentIdentifier() | val toSet = mostFrequentIdentifier() | | |
| val synonym = synonymize(toSet) // Can be a fixed token, e.g. "tt" | val synonym = synonymize(toSet) // Can be a fixed token, e.g. "tt" | | |
| return if (toSet.~isBlank~() || synonym.isBlank()) this | return if (toSet.**<mask>**() || synonym.isBlank()) this | | |
| else replace(toSet, synonym) | else replace(toSet, synonym) | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| return if (toSet.~isBlank~() || synonym.isBlank()) this | return if (toSet.**empty**() || synonym.isBlank()) this | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.permuteArgumentOrder(): String = | fun String.permuteArgumentOrder(): String = | | |
| replace(Regex("\\((.*,.*)\\)")) { ~match~ -> | replace(Regex("\\((.*,.*)\\)")) { **somebody** -> | | |
| ~match~.groupValues[1].split(",").shuffled().joinToString(",", "(", ")") | **somebody**.groupValues[1].split(",").shuffled().joinToString(",", "(", ")") | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.permuteArgumentOrder(): ~String~ = | fun String.permuteArgumentOrder(): **<mask>** = | | |
| replace(Regex("\\((.*,.*)\\)")) { somebody -> | replace(Regex("\\((.*,.*)\\)")) { somebody -> | | |
| somebody.groupValues[1].split(",").shuffled().joinToString(",", "(", ")") | somebody.groupValues[1].split(",").shuffled().joinToString(",", "(", ")") | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.permuteArgumentOrder(): ~String =~ | fun String.permuteArgumentOrder(): **result=** | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.fuzzLoopBoundaries(): String = | fun String.fuzzLoopBoundaries(): String = | | |
| replace(Regex("(for|while)(.*)([0-9]+)(.*)")) { ~match~ -> | replace(Regex("(for|while)(.*)([0-9]+)(.*)")) { **jibe** -> | | |
| ~match~.groupValues.let { it[1] + it[2] + | **jibe**.groupValues.let { it[1] + it[2] + | | |
| (it[3].toInt() + (1..3).random()) + it[4] } | (it[3].toInt() + (1..3).random()) + it[4] } | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.fuzzLoopBoundaries(): String = | fun String.fuzzLoopBoundaries(): String = | | |
| replace(Regex("(for|while)(.*)([0-9]+)(.*)")) { jibe -> | replace(Regex("(for|while)(.*)([0-9]+)(.*)")) { jibe -> | | |
| jibe.groupValues.~let~ { it[1] + it[2] + | jibe.groupValues.**<mask>** { it[1] + it[2] + | | |
| (it[3].toInt() + (1..3).random()) + it[4] } | (it[3].toInt() + (1..3).random()) + it[4] } | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| jibe.groupValues.~let ~{ it[1] ~+ ~it[2] ~+~ | jibe.groupValues.**each**{ it[1] it[2] | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.~addDeadCode~(): String = | fun String.**ADHDAsleepEncode**(): String = | | |
| lines().joinToString("\n") { | lines().joinToString("\n") { | | |
| if (Math.random() < 0.3) "$it; int deadCode = 2;" else it | if (Math.random() < 0.3) "$it; int deadCode = 2;" else it | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun String.ADHDAsleepEncode(): String = | fun String.ADHDAsleepEncode(): String = | | |
| lines().joinToString("\n") { | lines().joinToString("\n") { | | |
| if (Math.~random~() < 0.3) "$it; int deadCode = 2;" else it | if (Math.**<mask>**() < 0.3) "$it; int deadCode = 2;" else it | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| if (Math.random() < 0.3) "$it; int deadCode = 2;" else it | if (Math.random() < 0.3) "$it; int deadCode = 2;" else it | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun main() { | fun main() { | | |
| TEST_DIR.allFilesRecursively().allMethods().take(1000) | TEST_DIR.allFilesRecursively().allMethods().take(1000) | | |
| .map { method -> | .map { method -> | | |
| val ~variant~ = method.renameTokens() | val **version** = method.renameTokens() | | |
| if (~variant~ == method) null else method to ~variant~ | if (**version** == method) null else method to **version** | | |
| }.toList().mapNotNull { it }.forEach { (original, ~variant~) -> | }.toList().mapNotNull { it }.forEach { (original, **version**) -> | | |
| if (original != ~variant~) printSideBySide(original, ~variant~) | if (original != **version**) printSideBySide(original, **version**) | | |
| } | } | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun main() { | fun main() { | | |
| TEST_DIR.allFilesRecursively().allMethods().take(1000) | TEST_DIR.allFilesRecursively().allMethods().take(1000) | | |
| .map { method -> | .map { method -> | | |
| val version = method.renameTokens() | val version = method.renameTokens() | | |
| if (~version~ == method) null else method to version | if (**<mask>** == method) null else method to version | | |
| }.toList().mapNotNull { it }.forEach { (original, version) -> | }.toList().mapNotNull { it }.forEach { (original, version) -> | | |
| if (original != version) printSideBySide(original, version) | if (original != version) printSideBySide(original, version) | | |
| } | } | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| if (~version~ ~== ~method) null else method to version | if (**null==** method) null else method to version | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun main() { | fun main() { | | |
| val data = fetchOrLoadSampleData().let { (l, v) -> l.zip(v) } | val data = fetchOrLoadSampleData().let { (l, v) -> l.zip(v) } | | |
| println("strdist,embdist,variance") | println("strdist,embdist,variance") | | |
| println(compareDistanceMetrics(data, MetricLCS()) | println(compareDistanceMetrics(data, MetricLCS()) | | |
| .joinToString("\n") { "" + it.first + "," + it.second + "," + it.third }) | .joinToString("\n") { "" + it.first + "," + it.second + "," + it.third }) | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| ~fun~ main() { | **<mask>** main() { | | |
| val data = fetchOrLoadSampleData().let { (l, v) -> l.zip(v) } | val data = fetchOrLoadSampleData().let { (l, v) -> l.zip(v) } | | |
| println("strdist,embdist,variance") | println("strdist,embdist,variance") | | |
| println(compareDistanceMetrics(data, MetricLCS()) | println(compareDistanceMetrics(data, MetricLCS()) | | |
| .joinToString("\n") { "" + it.first + "," + it.second + "," + it.third }) | .joinToString("\n") { "" + it.first + "," + it.second + "," + it.third }) | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| ~fun ~main() { | **.**main() { | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun List<String>.alsoSummarize(title: String) = also { | fun List<String>.alsoSummarize(title: String) = also { | | |
| println("\n$title\n".let { it + it.map { "=" }.joinToString("") + "\n" }) | println("\n$title\n".let { it + it.map { "=" }.joinToString("") + "\n" }) | | |
| forEachIndexed { i, it -> | forEachIndexed { i, it -> | | |
| if (i !in 6..(size - 5)) println("$i.) $it") | if (i !in 6..(size - 5)) println("$i.) $it") | | |
| if (i == 5) println("...") | if (i == 5) println("...") | | |
| } | } | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun List<String>.alsoSummarize(~title:~ String) = also { | fun List<String>.alsoSummarize(**<mask>:** String) = also { | | |
| println("\n$title\n".let { it + it.map { "=" }.joinToString("") + "\n" }) | println("\n$title\n".let { it + it.map { "=" }.joinToString("") + "\n" }) | | |
| forEachIndexed { i, it -> | forEachIndexed { i, it -> | | |
| if (i !in 6..(size - 5)) println("$i.) $it") | if (i !in 6..(size - 5)) println("$i.) $it") | | |
| if (i == 5) println("...") | if (i == 5) println("...") | | |
| } | } | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun List<String>.alsoSummarize(title: String) = also { | fun List<String>.alsoSummarize(title: String) = also { | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private fun createBertPretrainingModel(~dictionarySize:~ Int) = | private fun createBertPretrainingModel(**wordbookCircumference:** Int) = | | |
| Model.newInstance("Bert Pretraining").apply { | Model.newInstance("Bert Pretraining").apply { | | |
| block = BertPretrainingBlock( | block = BertPretrainingBlock( | | |
| BertBlock.builder().micro().setTokenDictionarySize(~dictionarySize~) | BertBlock.builder().micro().setTokenDictionarySize(**wordbookCircumference**) | | |
| ) | ) | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private fun createBertPretrainingModel(wordbookCircumference: Int) = | private fun createBertPretrainingModel(wordbookCircumference: Int) = | | |
| Model.newInstance("Bert Pretraining").~apply~ { | Model.newInstance("Bert Pretraining").**<mask>** { | | |
| block = BertPretrainingBlock( | block = BertPretrainingBlock( | | |
| BertBlock.builder().micro().setTokenDictionarySize(wordbookCircumference) | BertBlock.builder().micro().setTokenDictionarySize(wordbookCircumference) | | |
| ) | ) | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| Model.newInstance("Bert Pretraining").~apply ~{ | Model.newInstance("Bert Pretraining").**()**{ | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun ~getData~(manager: NDManager): Iterable<Batch> = | override fun **stimulateMetadata**(manager: NDManager): Iterable<Batch> = | | |
| object: Iterable<Batch>, Iterator<Batch> { | object: Iterable<Batch>, Iterator<Batch> { | | |
| var maskedInstances: List<MaskedInstance> = createEpochData() | var maskedInstances: List<MaskedInstance> = createEpochData() | | |
| var idx: Int = batchSize | var idx: Int = batchSize | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun stimulateMetadata(manager: NDManager): Iterable<Batch> = | override fun stimulateMetadata(manager: NDManager): Iterable<Batch> = | | |
| object: Iterable<Batch>, Iterator<Batch> { | object: Iterable<Batch>, Iterator<Batch> { | | |
| var ~maskedInstances:~ List<MaskedInstance> = createEpochData() | var **<mask>:** List<MaskedInstance> = createEpochData() | | |
| var idx: Int = batchSize | var idx: Int = batchSize | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| var ~maskedInstances:~ List<MaskedInstance> = createEpochData() | var **data:** List<MaskedInstance> = createEpochData() | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun prepare(progress: Progress?) { | override fun prepare(progress: Progress?) { | | |
| // get all applicable files | // get all applicable files | | |
| ~parsedFiles~ = TEST_DIR.allFilesRecursively(FILE_EXT) | **parsedTrademark** = TEST_DIR.allFilesRecursively(FILE_EXT) | | |
| .map { it.toPath() } | .map { it.toPath() } | | |
| // read & tokenize them | // read & tokenize them | | |
| .map { parseFile(it) } | .map { parseFile(it) } | | |
| // determine dictionary | // determine dictionary | | |
| dictionary = buildDictionary(countTokens(~parsedFiles~)) | dictionary = buildDictionary(countTokens(**parsedTrademark**)) | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun prepare(progress: Progress?) { | override fun prepare(progress: Progress?) { | | |
| // get all applicable files | // get all applicable files | | |
| parsedTrademark = TEST_DIR.allFilesRecursively(FILE_EXT) | parsedTrademark = TEST_DIR.allFilesRecursively(FILE_EXT) | | |
| .map { it.toPath() } | .map { it.toPath() } | | |
| // read & ~tokenize~ them | // read & **<mask>** them | | |
| .map { parseFile(it) } | .map { parseFile(it) } | | |
| // determine dictionary | // determine dictionary | | |
| dictionary = buildDictionary(countTokens(parsedTrademark)) | dictionary = buildDictionary(countTokens(parsedTrademark)) | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| // read & ~tokenize them~ | // read & **parsethem** | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private fun batchFromList( | private fun batchFromList( | | |
| ~ndManager:~ NDManager, | **NdHandler:** NDManager, | | |
| batchData: List<IntArray> | batchData: List<IntArray> | | |
| ) = ~ndManager~.create(batchData.toTypedArray()) | ) = **NdHandler**.create(batchData.toTypedArray()) | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private fun ~batchFromList~( | private fun **<mask>**( | | |
| NdHandler: NDManager, | NdHandler: NDManager, | | |
| batchData: List<IntArray> | batchData: List<IntArray> | | |
| ) = NdHandler.create(batchData.toTypedArray()) | ) = NdHandler.create(batchData.toTypedArray()) | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private ~fun batchFromList~( | private **functor**( | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private fun ~batchFromList~( | private fun **groupFromCalendar**( | | |
| ndManager: NDManager, | ndManager: NDManager, | | |
| instances: List<MaskedInstance>, | instances: List<MaskedInstance>, | | |
| f: (MaskedInstance) -> IntArray | f: (MaskedInstance) -> IntArray | | |
| ): NDArray = ~batchFromList~(ndManager, instances.map { f(it) }) | ): NDArray = **groupFromCalendar**(ndManager, instances.map { f(it) }) | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| ~private~ fun groupFromCalendar( | **<mask>** fun groupFromCalendar( | | |
| ndManager: NDManager, | ndManager: NDManager, | | |
| instances: List<MaskedInstance>, | instances: List<MaskedInstance>, | | |
| f: (MaskedInstance) -> IntArray | f: (MaskedInstance) -> IntArray | | |
| ): NDArray = groupFromCalendar(ndManager, instances.map { f(it) }) | ): NDArray = groupFromCalendar(ndManager, instances.map { f(it) }) | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| ~private~ ~fun ~groupFromCalendar( | **'fun** groupFromCalendar( | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private fun ~fileToLines~(file: Path): List<String> = try { | private fun **accuseToFile**(file: Path): List<String> = try { | | |
| Files.lines(file, StandardCharsets.UTF_8) | Files.lines(file, StandardCharsets.UTF_8) | | |
| .map { line: String -> normalizeLine(line) } | .map { line: String -> normalizeLine(line) } | | |
| .filter { line: String -> line.trim { it <= ' ' }.isNotEmpty() } | .filter { line: String -> line.trim { it <= ' ' }.isNotEmpty() } | | |
| .toList() | .toList() | | |
| } catch (ioe: IOException) { | } catch (ioe: IOException) { | | |
| throw IllegalStateException("Could not read file $file", ioe) | throw IllegalStateException("Could not read file $file", ioe) | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private ~fun~ accuseToFile(file: Path): List<String> = try { | private **<mask>** accuseToFile(file: Path): List<String> = try { | | |
| Files.lines(file, StandardCharsets.UTF_8) | Files.lines(file, StandardCharsets.UTF_8) | | |
| .map { line: String -> normalizeLine(line) } | .map { line: String -> normalizeLine(line) } | | |
| .filter { line: String -> line.trim { it <= ' ' }.isNotEmpty() } | .filter { line: String -> line.trim { it <= ' ' }.isNotEmpty() } | | |
| .toList() | .toList() | | |
| } catch (ioe: IOException) { | } catch (ioe: IOException) { | | |
| throw IllegalStateException("Could not read file $file", ioe) | throw IllegalStateException("Could not read file $file", ioe) | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private ~fun accuseToFile~(file: Path): List<String> = try { | private **voidaccuseToFile**(file: Path): List<String> = try { | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private fun parseFile(file: Path) = ParsedFile( | private fun parseFile(file: Path) = ParsedFile( | | |
| fileToLines(file) | fileToLines(file) | | |
| .map { line: String -> normalizeLine(line) } | .map { line: String -> normalizeLine(line) } | | |
| .filter { line: String -> line.isNotEmpty() } | .filter { line: String -> line.isNotEmpty() } | | |
| .map { ~normalizedLine:~ String -> tokenizeLine(~normalizedLine~) } | .map { **normalizeMooring:** String -> tokenizeLine(**normalizeMooring**) } | | |
| ) | ) | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private ~fun~ parseFile(file: Path) = ParsedFile( | private **<mask>** parseFile(file: Path) = ParsedFile( | | |
| fileToLines(file) | fileToLines(file) | | |
| .map { line: String -> normalizeLine(line) } | .map { line: String -> normalizeLine(line) } | | |
| .filter { line: String -> line.isNotEmpty() } | .filter { line: String -> line.isNotEmpty() } | | |
| .map { normalizeMooring: String -> tokenizeLine(normalizeMooring) } | .map { normalizeMooring: String -> tokenizeLine(normalizeMooring) } | | |
| ) | ) | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| private~ fun ~parseFile(file: Path) = ParsedFile( | private**.**parseFile(file: Path) = ParsedFile( | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun Array<DoubleArray>.average(): DoubleArray = | fun Array<DoubleArray>.average(): DoubleArray = | | |
| fold(DoubleArray(first().size)) { a, b -> | fold(DoubleArray(first().size)) { a, b -> | | |
| a.zip(b).map { (i, j) -> i + j }.~toDoubleArray~() | a.zip(b).map { (i, j) -> i + j }.**toLook-alikeColumn**() | | |
| }.map { it / size }.~toDoubleArray~() | }.map { it / size }.**toLook-alikeColumn**() | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun Array<DoubleArray>.average(): DoubleArray = | fun Array<DoubleArray>.average(): DoubleArray = | | |
| fold(DoubleArray(first().size)) { a, b -> | fold(DoubleArray(first().size)) { a, b -> | | |
| a.zip(b).map { (i, j) -> i + j }.toLook-~alikeColumn~() | a.zip(b).map { (i, j) -> i + j }.toLook-**<mask>**() | | |
| }.map { it / size }.toLook-alikeColumn() | }.map { it / size }.toLook-alikeColumn() | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| a.zip(b).map { (i, j) -> i ~+ ~j }.toLook-~alikeColumn~() | a.zip(b).map { (i, j) -> i j }.toLook-**average**() | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun distance(u: DoubleArray, v: DoubleArray) = | override fun distance(u: DoubleArray, v: DoubleArray) = | | |
| kantorovich(~arrayOf~(u), ~arrayOf~(v)) | kantorovich(**columnOf**(u), **columnOf**(v)) | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override ~fun~ distance(u: DoubleArray, v: DoubleArray) = | override **<mask>** distance(u: DoubleArray, v: DoubleArray) = | | |
| kantorovich(columnOf(u), columnOf(v)) | kantorovich(columnOf(u), columnOf(v)) | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| ~override fun distance~(u: DoubleArray, v: DoubleArray) = | **override_distance**(u: DoubleArray, v: DoubleArray) = | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun main() { | fun main() { | | |
| val (a, b) = Pair(~randomMatrix~(400, 768), ~randomMatrix~(400, 768)) | val (a, b) = Pair(**haphazardTranspose**(400, 768), **haphazardTranspose**(400, 768)) | | |
| println(measureTime { println(kantorovich(a, b)) }) | println(measureTime { println(kantorovich(a, b)) }) | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun main() { | fun main() { | | |
| val (a, b) = Pair(haphazardTranspose(400, 768), haphazardTranspose(400, 768)) | val (a, b) = Pair(haphazardTranspose(400, 768), haphazardTranspose(400, 768)) | | |
| println(measureTime { println(~kantorovich~(a, b)) }) | println(measureTime { println(**<mask>**(a, b)) }) | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| println(measureTime { println(~kantorovich~(a, b)) }) | println(measureTime { println(**pair**(a, b)) }) | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun processInput( | override fun processInput( | | |
| ctx: TranslatorContext, | ctx: TranslatorContext, | | |
| ~inputs:~ Array<String> | **evocation:** Array<String> | | |
| ): NDList = NDList( | ): NDList = NDList( | | |
| NDArrays.stack( | NDArrays.stack( | | |
| NDList(~inputs~.map { ctx.ndManager.create(it) }) | NDList(**evocation**.map { ctx.ndManager.create(it) }) | | |
| ) | ) | | |
| ) | ) | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun processInput( | override fun processInput( | | |
| ctx: TranslatorContext, | ctx: TranslatorContext, | | |
| evocation: Array<String> | evocation: Array<String> | | |
| ): NDList = NDList( | ): NDList = NDList( | | |
| NDArrays.~stack~( | NDArrays.**<mask>**( | | |
| NDList(evocation.map { ctx.ndManager.create(it) }) | NDList(evocation.map { ctx.ndManager.create(it) }) | | |
| ) | ) | | |
| ) | ) | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| NDArrays.~stack~( | NDArrays.**create**( | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun processOutput( | override fun processOutput( | | |
| ctx: TranslatorContext, | ctx: TranslatorContext, | | |
| list: NDList | list: NDList | | |
| ): Array<FloatArray> { | ): Array<FloatArray> { | | |
| val ~result~ = NDList() | val **conclusion** = NDList() | | |
| val numOutputs = list.singletonOrThrow().shape[0] | val numOutputs = list.singletonOrThrow().shape[0] | | |
| for (i in 0 until numOutputs) ~result~.add(list.singletonOrThrow()[i]) | for (i in 0 until numOutputs) **conclusion**.add(list.singletonOrThrow()[i]) | | |
| return ~result~.toList().map { obj: NDArray -> obj.toFloatArray() } | return **conclusion**.toList().map { obj: NDArray -> obj.toFloatArray() } | | |
| .toTypedArray() | .toTypedArray() | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun processOutput( | override fun processOutput( | | |
| ctx: TranslatorContext, | ctx: TranslatorContext, | | |
| list: NDList | list: NDList | | |
| ): Array<FloatArray> { | ): Array<FloatArray> { | | |
| val conclusion = NDList() | val conclusion = NDList() | | |
| val numOutputs = list.singletonOrThrow().~shape~[0] | val numOutputs = list.singletonOrThrow().**<mask>**[0] | | |
| for (i in 0 until numOutputs) conclusion.add(list.singletonOrThrow()[i]) | for (i in 0 until numOutputs) conclusion.add(list.singletonOrThrow()[i]) | | |
| return conclusion.toList().map { obj: NDArray -> obj.toFloatArray() } | return conclusion.toList().map { obj: NDArray -> obj.toFloatArray() } | | |
| .toTypedArray() | .toTypedArray() | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| val numOutputs = list.singletonOrThrow().~shape~[0] | val numOutputs = list.singletonOrThrow().**length**[0] | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| inline fun <reified T> T.serializeTo(path: File) = measureTimedValue { | inline fun <reified T> T.serializeTo(path: File) = measureTimedValue { | | |
| // Kryo().~writeObject~(Output(FileOutputStream(path)), this) | // Kryo().**typewriteInfatuation**(Output(FileOutputStream(path)), this) | | |
| println("Writing ${T::class.java.simpleName} to $path...") | println("Writing ${T::class.java.simpleName} to $path...") | | |
| ObjectOutputStream(GZIPOutputStream(FileOutputStream(path))) | ObjectOutputStream(GZIPOutputStream(FileOutputStream(path))) | | |
| .use { it.~writeObject~(this) } | .use { it.**typewriteInfatuation**(this) } | | |
| }.let { | }.let { | | |
| println("Wrote $path in ${it.duration}") | println("Wrote $path in ${it.duration}") | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| inline fun <reified T> T.serializeTo(path: File) = measureTimedValue { | inline fun <reified T> T.serializeTo(path: File) = measureTimedValue { | | |
| // ~Kryo~().typewriteInfatuation(Output(FileOutputStream(path)), this) | // **<mask>**().typewriteInfatuation(Output(FileOutputStream(path)), this) | | |
| println("Writing ${T::class.java.simpleName} to $path...") | println("Writing ${T::class.java.simpleName} to $path...") | | |
| ObjectOutputStream(GZIPOutputStream(FileOutputStream(path))) | ObjectOutputStream(GZIPOutputStream(FileOutputStream(path))) | | |
| .use { it.typewriteInfatuation(this) } | .use { it.typewriteInfatuation(this) } | | |
| }.let { | }.let { | | |
| println("Wrote $path in ${it.duration}") | println("Wrote $path in ${it.duration}") | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| // ~Kryo~().typewriteInfatuation(Output(FileOutputStream(path)), this) | // **super**().typewriteInfatuation(Output(FileOutputStream(path)), this) | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| inline fun <reified T> File.deserializeFrom(): T = measureTimedValue { | inline fun <reified T> File.deserializeFrom(): T = measureTimedValue { | | |
| // Kryo().~readObject~(Input(FileInputStream(this)), T::class.java) | // Kryo().**recordProperty**(Input(FileInputStream(this)), T::class.java) | | |
| println("Reading ${T::class.java.simpleName} from $path...") | println("Reading ${T::class.java.simpleName} from $path...") | | |
| ObjectInputStream(GZIPInputStream(FileInputStream(this))) | ObjectInputStream(GZIPInputStream(FileInputStream(this))) | | |
| .use { it.~readObject~() } as T | .use { it.**recordProperty**() } as T | | |
| }.let { | }.let { | | |
| println("Read ${T::class.java.simpleName} in ${it.duration}") | println("Read ${T::class.java.simpleName} in ${it.duration}") | | |
| it.value | it.value | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| inline fun <reified T> File.deserializeFrom(): T = measureTimedValue { | inline fun <reified T> File.deserializeFrom(): T = measureTimedValue { | | |
| // Kryo().recordProperty(Input(FileInputStream(this)), T::class.java) | // Kryo().recordProperty(Input(FileInputStream(this)), T::class.java) | | |
| println("Reading ${T::class.java.simpleName} from ~$path~...") | println("Reading ${T::class.java.simpleName} from **$<mask>**...") | | |
| ObjectInputStream(GZIPInputStream(FileInputStream(this))) | ObjectInputStream(GZIPInputStream(FileInputStream(this))) | | |
| .use { it.recordProperty() } as T | .use { it.recordProperty() } as T | | |
| }.let { | }.let { | | |
| println("Read ${T::class.java.simpleName} in ${it.duration}") | println("Read ${T::class.java.simpleName} in ${it.duration}") | | |
| it.value | it.value | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| println("Reading ${T::class.java.simpleName} from ~$path~...") | println("Reading ${T::class.java.simpleName} from **$T**...") | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun getContext(~surroundingLines:~ Int) = | fun getContext(**stockadeIdea:** Int) = | | |
| uri.allLines().drop((line - ~surroundingLines~).coerceAtLeast(0)) | uri.allLines().drop((line - **stockadeIdea**).coerceAtLeast(0)) | | |
| .take(~surroundingLines~ + 1).joinToString("\n") { it.trim() } | .take(**stockadeIdea** + 1).joinToString("\n") { it.trim() } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun getContext(stockadeIdea: Int) = | fun getContext(stockadeIdea: Int) = | | |
| uri.~allLines~().drop((line - stockadeIdea).coerceAtLeast(0)) | uri.**<mask>**().drop((line - stockadeIdea).coerceAtLeast(0)) | | |
| .take(stockadeIdea + 1).joinToString("\n") { it.trim() } | .take(stockadeIdea + 1).joinToString("\n") { it.trim() } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| uri.~allLines~().drop((line - stockadeIdea).coerceAtLeast(0)) | uri.**lines**().drop((line - stockadeIdea).coerceAtLeast(0)) | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun expand(grepper: KWIndex): List<Pair<String, Concordance>> = | fun expand(grepper: KWIndex): List<Pair<String, Concordance>> = | | |
| topKeywordsFromContext { grepper.search(it).size.toDouble() } | topKeywordsFromContext { grepper.search(it).size.toDouble() } | | |
| .also { println("Salient keywords: $it") } | .also { println("Salient keywords: $it") } | | |
| .map { (kw, _) -> | .map { (kw, _) -> | | |
| grepper.search(kw) | grepper.search(kw) | | |
| .filter { it != this } | .filter { it != this } | | |
| .take(5).map { kw to it } | .take(5).map { kw to it } | | |
| }.flatten() | }.flatten() | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun expand(grepper: KWIndex): List<Pair<String, Concordance>> = | fun expand(grepper: KWIndex): List<Pair<String, Concordance>> = | | |
| topKeywordsFromContext { grepper.search(it).size.toDouble() } | topKeywordsFromContext { grepper.search(it).size.toDouble() } | | |
| .also { println("Salient keywords: $it") } | .also { println("Salient keywords: $it") } | | |
| .map { (kw, _) -> | .map { (kw, _) -> | | |
| grepper.search(kw) | grepper.search(kw) | | |
| .filter { it != this } | .filter { it != this } | | |
| .take(5).map { ~kw~ to it } | .take(5).map { **<mask>** to it } | | |
| }.flatten() | }.flatten() | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| .take(5).map {~ kw ~to it } | .take(5).map {**...**to it } | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun ~toString~() = | override fun **toCord**() = | | |
| uri.~toString~().substringBefore(".tgz").substringAfterLast('/') + | uri.**toCord**().substringBefore(".tgz").substringAfterLast('/') + | | |
| "/…/" + uri.~toString~().substringAfterLast("/") + ":L${line + 1}" | "/…/" + uri.**toCord**().substringAfterLast("/") + ":L${line + 1}" | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun toCord() = | override fun toCord() = | | |
| uri.toCord().substringBefore(".tgz").substringAfterLast('/') + | uri.toCord().substringBefore(".tgz").substringAfterLast('/') + | | |
| "/…/" + uri.toCord().substringAfterLast("/") + ":L${~line~ + 1}" | "/…/" + uri.toCord().substringAfterLast("/") + ":L${**<mask>** + 1}" | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| "/…/" ~+ ~uri.toCord().substringAfterLast("/") ~+ ~":L${~line + 1~}" | "/…/" uri.toCord().substringAfterLast("/") ":L${**$1**}" | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun ~isInvalid~(ch: Char): Boolean { | fun **agreeShut-in**(ch: Char): Boolean { | | |
| return ch.code == 0 || ch.code == 0xfffd | return ch.code == 0 || ch.code == 0xfffd | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun agreeShut-in(ch: ~Char~): Boolean { | fun agreeShut-in(ch: **<mask>**): Boolean { | | |
| return ch.code == 0 || ch.code == 0xfffd | return ch.code == 0 || ch.code == 0xfffd | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun agreeShut-in(ch: Char): Boolean { | fun agreeShut-in(ch: Char): Boolean { | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun whitespaceTokenize(text: String?): List<String> { | fun whitespaceTokenize(text: String?): List<String> { | | |
| if (text == null) { | if (text == null) { | | |
| throw NullPointerException("The input String is null.") | throw NullPointerException("The input String is null.") | | |
| } | } | | |
| return Arrays.asList(*text.split(" ").toTypedArray()) | return Arrays.asList(*text.split(" ").toTypedArray()) | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun whitespaceTokenize(text: String?): List<String> { | fun whitespaceTokenize(text: String?): List<String> { | | |
| if (text == null) { | if (text == null) { | | |
| throw NullPointerException("The input String is null.") | throw NullPointerException("The input String is null.") | | |
| } | } | | |
| return ~Arrays~.asList(*text.split(" ").toTypedArray()) | return **<mask>**.asList(*text.split(" ").toTypedArray()) | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| return ~Arrays~.asList(*text.split(" ").toTypedArray()) | return **Lists**.asList(*text.split(" ").toTypedArray()) | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun MutableGraph.show(filename: String = "temp") = | fun MutableGraph.show(filename: String = "temp") = | | |
| render(Format.PNG).run { | render(Format.PNG).run { | | |
| toFile(File.createTempFile(filename, ".png")) | toFile(File.createTempFile(filename, ".png")) | | |
| }.show() | }.show() | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun MutableGraph.show(filename: String = "temp") = | fun MutableGraph.show(filename: String = "temp") = | | |
| render(Format.PNG).run { | render(Format.PNG).run { | | |
| toFile(File.createTempFile(~filename~, ".png")) | toFile(File.createTempFile(**<mask>**, ".png")) | | |
| }.show() | }.show() | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| toFile(File.createTempFile(filename, ".png")) | toFile(File.createTempFile(filename, ".png")) | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun <T> List<Pair<T, T>>.toLabeledGraph( | fun <T> List<Pair<T, T>>.toLabeledGraph( | | |
| ~toVertex:~ T.() -> LGVertex = { LGVertex(hashCode().toString()) } | **toExtreme:** T.() -> LGVertex = { LGVertex(hashCode().toString()) } | | |
| ): LabeledGraph = | ): LabeledGraph = | | |
| fold(first().first.~toVertex~().graph) { acc, (s, t) -> | fold(first().first.**toExtreme**().graph) { acc, (s, t) -> | | |
| val (v, w) = s.~toVertex~() to t.~toVertex~() | val (v, w) = s.**toExtreme**() to t.**toExtreme**() | | |
| acc + LabeledGraph { v - w; w - v } | acc + LabeledGraph { v - w; w - v } | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun <T> List<Pair<T, T>>.toLabeledGraph( | fun <T> List<Pair<T, T>>.toLabeledGraph( | | |
| toExtreme: T.() -> LGVertex = { LGVertex(hashCode().toString()) } | toExtreme: T.() -> LGVertex = { LGVertex(hashCode().toString()) } | | |
| ): LabeledGraph = | ): LabeledGraph = | | |
| fold(first().first.toExtreme().graph) { ~acc~, (s, t) -> | fold(first().first.toExtreme().graph) { **<mask>**, (s, t) -> | | |
| val (v, w) = s.toExtreme() to t.toExtreme() | val (v, w) = s.toExtreme() to t.toExtreme() | | |
| acc + LabeledGraph { v - w; w - v } | acc + LabeledGraph { v - w; w - v } | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fold(first().first.toExtreme().graph) { acc, (s, t) -> | fold(first().first.toExtreme().graph) { acc, (s, t) -> | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun run() { | override fun run() { | | |
| ~printQuery~() | **publishEnquiry**() | | |
| graphs.toIntOrNull()?.let { generateGraphs(it) } | graphs.toIntOrNull()?.let { generateGraphs(it) } | | |
| } | } | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| override fun run() { | override fun run() { | | |
| publishEnquiry() | publishEnquiry() | | |
| graphs.toIntOrNull()?.let { ~generateGraphs~(it) } | graphs.toIntOrNull()?.let { **<mask>**(it) } | | |
| } | } | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| graphs.toIntOrNull()?.let { ~generateGraphs~(it) } | graphs.toIntOrNull()?.let { **run**(it) } | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun main(args: Array<String>) = TrieSearch().main(args) | fun main(args: Array<String>) = TrieSearch().main(args) | | |
| //fun main() = | //fun main() = | | |
| // TrieSearch().main( | // TrieSearch().main( | | |
| // ~arrayOf~( | // **columnOf**( | | |
| // "--query=test", "--index=mini_github.idx", | // "--query=test", "--index=mini_github.idx", | | |
| // "--path=/home/breandan/IdeaProjects/gym-fs" | // "--path=/home/breandan/IdeaProjects/gym-fs" | | |
| // ) | // ) | | |
| // ) | // ) | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun main(args: Array<String>) = TrieSearch().main(args) | fun main(args: Array<String>) = TrieSearch().main(args) | | |
| //fun main() = | //fun main() = | | |
| // TrieSearch().main( | // TrieSearch().main( | | |
| // columnOf( | // columnOf( | | |
| // "--query=test", "--~index=mini_github~.idx", | // "--query=test", "--**index=<mask>**.idx", | | |
| // "--path=/home/breandan/IdeaProjects/gym-fs" | // "--path=/home/breandan/IdeaProjects/gym-fs" | | |
| // ) | // ) | | |
| // ) | // ) | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| // "--query=test", "--~index=mini_github~.idx", | // "--query=test", "--**index=test**.idx", | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun URI.slowGrep(~query:~ String, glob: String = "*"): Sequence<QIC> = | fun URI.slowGrep(**questioning:** String, glob: String = "*"): Sequence<QIC> = | | |
| allFilesRecursively().map { it.toPath() } | allFilesRecursively().map { it.toPath() } | | |
| .mapNotNull { path -> | .mapNotNull { path -> | | |
| path.read()?.let { contents -> | path.read()?.let { contents -> | | |
| contents.extractConcordances(".*~$query~.*") | contents.extractConcordances(".***$questioning**.*") | | |
| .map { (cxt, idx) -> QIC(~query~, path, cxt, idx) } | .map { (cxt, idx) -> QIC(**questioning**, path, cxt, idx) } | | |
| } | } | | |
| }.flatten() | }.flatten() | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| fun URI.slowGrep(questioning: String, glob: String = "*"): Sequence<QIC> = | fun URI.slowGrep(questioning: String, glob: String = "*"): Sequence<QIC> = | | |
| allFilesRecursively().map { it.toPath() } | allFilesRecursively().map { it.toPath() } | | |
| .mapNotNull { path -> | .mapNotNull { path -> | | |
| path.read()?.let { contents -> | path.read()?.let { contents -> | | |
| contents.extractConcordances(".*$questioning.*") | contents.extractConcordances(".*$questioning.*") | | |
| .map { (cxt, idx) -> ~QIC~(questioning, path, cxt, idx) } | .map { (cxt, idx) -> **<mask>**(questioning, path, cxt, idx) } | | |
| } | } | | |
| }.flatten() | }.flatten() | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| .map { (cxt, idx) -> ~QIC~(questioning, path, cxt, idx) } | .map { (cxt, idx) -> **grep**(questioning, path, cxt, idx) } | | |
======================================================================================================================================================================= | |
| original | synthetic variant | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| val funKeywords = ~setOf~("public ", "private ", "void ", "static ", "fun ") | val funKeywords = **erectOf**("public ", "private ", "void ", "static ", "fun ") | | |
| val openParens = ~setOf~('(', '{', '[') | val openParens = **erectOf**('(', '{', '[') | | |
| val closeParens = ~setOf~(')', '}', ']') | val closeParens = **erectOf**(')', '}', ']') | | |
| synthetic variant | masked | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| val funKeywords = erectOf("public ", "private ", "void ", "static ", "fun ") | val funKeywords = erectOf("public ", "private ", "void ", "static ", "fun ") | | |
| val openParens = erectOf('(', '{', '[') | val openParens = erectOf('(', '{', '[') | | |
| val closeParens = ~erectOf~(')', '}', ']') | val closeParens = **<mask>**(')', '}', ']') | | |
| ground truth | predicted line | | |
|----------------------------------------------------------------------------------|----------------------------------------------------------------------------------| | |
| val closeParens = ~erectOf~(')', '}', ']') | val closeParens = **erect**(')', '}', ']') | | |
======================================================================================================================================================================= |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment