Last active
April 19, 2023 14:57
-
-
Save katorly/59030fb08df83af3aaa5a05fceab8946 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Copyright (c) 2023 Katorly (https://github.com/katorly) | |
* | |
* This Source Code Form is subject to the terms of the Mozilla Public | |
* License, v. 2.0. If a copy of the MPL was not distributed with this | |
* file, You can obtain one at https://mozilla.org/MPL/2.0/. | |
*/ | |
object textutils { | |
/** | |
* Count how many times a word containing one character | |
* or two characters appears in your article. | |
* | |
* Only counts the word that appears more than one time. | |
*/ | |
fun count() { | |
// Put the article you wanna scan | |
val text = """ | |
你的文章文字 | |
放在这里 | |
啊啊啊啊啊 | |
""".trimIndent() | |
// Skip these words you dont want | |
val skip = listOf(""," ",",",".","!","?","\'","\"","(",")","\\","/",":",";","-"," ","。",",","!","?","‘","’","“","”",":",";","《","》","1","2","3","4","5","6","7","8","9","0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z") | |
val strings = text.split("\n") | |
val wordlist: MutableMap<String, Int> = HashMap() | |
var first = "" | |
for (string in strings) { | |
val c: CharArray = string.toCharArray() | |
val s: MutableList<String> = ArrayList() | |
for (char in c) s.add(char.toString()) | |
for (char in s) { | |
var valid = true | |
// Check if it's in skip list | |
skip.forEach() { | |
if (char == it) valid = false | |
} | |
if (valid) { | |
// Check word containing one character | |
if (!wordlist.contains(char)) { | |
wordlist[char] = 1 | |
} else { | |
wordlist[char] = wordlist[char]!! + 1 | |
} | |
// Check word containing two characters | |
if (first != "") { | |
if (!wordlist.contains(first + char)) { | |
wordlist[first + char] = 1 | |
} else { | |
wordlist[first + char] = wordlist[first + char]!! + 1 | |
} | |
} | |
first = char | |
} else continue | |
} | |
} | |
// Print the results | |
wordlist.forEach { (word, count) -> | |
if (count > 1) { | |
println("${word}: $count") | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example Output: