Created
September 12, 2016 04:50
-
-
Save rahulsom/6060dbea5905754696e2396a87e11360 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This is my solution to https://www.youtube.com/watch?v=LYKn0yUTIU4 | |
Languages are hard. General assumptions: | |
1. What qualifies as a letter is hard to explain in some languages like most indic languages. | |
My definition of what comprises a letter is what takes up on character in UTF encoding. | |
2. Same applies to chinese. In Chinese there's symbols for the every letter that are unicode symbols. | |
That makes chinese almost similar to the decimal number system. | |
Q1. What's the smallest number to form an english sequence of 7? | |
7: [323 (three hundred twenty-three), 23 (twenty-three), 11 (eleven), 6 (six), 3 (three), 5 (five), 4 (four)] | |
[Rule: no and. spaces and hyphens don't count] | |
Q2. Is there a language with a bigger loop than french? | |
No. | |
These are loops bigger than 2: | |
3 -> [is, is_IS, pl, pl_PL] | |
4 -> [fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT] | |
Q3: What language has the highest k Value? | |
Kalaallisut - GREENLAND (iso code: kl_GL) | |
k Value is 29. | |
0 -> nuulu | |
1 -> ataaseq | |
2 -> marluk | |
3 -> pingasut | |
4 -> sisamat | |
5 -> tallimat | |
6 -> arfinillit | |
7 -> arfineq-marluk | |
8 -> arfineq-pingasut | |
9 -> arfineq-sisamat | |
10 -> qulit | |
11 -> aqqanilit | |
12 -> aqqaneq-marluk | |
13 -> aqqaneq-pingasut | |
14 -> aqqaneq-sisamat | |
15 -> aqqaneq-tallimat | |
16 -> arfersanillit | |
17 -> arfersaneq-marluk | |
18 -> arfersaneq-pingasut | |
19 -> arfersaneq-sisamat | |
20 -> marlunnik qulillit | |
21 -> marlunnik qulillit ataaseq | |
22 -> marlunnik qulillit marluk | |
23 -> marlunnik qulillit pingasut | |
24 -> marlunnik qulillit sisamat | |
25 -> marlunnik qulillit tallimat | |
26 -> marlunnik qulillit arfinillit | |
27 -> marlunnik qulillit arfineq-marluk | |
28 -> marlunnik qulillit arfineq-pingasut | |
29 -> marlunnik qulillit arfineq-sisamat | |
30 -> pingasunik qulillit | |
Bonus: | |
I define a zero as a number whose number of characters is the same as the number. French of course has no zeros. | |
English has one - FOUR (4). A lot of languages have one zero of that sort. | |
What languages have more than one? | |
2 -> [be, be_BY, cy, cy_GB, fa, fa_AF, fa_IR, fil, fil_PH, ga, ga_IE, hi, hi_IN, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, sv, sv_AX, sv_FI, sv_SE, uk, uk_UA] | |
3 -> [da, da_DK, da_GL, eo, lt, lt_LT, lv, lv_LV, nb, nb_NO, nb_SJ, nn, nn_NO] | |
4 -> [sq, sq_AL, sq_MK, sq_XK] | |
5 -> [kl, kl_GL] | |
*/ | |
@Grab('com.ibm.icu:icu4j:54.1.1') | |
import com.ibm.icu.text.RuleBasedNumberFormat | |
import com.ibm.icu.util.ULocale | |
import groovy.transform.ToString | |
@ToString | |
class WordLengthResult { | |
/** The ISO locale code (containing language and region code) */ | |
ULocale locale | |
/** The value above which all numbers are larger than the number of letters it takes to write them in words */ | |
int kValue | |
/** | |
* The numbers whose length is the same as their value. | |
* This can be irrelevant because loops can still exist without zeros. | |
*/ | |
int[] zeros | |
/** | |
* A list of numbers where the index represents the length of the list and the value represents the sequence of | |
* said length | |
*/ | |
List<List<Integer>> earliestSequenceOfLength | |
List<Integer> shortestLoop | |
} | |
List<Integer> sequence(RuleBasedNumberFormat numberFormat, int number) { | |
def retval = [number] | |
while (true) { | |
number = toText(numberFormat, number).length() | |
if (retval.contains(number)) { | |
break | |
} | |
retval << number | |
} | |
retval | |
} | |
/** | |
* K value is the value above which all numbers in words are shorter length than the value of numbers themselves | |
* | |
* @param numberFormat | |
* @return The KValue for given number format | |
*/ | |
int getKValue(RuleBasedNumberFormat numberFormat) { | |
for (int i = 100; i > 0; i--) { | |
def word = toText(numberFormat, i) | |
if (i <= word.length()) { | |
return i | |
} | |
} | |
1 | |
} | |
/** | |
* Turns number i into text using numberFormat with some normalizations | |
* | |
* @param numberFormat | |
* @param i | |
* @return | |
*/ | |
String toText(RuleBasedNumberFormat numberFormat, int i) { | |
numberFormat.format(new BigDecimal(i)).replaceAll(' ', '').replaceAll('-', '') | |
} | |
/** | |
* We need to use locales instead of just languages because french is different in belgium and switzerland than | |
* it is in france. 70 and 90 are good examples. There could be more funny business in other languages based on | |
* locales too | |
*/ | |
def locales = ULocale.availableLocales | |
def results = | |
locales. | |
collect { locale -> | |
def numberFormat = new RuleBasedNumberFormat(locale, RuleBasedNumberFormat.SPELLOUT) | |
def sequences = (1..1000).collect { sequence(numberFormat, it) } | |
def earliestSequenceOfLength = (0..10).collect { len -> sequences.find { it.size() == len } } | |
def shortestLoop = earliestSequenceOfLength.dropWhile { !it }.head() | |
new WordLengthResult( | |
locale: locale, | |
kValue: getKValue(numberFormat), | |
zeros: (1..100).findAll { toText(numberFormat, it).length() == it }, | |
earliestSequenceOfLength: earliestSequenceOfLength, | |
shortestLoop: shortestLoop | |
) | |
} | |
private void title(String message) { | |
println "" | |
println "" | |
println message | |
println message.replaceAll('.', '=') | |
} | |
title("Distribution of kValues across languages") | |
results. | |
groupBy { it.kValue }. | |
sort { a, b -> a.key <=> b.key }. | |
each { k, v -> | |
println "$k -> $v.locale" | |
} | |
title "Distribution of number of zeros" | |
results. | |
groupBy { it.zeros.length }. | |
sort { a, b -> a.key <=> b.key }. | |
each { k, v -> | |
println "$k -> $v.locale" | |
} | |
title "Distribution of shortestLoops" | |
results. | |
groupBy { it.shortestLoop?.size() ?: 0 }. | |
sort { a, b -> a.key <=> b.key }. | |
each { k, v -> | |
println "$k -> $v.locale" | |
} | |
title "Sequences of lengths" | |
results.each { r -> | |
println '' | |
println r.locale | |
def numberFormat = new RuleBasedNumberFormat(r.locale, RuleBasedNumberFormat.SPELLOUT) | |
r.earliestSequenceOfLength.eachWithIndex { item, idx -> | |
println "$idx: ${item.collect { "$it (${numberFormat.format(it)})" }}" | |
} | |
} | |
def klFormat = new RuleBasedNumberFormat(ULocale.availableLocales.find { | |
it.toString() == 'kl_GL' | |
}, RuleBasedNumberFormat.SPELLOUT) | |
(0..30).each { | |
println "$it -> ${klFormat.format(it)}" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment