Last active
February 10, 2020 16:26
-
-
Save bholota/ff9a54edf2dbe99e88008f45a18d9d87 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun jaccardSimilarity(first: Array<Int>, second: Array<Int>): Float { | |
val s1 = first.toSet() | |
val s2 = second.toSet() | |
return s1.intersect(s2).size / s1.union(s2).size.toFloat() | |
} | |
class Encoder { | |
var base = 0 | |
fun encode(categories: Set<String>) = | |
categories.mapIndexed { index: Int, category: String -> base + index }.toSortedSet().apply { | |
base += size | |
} | |
} | |
data class Item(val category: String, val shopName: String) | |
val A = Item("car", "ShopA") | |
val B = Item("toy", "ShopB") | |
val C = Item("toy", "ShopC") | |
val allItems = listOf(A, B, C) | |
val encoder = Encoder() | |
val categories = allItems.map { it.category }.toSortedSet() | |
val allShops = allItems.map { it.shopName }.toSortedSet() | |
val encodedCategories = encoder.encode(categories) | |
val encodedShops = encoder.encode(allShops) | |
val categoriesMap = categories.mapIndexed { index: Int, category: String -> category to encodedCategories.elementAt(index) }.toMap() | |
val shopsMap = allShops.mapIndexed { index: Int, shop: String -> shop to encodedShops.elementAt(index) }.toMap() | |
fun Item.encode(): Array<Int> = arrayOf(categoriesMap[category]!!, shopsMap[shopName]!!) | |
println("A -> B ${jaccardSimilarity(A.encode(), B.encode())} ") | |
println("A -> C ${jaccardSimilarity(A.encode(), C.encode())} ") | |
println("B -> C ${jaccardSimilarity(B.encode(), C.encode())} ") | |
println("C -> B ${jaccardSimilarity(C.encode(), B.encode())} ") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment