Last active
January 13, 2023 21:20
-
-
Save danielocampo2/8ddd4ce20e9dbb277dee469d6349084a to your computer and use it in GitHub Desktop.
multiDistinctBy function for Kotlin: Like stdlib distinctBy but for multiple fields
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun <T, K> Iterable<T>.multiDistinctBy(vararg selectors: (T) -> K): List<T> { | |
require(selectors.isNotEmpty()) | |
val set = HashSet<Int>() | |
val distinct = ArrayList<T>() | |
for (element in this) { | |
val key = selectors.fold(0) { sum, selector -> | |
sum.plus(selector(element)?.hashCode() ?: 0) } | |
if (set.add(key)) | |
distinct.add(element) | |
} | |
return distinct | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.junit.Assert.assertEquals | |
import org.junit.Test | |
class MultiDistinctByTest { | |
data class Item(val code: String, val name: String?, val value: Int) | |
@Test | |
fun multiDistinctBy_byTwoStringSelectors_shouldReturnOnlyDistinctValues() { | |
val someItems = listOf( | |
Item("1", "Item 1", 34), | |
Item("1", "Item 1", 36), | |
Item("2", "Item 1", 38), | |
Item("2", "Item 2", 40), | |
Item("2", "Item 3", 42), | |
Item("2", "", 44), | |
Item("2", "", 46) | |
) | |
val expected = listOf( | |
Item("1", "Item 1", 34), | |
Item("2", "Item 1", 38), | |
Item("2", "Item 2", 40), | |
Item("2", "Item 3", 42), | |
Item("2", "", 44) | |
) | |
val distinct = someItems.multiDistinctBy({it.code}, {it.name}) | |
assertEquals(expected, distinct) | |
} | |
@Test | |
fun multiDistinctBy_withNullValue_shouldIncludeNullValue() { | |
val someItems = listOf( | |
Item("1", "Item 1", 34), | |
Item("1", "Item 1", 36), | |
Item("2", null, 38), | |
Item("2", "Item 2", 40) | |
) | |
val expected = listOf( | |
Item("1", "Item 1", 34), | |
Item("2", null, 38), | |
Item("2", "Item 2", 40) | |
) | |
val distinct = someItems.multiDistinctBy({it.code}, {it.name}) | |
assertEquals(expected, distinct) | |
} | |
@Test | |
fun multiDistinctBy_withTwoNullValuesInSelectorField_shouldReturnOnlyOneNullValue() { | |
val someItems = listOf( | |
Item("1", "Item 1", 34), | |
Item("1", "Item 1", 36), | |
Item("2", null, 38), | |
Item("2", null, 40) | |
) | |
val expected = listOf( | |
Item("1", "Item 1", 34), | |
Item("2", null, 38) | |
) | |
val distinct = someItems.multiDistinctBy({it.code}, {it.name}) | |
assertEquals(expected, distinct) | |
} | |
@Test | |
fun multiDistinctBy_withStringAndNumeric_shouldReturnDistinctByStringAndNumericFields() { | |
val someItems = listOf( | |
Item("1", "Item 1", 34), | |
Item("1", "Item 1", 34), | |
Item("2", null, 38), | |
Item("2", null, 40) | |
) | |
val expected = listOf( | |
Item("1", "Item 1", 34), | |
Item("2", null, 38), | |
Item("2", null, 40) | |
) | |
val distinct = someItems.multiDistinctBy({it.code}, {it.value}) | |
assertEquals(expected, distinct) | |
} | |
@Test | |
fun multiDistinctBy_allFieldsAreDifferent_shouldReturnOriginalList() { | |
val someItems = listOf( | |
Item("1", "Item 1", 34), | |
Item("1", "Item 2", 34), | |
Item("2", null, 38), | |
Item("2", null, 40) | |
) | |
val distinct = someItems.multiDistinctBy({it.code}, {it.name}, {it.value}) | |
assertEquals(someItems, distinct) | |
} | |
} |
Hi @Intex32, thanks, this was a blast from the past, so long I haven't seen this gist.
I will take a look and see if there is any way this can be fixed, thanks for the suggestion.
@danielocampo2 a "blast from the past" xD very nice
Don't feel obligated however to fix this. I am now using distinctBy { listof(...) }
, it appears to be working for the time being.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey!
I accidently came across this code snippet while trying to findet a solution for distinct by multiple fields.
And this function behaves incorrect in a special case I found.
Given the following code
console output:
This is because you're summing the individual hashes and the hash of "name" increases whereas the hash of "size" decreases.
I came across this is issue with real data... So its not just an edge case.
Btw, this function signature would be safer:
I don't think there is an easy general solution to this.