Skip to content

Instantly share code, notes, and snippets.

@mashingan
Last active October 15, 2017 17:43
Show Gist options
  • Save mashingan/af5d5311223794b6cc33efe2c795bbae to your computer and use it in GitHub Desktop.
Save mashingan/af5d5311223794b6cc33efe2c795bbae to your computer and use it in GitHub Desktop.
Rough dbscans implementation with dataset3ID.csv as the data
ID X1 X2
10338001 -4.673253857 -0.612095787
10338002 -4.938803684 -0.469263745
10338003 -5.460488163 0.917422708
10338004 -5.363523692 -0.616513985
10338005 0.919264591 0.112761882
10338006 -1.309394745 -0.095029584
10338007 -4.033708861 -0.816182819
10338008 -1.588305433 -0.43439881
10338009 -1.118833043 1.356567282
10338010 1.301334823 0.119296088
10338011 -0.749044471 -0.117767044
10338012 0.081844478 -0.872226751
10338013 -3.187629033 -0.28804087
10338014 -3.250464167 -0.511683153
10338015 0.577343589 0.107093975
10338016 -1.738480192 -0.908106661
10338017 -2.704037912 1.275525478
10338018 -1.805249046 1.1794503
10338019 -2.711092844 -0.241043451
10338020 -3.892569204 1.336605933
10338021 -1.323878529 1.41719468
10338022 -4.036438051 -0.662524386
10338023 -1.682829818 1.189579042
10338024 -3.414899757 1.124766604
10338025 -4.472057108 -0.430698931
10338026 -1.084389931 1.17381792
10338027 -2.966603023 0.944200085
10338028 -1.229893201 -0.217455931
10338029 -3.188018557 -0.384909454
10338030 -4.272001238 0.929292089
10338031 0.997914003 1.375628272
10338032 -0.661261035 1.238555004
10338033 -3.832920443 -0.398466443
10338034 -2.505191526 -0.409618043
10338035 -6.127593947 -0.70729368
10338036 -1.336261052 -0.481463648
10338037 -4.341857176 0.923772787
10338038 -1.84683794 -0.244694715
10338039 0.966703369 -0.357295093
10338040 -1.98376636 -0.26990208
10338041 -1.104535249 1.325734693
10338042 -4.092269563 1.47862012
10338043 -2.527177684 2.530546676
10338044 -3.456123701 0.90568175
10338045 -5.930660757 -0.827990798
10338046 -4.274449598 0.808510213
10338047 -1.089275476 1.347003905
10338048 -1.949106548 1.198809192
10338049 0.692898129 1.403489439
10338050 -4.998808649 -0.517797025
10338051 -4.468349401 0.902788355
10338052 -2.216377147 1.182680383
10338053 -1.914397993 1.173263682
10338054 -5.562928111 1.004662615
10338055 -4.200505522 1.416080679
10338056 -1.958814077 1.323083169
10338057 0.015462601 -0.024601567
10338058 -6.277415822 1.005835472
10338059 -3.277584319 1.12659657
10338060 -0.849617878 -0.057893972
10338061 -2.938963241 -0.885793216
10338062 -2.952096378 -0.674990997
10338064 -2.209782421 -0.25994657
10338065 4.718185991 0.428444048
10338066 -2.340908566 -0.768267456
10338067 -2.83862556 -0.706676695
10338068 -1.128998142 -0.096021994
10338069 -1.308596853 -0.679909559
10338070 2.166133968 0.075734167
10338071 1.721545935 0.230092596
10338072 -1.124734382 -0.242915381
10338073 -0.733822347 -0.043961033
10338074 -2.148211154 -0.417546389
10338075 1.940407215 0.008990026
10338076 -2.215137877 -0.731849669
10338077 -2.431121827 -0.474657813
10338078 0.580237295 0.030285433
10338079 2.969461363 0.120736612
10338080 0.543534702 0.095034988
10338081 -2.339729882 -0.436826191
10338082 -4.754517584 -0.579674866
10338083 1.462041368 1.546708696
10338084 -0.743471956 -0.130565393
10338085 3.337893091 0.036832146
10338086 4.529059756 -0.090537145
10338087 2.901743066 -0.057271692
10338088 -2.674564443 -0.397267681
10338089 1.047282655 -0.057400092
10338090 0.712925071 -0.592372867
10338091 1.59355852 -0.401059061
10338092 0.338705305 -0.419523202
10338093 -1.236515816 -0.982981583
10338094 -2.107871143 -0.536686412
10338095 -0.46234494 -0.079915201
10338096 4.064404236 0.438646444
10338097 0.362244431 -0.058895017
10338098 0.121704836 -0.147094448
10338099 -2.107871143 -0.536686412
10338100 5.325816422 0.161737481
10338101 0.821746021 -0.197580217
10338102 1.719194707 0.136977558
10338103 -2.682923849 -0.295796261
10338104 -0.248696752 -0.364373425
10338105 1.819705559 -0.226262828
10338106 8.026229584 0.826844044
10338107 -0.018705272 -0.124130502
10338108 3.792662483 0.069860592
10338109 -1.463888639 -0.791186003
10338110 3.297954688 -1.838920233
10338111 -2.267691155 -0.826091037
10338112 3.558678676 0.133617295
10338113 4.229844203 0.010175905
10338114 4.352831093 0.213780607
10338115 3.332617644 -0.203102665
10338116 2.146060617 -1.403173685
10338117 -1.480258721 -0.640928477
10338118 0.248865631 -0.487692186
10338119 -1.751330098 1.772446679
10338120 2.233113681 0.290045956
10338121 -2.684160652 -0.289694986
10338122 -1.29214 1.370623423
10338123 3.237062465 0.387090922
10338124 1.807964676 -0.031096871
10338125 7.233493043 0.770846476
10338126 -0.181835752 -0.101079884
10338127 -0.921571334 -0.085634656
10338128 0.356440949 -0.11907673
10338129 -0.356939201 -0.469423128
10338130 -0.267330755 -0.125879997
10338131 1.495451929 0.194536932
10338132 -0.251542482 -0.107708457
10338133 2.908377452 -0.837623077
10338134 -2.002771527 -0.231965386
10338135 -3.999515994 -0.461444999
10338136 5.423955063 0.273228568
10338137 1.377893326 -0.692725188
10338138 -0.927352596 -0.218956613
10338139 -3.25042141 1.024100473
10338140 0.739807456 -0.28807236
10338141 1.627472152 -0.50849716
10338142 2.209300937 -0.058964069
10338143 3.738133936 0.17048225
10338144 6.461870121 -0.323736254
10338145 5.540470563 -0.287618767
10338146 -0.442924359 -0.001393775
10338147 2.940056474 -0.158733891
10338148 7.255095841 -0.403101908
10338149 -1.456996354 -0.120282148
10338150 -2.435280292 -0.318679331
10338151 -2.328697407 -0.390520029
10338152 3.681491235 0.195710724
10338153 4.370918419 0.504841485
10338154 -5.301476208 -0.582261903
10338155 -2.6790097 -0.429417658
10338156 -2.211967662 -1.024686835
10338157 5.767159049 -0.981487227
10338158 0.281563158 0.077314055
10338159 -0.608555141 -0.104334681
10338160 2.369739229 0.217622931
10338161 1.699461041 0.201585859
10338162 -0.086371444 -0.773945412
10338163 0.097123514 -0.326667262
10338164 -1.419495528 -0.944207299
10338165 4.681823252 -0.018848156
10338166 -0.821441688 -0.556129961
10338167 2.719125281 2.125813891
10338168 0.419057607 -0.172518081
10338169 -1.625530822 1.162653563
10338170 2.163807844 -0.004231459
10338171 -1.721528833 -0.284169918
10338172 0.190323729 -0.151429704
10338173 0.981417226 0.042870636
10338174 0.373498012 -0.528844745
10338175 -3.150184153 -0.348571887
10338176 -1.55775678 1.312115711
10338177 -3.431738262 -0.318980993
10338178 3.153866072 0.180697185
10338179 -0.537722988 -0.00450727
10338180 0.870707227 0.136054029
10338181 0.574962585 -1.518465142
10338182 4.67681085 0.315591785
10338183 3.825576954 0.075815643
10338184 0.836243477 -0.034548518
10338185 2.124620206 1.725415451
10338186 1.310710119 -0.049988503
10338187 1.465418671 0.15029257
10338188 3.289349643 -0.388874637
10338189 4.332658596 -0.791495103
10338190 1.384957365 0.169220641
10338191 1.03383778 0.158980451
10338192 6.385652082 -0.032534714
10338193 5.927107147 1.874628116
10338194 2.573151523 0.157826035
10338195 0.964216953 -0.348644384
10338196 1.144010648 -0.436689511
10338197 -0.701191976 -0.61358707
10338198 0.793532626 -0.498952765
10338199 2.325807791 0.048466466
10338200 1.905462644 -0.384255624
10338201 4.571045871 0.389670724
10338202 1.270246709 -1.415538805
10338203 2.082688048 -1.603882099
10338204 0.687408744 -0.493692882
10338205 5.510380101 -0.593849878
10338206 -0.38762176 -0.323439712
10338207 -0.46698768 -0.410294181
10338208 1.304400765 1.793220178
10338209 0.933342198 -0.091684219
10338210 1.812822114 -0.50059073
10338211 2.220427859 -0.592171392
10338212 0.34551195 -0.969439201
10338213 -0.633572103 -0.607220617
10338214 3.774071706 -0.239377167
10338215 3.606542191 -0.479660004
10338216 -0.08474595 -0.253646139
10338217 2.472457908 -0.185375619
10338218 -0.211436433 0.006538591
10338219 -1.273556188 -0.421546453
10338220 0.158810307 -0.504647026
10338221 3.048250335 -0.910777276
10338222 -2.389804403 -1.664758072
10338223 -2.03096539 -0.449364539
10338224 -3.246466761 -0.619898075
10338225 0.668450568 -0.084419827
10338226 -0.053503479 -0.153763395
10338227 -0.085185496 -0.014069339
10338228 1.79020949 0.220985627
10338229 -1.361235648 1.278922471
10338230 0.139723368 -0.172690944
10338231 4.524249215 0.386105714
10338232 4.940918154 1.790356551
10338233 -1.872536368 -0.31652033
10338234 6.838989512 0.296204283
10338235 6.691992142 0.256475119
10338236 3.119105612 -0.486212033
10338237 1.648834656 0.01950405
10338238 0.908779431 -0.118811745
10338239 0.416394037 1.565364439
10338240 -0.229636463 0.031129219
10338241 6.097869966 0.482600654
10338242 3.814076191 -0.549552843
10338243 -1.365491873 -0.254508012
10338244 -3.419075636 -0.664374091
10338245 -0.370693624 -0.308799189
10338246 -6.496673191 0.562379388
10338247 -1.343676224 -1.77065301
10338248 -2.669260588 -0.748890818
10338249 -1.128998142 -0.096021994
10338250 -2.142927921 -0.486269927
10338251 0.078420778 0.009870991
10338252 6.078353978 -0.002954292
10338253 1.554591151 0.18062911
10338254 -1.790417467 -0.173535957
10338255 1.893617356 0.1514817
10338256 2.760706974 0.224388536
10338257 -3.780698143 0.837892731
10338258 2.464235047 0.084275951
10338259 -5.105045501 1.030683496
10338260 -4.435960737 0.798940734
10338261 -4.605020971 -0.478626146
10338262 0.78957493 -0.505209961
10338263 0.939841454 0.055613818
10338264 -1.248512037 -0.096154661
import parsecsv
from os import fileExists
from math import pow, sqrt
from strutils import parseInt, parseFloat, strip
#from algorithm import sort
from sequtils import filter, deduplicate, apply
type
Coord = object
#id, cluster: int
id: int
x, y: float
DistanceTest[V] = proc(a: V, b: V): bool
const
epsilon = 1.0
minpt = 37
proc getCoords(filename: string): seq[Coord] =
result = newSeq[Coord]()
var csv: CsvParser
csv.open filename
csv.readHeaderRow
defer: csv.close()
var
x, y: float
id: int
while csv.readRow:
for entry in csv.headers.items:
let rowval = csv.rowEntry(entry).strip
case entry
of "ID": id = try: rowval.parseInt except: -1
of "X1": x = try: rowval.parseFloat except: 100.0
of "X2": y = try: rowval.parseFloat except: 100.0
result.add Coord(id: id, x: x, y: y)
proc `==`(a, b: Coord): bool = a.id == b.id and a.x == b.x and a.y == b.y
#[
proc cmp(a, b: Coord): int =
if a.id == b.id: 0
elif a.id < b.id: -1
elif a.id > b.id: 1
elif a.x > b.x: 1
elif a.x == b.x:
if a.y > b.y: 1
else: -1
else: -1
]#
proc euclideanDist(p1, p2: Coord): float =
sqrt((p1.x - p2.x).pow(2) + (p1.y - p2.y).pow(2))
proc neighbourTest(post, node: Coord): bool =
post.euclideanDist(node) < epsilon
# Define implementation of how to verify the cluster
# In this case, cluster is deemed okay when it's member more than `minpt`
proc consideredOkay[V](cluster: seq[V]): bool = cluster.len > minpt
proc union[T](a: var seq[T], b: seq[T]) =
for belm in b:
if belm notin a:
a.add belm
proc `+=`[T](a: var seq[T], b: seq[T]) =
a.union b
proc clustering[V](input: seq[V], test: DistanceTest[V]): seq[seq[V]] =
result = newSeq[seq[V]]()
var visited = newSeq[V]()
proc getNeighbours(node: V): seq[V] = input.filter(proc(a: V): bool =
a notin visited and a.test(node))
for node in input:
if node notin visited:
visited.add node
var cluster = newSeq[V]()
for neighbour in node.getNeighbours:
if neighbour notin visited:
visited.add neighbour
cluster += neighbour.getNeighbours.deduplicate
if cluster.consideredOkay:
result.add cluster
echo "visited length: ", visited.len
result = result.deduplicate
proc main =
let
fname = "dataset3ID.csv"
coords = if fileExists(fname):
getCoords fname
else:
@[]
if coords.len == 0:
quit "No " & fname & " available"
echo "coords data ", coords.len
let clusters = coords.clustering neighbourTest
echo "clusters are ", clusters.len
var sumelm = 0
for index, cluster in clusters.pairs:
echo "cluster ", index, " has element ", cluster.len
sumelm += cluster.len
echo "total elements ", sumelm
for index, cluster in clusters.pairs:
echo "elm ", index, " with length ", cluster.len
echo cluster
when isMainModule:
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment