Last active
October 15, 2017 17:43
-
-
Save mashingan/af5d5311223794b6cc33efe2c795bbae to your computer and use it in GitHub Desktop.
Rough dbscans implementation with dataset3ID.csv as the data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ID | X1 | X2 | |
---|---|---|---|
10338001 | -4.673253857 | -0.612095787 | |
10338002 | -4.938803684 | -0.469263745 | |
10338003 | -5.460488163 | 0.917422708 | |
10338004 | -5.363523692 | -0.616513985 | |
10338005 | 0.919264591 | 0.112761882 | |
10338006 | -1.309394745 | -0.095029584 | |
10338007 | -4.033708861 | -0.816182819 | |
10338008 | -1.588305433 | -0.43439881 | |
10338009 | -1.118833043 | 1.356567282 | |
10338010 | 1.301334823 | 0.119296088 | |
10338011 | -0.749044471 | -0.117767044 | |
10338012 | 0.081844478 | -0.872226751 | |
10338013 | -3.187629033 | -0.28804087 | |
10338014 | -3.250464167 | -0.511683153 | |
10338015 | 0.577343589 | 0.107093975 | |
10338016 | -1.738480192 | -0.908106661 | |
10338017 | -2.704037912 | 1.275525478 | |
10338018 | -1.805249046 | 1.1794503 | |
10338019 | -2.711092844 | -0.241043451 | |
10338020 | -3.892569204 | 1.336605933 | |
10338021 | -1.323878529 | 1.41719468 | |
10338022 | -4.036438051 | -0.662524386 | |
10338023 | -1.682829818 | 1.189579042 | |
10338024 | -3.414899757 | 1.124766604 | |
10338025 | -4.472057108 | -0.430698931 | |
10338026 | -1.084389931 | 1.17381792 | |
10338027 | -2.966603023 | 0.944200085 | |
10338028 | -1.229893201 | -0.217455931 | |
10338029 | -3.188018557 | -0.384909454 | |
10338030 | -4.272001238 | 0.929292089 | |
10338031 | 0.997914003 | 1.375628272 | |
10338032 | -0.661261035 | 1.238555004 | |
10338033 | -3.832920443 | -0.398466443 | |
10338034 | -2.505191526 | -0.409618043 | |
10338035 | -6.127593947 | -0.70729368 | |
10338036 | -1.336261052 | -0.481463648 | |
10338037 | -4.341857176 | 0.923772787 | |
10338038 | -1.84683794 | -0.244694715 | |
10338039 | 0.966703369 | -0.357295093 | |
10338040 | -1.98376636 | -0.26990208 | |
10338041 | -1.104535249 | 1.325734693 | |
10338042 | -4.092269563 | 1.47862012 | |
10338043 | -2.527177684 | 2.530546676 | |
10338044 | -3.456123701 | 0.90568175 | |
10338045 | -5.930660757 | -0.827990798 | |
10338046 | -4.274449598 | 0.808510213 | |
10338047 | -1.089275476 | 1.347003905 | |
10338048 | -1.949106548 | 1.198809192 | |
10338049 | 0.692898129 | 1.403489439 | |
10338050 | -4.998808649 | -0.517797025 | |
10338051 | -4.468349401 | 0.902788355 | |
10338052 | -2.216377147 | 1.182680383 | |
10338053 | -1.914397993 | 1.173263682 | |
10338054 | -5.562928111 | 1.004662615 | |
10338055 | -4.200505522 | 1.416080679 | |
10338056 | -1.958814077 | 1.323083169 | |
10338057 | 0.015462601 | -0.024601567 | |
10338058 | -6.277415822 | 1.005835472 | |
10338059 | -3.277584319 | 1.12659657 | |
10338060 | -0.849617878 | -0.057893972 | |
10338061 | -2.938963241 | -0.885793216 | |
10338062 | -2.952096378 | -0.674990997 | |
10338064 | -2.209782421 | -0.25994657 | |
10338065 | 4.718185991 | 0.428444048 | |
10338066 | -2.340908566 | -0.768267456 | |
10338067 | -2.83862556 | -0.706676695 | |
10338068 | -1.128998142 | -0.096021994 | |
10338069 | -1.308596853 | -0.679909559 | |
10338070 | 2.166133968 | 0.075734167 | |
10338071 | 1.721545935 | 0.230092596 | |
10338072 | -1.124734382 | -0.242915381 | |
10338073 | -0.733822347 | -0.043961033 | |
10338074 | -2.148211154 | -0.417546389 | |
10338075 | 1.940407215 | 0.008990026 | |
10338076 | -2.215137877 | -0.731849669 | |
10338077 | -2.431121827 | -0.474657813 | |
10338078 | 0.580237295 | 0.030285433 | |
10338079 | 2.969461363 | 0.120736612 | |
10338080 | 0.543534702 | 0.095034988 | |
10338081 | -2.339729882 | -0.436826191 | |
10338082 | -4.754517584 | -0.579674866 | |
10338083 | 1.462041368 | 1.546708696 | |
10338084 | -0.743471956 | -0.130565393 | |
10338085 | 3.337893091 | 0.036832146 | |
10338086 | 4.529059756 | -0.090537145 | |
10338087 | 2.901743066 | -0.057271692 | |
10338088 | -2.674564443 | -0.397267681 | |
10338089 | 1.047282655 | -0.057400092 | |
10338090 | 0.712925071 | -0.592372867 | |
10338091 | 1.59355852 | -0.401059061 | |
10338092 | 0.338705305 | -0.419523202 | |
10338093 | -1.236515816 | -0.982981583 | |
10338094 | -2.107871143 | -0.536686412 | |
10338095 | -0.46234494 | -0.079915201 | |
10338096 | 4.064404236 | 0.438646444 | |
10338097 | 0.362244431 | -0.058895017 | |
10338098 | 0.121704836 | -0.147094448 | |
10338099 | -2.107871143 | -0.536686412 | |
10338100 | 5.325816422 | 0.161737481 | |
10338101 | 0.821746021 | -0.197580217 | |
10338102 | 1.719194707 | 0.136977558 | |
10338103 | -2.682923849 | -0.295796261 | |
10338104 | -0.248696752 | -0.364373425 | |
10338105 | 1.819705559 | -0.226262828 | |
10338106 | 8.026229584 | 0.826844044 | |
10338107 | -0.018705272 | -0.124130502 | |
10338108 | 3.792662483 | 0.069860592 | |
10338109 | -1.463888639 | -0.791186003 | |
10338110 | 3.297954688 | -1.838920233 | |
10338111 | -2.267691155 | -0.826091037 | |
10338112 | 3.558678676 | 0.133617295 | |
10338113 | 4.229844203 | 0.010175905 | |
10338114 | 4.352831093 | 0.213780607 | |
10338115 | 3.332617644 | -0.203102665 | |
10338116 | 2.146060617 | -1.403173685 | |
10338117 | -1.480258721 | -0.640928477 | |
10338118 | 0.248865631 | -0.487692186 | |
10338119 | -1.751330098 | 1.772446679 | |
10338120 | 2.233113681 | 0.290045956 | |
10338121 | -2.684160652 | -0.289694986 | |
10338122 | -1.29214 | 1.370623423 | |
10338123 | 3.237062465 | 0.387090922 | |
10338124 | 1.807964676 | -0.031096871 | |
10338125 | 7.233493043 | 0.770846476 | |
10338126 | -0.181835752 | -0.101079884 | |
10338127 | -0.921571334 | -0.085634656 | |
10338128 | 0.356440949 | -0.11907673 | |
10338129 | -0.356939201 | -0.469423128 | |
10338130 | -0.267330755 | -0.125879997 | |
10338131 | 1.495451929 | 0.194536932 | |
10338132 | -0.251542482 | -0.107708457 | |
10338133 | 2.908377452 | -0.837623077 | |
10338134 | -2.002771527 | -0.231965386 | |
10338135 | -3.999515994 | -0.461444999 | |
10338136 | 5.423955063 | 0.273228568 | |
10338137 | 1.377893326 | -0.692725188 | |
10338138 | -0.927352596 | -0.218956613 | |
10338139 | -3.25042141 | 1.024100473 | |
10338140 | 0.739807456 | -0.28807236 | |
10338141 | 1.627472152 | -0.50849716 | |
10338142 | 2.209300937 | -0.058964069 | |
10338143 | 3.738133936 | 0.17048225 | |
10338144 | 6.461870121 | -0.323736254 | |
10338145 | 5.540470563 | -0.287618767 | |
10338146 | -0.442924359 | -0.001393775 | |
10338147 | 2.940056474 | -0.158733891 | |
10338148 | 7.255095841 | -0.403101908 | |
10338149 | -1.456996354 | -0.120282148 | |
10338150 | -2.435280292 | -0.318679331 | |
10338151 | -2.328697407 | -0.390520029 | |
10338152 | 3.681491235 | 0.195710724 | |
10338153 | 4.370918419 | 0.504841485 | |
10338154 | -5.301476208 | -0.582261903 | |
10338155 | -2.6790097 | -0.429417658 | |
10338156 | -2.211967662 | -1.024686835 | |
10338157 | 5.767159049 | -0.981487227 | |
10338158 | 0.281563158 | 0.077314055 | |
10338159 | -0.608555141 | -0.104334681 | |
10338160 | 2.369739229 | 0.217622931 | |
10338161 | 1.699461041 | 0.201585859 | |
10338162 | -0.086371444 | -0.773945412 | |
10338163 | 0.097123514 | -0.326667262 | |
10338164 | -1.419495528 | -0.944207299 | |
10338165 | 4.681823252 | -0.018848156 | |
10338166 | -0.821441688 | -0.556129961 | |
10338167 | 2.719125281 | 2.125813891 | |
10338168 | 0.419057607 | -0.172518081 | |
10338169 | -1.625530822 | 1.162653563 | |
10338170 | 2.163807844 | -0.004231459 | |
10338171 | -1.721528833 | -0.284169918 | |
10338172 | 0.190323729 | -0.151429704 | |
10338173 | 0.981417226 | 0.042870636 | |
10338174 | 0.373498012 | -0.528844745 | |
10338175 | -3.150184153 | -0.348571887 | |
10338176 | -1.55775678 | 1.312115711 | |
10338177 | -3.431738262 | -0.318980993 | |
10338178 | 3.153866072 | 0.180697185 | |
10338179 | -0.537722988 | -0.00450727 | |
10338180 | 0.870707227 | 0.136054029 | |
10338181 | 0.574962585 | -1.518465142 | |
10338182 | 4.67681085 | 0.315591785 | |
10338183 | 3.825576954 | 0.075815643 | |
10338184 | 0.836243477 | -0.034548518 | |
10338185 | 2.124620206 | 1.725415451 | |
10338186 | 1.310710119 | -0.049988503 | |
10338187 | 1.465418671 | 0.15029257 | |
10338188 | 3.289349643 | -0.388874637 | |
10338189 | 4.332658596 | -0.791495103 | |
10338190 | 1.384957365 | 0.169220641 | |
10338191 | 1.03383778 | 0.158980451 | |
10338192 | 6.385652082 | -0.032534714 | |
10338193 | 5.927107147 | 1.874628116 | |
10338194 | 2.573151523 | 0.157826035 | |
10338195 | 0.964216953 | -0.348644384 | |
10338196 | 1.144010648 | -0.436689511 | |
10338197 | -0.701191976 | -0.61358707 | |
10338198 | 0.793532626 | -0.498952765 | |
10338199 | 2.325807791 | 0.048466466 | |
10338200 | 1.905462644 | -0.384255624 | |
10338201 | 4.571045871 | 0.389670724 | |
10338202 | 1.270246709 | -1.415538805 | |
10338203 | 2.082688048 | -1.603882099 | |
10338204 | 0.687408744 | -0.493692882 | |
10338205 | 5.510380101 | -0.593849878 | |
10338206 | -0.38762176 | -0.323439712 | |
10338207 | -0.46698768 | -0.410294181 | |
10338208 | 1.304400765 | 1.793220178 | |
10338209 | 0.933342198 | -0.091684219 | |
10338210 | 1.812822114 | -0.50059073 | |
10338211 | 2.220427859 | -0.592171392 | |
10338212 | 0.34551195 | -0.969439201 | |
10338213 | -0.633572103 | -0.607220617 | |
10338214 | 3.774071706 | -0.239377167 | |
10338215 | 3.606542191 | -0.479660004 | |
10338216 | -0.08474595 | -0.253646139 | |
10338217 | 2.472457908 | -0.185375619 | |
10338218 | -0.211436433 | 0.006538591 | |
10338219 | -1.273556188 | -0.421546453 | |
10338220 | 0.158810307 | -0.504647026 | |
10338221 | 3.048250335 | -0.910777276 | |
10338222 | -2.389804403 | -1.664758072 | |
10338223 | -2.03096539 | -0.449364539 | |
10338224 | -3.246466761 | -0.619898075 | |
10338225 | 0.668450568 | -0.084419827 | |
10338226 | -0.053503479 | -0.153763395 | |
10338227 | -0.085185496 | -0.014069339 | |
10338228 | 1.79020949 | 0.220985627 | |
10338229 | -1.361235648 | 1.278922471 | |
10338230 | 0.139723368 | -0.172690944 | |
10338231 | 4.524249215 | 0.386105714 | |
10338232 | 4.940918154 | 1.790356551 | |
10338233 | -1.872536368 | -0.31652033 | |
10338234 | 6.838989512 | 0.296204283 | |
10338235 | 6.691992142 | 0.256475119 | |
10338236 | 3.119105612 | -0.486212033 | |
10338237 | 1.648834656 | 0.01950405 | |
10338238 | 0.908779431 | -0.118811745 | |
10338239 | 0.416394037 | 1.565364439 | |
10338240 | -0.229636463 | 0.031129219 | |
10338241 | 6.097869966 | 0.482600654 | |
10338242 | 3.814076191 | -0.549552843 | |
10338243 | -1.365491873 | -0.254508012 | |
10338244 | -3.419075636 | -0.664374091 | |
10338245 | -0.370693624 | -0.308799189 | |
10338246 | -6.496673191 | 0.562379388 | |
10338247 | -1.343676224 | -1.77065301 | |
10338248 | -2.669260588 | -0.748890818 | |
10338249 | -1.128998142 | -0.096021994 | |
10338250 | -2.142927921 | -0.486269927 | |
10338251 | 0.078420778 | 0.009870991 | |
10338252 | 6.078353978 | -0.002954292 | |
10338253 | 1.554591151 | 0.18062911 | |
10338254 | -1.790417467 | -0.173535957 | |
10338255 | 1.893617356 | 0.1514817 | |
10338256 | 2.760706974 | 0.224388536 | |
10338257 | -3.780698143 | 0.837892731 | |
10338258 | 2.464235047 | 0.084275951 | |
10338259 | -5.105045501 | 1.030683496 | |
10338260 | -4.435960737 | 0.798940734 | |
10338261 | -4.605020971 | -0.478626146 | |
10338262 | 0.78957493 | -0.505209961 | |
10338263 | 0.939841454 | 0.055613818 | |
10338264 | -1.248512037 | -0.096154661 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import parsecsv | |
from os import fileExists | |
from math import pow, sqrt | |
from strutils import parseInt, parseFloat, strip | |
#from algorithm import sort | |
from sequtils import filter, deduplicate, apply | |
type | |
Coord = object | |
#id, cluster: int | |
id: int | |
x, y: float | |
DistanceTest[V] = proc(a: V, b: V): bool | |
const | |
epsilon = 1.0 | |
minpt = 37 | |
proc getCoords(filename: string): seq[Coord] = | |
result = newSeq[Coord]() | |
var csv: CsvParser | |
csv.open filename | |
csv.readHeaderRow | |
defer: csv.close() | |
var | |
x, y: float | |
id: int | |
while csv.readRow: | |
for entry in csv.headers.items: | |
let rowval = csv.rowEntry(entry).strip | |
case entry | |
of "ID": id = try: rowval.parseInt except: -1 | |
of "X1": x = try: rowval.parseFloat except: 100.0 | |
of "X2": y = try: rowval.parseFloat except: 100.0 | |
result.add Coord(id: id, x: x, y: y) | |
proc `==`(a, b: Coord): bool = a.id == b.id and a.x == b.x and a.y == b.y | |
#[ | |
proc cmp(a, b: Coord): int = | |
if a.id == b.id: 0 | |
elif a.id < b.id: -1 | |
elif a.id > b.id: 1 | |
elif a.x > b.x: 1 | |
elif a.x == b.x: | |
if a.y > b.y: 1 | |
else: -1 | |
else: -1 | |
]# | |
proc euclideanDist(p1, p2: Coord): float = | |
sqrt((p1.x - p2.x).pow(2) + (p1.y - p2.y).pow(2)) | |
proc neighbourTest(post, node: Coord): bool = | |
post.euclideanDist(node) < epsilon | |
# Define implementation of how to verify the cluster | |
# In this case, cluster is deemed okay when it's member more than `minpt` | |
proc consideredOkay[V](cluster: seq[V]): bool = cluster.len > minpt | |
proc union[T](a: var seq[T], b: seq[T]) = | |
for belm in b: | |
if belm notin a: | |
a.add belm | |
proc `+=`[T](a: var seq[T], b: seq[T]) = | |
a.union b | |
proc clustering[V](input: seq[V], test: DistanceTest[V]): seq[seq[V]] = | |
result = newSeq[seq[V]]() | |
var visited = newSeq[V]() | |
proc getNeighbours(node: V): seq[V] = input.filter(proc(a: V): bool = | |
a notin visited and a.test(node)) | |
for node in input: | |
if node notin visited: | |
visited.add node | |
var cluster = newSeq[V]() | |
for neighbour in node.getNeighbours: | |
if neighbour notin visited: | |
visited.add neighbour | |
cluster += neighbour.getNeighbours.deduplicate | |
if cluster.consideredOkay: | |
result.add cluster | |
echo "visited length: ", visited.len | |
result = result.deduplicate | |
proc main = | |
let | |
fname = "dataset3ID.csv" | |
coords = if fileExists(fname): | |
getCoords fname | |
else: | |
@[] | |
if coords.len == 0: | |
quit "No " & fname & " available" | |
echo "coords data ", coords.len | |
let clusters = coords.clustering neighbourTest | |
echo "clusters are ", clusters.len | |
var sumelm = 0 | |
for index, cluster in clusters.pairs: | |
echo "cluster ", index, " has element ", cluster.len | |
sumelm += cluster.len | |
echo "total elements ", sumelm | |
for index, cluster in clusters.pairs: | |
echo "elm ", index, " with length ", cluster.len | |
echo cluster | |
when isMainModule: | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment