Skip to content

Instantly share code, notes, and snippets.

@tautologico
Created June 17, 2013 19:28
Show Gist options
  • Save tautologico/5799575 to your computer and use it in GitHub Desktop.
Save tautologico/5799575 to your computer and use it in GitHub Desktop.
Classificação do conjunto Iris usando knn em Julia.
#
# iris.jl
# Classificação no conjunto de dados Iris usando knn
#
# Andrei Formiga, 2013-06-17
#
using DataFrames
train_file = "iris.train.csv"
test_file = "iris.test.csv"
classes = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
# calculo da distancia entre itens
function distance(data::AbstractDataFrame, i::Int,
seplen::Float64, sepwid::Float64,
petlen::Float64, petwid::Float64)
dist = 0.0
dist += (data[i, "seplen"] - seplen) ^ 2
dist += (data[i, "sepwid"] - sepwid) ^ 2
dist += (data[i, "petlen"] - petlen) ^ 2
dist += (data[i, "petwid"] - petwid) ^ 2
dist
end
function insert_neighbor(neighbors, dists, d::Float64, n::Int)
index = 0
l = length(neighbors)
for i in 1:l
if d < dists[i]
index = i
break
end
end
if index > 0 && index <= l
insert!(neighbors, index, n)
insert!(dists, index, d)
end
(neighbors[1:l], dists[1:l])
end
function classify_knn(k, data, seplen::Float64, sepwid::Float64,
petlen::Float64, petwid::Float64)
neighbors = zeros(Int, k)
dists = zeros(Float64, k)
for i in 1:length(dists)
dists[i] = Inf
end
for i in 1:nrow(data)
d = distance(data, i, seplen, sepwid, petlen, petwid)
neighbors, dist = insert_neighbor(neighbors, dists, d, i)
end
count_classes = zeros(Int, 3)
for i in neighbors
for c in 1:3
if data[i, "class"] == classes[c]
count_classes[c] += 1
end
end
end
max = 0
maxi = 0
for c in 1:3
if count_classes[c] > max
max = count_classes[c]
maxi = c
end
end
classes[maxi]
end
function test_knn(k::Int)
iris_train = read_table(train_file)
iris_test = read_table(test_file)
hits = 0
for i in 1:nrow(iris_test)
class = classify_knn(k, iris_train,
iris_test[i, "seplen"], iris_test[i, "sepwid"],
iris_test[i, "petlen"], iris_test[i, "petwid"])
if class == iris_test[i, "class"]
hits += 1
end
end
percent = hits / nrow(iris_test) * 100.0
println("Classificação por knn com k = $k")
println("Acertos: $hits / $(nrow(iris_test)) ($percent %)")
end
if !isinteractive()
test_knn(3)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment