Last active
August 29, 2015 14:04
-
-
Save film42/ea4af23daee52b266ee4 to your computer and use it in GitHub Desktop.
(WIP) Starting to do a K Nearest Neighbor example. Repo with latest here: https://github.com/film42/knn-property-categorization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// knn.h | |
// k_nearest_neighbor | |
// | |
// http://burakkanber.com/blog/machine-learning-in-js-k-nearest-neighbor-part-1/ | |
// | |
// Created by Garrett Thornburg on 8/1/14. | |
// Copyright (c) 2014 gt. All rights reserved. | |
// | |
#ifndef k_nearest_neighbor_knn_h | |
#define k_nearest_neighbor_knn_h | |
#include <vector> | |
#include <cmath> | |
#include <algorithm> | |
#include <map> | |
#include <limits> | |
namespace knn { | |
enum class PropertyType { | |
House, | |
Apartment, | |
Flat, | |
Unknown | |
}; | |
struct Property { | |
explicit Property(double _rooms, double _area, PropertyType _type = PropertyType::Unknown) | |
: rooms(_rooms), area(_area), type(_type) {} | |
void measure_distance( double rooms_range, double area_range ) { | |
for( auto& neighbor : neighbors ) { | |
double delta_rooms = ( neighbor.rooms - this->rooms ) / rooms_range; | |
double delta_area = ( neighbor.area - this->area ) / area_range; | |
double delta_rooms_squared = delta_rooms * delta_rooms; | |
double delta_area_squared = delta_area * delta_area; | |
neighbor.distance = std::sqrt( delta_rooms_squared + delta_area_squared ); | |
} | |
} | |
PropertyType guess_type( double k, bool self_assign = false ) { | |
std::map< PropertyType, int > totals; | |
for( auto it = neighbors.begin(); it != ( neighbors.begin() + k ); ++it ) { | |
auto& neighbor = *it; | |
// Init type in totals | |
if( totals.count( neighbor.type ) <= 0 ) { | |
totals.insert( std::pair< PropertyType, double >(neighbor.type, 0) ); | |
} | |
// Increment | |
++totals[ neighbor.type ]; | |
} | |
// Calculate | |
auto max_type = PropertyType::Unknown; | |
int max_count = 0; | |
for( auto pair : totals ) { | |
if( pair.second > max_count ) { | |
max_type = pair.first; | |
max_count = pair.second; | |
} | |
} | |
if( self_assign ) { | |
this->type = max_type; | |
} | |
return max_type; | |
} | |
double rooms; | |
double area; | |
PropertyType type; | |
std::vector< Property > neighbors; | |
double distance; | |
}; | |
class PropertyList { | |
public: | |
PropertyList( std::vector< Property > properties ) | |
: m_properties(properties) {} | |
std::vector< Property > get_data() { return m_properties; } | |
PropertyType guess_type( Property p, int k = 1 ) { | |
// Clear out work done | |
p.neighbors.clear(); | |
// Add neighbors | |
for( auto property_2 : m_properties ) { | |
p.neighbors.push_back( property_2 ); | |
} | |
// Measure the distance | |
double rooms_range = get_max_rooms() - get_min_rooms(); | |
double area_range = get_max_area() - get_min_area(); | |
p.measure_distance(rooms_range, area_range); | |
// Sort by the distance | |
std::sort(p.neighbors.begin(), p.neighbors.end(), [](Property& p1, Property& p2) { | |
return p1.distance < p2.distance; | |
}); | |
// Guess the type | |
return p.guess_type( k ); | |
} | |
private: | |
std::vector< Property > m_properties; | |
int get_max_rooms() { | |
double max_rooms = 0; | |
for( auto p : m_properties ) { | |
if( p.rooms > max_rooms ) { | |
max_rooms = p.rooms; | |
} | |
} | |
return max_rooms; | |
} | |
int get_max_area() { | |
double max_area = 0; | |
for( auto p : m_properties ) { | |
if( p.area > max_area ) { | |
max_area = p.area; | |
} | |
} | |
return max_area; | |
} | |
int get_min_rooms() { | |
double min_rooms = std::numeric_limits<double>().max(); | |
for( auto p : m_properties ) { | |
if( min_rooms > p.rooms ) { | |
min_rooms = p.rooms; | |
} | |
} | |
return min_rooms; | |
} | |
int get_min_area() { | |
double min_area = std::numeric_limits<double>().max(); | |
for( auto p : m_properties ) { | |
if( min_area > p.area ) { | |
min_area = p.area; | |
} | |
} | |
return min_area; | |
} | |
void _normalize() { | |
double max_rooms = get_max_rooms(); | |
double max_area = get_max_area(); | |
for( auto &p : m_properties ) { | |
p.rooms = p.rooms / max_rooms; | |
p.area = p.area / max_area; | |
} | |
} | |
}; | |
} | |
namespace knn { | |
// | |
// Example Data | |
// | |
const static std::vector< Property > properties = { | |
Property( 1, 350, PropertyType::Apartment ), | |
Property( 2, 300, PropertyType::Apartment ), | |
Property( 3, 300, PropertyType::Apartment ), | |
Property( 4, 250, PropertyType::Apartment ), | |
Property( 4, 500, PropertyType::Apartment ), | |
Property( 4, 400, PropertyType::Apartment ), | |
Property( 5, 450, PropertyType::Apartment ), | |
Property( 7, 850, PropertyType::House ), | |
Property( 7, 900, PropertyType::House ), | |
Property( 7, 1200, PropertyType::House ), | |
Property( 8, 1500, PropertyType::House ), | |
Property( 9, 1300, PropertyType::House ), | |
Property( 8, 1240, PropertyType::House ), | |
Property( 10, 1700, PropertyType::House ), | |
Property( 9, 1000, PropertyType::House ), | |
Property( 1, 800, PropertyType::Flat ), | |
Property( 3, 900, PropertyType::Flat ), | |
Property( 2, 700, PropertyType::Flat ), | |
Property( 1, 900, PropertyType::Flat ), | |
Property( 2, 1150, PropertyType::Flat ), | |
Property( 1, 1000, PropertyType::Flat ), | |
Property( 2, 1200, PropertyType::Flat ), | |
Property( 1, 1300, PropertyType::Flat ) | |
}; | |
} | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// main.cpp | |
// k_nearest_neighbor | |
// | |
// Created by Garrett Thornburg on 8/1/14. | |
// Copyright (c) 2014 gt. All rights reserved. | |
// | |
#include <iostream> | |
#include "knn.h" | |
std::string to_string( knn::PropertyType type ) { | |
switch (type) { | |
case knn::PropertyType::House: | |
return "House"; | |
case knn::PropertyType::Apartment: | |
return "Apartment"; | |
case knn::PropertyType::Flat: | |
return "Flat"; | |
default: | |
return "Not Found"; | |
} | |
} | |
int main(int argc, const char * argv[]) { | |
knn::PropertyList property_list( knn::properties ); | |
auto type1 = property_list.guess_type( knn::Property( 4, 700 ), 3 ); | |
auto type2 = property_list.guess_type( knn::Property( 1, 1700 ), 3 ); | |
auto type3 = property_list.guess_type( knn::Property( 5, 2700 ), 3 ); | |
auto type4 = property_list.guess_type( knn::Property( 3, 300 ), 3 ); | |
auto type5 = property_list.guess_type( knn::Property( 2, 700 ), 3 ); | |
std::cout << to_string( type1 ) << std::endl; | |
std::cout << to_string( type2 ) << std::endl; | |
std::cout << to_string( type3 ) << std::endl; | |
std::cout << to_string( type4 ) << std::endl; | |
std::cout << to_string( type5 ) << std::endl; | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Apartment | |
Flat | |
House | |
Apartment | |
Flat |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment