PirosB3 · December 1, 2014 10:35
diff --git a/gist.txt b/gist.txt
 use std::io::BufferedReader;
 use std::collections::HashMap;
 use std::io::File;
 use std::from_str;
 use std::rand::distributions::{Exp, IndependentSample};

 #[deriving(Clone)]
 struct DataEntry {
    features: Vec<f64>,
    target: f64
 }

 struct LogisticRegression {
    dataset: Vec<DataEntry>,
    weights: Vec<f64>
 }

 pub fn vector_vector_mul(data_entry: &[f64], weights: &[f64]) -> f64 {
    let mut total: f64 = weights[0];

    let len_weights = weights.len() - 1;
    let slice_of_weights = weights.slice(1, len_weights);

    for i in range(0u, len_weights) {
        total += (data_entry[i] * weights[i]);
    }
    total
 }

 pub fn sigmoid(x : f64) -> f64 {
    let minus_x = -x;
    1.0 / (1.0 + minus_x.exp2())
 }

 impl LogisticRegression {
    pub fn from_data_entries(dataset: &Vec<DataEntry>) -> LogisticRegression {
        let first = dataset.get(0).unwrap();
        let len = first.features.len() + 1;
        let mut weights : Vec<f64> = Vec::new();
        for i in range(0, len) {
            weights.push(1.0);
        }

        LogisticRegression{
            dataset: dataset.clone(),
            weights: weights
        }
    }

    pub fn train(&mut self, epochs: int) {
        for i in range(0i, epochs) {
            // Calculate total error
            {
                let mut cost_of_theta: f64;
                let mut weight_gradients : Vec<f64>;
                {
                    let mut all_hypothesies = self.dataset.iter().map(|ds| {
                        let reality = vector_vector_mul(
                            ds.feature_vector(),
                            self.weights.as_slice()
                        );
                        let reality = sigmoid(reality);
                        
                        -ds.target * reality.log2() - (1.0 - ds.target) * (1.0 - reality).log2()
                    });
                    cost_of_theta = all_hypothesies.fold(0.0, |a, b| { a + b }) / self.dataset.len() as f64;
                }
                println!("{}", cost_of_theta);
                {

                    let costs : Vec<f64> = self.dataset.iter().map(|ds| {
                        let reality = vector_vector_mul(
                            ds.feature_vector(),
                            self.weights.as_slice()
                        );
                        sigmoid(reality)
                    }).collect();

                    weight_gradients = range(0u, self.weights.len()).map(|i| {
                        let all_gradients = range(0u, self.dataset.len()).map(|id| {
                            let ds = self.dataset.get(id).unwrap();
                            let ds_hypothesys = costs.get(id).unwrap();

                            let x_of_i = {
                                if i == 0 {
                                    1.0 as f64
                                } else {
                                    *ds.feature_vector().get(i-1).unwrap()
                                }
                            };

                            (*ds_hypothesys - ds.target) * x_of_i
                        }).fold(0.0, |a, b| { a + b });
                        all_gradients / self.dataset.len() as f64
                    }).collect();
                }


                for i in range(0u, self.weights.len()) {
                    let mut weight = self.weights.get_mut(i).unwrap();
                    *weight -= 0.001 * *weight_gradients.get(i).unwrap();
                }
            }
        }
    }
 }

 impl DataEntry {
    pub fn new(splitted_data: &Vec<&str>, target: f64) -> DataEntry {
        let features: Vec<f64> = splitted_data.iter().filter_map(|el| {
            from_str(*el)
        }).collect();
        DataEntry{
            features: features,
            target: target
        }
    }

    pub fn feature_vector(&self) -> &[f64] {
        self.features.as_slice()
    }
 }


 fn main() {

    let path = Path::new("iris.data");
    let mut file = BufferedReader::new(File::open(&path));

    let mut dataset : Vec<DataEntry> = Vec::new();

    {
        let mut counter: f64 = 0.0;
        let mut target_map : HashMap<String, f64> = HashMap::new();

        for line in file.lines() {
            let data = line.unwrap();
            let trimmed_data = data.trim();
            let mut splitted_data : Vec<&str> = trimmed_data.as_slice().split(',').collect();

            let class_type = splitted_data.pop().unwrap().to_string();
            let class_target = {
                if target_map.contains_key(&class_type) {
                    target_map[class_type]
                } else {
                    target_map.insert(class_type.clone(), counter);
                    counter += 1.0;
                    target_map[class_type]
                }
            };

            {
                let kls = class_type.as_slice();
                if kls == "Iris-setosa" || kls == "Iris-versicolor" {
                    dataset.push(DataEntry::new(&splitted_data, class_target));
                }
            }
        }
    }

    let mut log_regr = LogisticRegression::from_data_entries(&dataset);
    log_regr.train(10000000000);
 }
	use std::io::BufferedReader;
	use std::collections::HashMap;
	use std::io::File;
	use std::from_str;
	use std::rand::distributions::{Exp, IndependentSample};

	#[deriving(Clone)]
	struct DataEntry {
	features: Vec<f64>,
	target: f64
	}

	struct LogisticRegression {
	dataset: Vec<DataEntry>,
	weights: Vec<f64>
	}

	pub fn vector_vector_mul(data_entry: &[f64], weights: &[f64]) -> f64 {
	let mut total: f64 = weights[0];

	let len_weights = weights.len() - 1;
	let slice_of_weights = weights.slice(1, len_weights);

	for i in range(0u, len_weights) {
	total += (data_entry[i] * weights[i]);
	}
	total
	}

	pub fn sigmoid(x : f64) -> f64 {
	let minus_x = -x;
	1.0 / (1.0 + minus_x.exp2())
	}

	impl LogisticRegression {
	pub fn from_data_entries(dataset: &Vec<DataEntry>) -> LogisticRegression {
	let first = dataset.get(0).unwrap();
	let len = first.features.len() + 1;
	let mut weights : Vec<f64> = Vec::new();
	for i in range(0, len) {
	weights.push(1.0);
	}

	LogisticRegression{
	dataset: dataset.clone(),
	weights: weights
	}
	}

	pub fn train(&mut self, epochs: int) {
	for i in range(0i, epochs) {
	// Calculate total error
	{
	let mut cost_of_theta: f64;
	let mut weight_gradients : Vec<f64>;
	{
	let mut all_hypothesies = self.dataset.iter().map(\|ds\| {
	let reality = vector_vector_mul(
	ds.feature_vector(),
	self.weights.as_slice()
	);
	let reality = sigmoid(reality);

	-ds.target * reality.log2() - (1.0 - ds.target) * (1.0 - reality).log2()
	});
	cost_of_theta = all_hypothesies.fold(0.0, \|a, b\| { a + b }) / self.dataset.len() as f64;
	}
	println!("{}", cost_of_theta);
	{

	let costs : Vec<f64> = self.dataset.iter().map(\|ds\| {
	let reality = vector_vector_mul(
	ds.feature_vector(),
	self.weights.as_slice()
	);
	sigmoid(reality)
	}).collect();

	weight_gradients = range(0u, self.weights.len()).map(\|i\| {
	let all_gradients = range(0u, self.dataset.len()).map(\|id\| {
	let ds = self.dataset.get(id).unwrap();
	let ds_hypothesys = costs.get(id).unwrap();

	let x_of_i = {
	if i == 0 {
	1.0 as f64
	} else {
	*ds.feature_vector().get(i-1).unwrap()
	}
	};

	(ds_hypothesys - ds.target) x_of_i
	}).fold(0.0, \|a, b\| { a + b });
	all_gradients / self.dataset.len() as f64
	}).collect();
	}


	for i in range(0u, self.weights.len()) {
	let mut weight = self.weights.get_mut(i).unwrap();
	weight -= 0.001 *weight_gradients.get(i).unwrap();
	}
	}
	}
	}
	}

	impl DataEntry {
	pub fn new(splitted_data: &Vec<&str>, target: f64) -> DataEntry {
	let features: Vec<f64> = splitted_data.iter().filter_map(\|el\| {
	from_str(*el)
	}).collect();
	DataEntry{
	features: features,
	target: target
	}
	}

	pub fn feature_vector(&self) -> &[f64] {
	self.features.as_slice()
	}
	}


	fn main() {

	let path = Path::new("iris.data");
	let mut file = BufferedReader::new(File::open(&path));

	let mut dataset : Vec<DataEntry> = Vec::new();

	{
	let mut counter: f64 = 0.0;
	let mut target_map : HashMap<String, f64> = HashMap::new();

	for line in file.lines() {
	let data = line.unwrap();
	let trimmed_data = data.trim();
	let mut splitted_data : Vec<&str> = trimmed_data.as_slice().split(',').collect();

	let class_type = splitted_data.pop().unwrap().to_string();
	let class_target = {
	if target_map.contains_key(&class_type) {
	target_map[class_type]
	} else {
	target_map.insert(class_type.clone(), counter);
	counter += 1.0;
	target_map[class_type]
	}
	};

	{
	let kls = class_type.as_slice();
	if kls == "Iris-setosa" \|\| kls == "Iris-versicolor" {
	dataset.push(DataEntry::new(&splitted_data, class_target));
	}
	}
	}
	}

	let mut log_regr = LogisticRegression::from_data_entries(&dataset);
	log_regr.train(10000000000);
	}