Last active
September 1, 2024 10:15
-
-
Save dacr/8c3d5e76223cf6ac67fdcdbcf0472ff5 to your computer and use it in GitHub Desktop.
Playing with smile and california housing dataset / published by https://github.com/dacr/code-examples-manager #a532819f-03fb-429b-90b2-02babad00863/f2245ae45258e1b0f870e0008e4128981166b83f
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// summary : Playing with smile and california housing dataset | |
// keywords : smile, machine-learning, cal_housing, ai, @testable | |
// publish : gist | |
// authors : David Crosson | |
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2) | |
// id : a532819f-03fb-429b-90b2-02babad00863 | |
// created-on : 2021-03-05T09:23:01Z | |
// managed-by : https://github.com/dacr/code-examples-manager | |
// execution : scala ammonite script (http://ammonite.io/) - run as follow 'amm scriptname.sc' | |
// run-with : scala-cli $file | |
// --------------------- | |
//> using scala "3.4.2" | |
//> using dep "com.github.pathikrit::better-files:3.9.2" | |
//> using dep "com.github.haifengl::smile-scala:3.1.1" | |
//> using dep "org.bytedeco:javacpp-platform:1.5.10" | |
//> using dep "org.bytedeco:javacpp:1.5.10,classifier=linux-x86_64" | |
//> using dep "org.bytedeco:arpack-ng:3.9.1-1.5.10,classifier=linux-x86_64" | |
//> using dep "org.bytedeco:openblas:0.3.26-1.5.10,classifier=linux-x86_64" | |
//> using dep "org.slf4j:slf4j-nop:2.0.13" | |
//> using dep "com.lihaoyi::requests:0.9.0" | |
// --------------------- | |
import better.files.* | |
import scala.language.postfixOps | |
import smile.* | |
import smile.util.* | |
import smile.math.* | |
import smile.math.MathEx.* | |
import smile.math.distance.* | |
import smile.data.formula.* | |
import smile.regression.OLS | |
//import smile.plot.vega.* // FOR VEGA RENDERING | |
import smile.plot.swing.* // FOR SWING RENDERING | |
import smile.plot.show | |
// ===================================================================== | |
// Normalize input data in the same way as for python | |
/* | |
datasets : | |
* http://lib.stat.cmu.edu/datasets/ | |
used dataset : | |
* original data : https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz | |
+ with some adaptations : | |
- https://github.com/scikit-learn/scikit-learn/blob/95d4f0841/sklearn/datasets/_california_housing.py#L51 | |
- CaliforniaHousing/cal_housing.data | |
* https://github.com/crhodes2/Get-Rich/blob/master/tensorflow/Lib/site-packages/sklearn/datasets/descr/california_housing.rst | |
*/ | |
val infile=file"cal_housing.data" | |
if (infile.notExists) { | |
val url = "https://gist.githubusercontent.com/dacr/4dd9b6cf55154559684f96aeeed33f64/raw/68989aefc9c483601439aeb4e8a0c49b8e2de0db/cal_housing.data" | |
for {out <- infile.newOutputStream.autoClosed} {requests.get(url).writeBytesTo(out)} | |
} | |
val outfile=file"cal_housing.csv" | |
if (!outfile.exists) { | |
val columnsIndex = List(8, 7, 2, 3, 4, 5, 6, 1, 0) | |
val featureNames = List("Price", "MedInc", "HouseAge", "AveRooms", "AveBedrms", "Population", "AveOccup", "Latitude", "Longitude") | |
val it = | |
infile | |
.lineIterator | |
.map{_.split(",")} | |
.map{a => columnsIndex.map(i => a(i).toDouble)} | |
.map{ | |
case List(ct,c0,c1,c2,c3,c4,c5,c6,c7) => | |
List(ct/100000d,c0, c1, c2/c5, c3/c5, c4, c4/c5, c6, c7) | |
} | |
outfile.append(featureNames.mkString(",")).append("\n") | |
it.foreach{ a => outfile.append(a.mkString(",")).append("\n") } | |
} | |
// ===================================================================== | |
// SMILE PART - using linear regression | |
val houses = read.csv("cal_housing.csv") | |
val model = OLS.fit("Price".~(), houses) | |
println(model) | |
val tuple = houses(0) | |
val predicted = model.predict(tuple) | |
println(s"predicted : $predicted") | |
// TODO - to be continued... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment