This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.mllib.clustering.BisectingKMeans | |
import org.apache.spark.mllib.linalg.Vectors | |
import org.apache.spark.mllib.linalg.Vector | |
//std_features col is of type vector | |
scaledFeatures.select($"std_features").printSchema() | |
val tempFeatureRdd = scaledFeatures.select($"std_features").rdd | |
import scala.reflect.runtime.universe._ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession | |
from pyspark.sql.functions import * | |
from pyspark.sql import Row | |
from pyspark.sql.types import IntegerType | |
# Create the Spark session | |
spark = SparkSession.builder \ | |
.master("local") \ | |
.config("spark.sql.autoBroadcastJoinThreshold", -1) \ | |
.config("spark.executor.memory", "500mb") \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.ml.Pipeline | |
import org.apache.spark.ml.PipelineStage | |
import org.apache.spark.ml.Transformer | |
import org.apache.spark.ml.classification.LogisticRegression | |
import org.apache.spark.ml.feature.LabeledPoint | |
import org.apache.spark.ml.linalg.DenseVector | |
import org.apache.spark.ml.linalg.Vectors | |
import org.apache.spark.ml.param.ParamMap | |
import org.apache.spark.sql.Dataset | |
import org.apache.spark.sql.Row |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.diorsding.spark.ml; | |
import java.util.Arrays; | |
import java.util.List; | |
import org.apache.spark.SparkConf; | |
import org.apache.spark.SparkContext; | |
import org.apache.spark.ml.Pipeline; | |
import org.apache.spark.ml.PipelineModel; | |
import org.apache.spark.ml.PipelineStage; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Arrays; | |
import java.util.List; | |
import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec.P; | |
import org.apache.spark.SparkConf; | |
import org.apache.spark.api.java.JavaSparkContext; | |
import org.apache.spark.api.java.function.MapFunction; | |
import org.apache.spark.ml.Pipeline; | |
import org.apache.spark.ml.PipelineModel; | |
import org.apache.spark.ml.PipelineStage; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.functions._ | |
import org.apache.spark.sql.SparkSession | |
object DataFrameWithFileNameApp extends App { | |
val spark: SparkSession = | |
SparkSession | |
.builder() | |
.appName("DataFrameApp") | |
.config("spark.master", "local[*]") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Root logger option | |
log4j.rootLogger=INFO, stdout | |
# Redirect log messages to console | |
log4j.appender.stdout=org.apache.log4j.ConsoleAppender | |
log4j.appender.stdout.Target=System.out | |
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout | |
log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %t %c:%L - %m%n | |
log4j.com.ncr.eda=INFO, stdout |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
:paste | |
import org.apache.spark.sql.types._ | |
import com.databricks.spark.xml._ | |
import org.apache.spark.sql.functions._ | |
// For implicit conversions like converting RDDs to DataFrames | |
import spark.implicits._ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Custom history configuration | |
# Run script using: | |
# chmod u+x better_history.sh | |
# sudo su | |
# ./better_history.sh | |
echo ">>> Starting" | |
echo ">>> Loading configuration into /etc/bash.bashrc" | |
echo "HISTTIMEFORMAT='%F %T '" >> /etc/bash.bashrc | |
echo 'HISTFILESIZE=-1' >> /etc/bash.bashrc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[alias] | |
co = checkout | |
cob = checkout -b | |
coo = !git fetch && git checkout | |
br = branch | |
brd = branch -d | |
brD = branch -D | |
merged = branch --merged | |
dmerged = "git branch --merged | grep -v '\\*' | xargs -n 1 git branch -d" | |
st = status |
NewerOlder