Kun 1ambda

🦁

in the jungle

high-functioning developer trained by unsupervised compilers

1ambda / ds-optional.scala

Created December 20, 2021 11:47

	case class MarketingUserRefined(userId: Int,
	yearBirth: Int,
	education: String,
	income: Option[Int],
	kidhome: Option[Int],
	teenhome: Option[Int],
	dtCustomer: String,
	recency: Option[Int])

	val dsUserRefined = dfSelected

1ambda / ds-optional-map.scala

Created December 20, 2021 11:47

	val dsUserUpdated = dsUserFiltered
	.map(x => x.copy(yearBirth = x.yearBirth + 3))

1ambda / ds-practice.scala

Created December 20, 2021 11:48

TragetUser(userId, income, hasKidhome, hasTeenhome, recency)

1ambda / df-transformation.python

Created December 20, 2021 12:09

	# Transformation 입니다. 아직 실행되지 않습니다.
	df = spark.read.load("./marketing_campaign.csv",
	format="csv", sep="\t", inferSchema="true", header="true")

	# Transformation 입니다. 아직 실행되지 않습니다.
	dfSelected = df.select(
	col("ID").alias("id"),
	col("Year_Birth").alias("year_birth"),
	col("Education").alias("education"),
	col("Kidhome").alias("count_kid"),

1ambda / df-transformation.py

Created December 20, 2021 12:11

	# Transformation 입니다. 아직 실행되지 않습니다.
	df = spark.read.load("./marketing_campaign.csv",
	format="csv", sep="\t", inferSchema="true", header="true")

	# Transformation 입니다. 아직 실행되지 않습니다.
	dfSelected = df.select(
	col("ID").alias("id"),
	col("Year_Birth").alias("year_birth"),
	col("Education").alias("education"),
	col("Kidhome").alias("count_kid"),

1ambda / df-transformation-result.py

Created December 20, 2021 12:11

	# dfSelected.explain("formatted") 의 실행 결과
	(1) Scan csv
	Output [7]: [ID#16, Year_Birth#17, Education#18, Kidhome#21, Teenhome#22, Dt_Customer#23, Recency#24]
	Batched: false
	Location: InMemoryFileIndex [file:/home/jovyan/private-notebook/spark-tutorial/marketing_campaign.csv]
	ReadSchema: struct<ID:int,Year_Birth:int,Education:string,Kidhome:int,Teenhome:int,Dt_Customer:string,Recency:int>

	(2) Project [codegen id : 1]
	Output [7]: [ID#16 AS id#190, Year_Birth#17 AS year_birth#191, Education#18 AS education#192, Kidhome#21 AS count_kid#193, Teenhome#22 AS count_teen#194, Dt_Customer#23 AS date_customer#195, Recency#24 AS days_last_login#196]
	Input [7]: [ID#16, Year_Birth#17, Education#18, Kidhome#21, Teenhome#22, Dt_Customer#23, Recency#24]

1ambda / df-plan.py

Created December 20, 2021 12:13

add_months(cast(gettimestamp(Dt_Customer#23, d-M-yyyy, Some(Asia/Seoul), false) as date), 72) AS date_joined#227

1ambda / df-rdd.py

Created December 20, 2021 12:14

	# `dfSelected.rdd.id` 출력 결과
	<bound method RDD.id of MapPartitionsRDD[15] at javaToPython at NativeMethodAccessorImpl.java:0>

	# `dfConverted.rdd.id` 출력 결과
	<bound method RDD.id of MapPartitionsRDD[42] at javaToPython at NativeMethodAccessorImpl.java:0>

1ambda / df-explainpy

Created December 20, 2021 12:15

	# dfConvertedxplain("extended")

	== Parsed Logical Plan ==
	'Project [id#236, year_birth#237, education#238, count_kid#239, count_teen#240, date_customer#241, days_last_login#242, add_months(to_date('date_customer, Some(d-M-yyyy)), 72) AS date_joined#257]
	+- Project [ID#16 AS id#236, Year_Birth#17 AS year_birth#237, Education#18 AS education#238, Kidhome#21 AS count_kid#239, Teenhome#22 AS count_teen#240, Dt_Customer#23 AS date_customer#241, Recency#24 AS days_last_login#242]
	+- Relation[ID#16,Year_Birth#17,Education#18,Marital_Status#19,Income#20,Kidhome#21,Teenhome#22,Dt_Customer#23,Recency#24,MntWines#25,MntFruits#26,MntMeatProducts#27,MntFishProducts#28,MntSweetProducts#29,MntGoldProds#30,NumDealsPurchases#31,NumWebPurchases#32,NumCatalogPurchases#33,NumStorePurchases#34,NumWebVisitsMonth#35,AcceptedCmp3#36,AcceptedCmp4#37,AcceptedCmp5#38,AcceptedCmp1#39,... 5 more fields] csv

	== Analyzed Logical Plan ==
	id: int, year_birth: int, education: string, count_kid: int, count_teen: int, date_customer: string

1ambda / df-explain.py

Created December 20, 2021 12:15

	# dfConvertedxplain("extended")

	== Parsed Logical Plan ==
	'Project [id#236, year_birth#237, education#238, count_kid#239, count_teen#240, date_customer#241, days_last_login#242, add_months(to_date('date_customer, Some(d-M-yyyy)), 72) AS date_joined#257]
	+- Project [ID#16 AS id#236, Year_Birth#17 AS year_birth#237, Education#18 AS education#238, Kidhome#21 AS count_kid#239, Teenhome#22 AS count_teen#240, Dt_Customer#23 AS date_customer#241, Recency#24 AS days_last_login#242]
	+- Relation[ID#16,Year_Birth#17,Education#18,Marital_Status#19,Income#20,Kidhome#21,Teenhome#22,Dt_Customer#23,Recency#24,MntWines#25,MntFruits#26,MntMeatProducts#27,MntFishProducts#28,MntSweetProducts#29,MntGoldProds#30,NumDealsPurchases#31,NumWebPurchases#32,NumCatalogPurchases#33,NumStorePurchases#34,NumWebVisitsMonth#35,AcceptedCmp3#36,AcceptedCmp4#37,AcceptedCmp5#38,AcceptedCmp1#39,... 5 more fields] csv

	== Analyzed Logical Plan ==
	id: int, year_birth: int, education: string, count_kid: int, count_teen: int, date_customer: string