This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Ctrl + Alt + O ::: optimize imports | |
/// Ctrl + Shift + L ::: scalafmt format | |
/// F4 ::: open project settings |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def process(df: DataFrame): DataFrame = { | |
val encoder = RowEncoder(df.schema) // provide the Catalyst codegen info about the datatypes of the data to avoid reflection | |
df.map(row => { | |
val rowIn = row.toArray | |
var rowOut = rowIn | |
// ... do here some kind of rowOut modifications | |
Row.fromSeq(rowOut) | |
})(encoder) // much faster than simple rdd iteration , because avoids the reflection overhead |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class DoubleInterpolationTechniqueForUDFcallInDataFrame { | |
val colsNum = 100 | |
// this is the func for the udf ... | |
def getHeight(index: Int, freqs: Seq[Integer]): Option[Double] = { | |
(0 to colsNum - 1) foreach (n => { | |
// some logic | |
}) | |
Option(freqs(index)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val structType= { | |
val id = StructField("id", IntegerType) | |
val name = StructField("name", StringType) | |
val age = StructField("age", IntegerType) | |
new StructType(Array(id, name , age)) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- how-to alter a column | |
alter table table_name alter column column_name type varchar(30); | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# how-to list the files in the bucket recursively | |
s3cmd ls -r -c ~/.aws/s3cmd/$aws_profile.s3cfg s3://$bucket | sort -nr | less | |
# how-to upload a file-or-dir to the bucket | |
s3cmd -r -c ~/.aws/s3cmd/$aws_profile.s3cfg put --acl-public --guess-mime-type /path/to/local/file s3://$bucket/path/to/remote/obj-file-or-dir | |
# how-to download a file-or-dir-object from the remote s3 bucket | |
s3cmd -r -c ~/.aws/s3cmd/$aws_profile.s3cfg get s3://$bucket/path/to/remote/obj-file /path/to/local/file-or-dir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cat interpreter-s3-project_665.json | jq -r '.interpreterSettings[] | objects | select(.name | contains("spark")) | .properties[] | recurse (.children) | "name: \"\(.name)\"; value: \"\(.value)\""' | grep shuffle | |
# chk: https://programminghistorian.org/en/lessons/json-and-jq#filter-select |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT DISTINCT | |
ROW_NUMBER () OVER (ORDER BY pgc.relname , a.attnum) as rowid , | |
pgc.relname as table_name , | |
a.attnum as attr, | |
a.attname as name, | |
format_type(a.atttypid, a.atttypmod) as typ, | |
a.attnotnull as notnull, | |
com.description as comment, | |
coalesce(i.indisprimary,false) as primary_key, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.corp.dept.spark.dfutils | |
import org.apache.spark.sql.Row | |
object RowEnhancements { | |
implicit class RowExtender(row: Row) { | |
def isNullAtCol(cellName: String): Boolean = { | |
val index: Int = row.fieldIndex(cellName) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val ts:String = java.time.LocalDateTime.now().format(java.time.format.DateTimeFormatter.ISO_LOCAL_DATE_TIME).toString.replaceAll("T"," ") |