This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from pyspark.sql.types import * | |
#Create Pandas DataFrame | |
pd_person = pd.DataFrame({'PERSONID':'0','LASTNAME':'Doe','FIRSTNAME':'John','ADDRESS':'Museumplein','CITY':'Amsterdam'}, index=[0]) | |
#Create PySpark DataFrame Schema | |
p_schema = StructType([StructField('ADDRESS',StringType(),True),StructField('CITY',StringType(),True),StructField('FIRSTNAME',StringType(),True),StructField('LASTNAME',StringType(),True),StructField('PERSONID',DecimalType(),True)]) | |
#Create Spark DataFrame from Pandas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.time.Instant; | |
import java.time.ZoneId; | |
import java.time.ZonedDateTime; | |
import java.time.format.DateTimeFormatter; | |
/** | |
* Timezone Conversion Utils between EpochSec Timestamps | |
* and other date formats | |
*/ | |
public class TimeZoneUtils { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.types.{StringType, StructField, StructType} | |
import org.apache.spark.sql.{Row, SparkSession} | |
import org.apache.spark.sql.catalyst.encoders.RowEncoder | |
object TrySparkExcel extends App { | |
//Create Spark Session | |
val ss = SparkSession | |
.builder() | |
.master("local[*]") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Simple Base **/ | |
abstract class Animal { | |
def hasNose : Boolean | |
def hasLegs : Boolean | |
def numLegs : Int | |
} | |
/** Example Traits **/ | |
trait Mammal { | |
def isMammal : Unit = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.catalyst.encoders.RowEncoder | |
import org.apache.spark.sql.types.{StringType, StructField, StructType} | |
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} | |
/** | |
* Spark Excel Loading Utils to Transform the DataFrame into DateFrame | |
* that can be saved regular rows and columns in Hive | |
*/ | |
object SparkExcelLoadingUtils { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.time.LocalDate | |
val digitDate = """(\d\d)-(\d\d)""".r | |
val letterDate = """(\d\d)-([a-zA-Z]{3})""".r | |
def toStrHiveDateFrmt(str: String): String = str match { | |
case "" => "0001-01-01" | |
case digitDate(day, month) => { | |
val currentYear = LocalDate.now().getYear | |
val tmpDate = LocalDate.of(currentYear, Integer.valueOf(month), Integer.valueOf(day)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val endDateTimeOffset = endDateTime.getOffset.toString | |
//Create OffsetDateTime in chosen timezone with correct Offset | |
val startDateTime = OffsetDateTime.now(ZoneId.of(endDateTimeOffset)) | |
val timeDiff = ChronoUnit.HOURS.between(startDateTime, endDateTime) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def xgb_precision(proba_y: np.ndarray, dataset: xgb.DMatrix) -> Tuple[str, float]: | |
'''returns binary classification precision using 0.5 threshold. | |
proba_y: 1x2 shape or binary classification probabilities | |
dataset: xgboost DMatrix | |
''' | |
y = dataset.get_label() | |
tresh_func = np.vectorize(lambda x: 1 if x> 0.5 else 0) | |
pred_y = tresh_func(proba_y) | |
return 'clf_precision', precision_score(y, pred_y) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set global figsize rcParams | |
from pylab import rcParams | |
rcParams['figure.figsize'] = 12,5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import statsmodels.api as sm | |
df = sm.datasets.macrodata.load_pandas().data | |
df.index = pd.Index(sm.tsa.datetools.dates_from_range('1959Q1', '2009Q3')) | |
print(sm.datasets.macrodata.NOTE) |
OlderNewer