Skip to content

Instantly share code, notes, and snippets.

View 64lines's full-sized avatar

Julian Alexander Murillo 64lines

  • Huge Inc.
  • Medellin - Colombia
View GitHub Profile
@64lines
64lines / lower.py
Last active February 19, 2019 20:35
from pyspark.sql.functions import *
fact_df = fact_df.withColumn('columname', lower(col('columname'))).alias('fact_df')
from pyspark.sql.functions import *
fact_df = fact_df.select([
'fact_df.*',
col('columnname').alias('columnalias'),
]).alias('fact_df')
from pyspark.sql.functions import *
fact_df = fact_df.withColumn('month', month('datecolumn')).alias('fact_df')
fact_df = fact_df.withColumn('day', dayofmonth('datecolumn')).alias('fact_df')
fact_df = fact_df.withColumn('year', year('datecolumn')).alias('fact_df')
from pyspark.sql.functions import *
fact_df = fact_df.withColumn('columname', concat(col('columname'), lit(' '), col('another_columname'))).alias('fact_df')
fact_df = other_df.union(another_df).alias('fact_df')
from pyspark.sql.functions import *
# Example 1
fact_df = fact_df.filter(col('colname').isNotNull()).alias('fact_df')
# Example 2
fact_df = fact_df.filter(col('colname').isNull()).alias('fact_df')
# Example 3
fact_df = fact_df.withColumn('colname', when(col('colname').isNotNull(), col('colname2')).otherwise(col('colname3'))).alias('fact_df')
@64lines
64lines / trim.py
Last active February 19, 2019 20:02
from pyspark.sql.functions import *
# Example 1
fact_df = fact_df.withColumn('colname', trim(col('colname'))).alias('fact_df')
# Example 2
fact_df = fact_df.join(dimension_df, trim(col('fact_df.colname') == trim(col('dimension_df.another_colname')), 'left').alias('fact_df')
alter table {0} rename to {1};
from redshift_utils import Messages
from redshift_utils import ScriptReader
from redshift_utils import RedshiftDataManager
from settings import SCRIPT_PATH
from settings import DB_CONNECTION
def lambda_handler(event, context):
table_name = event.get('table_name')
new_table_name = event.get('new_table_name')
import psycopg2
# Functions for reading scripts
class ScriptReader(object):
@staticmethod
def get_script(path):
return open(path, 'r').read()
# Utils for messages