Skip to content

Instantly share code, notes, and snippets.

@justinTM
justinTM / example.py
Created March 2, 2022 03:57
Apache Spark RDD parallelize pipe JSON file through jq multi-core
import os
from pyspark.sql import SparkSession
# create the spark session on a cluster of multiple cores
DIR_JSONS = '/tmp/in/jsons'
SPARK = SparkSession.builder.appName('APP_NAME').getOrCreate()
sc = SPARK.sparkContext
# execute a shell command using python
def os_shell_jq(filepath):
@justinTM
justinTM / write_previous_deployment_yml.py
Created April 6, 2022 20:29
Downloads a GitLab pipeline job artifact based on branch name, status, and job name criteria.
import gitlab
import os
import subprocess
from gitlab.v4.objects.jobs import ProjectJob
from gitlab.v4.objects.pipelines import ProjectPipeline
def pprint_job(job: ProjectJob):
return f"ProjectJob<name={job.attributes.get('name', None)}, finished_at={job.attributes.get('finished_at')[0:16]}>"