Skip to content

Instantly share code, notes, and snippets.

@barograf
barograf / job_5.py
Last active November 4, 2022 09:40
sink = glueContext.getSink(
connection_type="s3",
path=f"s3a://{dest_path}",
enableUpdateCatalog=True,
updateBehavior="UPDATE_IN_DATABASE",
partitionKeys=["year", "month"],
)
sink.setFormat("glueparquet")
@barograf
barograf / job_4.py
Last active November 4, 2022 09:40
source_df = source_dynf.toDF()
partitioned_df = source_df.withColumn(
"year", functions.year(functions.col("created_at"))
).withColumn("month", functions.month(functions.col("created_at")))
partitioned_dynf = DynamicFrame.fromDF(
partitioned_df, glueContext, f"partitioned_dynf_{tbl_name}"
)
@barograf
barograf / job_3.py
Last active November 4, 2022 09:40
source_dynf = glueContext.create_dynamic_frame.from_catalog(
database=args["CATALOG"],
table_name=table["Name"],
transformation_ctx=f"source_dynf_{tbl_name}",
additional_options={
"jobBookmarkKeys": ["updated_at"],
"jobBookmarksKeysSortOrder": "asc",
},
)
client = boto3.client("glue", region_name=args["REGION"])
paginator = client.get_paginator("get_tables")
page_iterator = paginator.paginate(DatabaseName=args["CATALOG"])
for page in page_iterator:
for table in page["TableList"]:
if not table_valid(table, args):
continue
[db, schema, tbl_name] = table["StorageDescriptor"]["Location"].split(".")
# various imports
args = getResolvedOptions(
sys.argv, ["JOB_NAME", "REGION", "CRAWLER", "CATALOG", "DEST", "DEST_TABLE_PREFIX"]
)
sc = SparkContext()
glueContext = GlueContext(sc)
job = Job(glueContext)
job.init(args["JOB_NAME"], args)
import pulumi
import pulumi_aws as aws
aws.glue.Job(
"rdsToS3GlueJob",
name=job_name,
role_arn=role.arn,
command=aws.glue.JobCommandArgs(
script_location=f"s3://{script_bucket}/{rds_to_s3_job_file_name}",
python_version="3",
(() => {
const dd = $$;
const d = $;
const iframe = d('frame[name="page_content"]').contentWindow.document;
const courses = dd('.coursediv', iframe);
const filterElements = (e) => {
courses.forEach(course => {
if (course.innerText.includes(e.target.value)) {
@barograf
barograf / producer.ts
Created September 27, 2019 11:17
Producer service
import * as awilix from "awilix";
import * as dotenv from "dotenv";
import DynamoDbStorage from "../shared/storage/dynamoDb";
import TranslationsStorage from "../shared/translations/translations";
import createContainer from "./container";
import GoogleSheets from "./google/sheets";
import ITransformer from "./transformer/transformer";
dotenv.config();
@barograf
barograf / api.ts
Created September 27, 2019 11:15
API service
import * as awilix from "awilix";
import * as dotenv from "dotenv";
import * as serverless from "serverless-http";
import DynamoDbStorage from "../shared/storage/dynamoDb";
import createContainer from "./container";
import Server from "./server/server";
dotenv.config();
const container = createContainer({
@barograf
barograf / serverless.yml
Created September 27, 2019 11:13
Translations service config
service: serverless-translations
plugins:
- serverless-plugin-typescript
provider:
name: aws
runtime: nodejs8.10
stage: dev
region: us-east-1