Skip to content

Instantly share code, notes, and snippets.

#arithmetic.py
from py_workers.worker import celery_app
@celery_app.task
def add(x, y):
return x + y
// arithmetic.js
const add = (x, y) => x + y
const mul = (x, y) => x * y
export {
add,
mul
}
@Senhaji-Rhazi-Hamza
Senhaji-Rhazi-Hamza / setup_server.py
Last active September 29, 2024 21:31
pyinfra deploy example
import os
from pyinfra import host
from pyinfra.operations import server, files
from pyinfra import local
ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def get_condition_on_df_col(df, column, lambda_func):
# return a serie of truth valuation {True, False} of the condition lambda_func for every cell in the column
return df[column].apply(lambda row : lambda_func(row))
def get_condition_on_at_least_one_col(df, lambda_func, columns = []):
if columns == []:
columns = df.columns
return or_(*[get_condition_on_df_col(df, column, lambda_func) for column in columns ])
# Define logical operators
from functools import reduce
def and_(*conditions):
# the classical logical operator "and" is binary, this one is n-ary to facilitate mainuplation
# instead of doing and_(and_(c1, c2),c3) we do and_(c1, c2, c3)
# when applied on every colum, it plays the role of the operator ∀(c)
return reduce(lambda x, y : x & y, conditions)
def or_(*conditions):
### Function to generate Fake dataframe
def gen_fake_data(fake_config, fake):
data = []
for i in range(fake_config['nrows']):
fake_types = fake_config['fake_types']
data.append(
{
el.get('column_name', None) or el['fake_type']:
getattr(fake,el['fake_type'])() if (el.get('kwargs') is None) else getattr(fake,el['fake_type'])(**el.get('kwargs'))
for el in fake_types
jobs:
- pysparkJob:
args:
- --job=load_loans
- --job-args=gcs_input_path=gs://dfwt-example/data/ibrd-statement-of-loans-historical-data.csv
- --job-args=gcs_output_path=gs://dfwt-example/data/ibrd-summary-large-python
mainPythonFileUri: gs://dfwt-example/dist/main.py
pythonFileUris:
- gs://dfwt-example/dist/jobs.zip
stepId: ibrd-large-pyspark
from bq_iterate import BqQueryRowIterator, batchify_iterator
query = "select * from <project_id>.<dataset_id>.<table_id>"
row_itrator = BqQueryRowIterator(query=query, batch_size=2000000) # choose a batch_size that will fit into your memory
batches = batchify_iterator(row_itrator, batch_slice=50000) # choose a batch_slice that will fit into your memory
data = []
for batch in batches:
# do your batch processing here
@Senhaji-Rhazi-Hamza
Senhaji-Rhazi-Hamza / large.py
Last active March 20, 2021 22:27
Code to send fake data 2 bq
import pandas as pd
from faker import Faker
fake = Faker()
# function that generate a list of fake data dictionaries
def gen_fake_data(fake_config, fake):
data = []
for i in range(fake_config['nrows']):
fake_types = fake_config['fake_types']