Senhaji Rhazi Hamza Senhaji-Rhazi-Hamza

Senhaji-Rhazi-Hamza / arithmetic.py

Created November 12, 2021 16:14

	#arithmetic.py
	from py_workers.worker import celery_app


	@celery_app.task
	def add(x, y):
	return x + y

Senhaji-Rhazi-Hamza / arithmetic.js

Last active November 12, 2021 16:13

Senhaji-Rhazi-Hamza / setup_server.py

Last active September 29, 2024 21:31

pyinfra deploy example

	import os

	from pyinfra import host
	from pyinfra.operations import server, files
	from pyinfra import local


	ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

Senhaji-Rhazi-Hamza / extractors_pandas.py

Last active July 14, 2021 14:40


	def get_condition_on_df_col(df, column, lambda_func):
	# return a serie of truth valuation {True, False} of the condition lambda_func for every cell in the column
	return df[column].apply(lambda row : lambda_func(row))

	def get_condition_on_at_least_one_col(df, lambda_func, columns = []):
	if columns == []:
	columns = df.columns
	return or_(*[get_condition_on_df_col(df, column, lambda_func) for column in columns ])

Senhaji-Rhazi-Hamza / pandas_operators.py

Last active July 14, 2021 14:11

	# Define logical operators
	from functools import reduce

	def and_(*conditions):
	# the classical logical operator "and" is binary, this one is n-ary to facilitate mainuplation
	# instead of doing and_(and_(c1, c2),c3) we do and_(c1, c2, c3)
	# when applied on every colum, it plays the role of the operator ∀(c)
	return reduce(lambda x, y : x & y, conditions)

	def or_(*conditions):

Senhaji-Rhazi-Hamza / gen_fake_data_persons.py

Created July 14, 2021 13:38

	### Function to generate Fake dataframe
	def gen_fake_data(fake_config, fake):
	data = []
	for i in range(fake_config['nrows']):
	fake_types = fake_config['fake_types']
	data.append(
	{
	el.get('column_name', None) or el['fake_type']:
	getattr(fake,el['fake_type'])() if (el.get('kwargs') is None) else getattr(fake,el['fake_type'])(**el.get('kwargs'))
	for el in fake_types

Senhaji-Rhazi-Hamza / DWT.yaml

Created April 5, 2021 18:38

	jobs:
	- pysparkJob:
	args:
	- --job=load_loans
	- --job-args=gcs_input_path=gs://dfwt-example/data/ibrd-statement-of-loans-historical-data.csv
	- --job-args=gcs_output_path=gs://dfwt-example/data/ibrd-summary-large-python
	mainPythonFileUri: gs://dfwt-example/dist/main.py
	pythonFileUris:
	- gs://dfwt-example/dist/jobs.zip
	stepId: ibrd-large-pyspark

Senhaji-Rhazi-Hamza / bq_iterate_usage.py

Created March 21, 2021 17:24

	from bq_iterate import BqQueryRowIterator, batchify_iterator

	query = "select * from <project_id>.<dataset_id>.<table_id>"

	row_itrator = BqQueryRowIterator(query=query, batch_size=2000000) # choose a batch_size that will fit into your memory
	batches = batchify_iterator(row_itrator, batch_slice=50000) # choose a batch_slice that will fit into your memory

	data = []
	for batch in batches:
	# do your batch processing here

Senhaji-Rhazi-Hamza / large.py

Last active March 20, 2021 22:27

Code to send fake data 2 bq

	import pandas as pd
	from faker import Faker

	fake = Faker()

	# function that generate a list of fake data dictionaries
	def gen_fake_data(fake_config, fake):
	data = []
	for i in range(fake_config['nrows']):
	fake_types = fake_config['fake_types']