This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #arithmetic.py | |
| from py_workers.worker import celery_app | |
| @celery_app.task | |
| def add(x, y): | |
| return x + y |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // arithmetic.js | |
| const add = (x, y) => x + y | |
| const mul = (x, y) => x * y | |
| export { | |
| add, | |
| mul | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from pyinfra import host | |
| from pyinfra.operations import server, files | |
| from pyinfra import local | |
| ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_condition_on_df_col(df, column, lambda_func): | |
| # return a serie of truth valuation {True, False} of the condition lambda_func for every cell in the column | |
| return df[column].apply(lambda row : lambda_func(row)) | |
| def get_condition_on_at_least_one_col(df, lambda_func, columns = []): | |
| if columns == []: | |
| columns = df.columns | |
| return or_(*[get_condition_on_df_col(df, column, lambda_func) for column in columns ]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Define logical operators | |
| from functools import reduce | |
| def and_(*conditions): | |
| # the classical logical operator "and" is binary, this one is n-ary to facilitate mainuplation | |
| # instead of doing and_(and_(c1, c2),c3) we do and_(c1, c2, c3) | |
| # when applied on every colum, it plays the role of the operator ∀(c) | |
| return reduce(lambda x, y : x & y, conditions) | |
| def or_(*conditions): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ### Function to generate Fake dataframe | |
| def gen_fake_data(fake_config, fake): | |
| data = [] | |
| for i in range(fake_config['nrows']): | |
| fake_types = fake_config['fake_types'] | |
| data.append( | |
| { | |
| el.get('column_name', None) or el['fake_type']: | |
| getattr(fake,el['fake_type'])() if (el.get('kwargs') is None) else getattr(fake,el['fake_type'])(**el.get('kwargs')) | |
| for el in fake_types |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| jobs: | |
| - pysparkJob: | |
| args: | |
| - --job=load_loans | |
| - --job-args=gcs_input_path=gs://dfwt-example/data/ibrd-statement-of-loans-historical-data.csv | |
| - --job-args=gcs_output_path=gs://dfwt-example/data/ibrd-summary-large-python | |
| mainPythonFileUri: gs://dfwt-example/dist/main.py | |
| pythonFileUris: | |
| - gs://dfwt-example/dist/jobs.zip | |
| stepId: ibrd-large-pyspark |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from bq_iterate import BqQueryRowIterator, batchify_iterator | |
| query = "select * from <project_id>.<dataset_id>.<table_id>" | |
| row_itrator = BqQueryRowIterator(query=query, batch_size=2000000) # choose a batch_size that will fit into your memory | |
| batches = batchify_iterator(row_itrator, batch_slice=50000) # choose a batch_slice that will fit into your memory | |
| data = [] | |
| for batch in batches: | |
| # do your batch processing here |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from faker import Faker | |
| fake = Faker() | |
| # function that generate a list of fake data dictionaries | |
| def gen_fake_data(fake_config, fake): | |
| data = [] | |
| for i in range(fake_config['nrows']): | |
| fake_types = fake_config['fake_types'] |
NewerOlder