Description
Long text, code snippet, image, etc.
Long text, code snippet, image, etc.
BEWARE OF USING IT WITH CACHED FUNCTIONS!
Unexpected behavior when used with cached functions. Disable all cache (like functools.lru_cache) that don't let the functions run normally in certain conditions.
THIS IS FOR ACADEMIC/LEARNING PURPOSES!
Do not blindly rely on the results this can give you as it has not been fully tested.
| import pandas as pd | |
| def json_normalize_and_concat(df: pd.DataFrame, col: str) -> pd.DataFrame: | |
| index_name = df.reset_index().columns[0] | |
| df_json_norm = pd.json_normalize(df[col], max_level=0).reset_index(drop=True) | |
| df_ret = pd.concat([ | |
| df.reset_index().drop([c for c in df_json_norm.columns if c in df.columns], axis=1), | |
| df_json_norm | |
| ], axis=1).set_index(index_name).drop(col, axis=1) |
| from thefuzz import fuzz | |
| from scipy.optimize import linear_sum_assignment | |
| def df_merge_fuzzy_optimal_with_cost( | |
| df_left: pd.DataFrame, | |
| df_right: pd.DataFrame, | |
| left_on: str, | |
| right_on: str, | |
| fn_cost: Optional[Callable] = lambda x, y: 100 - fuzz.ratio(x, y) | |
| ) -> pd.DataFrame: |
| import git | |
| import os | |
| from pathlib import Path | |
| def get_root() -> Path: | |
| return Path(git.Repo(os.getcwd(), search_parent_directories=True).working_dir) |
| def read_avro(bytes_buff_or_file): | |
| import avro.schema | |
| from avro.datafile import DataFileReader, DataFileWriter | |
| from avro.io import DatumReader, DatumWriter | |
| from io import BytesIO | |
| import json | |
| if type(bytes_buff_or_file) == bytes: | |
| from tempfile import TemporaryFile | |
| # Has to be a file yes or yes because of how avro lib is made | |
| with TemporaryFile() as tmp: |
| from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions | |
| from tensorflow.keras.preprocessing import image as tf_image_preproc | |
| model = ResNet50(weights='imagenet') | |
| from tensorflow.keras.models import Model | |
| resnet50_without_softmax = Model(inputs=model.input, | |
| outputs=model.get_layer('avg_pool').output) |