Skip to content

Instantly share code, notes, and snippets.

@ozturkoktay
Last active February 26, 2025 16:46
Show Gist options
  • Save ozturkoktay/afad85d3e3b3e6a8844600580e7a1b7d to your computer and use it in GitHub Desktop.
Save ozturkoktay/afad85d3e3b3e6a8844600580e7a1b7d to your computer and use it in GitHub Desktop.
This is helper functions for Python projects.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import os
import pickle
import codecs
import numpy as np
from functools import reduce
def mean_absolute_percentage_error(val1: float, val2: float) -> float:
"""
Calculate the mean absolute percentage error (MAPE).
"""
total = val1 + val2
if total == 0:
return 0.0
return (abs(val1 - val2) / total) * 2.0
def rescale_range(values: list) -> dict:
"""
Rescales a list of values between 0 and 1.
"""
if not values:
return {}
min_val, max_val = np.min(values), np.max(values)
if min_val == max_val:
return {v: 0.0 for v in values}
return {v: (v - min_val) / (max_val - min_val) for v in values}
def check_file_exists(filename: str) -> str:
"""
Check if a file exists, raise an error otherwise.
"""
if not os.path.isfile(filename):
raise FileNotFoundError(f"Invalid filename: {filename}")
return filename
def read_text_file(filename: str):
"""
Generator function to read a text file line by line, stripping newlines and carriage returns.
"""
filename = check_file_exists(filename)
with codecs.open(filename, encoding='utf-8') as file:
for line in file:
line = line.strip()
if line:
yield line
def read_hash_file(filename: str):
"""
Generator function to read a hash file (key-value pairs separated by tabs).
"""
filename = check_file_exists(filename)
with codecs.open(filename, encoding='utf-8') as file:
for line in file:
parts = line.strip().split("\t")
if len(parts) == 2:
yield parts
def load_data(source: str, path: str, config=None):
"""
Load data from a given source type.
"""
path = check_file_exists(path)
if source == 'list':
return list(read_text_file(path))
if source == 'key-json':
return {line.split("\t")[0]: json.loads(line.split("\t")[1]) for line in read_text_file(path)}
if source == 'commalist':
return {w: line.split(",") for line in read_text_file(path) for w in line.split(",")}
if source == 'dict':
return {key: value for key, value in read_hash_file(path)}
if source == 'json':
with open(path, 'r', encoding='utf-8') as file:
return json.load(file)
if source == 'bloomfilter':
from pybloom_live import BloomFilter
bloom = BloomFilter(capacity=config.get('capacity', 1000), error_rate=config.get('error_rate', 0.01))
for line in read_text_file(path):
bloom.add(line)
return bloom
raise ValueError(f"Unsupported source type: {source}")
def write_pickle(data: object, filename: str):
"""
Write data to a pickle file.
"""
with open(filename, 'wb') as file:
pickle.dump(data, file, pickle.HIGHEST_PROTOCOL)
def read_pickle(filename: str) -> object:
"""
Read data from a pickle file.
"""
with open(filename, 'rb') as file:
return pickle.load(file)
def write_json(data: object, filename: str):
"""
Write data to a JSON file.
"""
with open(filename, 'w', encoding='utf-8') as file:
json.dump(data, file, ensure_ascii=False, indent=4)
def read_json(filename: str) -> object:
"""
Read data from a JSON file.
"""
with open(filename, 'r', encoding='utf-8') as file:
return json.load(file)
def write_text_file(filename: str, data: list, mode='w'):
"""
Write or append a list of strings to a text file.
"""
with codecs.open(filename, mode, "utf-8") as file:
file.writelines(f"{line}\n" for line in data)
def append_text_file(filename: str, data: list):
"""
Append data to a text file.
"""
write_text_file(filename, data, mode='a')
REG_ALL_NOTCHARS = r"[^a-zşıüğçöâîûA-ZŞİÜĞÇÖÂÎÛ0-9 ]"
def replace_circumflex(text: str) -> str:
"""
Replace circumflex characters with their base counterparts.
"""
mapping = {"Â": "A", "Î": "I", "Û": "U", "â": "a", "î": "ı", "û": "u"}
return reduce(lambda x, y: x.replace(y, mapping[y]), mapping, text)
def to_lower(text: str) -> str:
"""
Convert text to lowercase, handling Turkish-specific characters.
"""
mapping = {"Ş": "ş", "I": "ı", "Ü": "ü", "Ç": "ç", "Ö": "ö", "Ğ": "ğ", "İ": "i", "Â": "â", "Î": "î", "Û": "û"}
text = reduce(lambda x, y: x.replace(y, mapping[y]), mapping, text)
return text.lower()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment