ozturkoktay · February 26, 2025 16:46
diff --git a/utilities.py b/utilities.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 import json
 import os
 import pickle
 import codecs
 import numpy as np
 from functools import reduce


 def mean_absolute_percentage_error(val1: float, val2: float) -> float:
    """
    Calculate the mean absolute percentage error (MAPE).
    """
    total = val1 + val2
    if total == 0:
        return 0.0
    return (abs(val1 - val2) / total) * 2.0


 def rescale_range(values: list) -> dict:
    """
    Rescales a list of values between 0 and 1.
    """
    if not values:
        return {}

    min_val, max_val = np.min(values), np.max(values)
    if min_val == max_val:
        return {v: 0.0 for v in values}

    return {v: (v - min_val) / (max_val - min_val) for v in values}


 def check_file_exists(filename: str) -> str:
    """
    Check if a file exists, raise an error otherwise.
    """
    if not os.path.isfile(filename):
        raise FileNotFoundError(f"Invalid filename: {filename}")
    return filename


 def read_text_file(filename: str):
    """
    Generator function to read a text file line by line, stripping newlines and carriage returns.
    """
    filename = check_file_exists(filename)
    with codecs.open(filename, encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            if line:
                yield line


 def read_hash_file(filename: str):
    """
    Generator function to read a hash file (key-value pairs separated by tabs).
    """
    filename = check_file_exists(filename)
    with codecs.open(filename, encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split("\t")
            if len(parts) == 2:
                yield parts


 def load_data(source: str, path: str, config=None):
    """
    Load data from a given source type.
    """
    path = check_file_exists(path)
    
    if source == 'list':
        return list(read_text_file(path))
    
    if source == 'key-json':
        return {line.split("\t")[0]: json.loads(line.split("\t")[1]) for line in read_text_file(path)}
    
    if source == 'commalist':
        return {w: line.split(",") for line in read_text_file(path) for w in line.split(",")}
    
    if source == 'dict':
        return {key: value for key, value in read_hash_file(path)}
    
    if source == 'json':
        with open(path, 'r', encoding='utf-8') as file:
            return json.load(file)
    
    if source == 'bloomfilter':
        from pybloom_live import BloomFilter
        bloom = BloomFilter(capacity=config.get('capacity', 1000), error_rate=config.get('error_rate', 0.01))
        for line in read_text_file(path):
            bloom.add(line)
        return bloom
    
    raise ValueError(f"Unsupported source type: {source}")


 def write_pickle(data: object, filename: str):
    """
    Write data to a pickle file.
    """
    with open(filename, 'wb') as file:
        pickle.dump(data, file, pickle.HIGHEST_PROTOCOL)


 def read_pickle(filename: str) -> object:
    """
    Read data from a pickle file.
    """
    with open(filename, 'rb') as file:
        return pickle.load(file)


 def write_json(data: object, filename: str):
    """
    Write data to a JSON file.
    """
    with open(filename, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)


 def read_json(filename: str) -> object:
    """
    Read data from a JSON file.
    """
    with open(filename, 'r', encoding='utf-8') as file:
        return json.load(file)


 def write_text_file(filename: str, data: list, mode='w'):
    """
    Write or append a list of strings to a text file.
    """
    with codecs.open(filename, mode, "utf-8") as file:
        file.writelines(f"{line}\n" for line in data)


 def append_text_file(filename: str, data: list):
    """
    Append data to a text file.
    """
    write_text_file(filename, data, mode='a')


 REG_ALL_NOTCHARS = r"[^a-zşıüğçöâîûA-ZŞİÜĞÇÖÂÎÛ0-9 ]"


 def replace_circumflex(text: str) -> str:
    """
    Replace circumflex characters with their base counterparts.
    """
    mapping = {"Â": "A", "Î": "I", "Û": "U", "â": "a", "î": "ı", "û": "u"}
    return reduce(lambda x, y: x.replace(y, mapping[y]), mapping, text)


 def to_lower(text: str) -> str:
    """
    Convert text to lowercase, handling Turkish-specific characters.
    """
    mapping = {"Ş": "ş", "I": "ı", "Ü": "ü", "Ç": "ç", "Ö": "ö", "Ğ": "ğ", "İ": "i", "Â": "â", "Î": "î", "Û": "û"}
    text = reduce(lambda x, y: x.replace(y, mapping[y]), mapping, text)
    return text.lower()
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import json
	import os
	import pickle
	import codecs
	import numpy as np
	from functools import reduce


	def mean_absolute_percentage_error(val1: float, val2: float) -> float:
	"""
	Calculate the mean absolute percentage error (MAPE).
	"""
	total = val1 + val2
	if total == 0:
	return 0.0
	return (abs(val1 - val2) / total) * 2.0


	def rescale_range(values: list) -> dict:
	"""
	Rescales a list of values between 0 and 1.
	"""
	if not values:
	return {}

	min_val, max_val = np.min(values), np.max(values)
	if min_val == max_val:
	return {v: 0.0 for v in values}

	return {v: (v - min_val) / (max_val - min_val) for v in values}


	def check_file_exists(filename: str) -> str:
	"""
	Check if a file exists, raise an error otherwise.
	"""
	if not os.path.isfile(filename):
	raise FileNotFoundError(f"Invalid filename: {filename}")
	return filename


	def read_text_file(filename: str):
	"""
	Generator function to read a text file line by line, stripping newlines and carriage returns.
	"""
	filename = check_file_exists(filename)
	with codecs.open(filename, encoding='utf-8') as file:
	for line in file:
	line = line.strip()
	if line:
	yield line


	def read_hash_file(filename: str):
	"""
	Generator function to read a hash file (key-value pairs separated by tabs).
	"""
	filename = check_file_exists(filename)
	with codecs.open(filename, encoding='utf-8') as file:
	for line in file:
	parts = line.strip().split("\t")
	if len(parts) == 2:
	yield parts


	def load_data(source: str, path: str, config=None):
	"""
	Load data from a given source type.
	"""
	path = check_file_exists(path)

	if source == 'list':
	return list(read_text_file(path))

	if source == 'key-json':
	return {line.split("\t")[0]: json.loads(line.split("\t")[1]) for line in read_text_file(path)}

	if source == 'commalist':
	return {w: line.split(",") for line in read_text_file(path) for w in line.split(",")}

	if source == 'dict':
	return {key: value for key, value in read_hash_file(path)}

	if source == 'json':
	with open(path, 'r', encoding='utf-8') as file:
	return json.load(file)

	if source == 'bloomfilter':
	from pybloom_live import BloomFilter
	bloom = BloomFilter(capacity=config.get('capacity', 1000), error_rate=config.get('error_rate', 0.01))
	for line in read_text_file(path):
	bloom.add(line)
	return bloom

	raise ValueError(f"Unsupported source type: {source}")


	def write_pickle(data: object, filename: str):
	"""
	Write data to a pickle file.
	"""
	with open(filename, 'wb') as file:
	pickle.dump(data, file, pickle.HIGHEST_PROTOCOL)


	def read_pickle(filename: str) -> object:
	"""
	Read data from a pickle file.
	"""
	with open(filename, 'rb') as file:
	return pickle.load(file)


	def write_json(data: object, filename: str):
	"""
	Write data to a JSON file.
	"""
	with open(filename, 'w', encoding='utf-8') as file:
	json.dump(data, file, ensure_ascii=False, indent=4)


	def read_json(filename: str) -> object:
	"""
	Read data from a JSON file.
	"""
	with open(filename, 'r', encoding='utf-8') as file:
	return json.load(file)


	def write_text_file(filename: str, data: list, mode='w'):
	"""
	Write or append a list of strings to a text file.
	"""
	with codecs.open(filename, mode, "utf-8") as file:
	file.writelines(f"{line}\n" for line in data)


	def append_text_file(filename: str, data: list):
	"""
	Append data to a text file.
	"""
	write_text_file(filename, data, mode='a')


	REG_ALL_NOTCHARS = r"[^a-zşıüğçöâîûA-ZŞİÜĞÇÖÂÎÛ0-9 ]"


	def replace_circumflex(text: str) -> str:
	"""
	Replace circumflex characters with their base counterparts.
	"""
	mapping = {"Â": "A", "Î": "I", "Û": "U", "â": "a", "î": "ı", "û": "u"}
	return reduce(lambda x, y: x.replace(y, mapping[y]), mapping, text)


	def to_lower(text: str) -> str:
	"""
	Convert text to lowercase, handling Turkish-specific characters.
	"""
	mapping = {"Ş": "ş", "I": "ı", "Ü": "ü", "Ç": "ç", "Ö": "ö", "Ğ": "ğ", "İ": "i", "Â": "â", "Î": "î", "Û": "û"}
	text = reduce(lambda x, y: x.replace(y, mapping[y]), mapping, text)
	return text.lower()