This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # conda activate bert-github | |
| #%% | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import glob | |
| import pathlib | |
| from tqdm import tqdm | |
| from bs4 import BeautifulSoup | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Splits the values and expands them in multiple numbered columns | |
| temp_df = df[column].str.split("|", expand=True).fillna('') | |
| # One-Hot encodes all the values for each column | |
| temp_df = pd.get_dummies(temp_df).astype('uint8') | |
| # Removes the "N_" prefixe for each column to expose duplicates | |
| temp_df = remove_prefixes(temp_df) | |
| # Merges the duplicate columns | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #! /usr/bin/env python3 | |
| # Usage: | |
| # $ python3 this.py > output.csv | |
| # $ nkf --overwrite --oc=UTF-8-BOM output.csv | |
| import glob | |
| import hashlib | |
| import os | |
| import sys | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import pandas as pd | |
| import hashlib | |
| def hash_dataframe(df): | |
| """ | |
| Generate a hash for a DataFrame using the SHA-256 algorithm. | |
| This function creates a hash for each row of the DataFrame using pandas' `hash_pandas_object` | |
| and then hashes the resulting array of row hashes using `hashlib.sha256`. | |
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import pandas as pd | |
| import json | |
| def readJson(filename,) -> dict: | |
| """ | |
| Reads a json file and returns a dictionary | |
| """ | |
| file = open(filename, 'r') | |
| data = file.read() | |
| file.close() | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #2022-07-25 Argparse Parse from Command Line | |
| "C:\<Entwicklung>\WORK_JUPYTER\root\tools\argparse_template.py" | |
| """ template code for argparse """ | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import pandas as pd | |
| import pyodbc | |
| class DataImporter: | |
| """ | |
| This Python class takes in a pandas DataFrame from a CSV file and a SQL Server connection string, as | |
| well as a name for the SQL table where the data will be imported. The class compares the columns of | |
| the DataFrame to those in the SQL table and ensures that at least 50% of the fields match. If there | |
| are missing fields, the class adds them to the SQL table using the same naming convention as existing | |
| fields. All new fields are created as varchar fields by default. Once the SQL table has all the fields | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import pandas as pd | |
| @pd.api.extensions.register_series_accessor("nullsafe") | |
| class NullSafeSeriesAccessor: | |
| """ | |
| Null-safe comparison accessor for Pandas Series. | |
| This is equivalent to a null-safe equal operator in SQL (<=>) where | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # This example is copied verbatim from one of the codebases I contribute to. | |
| # Unfortunately, I can't copy it, so I tried to extract the scenario as it was, | |
| # as an example that I could share publicly. | |
| # | |
| # I'm sure the example could be simplified, but I thought it would be better to | |
| # show it as it is, and leave any conclusions to whomever is going to read it. | |
| # | |
| from typing import Union, TypeVar, Callable | |
| from typing_extensions import TypeAlias, Literal, overload | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | """Unit conversion""" | |
| from dataclasses import dataclass | |
| from functools import total_ordering | |
| from enum import Enum | |
| from typing import Callable, NamedTuple | |
| class UnitDesc(NamedTuple): | |
| """Unit description""" | |
| scale: float |