Skip to content

Instantly share code, notes, and snippets.

@bernhardkaindl
Created September 22, 2024 23:17
Show Gist options
  • Save bernhardkaindl/cf6e3cf4146b83231e15829000437b1e to your computer and use it in GitHub Desktop.
Save bernhardkaindl/cf6e3cf4146b83231e15829000437b1e to your computer and use it in GitHub Desktop.
Skipt um neue Lehrlinge vom 1. LJ per Plugin "Import and export users and customers" zu importieren
#!/usr/bin/env python3
# Installl Dependencies:
# sudo apt install -y python3-pandas python3-ipython python3-unidecode
infile = "Lehrlingsdatenbank.xlsx"
from datetime import datetime
from IPython.display import display
import locale
import pandas as pd
import unidecode
locale.setlocale(locale.LC_ALL, "de_DE")
def remove_accents(a):
return unidecode.unidecode(a)
cols = {
"Vorname": str,
"Nachname": str,
"LJ": str,
"Mail": str,
"Eintritt": datetime,
}
df = pd.read_excel(
infile,
sheet_name="Lehrlingsdatenbank",
header=3,
usecols="B:D,I:J,E:F,H,K",
parse_dates=["Eintritt"],
dtype=cols,
)
print(df.info())
df = df.loc[df["LJ"].isin(["1", "1+2", "1+2+3", "1+3"])]
#print(df)
#df = df.loc[df["LJ"].isin(["1", "1+2", "1+2+3", "1+3"])]
# apply strip() method to all strings in DataFrame
# FIXME: Behalten!!!
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
# Leerzeichen in gruppe entfernen
df["Gruppe"] = df["Gruppe"].str.replace(" ", "") # translate(mytable)
transliterate_umlauts = str.maketrans(
{
"Ä": "Ae",
"Ö": "Oe",
"Ü": "Ue",
"ä": "ae",
"ö": "oe",
"ü": "ue",
"ß": "ss",
"'": "",
# "-": " ",
}
)
# Beispiel: Doppelte User einen anderen Nachnament geben:
def fixup_duplicate_names(row):
if row["Vorname"] == "Bernhard" and row["Nachname"] == "Mustermann" and row["LJ"] == "1":
# print("Jana")
return "Mustermann 1.LJ"
return row["Nachname"]
df["Nachname"] = df.apply(lambda row: fixup_duplicate_names(row), axis="columns")
df["Mail"] = df.apply(
lambda row: "1lj." + row["Mail"]
if row["Nachname"] == "Huber 1.LJ"
else row["Mail"],
axis="columns",
)
def fixup_mail_col(row):
mail = row["Mail"]
if not isinstance(mail, str):
mail = remove_accents(
row["Vorname"].split(" ")[0].translate(transliterate_umlauts)
+ "."
+ row["Nachname"].translate(transliterate_umlauts)
+ "@powerakademie.at"
)
return mail.lower()
df["Mail"] = df.apply(lambda row: fixup_mail_col(row), axis="columns")
def create_username(row):
user = row["Vorname"].split(" ")[0].split("-")[0].translate(transliterate_umlauts)
user += " " + row["Nachname"].translate(transliterate_umlauts)
return remove_accents(user)
df["user_login"] = df.apply(lambda row: create_username(row), axis="columns")
# Only after the mail address have been set for those which have no mail!!!!!:
# Remove User which have NaN (not a number - empty values, sollte nur in der Gruppe
# vorkommen, z.B. Lazarovic Oggi - wurde ausgeschlossen aus der Powerakademie)
# df = df.dropna()
def gruppe_an_nachnamen_anhaengen(row):
if "1" in row["LJ"]:
year = "24"
else:
year = "22" if "2" in row["LJ"] or "3" in row["LJ"] else "23"
return f'{row["Nachname"]} ({row["Gruppe"]},{year})'
df["Nachname"] = df.apply(
lambda row: gruppe_an_nachnamen_anhaengen(row), axis="columns"
)
def display_name(row):
# FIXME: hack to emulate strange only one happened bug in Excel xlsm formulas
# if row["Vorname"].endswith(" "):
# return row["Vorname"] + " " + row["Nachname"] # + " (" + row["Gruppe"] + ")"
return row["Vorname"] + " " + row["Nachname"] # + " (" + row["Gruppe"] + ")"
df["display_name"] = df.apply(lambda row: display_name(row), axis="columns")
pd.set_option("display.max_rows", 200)
df = df.rename(
columns={"Vorname": "first_name", "Nachname": "last_name", "Mail": "user_email"}
)
df = df.sort_values(["last_name", "first_name", "user_email"])
df["password"] = "start2024"
bundeslaender = {
# "BL": "Burgenland",
"BGLD": "Burgenland",
"NÖ": "Niederösterreich",
"OÖ": "Oberösterreich",
"SBG": "Salzburg",
"STMK": "Steiermark",
"T": "Tirol",
"V": "Vorarlberg",
"W": "Wien",
"K": "Kärnten",
}
# df["Eintritt"] = pd.to_datetime(df["Eintritt"], format="mixed")
df["Eintritt"] = pd.to_datetime(df["Eintritt"])
# print(row.keys())
# Bundesland: {bundeslaender[row["BL"]]}
#{row["LJ"]}. Lehrjahr
# Eintritt: {row["Eintritt"].strftime("%e. %b %Y")}
df["description"] = df.apply(
lambda row: f"""
Markt: {row["Markt"]}
NR: {row["NR"]}""",
axis="columns",
)
# df["first_name"] = df["first_name"].str.strip()
# df["user_login"] = df["user_login"].str.strip()
# encoding="cp1252"
encoding="utf-8"
print(
df.to_csv(
infile.replace("Lehrlingsdatenbank ", "WordpressPower-UserImport")
+ "-" + encoding + ".csv",
index=None,
encoding=encoding,
columns=[
"user_login",
"user_email",
"first_name",
"last_name",
"display_name",
"password",
"description",
],
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment