Created
September 22, 2024 23:17
-
-
Save bernhardkaindl/cf6e3cf4146b83231e15829000437b1e to your computer and use it in GitHub Desktop.
Skipt um neue Lehrlinge vom 1. LJ per Plugin "Import and export users and customers" zu importieren
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Installl Dependencies: | |
# sudo apt install -y python3-pandas python3-ipython python3-unidecode | |
infile = "Lehrlingsdatenbank.xlsx" | |
from datetime import datetime | |
from IPython.display import display | |
import locale | |
import pandas as pd | |
import unidecode | |
locale.setlocale(locale.LC_ALL, "de_DE") | |
def remove_accents(a): | |
return unidecode.unidecode(a) | |
cols = { | |
"Vorname": str, | |
"Nachname": str, | |
"LJ": str, | |
"Mail": str, | |
"Eintritt": datetime, | |
} | |
df = pd.read_excel( | |
infile, | |
sheet_name="Lehrlingsdatenbank", | |
header=3, | |
usecols="B:D,I:J,E:F,H,K", | |
parse_dates=["Eintritt"], | |
dtype=cols, | |
) | |
print(df.info()) | |
df = df.loc[df["LJ"].isin(["1", "1+2", "1+2+3", "1+3"])] | |
#print(df) | |
#df = df.loc[df["LJ"].isin(["1", "1+2", "1+2+3", "1+3"])] | |
# apply strip() method to all strings in DataFrame | |
# FIXME: Behalten!!! | |
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) | |
# Leerzeichen in gruppe entfernen | |
df["Gruppe"] = df["Gruppe"].str.replace(" ", "") # translate(mytable) | |
transliterate_umlauts = str.maketrans( | |
{ | |
"Ä": "Ae", | |
"Ö": "Oe", | |
"Ü": "Ue", | |
"ä": "ae", | |
"ö": "oe", | |
"ü": "ue", | |
"ß": "ss", | |
"'": "", | |
# "-": " ", | |
} | |
) | |
# Beispiel: Doppelte User einen anderen Nachnament geben: | |
def fixup_duplicate_names(row): | |
if row["Vorname"] == "Bernhard" and row["Nachname"] == "Mustermann" and row["LJ"] == "1": | |
# print("Jana") | |
return "Mustermann 1.LJ" | |
return row["Nachname"] | |
df["Nachname"] = df.apply(lambda row: fixup_duplicate_names(row), axis="columns") | |
df["Mail"] = df.apply( | |
lambda row: "1lj." + row["Mail"] | |
if row["Nachname"] == "Huber 1.LJ" | |
else row["Mail"], | |
axis="columns", | |
) | |
def fixup_mail_col(row): | |
mail = row["Mail"] | |
if not isinstance(mail, str): | |
mail = remove_accents( | |
row["Vorname"].split(" ")[0].translate(transliterate_umlauts) | |
+ "." | |
+ row["Nachname"].translate(transliterate_umlauts) | |
+ "@powerakademie.at" | |
) | |
return mail.lower() | |
df["Mail"] = df.apply(lambda row: fixup_mail_col(row), axis="columns") | |
def create_username(row): | |
user = row["Vorname"].split(" ")[0].split("-")[0].translate(transliterate_umlauts) | |
user += " " + row["Nachname"].translate(transliterate_umlauts) | |
return remove_accents(user) | |
df["user_login"] = df.apply(lambda row: create_username(row), axis="columns") | |
# Only after the mail address have been set for those which have no mail!!!!!: | |
# Remove User which have NaN (not a number - empty values, sollte nur in der Gruppe | |
# vorkommen, z.B. Lazarovic Oggi - wurde ausgeschlossen aus der Powerakademie) | |
# df = df.dropna() | |
def gruppe_an_nachnamen_anhaengen(row): | |
if "1" in row["LJ"]: | |
year = "24" | |
else: | |
year = "22" if "2" in row["LJ"] or "3" in row["LJ"] else "23" | |
return f'{row["Nachname"]} ({row["Gruppe"]},{year})' | |
df["Nachname"] = df.apply( | |
lambda row: gruppe_an_nachnamen_anhaengen(row), axis="columns" | |
) | |
def display_name(row): | |
# FIXME: hack to emulate strange only one happened bug in Excel xlsm formulas | |
# if row["Vorname"].endswith(" "): | |
# return row["Vorname"] + " " + row["Nachname"] # + " (" + row["Gruppe"] + ")" | |
return row["Vorname"] + " " + row["Nachname"] # + " (" + row["Gruppe"] + ")" | |
df["display_name"] = df.apply(lambda row: display_name(row), axis="columns") | |
pd.set_option("display.max_rows", 200) | |
df = df.rename( | |
columns={"Vorname": "first_name", "Nachname": "last_name", "Mail": "user_email"} | |
) | |
df = df.sort_values(["last_name", "first_name", "user_email"]) | |
df["password"] = "start2024" | |
bundeslaender = { | |
# "BL": "Burgenland", | |
"BGLD": "Burgenland", | |
"NÖ": "Niederösterreich", | |
"OÖ": "Oberösterreich", | |
"SBG": "Salzburg", | |
"STMK": "Steiermark", | |
"T": "Tirol", | |
"V": "Vorarlberg", | |
"W": "Wien", | |
"K": "Kärnten", | |
} | |
# df["Eintritt"] = pd.to_datetime(df["Eintritt"], format="mixed") | |
df["Eintritt"] = pd.to_datetime(df["Eintritt"]) | |
# print(row.keys()) | |
# Bundesland: {bundeslaender[row["BL"]]} | |
#{row["LJ"]}. Lehrjahr | |
# Eintritt: {row["Eintritt"].strftime("%e. %b %Y")} | |
df["description"] = df.apply( | |
lambda row: f""" | |
Markt: {row["Markt"]} | |
NR: {row["NR"]}""", | |
axis="columns", | |
) | |
# df["first_name"] = df["first_name"].str.strip() | |
# df["user_login"] = df["user_login"].str.strip() | |
# encoding="cp1252" | |
encoding="utf-8" | |
print( | |
df.to_csv( | |
infile.replace("Lehrlingsdatenbank ", "WordpressPower-UserImport") | |
+ "-" + encoding + ".csv", | |
index=None, | |
encoding=encoding, | |
columns=[ | |
"user_login", | |
"user_email", | |
"first_name", | |
"last_name", | |
"display_name", | |
"password", | |
"description", | |
], | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment