shsdev · November 19, 2024 08:40
diff --git a/simple_pseudonymization.py b/simple_pseudonymization.py
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-

 # Requires pandas and numpy
 #     pip3 install pandas numpy
 # and the Faker package:
 #     pip3 install Faker

 from faker import Faker
 import numpy as np
 import pandas as pd

 # example dataframe
 df = pd.DataFrame({'name': {0: 'foo', 1: 'bar', 2: 'foo', 3: 'foobar', 4: 'foobar', 5: np.nan, 6: ''}})

 print(df)
 #      name
 # 0     foo
 # 1     bar
 # 2     foo
 # 3  foobar
 # 4  foobar
 # 5     NaN
 # 6        

 # pseudonymization of the name column using Faker
 fake = Faker()

 # empty string values as nan
 df.replace('', np.nan, inplace=True)

 # name replacement dictionary
 name_replacements = {name: fake.name().lower().replace(" ", "") for name in df['name'].unique() if name is not np.nan}

 # apply replacement
 df.replace({"name": name_replacements}, inplace=True)

 print(df)
 #            name
 # 0     traceymata
 # 1   susansmithmd
 # 2     traceymata
 # 3  charlesrogers
 # 4  charlesrogers
 # 5            NaN
 # 6            NaN
	#!/usr/bin/python3
	# -- coding: utf-8 --

	# Requires pandas and numpy
	# pip3 install pandas numpy
	# and the Faker package:
	# pip3 install Faker

	from faker import Faker
	import numpy as np
	import pandas as pd

	# example dataframe
	df = pd.DataFrame({'name': {0: 'foo', 1: 'bar', 2: 'foo', 3: 'foobar', 4: 'foobar', 5: np.nan, 6: ''}})

	print(df)
	# name
	# 0 foo
	# 1 bar
	# 2 foo
	# 3 foobar
	# 4 foobar
	# 5 NaN
	# 6

	# pseudonymization of the name column using Faker
	fake = Faker()

	# empty string values as nan
	df.replace('', np.nan, inplace=True)

	# name replacement dictionary
	name_replacements = {name: fake.name().lower().replace(" ", "") for name in df['name'].unique() if name is not np.nan}

	# apply replacement
	df.replace({"name": name_replacements}, inplace=True)

	print(df)
	# name
	# 0 traceymata
	# 1 susansmithmd
	# 2 traceymata
	# 3 charlesrogers
	# 4 charlesrogers
	# 5 NaN
	# 6 NaN