Skip to content

Instantly share code, notes, and snippets.

@kstrauss
Last active April 26, 2023 01:57
Show Gist options
  • Save kstrauss/bac26d6458603b634479cc990c12fb97 to your computer and use it in GitHub Desktop.
Save kstrauss/bac26d6458603b634479cc990c12fb97 to your computer and use it in GitHub Desktop.
Simple samples of functions to depersonalize data
async void Main()
{
// baby first names https://github.com/hadley/data-baby-names
var fnames = ReadFileAsync(@"C:\temp\depersonal\bfnames.csv");
// surnames https://github.com/fivethirtyeight/data/blob/master/most-common-name/surnames.csv
var lnames = ReadFileAsync(@"C:\temp\depersonal\surnamesClean.csv");
var testNames = ReadFileAsync(@"c:\temp\depersonal\nbaPlayers.csv");
var r = new Random();
int Max = 3;
var result = new int[Max];
for(int i = 0; i< Max;i++){
result[i] = r.Next();
}
result.Dump();
(await testNames).TakeLast(10)
.Select(async re =>
{
var (first, last) = GenerateName(re, await fnames, await lnames);
return new { Original = re, DeFirst = first, DeLast = last };
})
.Dump();
}
// You can define other methods, fields, classes and namespaces here
public static (string,string) GenerateName(string original, List<string> fnames, List<string> lnames, bool deterministic = false)
{
List<string> names = new List<string>();
//should be from hash
Random random = deterministic ? new Random(BitConverter.ToInt32(CalculateMD5Hash(original))) : Random.Shared ;
string firstName = fnames[random.Next(0,fnames.Count)];
string lastName = lnames[random.Next(0,lnames.Count)];
return (firstName,lastName);
}
async Task<List<string>> ReadFileAsync(string fname)
{
var records = new List<string>();
using (var reader = new StreamReader(fname))
{
while (!reader.EndOfStream)
{
records.Add(await reader.ReadLineAsync());
}
return records;
}
}
public static byte[] CalculateMD5Hash(string input)
{
using (var md5 = MD5.Create())
{
var bytes = Encoding.UTF8.GetBytes(input);
return md5.ComputeHash(bytes);
}
}
List<T> ReadCSV<T>(string fname)
{
using (var reader = new StreamReader(fname))
using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
{
var records = csv.GetRecords<T>();
return new List<T>(records);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment