Skip to content

Instantly share code, notes, and snippets.

@syaffers
Created May 15, 2019 07:02
Show Gist options
  • Save syaffers/d271f7d29d3b01bab2d04fa7ff01cac2 to your computer and use it in GitHub Desktop.
Save syaffers/d271f7d29d3b01bab2d04fa7ff01cac2 to your computer and use it in GitHub Desktop.
The first iteration of the TES names dataset
import os
from torch.utils.data import Dataset
class TESNamesDataset(Dataset):
def __init__(self, data_root):
self.samples = []
for race in os.listdir(data_root):
race_folder = os.path.join(data_root, race)
for gender in os.listdir(race_folder):
gender_filepath = os.path.join(race_folder, gender)
with open(gender_filepath, 'r') as gender_file:
for name in gender_file.read().splitlines():
self.samples.append((race, gender, name))
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
return self.samples[idx]
if __name__ == '__main__':
dataset = TESNamesDataset('/home/syafiq/Data/tes-names/')
print(len(dataset))
print(dataset[420])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment