Skip to content

Instantly share code, notes, and snippets.

@steniowagner
Created March 28, 2018 16:09
Show Gist options
  • Save steniowagner/3579bc5567e86972dc6de934d7c685a2 to your computer and use it in GitHub Desktop.
Save steniowagner/3579bc5567e86972dc6de934d7c685a2 to your computer and use it in GitHub Desktop.
dataset = pandas.read_csv('epidemias.csv', skiprows=1, header=None, names=['nome', 'genero', 'data_nascimento', 'cidade', 'estado', 'doencax'])
cities = set(dataset['cidade'])
states = set(dataset['estado'])
def get_gender_value(raw_gender):
gender = raw_gender.to_string()
index_substring = gender.rfind(' ') + 1
return 0 if gender[index_substring:] == 'Female' else 1
def get_city_value(raw_city):
city_string = raw_city.to_string()
city = (city_string[city_string.find(' '):])[4:]
return list(cities).index(city)
def get_state_value(raw_state):
state = raw_state.to_string()
index_substring = state.rfind(' ') + 1
return list(states).index(state[index_substring:])
def get_days_from_date(raw_birth_date):
birth_date_string = raw_birth_date.to_string()
index_substring = birth_date_string.rfind(' ') + 1
month, day, year = birth_date_string[index_substring:].split('/')
birth_date = datetime.date(int(year), int(month), int(day))
current_date = datetime.datetime.now().date()
date_value = str(current_date - birth_date)
index_substring = date_value.find(' ')
return int(date_value[0:index_substring])
def get_register_value(register):
gender_value = get_gender_value(register['genero'])
state_value = get_state_value(register['estado'])
city_value = get_city_value(register['cidade'])
date_value = get_days_from_date(register['data_nascimento'])
return gender_value + state_value + city_value + date_value
def define_groups(dataset, k):
elements = []
groups = []
for i in range(0, 1000):
elements.append(new_dataset.iloc[[i]])
for i in range(0, 1000):
current_index = k * (i + 1)
min_index = current_index - k
max_index = current_index
groups.append(elements[min_index:max_index])
return groups
def anonymize_birth_date(group, k):
print(list(group)[0]['data_nascimento'].to_string())
def anonymize_gender(group, k):
group_copy = []
genders = []
for i in range(0, k):
group_copy.append(list(group)[i]['genero'])
for i in range(0, k):
genders.append(group_copy[i].to_string()[group_copy[i].to_string().find(' '):][4:])
if len(set(genders)) > 1:
for i in range(0, k):
print('---------------------------')
print(group[i]['genero'])
print('---------------------------')
group_copy[i] = '*'
print(group_copy)
def anonymize_state(group, k):
group_copy = []
states
def analyse(dataset, k):
groups = define_groups(dataset, k)
for i in range(0, 1000):
anonymize_gender(groups[i], k)
register_value = []
for i in range(0, 1000):
register_value.append(get_register_value(dataset.iloc[[i]]))
new_dataset = dataset
new_dataset['nome'] = '*'
new_dataset['register_value'] = register_value
new_dataset = new_dataset.sort_values(['register_value'])
analyse(dataset, 3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment