Created
January 29, 2019 02:53
-
-
Save olivx/eebde8a3229b9456bee429b78a648869 to your computer and use it in GitHub Desktop.
state city pandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
job = 'b5ff7064-d3cb-4886-964d-418baa1f341c' | |
candidates = Candidate.objects.filter(candidate__userprofile__country__icontains='bra') | |
list_expected = State.objects.filter(country__language_code='pt-br').values_list('shortname', flat=True) | |
states_name = State.objects.filter(country__language_code='pt-br').values_list('name', flat=True) | |
# chaves usadas extraidas em variavies para diminuir a repetição | |
_state_key , candidate_pk = 'candidate__userprofile__state', 'candidate__pk' | |
# coloca os estado em um dicionario ex: {'SAO PAULO': SP} | |
states_dict = {} | |
for name, shortname in State.objects.filter(country__language_code='pt-br').values_list('name', 'shortname'): | |
states_dict[name.upper()] = shortname.upper() | |
# passo todos estados para maiusculas, padrão está como title | |
qs_states_name_noralized = [state.upper() for state in states_name] | |
def data_normalize(data): | |
""" | |
faz a normalização do UF para melhorar a contagem, | |
deixando todos sem caracteres especiais e em maiuscula | |
""" | |
if data is not None: | |
return normalize('NFKD', data).encode('ASCII', 'ignore').decode('ASCII').strip().upper() | |
return None | |
from collections import Counter | |
from unicodedata import normalize | |
# list de user pk e o estadovincculado | |
list_values = candidates.values('candidate__userprofile__state', 'candidate__pk') | |
counter = Counter() # | |
counter_other = Counter() | |
counter_expected = Counter() | |
update_user_states = {} | |
#lista que conterá os valores já ormalizados | |
list_values_normalized = [] | |
# normaliza os dads e criar o diconario para contagem | |
for data in list_values: | |
data[_state_key] = data_normalize(data[_state_key]) | |
list_values_normalized.append(data) | |
# para cada dado | |
for data in list_values_normalized: | |
value = data[_state_key] # pego o valor | |
cand_pk = data[candidate_pk] # pego o pk do candidato | |
# se o valor não for none e o valor estiver no dado normalizado | |
if value is not None and value in qs_states_name_noralized: | |
current_dict = update_user_states.get(value, None) # pego o dict do estado | |
state_current = states_dict[value] # estao que está referente | |
# se achave com estado não existir | |
# o diconario deve ter o formato {'SAO PAULO' : '{'sp':['PK_DO_CAND_1','PK_DO_CAND_1']}} | |
if current_dict is None: | |
# update_user_states: dicionario que sera usado para realziar o update dos dados | |
update_user_states[value] = {state_current: [cand_pk]} | |
else: | |
# caso a chave já existe no dicionario então atulize adcionado uma uma pk a lista de pk | |
_list_cand = current_dict[state_current] | |
_list_cand.append(cand_pk) | |
update_user_states[value] ={state_current: _list_cand} | |
# faça a contagem | |
counter[data[_state_key]] += 1 | |
# aqui conta quantos registros está com os dados esperado pelo sistema | |
elif value is not None and value.upper().strip() in list_expected: | |
counter_expected[data[_state_key]] += 1 | |
# aqui são os outros que podem contar qualquer coisa | |
else: | |
counter_other[data[_state_key]] += 1 | |
# faz a montagem em or | |
order_counter = sorted(counter.items(), key=lambda x:x[1], reverse=True) | |
order_counter_other = sorted(counter_other.items(), key=lambda x:x[1], reverse=True) | |
order_counter_expected = sorted(counter_expected.items(), key=lambda x:x[1], reverse=True) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from unicodedata import normalize | |
df = pd.read_csv('est_city.csv', sep='|', index_col=False) | |
df['name'] = df['name'].str.upper() | |
states_name = State.objects.filter(country__language_code='pt-br') | |
dict_s = {} | |
for st in states_name: | |
dict_s[st.name.upper()] = st | |
def remover_acentos(txt, codif='utf-8'): | |
return normalize('NFKD', txt.decode(codif)).encode('ASCII', 'ignore').upper() | |
list_estate_city =[] | |
for row in df.itertuples(): | |
name = remover_acentos(row[1]) | |
city = remover_acentos(row[3]) | |
fk = dict_s[name] | |
list_estate_city.append(City(name=city, state=fk)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment