Skip to content

Instantly share code, notes, and snippets.

@AsaK
Last active July 21, 2017 18:47
Show Gist options
  • Save AsaK/2675429d4b779dcd3a768ea36b2c7213 to your computer and use it in GitHub Desktop.
Save AsaK/2675429d4b779dcd3a768ea36b2c7213 to your computer and use it in GitHub Desktop.
Script para verificar 'winners' duplicados no MongoDB e insere os mesmos numa tabela no Dynamo
import sys
import pymongo
import boto3
import pandas as pd
import time
import datetime
connection = pymongo.MongoClient('')
db = connection.module_database
session = boto3.Session(aws_access_key_id='', aws_secret_access_key='')
BD = session.resource('dynamodb', region_name='')
def insert_log(wrong_winner, bid_winner=None, bid_id=None, item_order=None):
try:
duplicatedWinnerTable = BD.Table('DuplicatedWinner')
duplicatedWinnerTable.put_item(Item={
'wrongWinner': str(wrong_winner),
'bidWinner': str(bid_winner),
'bidID': str(bid_id),
'itemOrder': str(item_order),
'creation_at': str(datetime.datetime.now())
})
except:
print('Erro ao inserir no DynamoDB')
def get_duplcated_winners():
pipeline = [
{"$group": {"_id": {
"winner_data.hash_provider": "$winner_data.hash_provider",
"requested_item.order": "$requested_item.order",
"bid.trading": "$bid.trading",
"bid.unit.code": "$bid.unit.code",
"bid.portal": "COMPRASNET"
}, "count": {"$sum": 1}}},
{"$match": {"count": {"$gt": 1}}}
]
return pd.DataFrame(list(db.winner.aggregate(pipeline, allowDiskUse=True)))
def get_winners(row):
winner_list = pd.DataFrame(list(db.winner.find({
"requested_item.order": row['requested_item.order'],
"bid.unit.code": row['bid.unit.code'],
"bid.trading": row['bid.trading'],
"winner_data.hash_provider": row['winner_data.hash_provider']
})))
return winner_list
def get_bid(row):
bid_list = pd.DataFrame(list(db.minute.find({
"requested_items.order": row['requested_item.order'],
"unit.code": row['bid.unit.code'],
"trading": row['bid.trading']
})))
return bid_list
def main():
print('Script de correçao de duplicados iniciado.')
init_time = time.time()
duplicated_winner_list = get_duplcated_winners()
for row in duplicated_winner_list.itertuples():
winner_list = get_winners(row[1])
bid_list = get_bid(row[1])
if len(winner_list) > 1 and len(bid_list) == 1:
for winner in winner_list.itertuples():
if 'winner' in bid_list.requested_items[0][row[1]['requested_item.order'] - 1]:
bid_winner = bid_list.requested_items[0][row[1]['requested_item.order'] - 1]['winner'][0]
if bid_winner != winner[1]:
# Envia dados do duplicado para o Dynamo
insert_log(winner[1], bid_winner, bid_list.iloc[0]['_id'], row[1]['requested_item.order'])
print('Winner Duplicado ({0}) encontrado, Logado no Dynamo.'.format(winner[1]))
else:
print('Nao existe Winner associado no BID. ({0})'.format(winner[1]))
insert_log(winner[1])
else:
print("Winner nao e duplicado, verifique sua consulta ao MongoDB")
end_time = time.time()
print('Tempo de execuçao: {0}'.format(end_time - init_time))
print('Script executado com sucesso')
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment