Last active
April 28, 2017 16:07
-
-
Save cristianmiranda/b17e82d4b3ec8c9684251e73b480d4dc to your computer and use it in GitHub Desktop.
Custom Indicators matcher
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import csv | |
| import psycopg2 | |
| from difflib import SequenceMatcher | |
| from tabulate import tabulate | |
| MIN_SIMILARITY_RATIO = 0.8 | |
| # LOCAL | |
| PSQL_HOST = 'localhost' | |
| PSQL_PORT = '38036' | |
| PSQL_USERNAME = 'magick' | |
| PSQL_PASSWORD = 'magick' | |
| PSQL_DBNAME = 'magick' | |
| # Export to CSV -> https://docs.google.com/spreadsheets/d/1otjo4D8-_2twMQ8ppsJgx201sPPlcdPgDn6ghWRoDsI | |
| CSV_PATH = '/Users/cristianmiranda/Desktop/custom_indicators.csv' | |
| with open(CSV_PATH, 'rb') as f: | |
| reader = csv.reader(f) | |
| indicators = list(reader) | |
| conn = psycopg2.connect( | |
| "dbname='{0}' user='{1}' host='{2}' port='{3}' password='{4}'" | |
| .format(PSQL_DBNAME, PSQL_USERNAME, PSQL_HOST, PSQL_PORT, PSQL_PASSWORD)) | |
| psql_cursor = conn.cursor() | |
| psql_cursor.execute("select * from indicator_meta") | |
| result = psql_cursor.fetchall() | |
| indicators_map = {} | |
| for row in result: | |
| db_indicator_name = row[1] | |
| for indicator in indicators: | |
| csv_indicator_name = indicator[1] | |
| csv_indicator_description = indicator[3] | |
| similarity_ratio = SequenceMatcher(None, db_indicator_name, csv_indicator_name).ratio() | |
| if (db_indicator_name not in indicators_map) or (similarity_ratio > indicators_map[db_indicator_name][0]): | |
| indicators_map[db_indicator_name] = {} | |
| indicators_map[db_indicator_name][0] = similarity_ratio | |
| indicators_map[db_indicator_name][1] = csv_indicator_name | |
| indicators_map[db_indicator_name][2] = csv_indicator_description | |
| count = 0 | |
| values = [] | |
| for indicator in indicators_map: | |
| if indicators_map[indicator][0] > MIN_SIMILARITY_RATIO: | |
| percentage = "%.0f" % (indicators_map[indicator][0] * 100) | |
| values.append([percentage, indicator, indicators_map[indicator][1]]) | |
| count += 1 | |
| # print "UPDATE indicators_meta SET description = '%s' WHERE meta_type = '%s';" % (indicators_map[indicator][2], indicator) | |
| print "" | |
| print "- Similarity minimum ratio: %.2f" % MIN_SIMILARITY_RATIO | |
| print "- DB Indicators : %d" % len(result) | |
| print "- CSV Indicators : %d" % (len(indicators) - 1) | |
| print "- Known indicators that can be updated: %d" % count | |
| print "- Unknown indicators: %d" % (len(indicators) - 1 - count) | |
| print "" | |
| print tabulate(values, headers=['Similarity %', "DB Indicator", "CSV Indicator"]) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
[Similarity %, DB Indicaror name, CSV Indicator name|