Skip to content

Instantly share code, notes, and snippets.

@rhelmer
Created December 9, 2011 22:11
Show Gist options
  • Select an option

  • Save rhelmer/1453534 to your computer and use it in GitHub Desktop.

Select an option

Save rhelmer/1453534 to your computer and use it in GitHub Desktop.
diff --git a/socorro/processor/processor.py b/socorro/processor/processor.py
index b3964a1..1754215 100755
--- a/socorro/processor/processor.py
+++ b/socorro/processor/processor.py
@@ -123,6 +123,8 @@ class Processor(object):
self.prefixSignatureRegEx = re.compile(self.config.prefixSignatureRegEx)
self.signaturesWithLineNumbersRegEx = re.compile(self.config.signaturesWithLineNumbersRegEx)
+ self.productIdMap = []
+
self.reportsTable = sch.ReportsTable(logger=config.logger)
self.extensionsTable = sch.ExtensionsTable(logger=config.logger)
self.framesTable = sch.FramesTable(logger=config.logger)
@@ -170,7 +172,7 @@ class Processor(object):
logger.debug("success")
db_conn.commit()
except sdb.exceptions_eligible_for_retry:
- logger.debug("timout trouble")
+ logger.debug("timeout trouble")
raise
except sdb.db_module.ProgrammingError, x:
logger.debug('the priority jobs table (%s) already exists',
@@ -656,6 +658,19 @@ class Processor(object):
"""
#logger.debug("starting insertReportIntoDatabase")
product = Processor.getJsonOrWarn(jsonDocument,'ProductName',processorErrorMessages,None, 30)
+ productId = Processor.getJsonOrWarn(jsonDocument,'ProductId',processorErrorMessages,None, 30)
+
+ # in some cases, we will receive products with the same name but different ID. cache the result.
+ if (not self.productIdMap):
+ self.productIdMap = self.loadProductIdMap()
+ logger.debug("product ID map: " + str(self.productIdMap))
+ logger.debug([x[0] for x in productIdMap])
+ for p in productIdMap:
+ if productId == p.productId:
+ if product != p.product and p.rewrite:
+ logger.info("Product name %s rewritten to %s", (product, p.product_name))
+ product = p.product_name
+
version = Processor.getJsonOrWarn(jsonDocument,'Version', processorErrorMessages,None,16)
buildID = Processor.getJsonOrWarn(jsonDocument,'BuildID', processorErrorMessages,None,16)
url = sutil.lookupLimitedStringOrNone(jsonDocument, 'URL', 255)
@@ -836,3 +851,27 @@ class Processor(object):
showTraceback=False)
except KeyError:
self.config.logger.info('no Elastic Search URL has been configured')
+
+
+ #-----------------------------------------------------------------------------------------------------------------
+ def loadProductIdMap(self):
+ logger.debug('test1')
+ logger.debug(self.databaseConnectionPool.connectionCursorPair)
+ db_conn, db_cur = self.databaseConnectionPool.connectionCursorPair()
+ productIdMap = []
+ try:
+ logger.debug("attempting to load product_productid_map")
+ db_cur.execute("SELECT * FROM product_productid_map")
+ productIdList = db_cur.fetchall()
+ logger.debug("done loading product_productid_map")
+ db_conn.commit()
+ except:
+ logger.error('Unable to load product_productid_map')
+ db_conn.rollback()
+ raise
+
+ columns = ('product_name', 'productid', 'rewrite', 'version_began', 'version_ended')
+ for result in productIdList:
+ productIdMap.append(dict(x for x in zip(columns, result)))
+
+ return productIdMap
@peterbe
Copy link

peterbe commented Dec 9, 2011

30: Why the bracket?

31: Why assigning it to the instance? I.e. why not productIdMap = self.loadProductIdMap()?

50: The word "load" doesn't imply what it actually does. If it was prefixed "set" or "get" or "apply" I would understand what it does. At least, if it's called "load" then it shouldn't return anything.

51: What good does this do?

66: If you know the columns, stick that into the SELECT ... FROM ...

54: If it's called, a "somethingsomethingMap" I'd expect to be a dict type. Not a list.

61: When using commit/rollback I would much prefer a "try: finally:" approach simply because it's less about the error.

62: I'd prefer logger.error('Unable to load product_productid_map', exc_info=True) so you get the traceback in the logs.

35: Is p a dict instance coming from line 68? The p.productId would fail. However, p['productId'] would fail too because it's zipped up as p['productid']

57: In fact, if columns ever change, this script will fail because of the hardcoded columns tuple. Using the tuple instead of * would at least make us safe from newly added columns or change of internal order.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment