Skip to content

Instantly share code, notes, and snippets.

@patcon
Created February 15, 2016 02:39
Show Gist options
  • Save patcon/b237e631dfff35d919ee to your computer and use it in GitHub Desktop.
Save patcon/b237e631dfff35d919ee to your computer and use it in GitHub Desktop.
diff --git a/ca_on_toronto/__init__.py b/ca_on_toronto/__init__.py
index bda98f5..82d7281 100644
--- a/ca_on_toronto/__init__.py
+++ b/ca_on_toronto/__init__.py
@@ -4,6 +4,19 @@ from .jurisdiction import TorontoJurisdiction
import lxml.html
import requests
+# Patch importer to swallow DuplicateItemError for Bill
+import pupa.importers.bills
+from pupa.exceptions import DuplicateItemError
+
+class OverrideBillImporter(pupa.importers.bills.BillImporter):
+ def import_item(self, data):
+ try:
+ super(OverrideBillImporter, self).import_item(data)
+ except DuplicateItemError:
+ print('Skipped a DuplicateItemError: {}'.format(data))
+ pass
+
+pupa.importers.bills.BillImporter = OverrideBillImporter
class Toronto(TorontoJurisdiction):
classification = 'legislature'
diff --git a/ca_on_toronto/events-incremental.py b/ca_on_toronto/events-incremental.py
index a65ff86..575c060 100644
--- a/ca_on_toronto/events-incremental.py
+++ b/ca_on_toronto/events-incremental.py
@@ -72,10 +72,12 @@ class TorontoIncrementalEventScraper(CanadianScraper):
headers = [sanitize_key(col.text) for col in rows.pop(0)]
for row in rows:
meeting_link = row.cssselect('a')[0].attrib['href']
+ org_name = row.cssselect('a')[0].text.strip()
values = [col.text_content().strip() for col in row]
item = dict(zip(headers, values))
item.update({'meeting': sanitize_org_name(item['meeting']) })
item.update({'meeting_link': meeting_link})
+ item.update({'org_name': sanitize_org_name(org_name)})
items.append(item)
return items
@@ -121,7 +123,7 @@ class TorontoIncrementalEventScraper(CanadianScraper):
time = dt.datetime.strptime(event['time'], '%I:%M %p')
start = tz.localize(date.replace(hour=time.hour, minute=time.minute, second=0, microsecond=0))
source_url = CALENDAR_DAY_TEMPLATE.format(start.year, start.month, start.day)
- org_name = event['meeting']
+ org_name = event['org_name']
e = Event(
name = org_name,
start_time = start,
@@ -143,7 +145,7 @@ class TorontoIncrementalEventScraper(CanadianScraper):
return event['publishing_status'] in ['Agenda Published', 'Minutes Published']
def is_council(event):
- return True if event['meeting'] == self.jurisdiction.name else False
+ return True if event['org_name'] == self.jurisdiction.name else False
if is_agenda_available(event):
template = AGENDA_FULL_COUNCIL_TEMPLATE if is_council(event) else AGENDA_FULL_STANDARD_TEMPLATE
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment