Last active
August 3, 2018 08:36
-
-
Save higs4281/26447e03c0310c4930c9949b16013071 to your computer and use it in GitHub Desktop.
Reg link insertion
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import unicode_literals | |
import re | |
from regulations3k.models import Part, Section | |
from regulations3k.scripts.ecfr_importer import LEGACY_PARTS | |
REG_BASE = '/policy-compliance/rulemaking/regulations/{}/' | |
SECTION_RE = re.compile('(?:\N{SECTION SIGN}|Section|12 CFR)\W+([^\s]+)') | |
PARTS_RE = re.compile( | |
r'(?P<part>\d{4})[.-](?P<section>[0-9A-Z]+)(?P<ids>\([a-zA-Z0-9)(]+)?') | |
ID_RE = re.compile(r'\(([a-zA-Z0-9]{1,4})\)') | |
def get_url(section_reference): | |
if not PARTS_RE.match(section_reference): | |
return | |
parts = PARTS_RE.match(section_reference).groupdict() | |
part = parts.get('part') | |
if part not in LEGACY_PARTS: | |
return | |
part_url = REG_BASE.format(part) | |
if not parts.get('section'): | |
return part_url | |
section_url = "{}{}/".format(part_url, parts.get('section')) | |
if not parts.get('ids'): | |
return section_url | |
paragraph_id = "-".join(ID_RE.findall(parts.get('ids'))) | |
if paragraph_id: | |
return "{}#{}".format(section_url, paragraph_id) | |
else: | |
return section_url | |
def insert_section_links(regdown): | |
"""Turn internal section references into links.""" | |
section_refs = SECTION_RE.findall(regdown) | |
index_head = 0 | |
for i, ref in enumerate(section_refs): | |
url = get_url(ref) | |
if url: | |
link = '<a href="{}" data-linktag="{}">{}</a> '.format(url, i, ref) | |
regdown = ( | |
regdown[:index_head] + | |
regdown[index_head:].replace(ref, link, 1)) | |
index_head = regdown.index(link) + len(link) | |
return regdown | |
def insert_links(reg=None): | |
if reg is None: | |
parts = Part.objects.all() | |
else: | |
parts = Part.objects.filter(part_number=reg) | |
live_versions = [part.effective_version for part in parts] | |
if not live_versions: | |
return | |
live_sections = Section.objects.filter(subpart__version__in=live_versions) | |
for section in live_sections: | |
print("Inserting links in section {}".format(section)) | |
section.contents = insert_section_links(section.contents) | |
section.save() | |
def run(*args): | |
if args: | |
insert_links(reg=args[0]) | |
else: | |
insert_links() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment