Skip to content

Instantly share code, notes, and snippets.

@osantana
Last active June 3, 2016 03:53
Show Gist options
  • Save osantana/568029e8892ce596e5e20aac593a4f36 to your computer and use it in GitHub Desktop.
Save osantana/568029e8892ce596e5e20aac593a4f36 to your computer and use it in GitHub Desktop.
import re
import wikipedia
def parse(raw_content):
section_title_re = re.compile("^=+\s+.*\s+=+$")
content = []
skip = False
for l in raw_content.splitlines():
line = l.strip()
if "= references =" in line.lower():
skip = True # replace with break if this is the last section
continue
if "= further reading =" in line.lower():
skip = True # replace with break if this is the last section
continue
if section_title_re.match(line):
skip = False
continue
if skip:
continue
content.append(line)
return '\n'.join(content) + '\n'
ny = wikipedia.page("New York")
print(parse(ny.content))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment