This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if char == '<': | |
# do something | |
elif char == '>': | |
# do another thing | |
else: | |
# do something completely different |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# obo.py | |
def stripTags(pageContents): | |
startLoc = pageContents.find("<hr/><h2>") | |
pageContents = pageContents[startLoc:] | |
inside = 0 | |
text = '' | |
for char in pageContents: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
inside = 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
f = open('helloworld.txt','w') | |
f.write('hello world') | |
f.close() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# string-to-list.py | |
# some strings | |
s1 = 'hello world' | |
s2 = 'howdy world' | |
# list of characters | |
charlist = [] | |
for char in s1: | |
charlist.append(char) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#html-to-list1.py | |
import urllib2, obo | |
url = 'http://www.oldbaileyonline.org/print.jsp?div=t17800628-33' | |
response = urllib2.urlopen(url) | |
html = response.read() | |
text = obo.stripTags(html) | |
wordlist = text.split() | |
print wordlist[0:120] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
['BENJAMIN', 'BOWSEY,', 'Breaking', 'Peace', '>', | |
'riot,', '28th', 'June', '1780.', '324.', 'BENJAMIN', | |
'BOWSEY', '(a', 'blackmoor', ')', 'was', 'indicted', | |
'for', 'that', 'he', 'together', 'with', 'five', | |
'hundred', 'other', 'persons', 'and', 'more,', 'did,', | |
'unlawfully,', 'riotously,', 'and', 'tumultuously', | |
'assemble', 'on', 'the', '6th', 'of', 'June', 'to', | |
'the', 'disturbance', 'of', 'the', 'public', 'peace', | |
'and', 'did', 'begin', 'to', 'demolish', 'and', 'pull', | |
'down', 'the', 'dwelling', 'house', 'of', 'Richard', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#html-to-list1.py | |
import urllib2 | |
import dh | |
url = 'http://www.oldbaileyonline.org/print.jsp?div=t17800628-33' | |
response = urllib2.urlopen(url) | |
xhtml = response.read() | |
text = dh.stripTags(xhtml).lower() #add the string method here. | |
wordlist = text.split() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#html-to-list1.py | |
import urllib2, obo | |
url = 'http://www.oldbaileyonline.org/print.jsp?div=t17800628-33' | |
response = urllib2.urlopen(url) | |
html = response.read() | |
text = obo.stripTags(html).lower() #add the string method here. | |
wordlist = text.split() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text = text.replace('[', '') | |
text = text.replace(']', '') | |
text = text.replace(',', '') | |
#etc... |