This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import re | |
| import codecs # UniCode support | |
| from pymongo import Connection # For DB Connection | |
| from pymongo.errors import ConnectionFailure # For catching exeptions | |
| def main(): | |
| # MongoDB connection | |
| try: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def main(): | |
| # Command-line parsing supports filename*.txt | |
| # Make a list of command line arguments, omitting the [0] element which is the script itself. | |
| args = sys.argv[1:] | |
| if not args: | |
| print 'Some message.' | |
| sys.exit(1) | |
| for filename in args: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from bs4 import BeautifulSoup | |
| import re # Regular Expressions | |
| import collections # Data Types | |
| import sys # File operations | |
| import codecs # UniCode support | |
| def scrape(page): | |
| # Dump raw HTML into Soup | |
| raw_data = codecs.open(page, 'r', encoding='utf-8').read() | |
| soup = BeautifulSoup(raw_data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| soup(lambda tag: tag.name == 'div' and tag.get('class') == ['some-class']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os, random | |
| random.choice(os.listdir("INSERT-DIR")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Non-recursive | |
| import os | |
| def load_directory(data_path): | |
| files_list = [] | |
| try: | |
| for file_name in os.listdir(data_path): | |
| if file_name.endswith(".html"): | |
| files_list.append(file_name) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| file_header = "<html>\n<head>\n<meta charset='utf-8'>\n</head>\n<body>\n" | |
| file_footer = "\n</body>\n</html>" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def longest_common_substring(s1, s2): | |
| m = [[0] * (1 + len(s2)) for i in range(1 + len(s1))] | |
| longest, x_up_to = 0, 0 | |
| for x in range(1, 1 + len(s1)): | |
| for y in range(1, 1 + len(s2)): # match every char in s2 against every char in s1 | |
| if s1[x - 1] == s2[y - 1]: # record a char match | |
| m[x][y] = m[x - 1][y - 1] + 1 # char match tally will accumulate if previous char also matched | |
| if m[x][y] > longest: | |
| longest = m[x][y] | |
| x_up_to = x # record char position of last found match |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| AA ɑ | |
| AA0 ɑ | |
| AA1 ɑ | |
| AA2 ɑ | |
| AE æ | |
| AE0 æ | |
| AE1 æ | |
| AE2 æ | |
| AH ə | |
| AH0 ə |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from bs4 import BeautifulSoup | |
| import re # Regular Expressions | |
| import collections # Data Types | |
| import sys # File operations | |
| import codecs # UniCode support | |
| import os | |
| def clear_output_file(out_file): | |
| file_header ="""<html> | |
| <head> |
OlderNewer