This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # shuffling | |
| shuffled = dataset.shuffle(BUFFER_SIZE) | |
| # batching | |
| dataset = shuffled.batch(BATCH_SIZE, drop_remainder=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def gladiator_predict(model_list, start, end, sequences=10, vis=False): | |
| text = "" # initialise our text string | |
| meditations = {} # initialise generated text dictionary | |
| models = {} # initialise models dictionary | |
| # loop through each model in the model list and load the model itself and related char2idx mappings | |
| for modelname in model_list: | |
| models[modelname] = {} | |
| models[modelname]['model'] = dw.load_model(modelname) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| from bs4 import BeautifulSoup | |
| # import page containing links to all of Seneca's letters | |
| # get web address | |
| src = "https://en.wikisource.org/wiki/Moral_letters_to_Lucilius" | |
| html = requests.get(src).text # pull html as text | |
| soup = BeautifulSoup(html, "html.parser") # parse into BeautifulSoup object |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # create function to pull letter from webpage (pulls text within <p> elements | |
| def pull_letter(http): | |
| # get html from webpage given by 'http' | |
| html = requests.get(http).text | |
| # parse into a beautiful soup object | |
| soup = BeautifulSoup(html, "html.parser") | |
| # build text contents within all p elements | |
| txt = '\n'.join([x.text for x in soup.find_all('p')]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # compile RegEx for finding 'Letter 12', 'Letter 104' etc | |
| letters_regex = re.compile("^Letter\s+[0-9]{1,3}$") | |
| # create dictionary containing letter number: [local href, letter contents] for all that satisfy above RegEx | |
| moral_letters = { | |
| x.contents[0]: | |
| [x.get('href'), pull_letter(f"https://en.wikisource.org{x.get('href')}")] | |
| for x in soup.find_all('a') | |
| if len(x.contents) > 0 | |
| if letters_regex.match(str(x.contents[0])) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # join the moral letters dictionary into a single string | |
| txt = "\n".join([moral_letters[key][1] for key in moral_letters) | |
| # create vocab from text string (txt) | |
| vocab = sorted(set(txt)) | |
| # create char2idx mappings from the vocabulary | |
| char2idx = {c: i for i, c in enumerate(vocab)} | |
| # converting data from characters to indexes |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # define the input/target data splitting function | |
| def split_xy(seq): | |
| input_data = seq[:-1] | |
| target_data = seq[1:] | |
| return input_data, target_data | |
| SEQLEN = 100 # the number of characters in a single sequence | |
| BATCHSIZE = 64 # how many sequences in a single training batch | |
| BUFFER = 10000 # how many elements are contained within a single shuffling space |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # first run parameter match regex | |
| param_re = re.compile(r"(?s)\w+ : .*?(?=\w+ :)") | |
| # the above will not find the final parameter, this will | |
| param_re2 = re.compile(r"(?s)\w+ : .*") | |
| params = [] # initialise parameter list | |
| while True: | |
| # find a parameter | |
| new_param = param_re.search(text) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # first run parameter match regex | |
| param_re = re.compile(r"(?s)\w+ : .*?(?=\w+ :)") | |
| # the above will not find the final parameter, this will | |
| param_re2 = re.compile(r"(?s)\w+ : .*") | |
| params = {} # initialise parameter dictionary | |
| while True: | |
| # find a parameter | |
| new_param = param_re.search(text) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> | |
| <title>Docs</title> |