This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import torch.nn.functional as F | |
import torch.optim as optim | |
from tqdm import tqdm | |
def train_fixed_window(n, n_epochs=1, batch_size=3200, lr=1e-2): | |
# Vectorize the data | |
train_x, train_y = vectorize_fixed_window(wikitext.train, n) | |
valid_x, valid_y = vectorize_fixed_window(wikitext.valid, n) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
# This Gist converts the Wall Street Journal part of the Penn Treebank | |
# (more specifically, sections 2–24) to CoNLL 2007 format using | |
# PennConverter. As suggested by the authors of PennConverter, the script | |
# first applies the NP bracketing patch by David Vadas. | |
# | |
# In order to make this script work, you will need the following files: | |
# | |
# * treebank-3.tar.gz, containing the standard distribution of the PTB |