start new:
tmux
start new with session name:
tmux new -s myname
| # copy and paste two modules. | |
| # https://raw.githubusercontent.com/skydark/nstools/master/zhtools/langconv.py | |
| # https://raw.githubusercontent.com/skydark/nstools/master/zhtools/zh_wiki.py | |
| from langconv import * | |
| def Traditional2Simplified(sentence): | |
| sentence = Converter('zh-hans').convert(sentence) | |
| return sentence |
| # 1. Directly Load a Pre-trained Model | |
| # https://github.com/pytorch/vision/tree/master/torchvision/models | |
| import torchvision.models as models | |
| resnet50 = models.resnet50(pretrained=True) | |
| # or | |
| model = models.resnet50(pretrained=False) | |
| # Maybe you want to modify the last fc layer? | |
| resnet.fc = nn.Linear(2048, 2) |
| ''' Script for downloading all GLUE data. | |
| Note: for legal reasons, we are unable to host MRPC. | |
| You can either use the version hosted by the SentEval team, which is already tokenized, | |
| or you can download the original data from (https://download.microsoft.com/download/D/4/6/D46FF87A-F6B9-4252-AA8B-3604ED519838/MSRParaphraseCorpus.msi) and extract the data from it manually. | |
| For Windows users, you can run the .msi file. For Mac and Linux users, consider an external library such as 'cabextract' (see below for an example). | |
| You should then rename and place specific files in a folder (see below for an example). | |
| mkdir MRPC | |
| cabextract MSRParaphraseCorpus.msi -d MRPC |
| import torch | |
| import torch.nn as nn | |
| from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence | |
| seqs = ['gigantic_string','tiny_str','medium_str'] | |
| # make <pad> idx 0 | |
| vocab = ['<pad>'] + sorted(set(''.join(seqs))) | |
| # make model |
As configured in my dotfiles.
start new:
tmux
start new with session name:
| # encoding: utf-8 | |
| import os | |
| import pygame | |
| font_file = '/System/Library/Fonts/PingFang.ttc' | |
| chinese_dir = 'chinese' | |
| if not os.path.exists(chinese_dir): | |
| os.mkdir(chinese_dir) |
| from graphviz import Digraph | |
| import torch | |
| from torch.autograd import Variable, Function | |
| def iter_graph(root, callback): | |
| queue = [root] | |
| seen = set() | |
| while queue: | |
| fn = queue.pop() | |
| if fn in seen: |
| import re, collections | |
| def get_stats(vocab): | |
| pairs = collections.defaultdict(int) | |
| for word, freq in vocab.items(): | |
| symbols = word.split() | |
| for i in range(len(symbols)-1): | |
| pairs[symbols[i],symbols[i+1]] += freq | |
| return pairs |