使用 Python 内置的 defaultdict,我们可以很容易的定义一个树形数据结构:
def tree(): return defaultdict(tree)就是这样!
| """Short and sweet LSTM implementation in Tensorflow. | |
| Motivation: | |
| When Tensorflow was released, adding RNNs was a bit of a hack - it required | |
| building separate graphs for every number of timesteps and was a bit obscure | |
| to use. Since then TF devs added things like `dynamic_rnn`, `scan` and `map_fn`. | |
| Currently the APIs are decent, but all the tutorials that I am aware of are not | |
| making the best use of the new APIs. | |
| Advantages of this implementation: |
| """ | |
| Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy) | |
| BSD License | |
| """ | |
| import numpy as np | |
| # data I/O | |
| data = open('input.txt', 'r').read() # should be simple plain text file | |
| chars = list(set(data)) | |
| data_size, vocab_size = len(data), len(chars) |
使用 Python 内置的 defaultdict,我们可以很容易的定义一个树形数据结构:
def tree(): return defaultdict(tree)就是这样!
| # Recall that, in regex, we use [\u4e00-\u9fa5] to match chinese characters. | |
| start = int("4e00", 16) | |
| end = int("9fa5", 16) | |
| for i in range(start, end + 1): | |
| uni = r"\u" + hex(i)[2:] | |
| result = uni.encode("utf-8").decode("unicode_escape") | |
| print(result) |
| """ Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """ | |
| import numpy as np | |
| import cPickle as pickle | |
| import gym | |
| # hyperparameters | |
| H = 200 # number of hidden layer neurons | |
| batch_size = 10 # every how many episodes to do a param update? | |
| learning_rate = 1e-4 | |
| gamma = 0.99 # discount factor for reward |
| import requests | |
| import re | |
| from urllib.request import urlopen | |
| import multiprocessing as mp | |
| import os | |
| def process(pair): | |
| idx, title = pair | |
| idx = '0' * (4 - len(idx)) + idx | |
| for word in keywords: |
| #!/usr/bin/python | |
| import sys | |
| import json | |
| if len(sys.argv) != 3: | |
| sys.exit("Usage: %s <gold_file> <result_file>" % sys.argv[0]) | |
| """load the answers to a list""" | |
| def loadAnswers(filename): |
| """Information Retrieval metrics | |
| Useful Resources: | |
| http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt | |
| http://www.nii.ac.jp/TechReports/05-014E.pdf | |
| http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf | |
| http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf | |
| Learning to Rank for Information Retrieval (Tie-Yan Liu) | |
| """ | |
| import numpy as np |
| # Here is a list of stop words in English and it is taken from nltk.corpus.stopwords | |
| # requirements: NLTK==3.2.4 | |
| stop_words = ['a', 'about', 'above', 'after', 'again', 'against', 'ain', 'all', 'am', 'an', 'and', 'any', 'are', 'aren', "aren't", 'as', 'at', | |
| 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', | |
| 'can', 'couldn', "couldn't", | |
| 'd', 'did', 'didn', "didn't", 'do', 'does', 'doesn', "doesn't", 'doing', 'don', "don't", 'down', 'during', | |
| 'each', | |
| 'few', 'for', 'from', 'further', | |
| 'had', 'hadn', "hadn't", 'has', 'hasn', "hasn't", 'have', 'haven', "haven't", 'having', 'he', 'her', 'here', 'hers', 'herself', 'him', 'himself', 'his', 'how', |
| # coding: utf-8 | |
| import logging | |
| import re | |
| from collections import Counter | |
| import numpy as np | |
| import torch | |
| from sklearn.datasets import fetch_20newsgroups | |
| from torch.autograd import Variable |