- 实词:名词、动词、形容词、状态词、区别词、数词、量词、代词
- 虚词:副词、介词、连词、助词、拟声词、叹词。
n 名词
nr 人名
| # Initialize the scroll | |
| page = es.search( | |
| index = 'yourIndex', | |
| doc_type = 'yourType', | |
| scroll = '2m', | |
| search_type = 'scan', | |
| size = 1000, | |
| body = { | |
| # Your query's body | |
| }) |
| <?xml version="1.0" encoding="UTF-8" standalone="no"?> | |
| <?pde version="3.8"?><target name="simple" sequenceNumber="12"> | |
| <locations> | |
| <location path="${env_var:ECLIPSE_432_HOME}" type="Profile"/> | |
| <location path="${project_loc:builder_external}/builder/lib" type="Directory"/> | |
| <location includeAllPlatforms="false" includeConfigurePhase="true" includeMode="planner" includeSource="true" type="InstallableUnit"> | |
| <unit id="org.apache.commons.beanutils" version="1.8.0.v201205091237"/> | |
| <unit id="org.apache.commons.collections" version="3.2.0.v2013030210310"/> | |
| <unit id="com.google.guava" version="12.0.0.v201212092141"/> | |
| <unit id="com.google.gson" version="2.1.0.v201303041604"/> |
| """A simple implementation of a greedy transition-based parser. Released under BSD license.""" | |
| from os import path | |
| import os | |
| import sys | |
| from collections import defaultdict | |
| import random | |
| import time | |
| import pickle | |
| SHIFT = 0; RIGHT = 1; LEFT = 2; |
| mahout clusterdump \ | |
| -dt sequencefile \ # format: {Integer => String} | |
| -d reuters-vectors/dictionary.file-* \ # dictionary: {id => word} | |
| -i reuters-kmeans-clusters/clusters-3-final \ # input | |
| -o clusters.txt \ # output (local filesystem) | |
| -b 10 \ # format length | |
| -n 10 # number of top terms to print | |
| --distanceMeasure org.apache.mahout.common.distance.CosineDistanceMeasure # default is euclidean distance |
#A Collection of NLP notes
##N-grams
###Calculating unigram probabilities:
P( wi ) = count ( wi ) ) / count ( total number of words )
In english..
| git fetch upstream | |
| git reset --hard upstream/master |