This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# classifier method | |
# dataSet: the training data set | |
# inx: the test sample | |
# labels: the corresponding labels about the data set | |
# k: the top k classes to be selected | |
def classify0(inX, dataSet, labels, k): | |
sortedDistIndicies = euclideanMetric(dataSet, inX) | |
classCount = {} | |
for i in range(k): | |
voteIlabel = labels[sortedDistIndicies[i]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# classifier method | |
# dataSet: the training data set | |
# inx: the test sample | |
# labels: the corresponding labels about the data set | |
# k: the top k classes to be selected | |
def classify0(inX, dataSet, labels, k): | |
sortedDistIndicies = euclideanMetric(dataSet, inX) | |
classCount = {} | |
for i in range(k): | |
voteIlabel = labels[sortedDistIndicies[i]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on Oct 12, 2010 | |
Decision Tree Source Code for Machine Learning in Action Ch. 3 | |
@author: Peter Harrington | |
''' | |
from math import log | |
import operator | |
# prepare data set | |
# create data set and corrsponding labels |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def run(): | |
data = pd.read_csv('Desktop_Win10Cortana_TRAIN_en-US_Live_2017-04_Desktop_Win10Cortana_Standard_en-us.hyp', sep='\t') | |
wordCount = 0 | |
wordDict = {} | |
for index, item in data.iterrows(): | |
wordList = item[8].split(' ') | |
for word in wordList: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Python Training | |
https://microsoft.sharepoint.com/teams/stca/ipe/sr/_layouts/15/WopiFrame.aspx?sourcedoc={57bcddd2-341f-4151-84f7-f332fca4d07a}&action=edit&wd=target%28LearningCorner%2Eone%7CDF7F96A9-2186-462F-A0EC-8772881176AA%2FPerl%20and%20C%23%20Training%7C1C6AD0B9-765A-4DD5-A3C7-0822927FC9D1%2F%29 | |
''' | |
import xml.etree.ElementTree as ET | |
from operator import itemgetter | |
import argparse | |
SRC_FILE_NAME = 'ITA_Blind_R2.xml' | |
DES_FILE_NAME = 'task1_output.txt' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Python Training | |
https://microsoft.sharepoint.com/teams/stca/ipe/sr/_layouts/15/WopiFrame.aspx?sourcedoc={57bcddd2-341f-4151-84f7-f332fca4d07a}&action=edit&wd=target%28LearningCorner%2Eone%7CDF7F96A9-2186-462F-A0EC-8772881176AA%2FPerl%20and%20C%23%20Training%7C1C6AD0B9-765A-4DD5-A3C7-0822927FC9D1%2F%29 | |
''' | |
import pandas as pd | |
from operator import itemgetter | |
import argparse | |
SRC_FILE_NAME = 'Desktop_Merino_ThresholdCortana_Train_de-de_Li_1.hyp' | |
DES_FILE_NAME = 'task2_output.txt' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Python Training | |
''' | |
import argparse | |
import xml.etree.ElementTree as ET | |
from operator import itemgetter | |
import codecs | |
SRC_FILE_NAME = 'CortanaLiveData_ja-JP_2015_Audio_test.xml' | |
DES_FILE_NAME = 'task3_output.hyp' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Python Training | |
''' | |
import argparse | |
import codecs | |
import xml.etree.ElementTree as ET | |
import pandas as pd | |
INPUT_XML = 'CortanaLiveData_ja-JP_2015_Audio_test_Result.xml' | |
INPUT_HYP = 'task3_output.hyp' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Python Training | |
''' | |
import os | |
import codecs | |
import argparse | |
# import xml.etree.ElementTree as ET | |
from bs4 import BeautifulSoup | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Python Training | |
''' | |
import os | |
import codecs | |
import argparse | |
# import xml.etree.ElementTree as ET | |
from bs4 import BeautifulSoup | |
OlderNewer