Skip to content

Instantly share code, notes, and snippets.

View bryanyang0528's full-sized avatar
🎯
Focusing

Bryan Yang bryanyang0528

🎯
Focusing
View GitHub Profile
@bryanyang0528
bryanyang0528 / kmeans.py
Created July 27, 2016 02:15 — forked from dave-andersen/kmeans.py
k-means in Tensorflow
import tensorflow as tf
import numpy as np
import time
N=10000
K=4
MAX_ITERS = 1000
start = time.time()
@bryanyang0528
bryanyang0528 / tsvtoxlsx.py
Last active June 16, 2016 08:04
tsk to xlsx
__reference__ = 'http://stackoverflow.com/questions/16852655/convert-a-tsv-file-to-xls-xlsx-using-python'
__author__ = 'jmcnamara'
__contributor__ = 'bryan yang'
import sys
import csv
from xlsxwriter.workbook import Workbook
class Tsvtoxlsx(object):
import numpy as np
import operator
point = [0,0]
k = 3
listOfPoint=[(1,1),(1,0),(2,1),(0,-1),(2,2)]
def knn(point, k, lists):
#create a dic to store distance for each point
dic={}
@bryanyang0528
bryanyang0528 / sumOfZero.py
Created October 28, 2015 02:11
sumOfZero
input = [-3,-1, 0, 1, 2 ,3]
res = []
def sumOfZero(input, target, part=[]):
s = 0
if part:
for i in part:
s+=i
if s == target:
res.append(part)
import numpy as np
def roc(actual, pred):
fpr=np.array([1.0])
tpr=np.array([1.0])
n=float(len(actual)-sum(actual))
p=float(sum(actual))
for i in np.arange(min(pred), max(pred), 1.0/len(pred)):
TP=0.0
FP=0.0
for j in range(len(pred)):
@bryanyang0528
bryanyang0528 / roc.py
Created June 23, 2015 06:26
ROC CURVE
def roc(pred, actual):
roc=[]
p=float(len(actual)-sum(actual))
n=float(sum(actual))
for i in range(0,100,1) :
TP=0
FP=0
i = float(i)/100
for j in range(len(pred)):
if (pred[j] > i) & (actual[j]==0):
  1. General Background and Overview
http://d.stavrovski.net/blog/post/how-to-install-and-setup-oracle-java-jdk-in-centos-6
# rpm
wget --no-cookies \
--no-check-certificate \
--header "Cookie: oraclelicense=accept-securebackup-cookie" \
"http://download.oracle.com/otn-pub/java/jdk/7u55-b13/jdk-7u55-linux-x64.rpm" \
-O jdk-7-linux-x64.rpm
# ubuntu
core-site.xml
=================================
<property>
<name>fs.defaultFS</name>
<value>hdfs://ec2-54-148-213-237.us-west-2.compute.amazonaws.com</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/local/var/hadoop/tmp/hadoop-${user.name}</value>
</property>
# coding=utf-8
# Goal: parse house information for each district from websites
# for each district, get 「土地區段位置或建物區門牌」,「建物型態」,「建物現況格局」,「坪數」,「屋齡」,「總價元」,「資料來源」into csv file
# Procedure:
# 1. get the number of page for each district by parsing first html content
# 2. for each district put all html page together, use htmlparser to parse content and save data into file
import sys
import math