Skip to content

Instantly share code, notes, and snippets.

@HelloWorld017
Created May 13, 2018 14:11
Show Gist options
  • Save HelloWorld017/0b52b4486247c2802e9d314c06ca8b5a to your computer and use it in GitHub Desktop.
Save HelloWorld017/0b52b4486247c2802e9d314c06ca8b5a to your computer and use it in GitHub Desktop.
Using SVM to classify houses in seoul and daejeon
import pandas as pd
import requests
import json
import os
# From https://gist.github.com/giftbott/6ab91e81a8e5ff67c631fcc7c97f1483
class ZigbangDl:
def __init__(self):
self.rooms = []
def crawl_item(self, zigbang_url):
response = requests.get(zigbang_url)
init_data = json.loads(response.text)
rooms_info = init_data.get('items')
self.rooms += rooms_info
def crawl_items(self, room_id_list):
list_len = len(room_id_list)
for idx, room_id in enumerate(room_id_list):
print("%d / %d" % (idx, list_len))
url = "https://api.zigbang.com/v1/items?detail=true&item_ids=%d&~~~&~~~&~~~" % room_id
self.crawl_item(url)
def save_info_to_csv(self, path):
df = pd.DataFrame(
columns=["평 수", "보증금", "월세"]
)
for idx, item in enumerate(self.rooms):
room = item.get('item')
df.loc[idx] = [
room['size'], # 평 수
room['deposit'], # 보증금
room['rent'] # 월세
]
csv_path = os.path.join(path)
df.to_csv(csv_path)
zigbang_list = json.load(open("./zigbang_integrated.json"))
zigbang_seoul = zigbang_list['seoul']
zigbang_daejeon = zigbang_list['daejeon']
print("Crawling Seoul...")
crawler = ZigbangDl()
crawler.crawl_items(zigbang_seoul)
crawler.save_info_to_csv('./zigbang_seoul.csv')
print("Crawling Daejeon...")
crawler = ZigbangDl()
crawler.crawl_items(zigbang_daejeon)
crawler.save_info_to_csv('./zigbang_daejeon.csv')
import pandas as pd
import requests
import json
import os
class ZigbangDl:
def __init__(self):
self.rooms = []
def crawl_item(self, zigbang_url):
response = requests.get(zigbang_url)
init_data = json.loads(response.text)
rooms_info = init_data.get('items')
self.rooms += rooms_info
def crawl_items(self, room_id_list):
list_len = len(room_id_list)
for idx, room_id in enumerate(room_id_list):
print("%d / %d" % (idx, list_len))
url = "https://api.zigbang.com/v1/items?detail=true&item_ids=%d&~~~&~~~&~~~" % room_id
self.crawl_item(url)
def save_info_to_csv(self, path):
df = pd.DataFrame(
columns=["평 수", "보증금", "월세"]
)
for idx, item in enumerate(self.rooms):
room = item.get('item')
df.loc[idx] = [
room['size'], # 평 수
room['deposit'], # 보증금
room['rent'] # 월세
]
csv_path = os.path.join(path)
df.to_csv(csv_path)
zigbang_list = json.load(open("./zigbang_integrated.json"))
zigbang_seoul = zigbang_list['seoul']
zigbang_daejeon = zigbang_list['daejeon']
print("Crawling Seoul...")
crawler = ZigbangDl()
crawler.crawl_items(zigbang_seoul)
crawler.save_info_to_csv('./zigbang_seoul.csv')
print("Crawling Daejeon...")
crawler = ZigbangDl()
crawler.crawl_items(zigbang_daejeon)
crawler.save_info_to_csv('./zigbang_daejeon.csv')
import csv
from sklearn.svm import SVC
import matplotlib.pyplot as plt
# noinspection PyUnresolvedReferences
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import random
x_data = []
y_data = [0] * 100 + [1] * 100
def read_csv(file):
with open(file) as csvfile:
reader = csv.reader(csvfile)
next(reader)
for x in reader:
x_data.append([float(data) for data in x][1:])
# noinspection PyUnresolvedReferences
def visualize():
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
for idx, x_datum in enumerate(x_data):
marker = 'x' if y_data[idx] == 0 else 'o'
ax.scatter(x_datum[0], x_datum[1], x_datum[2], marker=marker)
plane_x, plane_y = np.meshgrid(np.linspace(0, 30, 50), np.linspace(0, 30000, 50))
def plane_z(x, y):
return (- svm.intercept_[0] - svm.coef_[0][0] * x - svm.coef_[0][1]) / svm.coef_[0][2]
ax.plot_surface(plane_x, plane_y, plane_z(plane_x, plane_y))
ax.set_xlabel('Size')
ax.set_ylabel('Deposit')
ax.set_zlabel('Rent')
plt.show()
# Reading CSV data
read_csv('./zigbang_daejeon.csv')
read_csv('./zigbang_seoul.csv')
# Randomly sampling 20 test data
x_test = []
y_test = []
for i in range(20):
sample_index = random.randrange(len(x_data))
x_test.append(x_data.pop(sample_index))
y_test.append(y_data.pop(sample_index))
y_test = np.array(y_test)
# Fitting SVM
svm = SVC(kernel='linear')
svm.fit(x_data, y_data)
# Visualizing
visualize()
# Checking accuracy
correct = 0
predict = np.array(svm.predict(x_test))
accuracy = np.sum(predict == y_test) / 20 * 100
print(predict, y_test)
print('Accuracy: ' + str(accuracy) + '%')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment