import requests
response = requests .get ('https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data' )
with open ('imports-85.data' , mode = 'wb' ) as f :
f .write (response .content )
with open ('imports-85.data' , mode = 'r' ) as f :
data = f .read ()
title = 'symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,curb-weight,engine-type,num-of-cylinders,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price'
data = f'{ title } \n { data } '
with open ('automobile.csv' , mode = 'w' ) as f :
f .write (data )
import pandas as pd
pd .set_option ('display.max_columns' , 30 )
df = pd .read_csv ('automobile.csv' )
df .head ()
print ('shape:' , df .shape )
print ('columns:' , df .columns )
df [['make' , 'width' , 'price' ]].query ('width > 70' )
df [df ['width' ] > 70 ][['make' , 'width' , 'price' ]]
Pandas(5)indexを指定したデータの取得
df [['make' , 'price' ]].loc [10 :15 ]
df [['make' , 'width' ]].groupby (['make' ]).mean ()
Pandas(9)特定文字列のNaNへの置換と型変換
import numpy as np
df ['normalized-losses' ] = df ['normalized-losses' ].replace ('?' , np .nan )
df ['bore' ] = df ['bore' ].replace ('?' , np .nan )
df ['stroke' ] = df ['stroke' ].replace ('?' , np .nan )
df ['horsepower' ] = df ['horsepower' ].replace ('?' , np .nan )
df ['peak-rpm' ] = df ['peak-rpm' ].replace ('?' , np .nan )
df ['price' ] = df ['price' ].replace ('?' , np .nan )
df = df .astype ({'normalized-losses' : 'float64' , 'bore' : 'float64' , 'stroke' : 'float64' , 'horsepower' : 'float64' , 'peak-rpm' : 'float64' , 'price' : 'float64' })
Pandas(10)量的データに絞り込んで基本統計量を見る
df [['width' , 'length' , 'horsepower' , 'price' ]].describe ()
df [['horsepower' , 'price' ]].var ()
Pandas(11)カテゴリデータ(質的データ)の値の種類を見る
print (df ['make' ].unique ())
print (type (df ['make' ]))
print ('df count:' , df .count ())
df_a = df .dropna ()
print ('df_a count:' , df_a .count ())
df_b = df .fillna ({
'normalized-losses' : df ['normalized-losses' ].mean (),
'bore' : df ['bore' ].mean (),
'stroke' : df ['stroke' ].mean (),
'horsepower' : df ['horsepower' ].mean (),
'peak-rpm' : df ['peak-rpm' ].mean (),
'price' : df ['price' ].mean ()
})
% matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns
make_count = df [['make' ]].value_counts ()
make_count
df_b [['horsepower' , 'price' ]].corr ()
plt .figure (figsize = (9 , 9 ))
sns .heatmap (df_b .corr (), annot = True )
df2 = pd .get_dummies (df_b [['make' ]])
df2
df3 = pd .concat ([df_b .drop (['make' ], axis = 1 ), df2 ], axis = 1 )
df3
plt .figure (figsize = (12 , 12 ))
sns .heatmap (df3 .corr (), annot = True )
from sklearn .preprocessing import LabelEncoder
for category in ['make' , 'fuel-type' , 'aspiration' , 'num-of-doors' , 'body-style' , 'drive-wheels' , 'engine-location' , 'engine-type' , 'num-of-cylinders' , 'fuel-system' ]:
le = LabelEncoder ()
le .fit (df_b [category ])
df_b [category ] = le .transform (df_b [category ])
df_b
plt .figure (figsize = (15 , 15 ))
sns .heatmap (df_b .corr (), annot = True )
pd .plotting .scatter_matrix (df_b [['drive-wheels' , 'wheel-base' , 'length' , 'width' , 'curb-weight' , 'engine-size' , 'fuel-system' , 'bore' , 'horsepower' , 'city-mpg' , 'highway-mpg' , 'price' ]], figsize = (15 ,15 ), range_padding = 0.2 )
plt .show ()
df_b .to_csv ('automobile_converted.csv' , index = False )
import pandas as pd
df = pd .read_csv ('automobile_converted.csv' )
df .head ()
# 説明変数
X_var = df .drop ('price' , axis = 1 )
X_array = X_var .values
# 目的変数
y_var = df ['price' ]
y_array = y_var .values
from sklearn .model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split (X_array , y_array , train_size = 0.8 , random_state = 0 )
from sklearn import linear_model
model = linear_model .LinearRegression ()
model .fit (X_train , y_train )
print ('傾き: %s' % model .coef_ )
print ('切片: %s' % model .intercept_ )
print (model .score (X_train , y_train ))
print (model .score (X_test , y_test ))
model .predict ([X_test [0 ]])
from sklearn .tree import DecisionTreeRegressor
# 決定木回帰
dtr = DecisionTreeRegressor (
max_depth = 3
)
dtr .fit (X_train , y_train )
print (dtr .score (X_train , y_train ))
print (dtr .score (X_test , y_test ))
% matplotlib inline
from matplotlib import pyplot as plt
from sklearn .tree import plot_tree
plt .figure (figsize = (20 , 10 ))
plot_tree (
dtr ,
label = 'all' , # all, none
max_depth = 3 ,
filled = True ,
feature_names = df .columns ,
fontsize = 12 ,
)
plt .show ()
from sklearn .ensemble import RandomForestRegressor
# ランダムフォレスト回帰
rfr = RandomForestRegressor (
n_estimators = 50 ,
max_depth = 3
)
rfr .fit (X_train , y_train )
print (rfr .score (X_train , y_train ))
print (rfr .score (X_test , y_test ))
from xgboost import XGBRegressor
# XGBoost回帰
xgb = XGBRegressor (
n_estimators = 50 ,
use_label_encoder = False ,
eval_metric = 'logloss' )
xgb .fit (X_train , y_train )
print (xgb .score (X_train , y_train ))
print (xgb .score (X_test , y_test ))
import pandas as pd
from sklearn .model_selection import train_test_split
df = pd .read_csv ('automobile_converted.csv' )
train = df .sample (frac = 0.8 , random_state = 111 )
test = df .drop (train .index )
train .reset_index (inplace = True , drop = True )
test .reset_index (inplace = True , drop = True )
print ('train: ' + str (train .shape ))
print ('test: ' + str (test .shape ))
from pycaret .regression import *
reg01 = setup (data = train , target = 'price' )
best_model = compare_models ()
model = create_model ('ridge' )
tuned = tune_model (model )
final = finalize_model (tuned )
print (final )
test_predictions = predict_model (final , data = test )
test_predictions .head ()
save_model (final , 'automobile_final' )
from pycaret .regression import *
import pandas as pd
model = load_model ('automobile_final' )
data = [[2.0 , 164.0 , 1.0 , 1.0 , 0.0 , 1.0 , 3.0 , 0.0 , 0.0 , 99.4 , 176.6 , 66.4 , 54.3 , 2824.0 , 3.0 , 1.0 , 136.0 , 5.0 , 3.19 , 3.4 , 8.0 , 115.0 , 5500.0 , 18.0 , 22.0 ]]
columns = ['symboling' , 'normalized-losses' , 'make' , 'fuel-type' , 'aspiration' , 'num-of-doors' , 'body-style' , 'drive-wheels' , 'engine-location' , 'wheel-base' , 'length' , 'width' , 'height' , 'curb-weight' , 'engine-type' , 'num-of-cylinders' , 'engine-size' , 'fuel-system' , 'bore' , 'stroke' , 'compression-ratio' , 'horsepower' , 'peak-rpm' , 'city-mpg' , 'highway-mpg' ]
df = pd .DataFrame (data , columns = columns )
predictions = predict_model (model , df )
print ('predicted price: %i' % predictions [['prediction_label' ]].values [0 ][0 ])
!pip install flask flask - ngrok pyngrok
from pycaret .regression import *
import pandas as pd
from flask import Flask , request , jsonify
from flask_ngrok import run_with_ngrok
app = Flask (__name__ )
run_with_ngrok (app )
@app .route ('/predict' , methods = ['POST' ])
def predict ():
payload = request .json
columns = ['symboling' , 'normalized-losses' , 'make' , 'fuel-type' , 'aspiration' , 'num-of-doors' , 'body-style' , 'drive-wheels' , 'engine-location' , 'wheel-base' , 'length' , 'width' , 'height' , 'curb-weight' , 'engine-type' , 'num-of-cylinders' , 'engine-size' , 'fuel-system' , 'bore' , 'stroke' , 'compression-ratio' , 'horsepower' , 'peak-rpm' , 'city-mpg' , 'highway-mpg' ]
df = pd .DataFrame (payload ['data' ], columns = columns )
model = load_model ('automobile_final' )
predictions = predict_model (model , df )
return jsonify ({
'price' : float (predictions [['prediction_label' ]].values [0 ][0 ])
})
app .run ()
import json
import requests
response = requests .post (
'http://ce59a77d4613.ngrok.io/predict' ,
headers = {'Content-Type' : 'application/json' },
data = json .dumps ({'data' : [[2.0 , 164.0 , 1.0 , 1.0 , 0.0 , 1.0 , 3.0 , 0.0 , 0.0 , 99.4 , 176.6 , 66.4 , 54.3 , 2824.0 , 3.0 , 1.0 , 136.0 , 5.0 , 3.19 , 3.4 , 8.0 , 115.0 , 5500.0 , 18.0 , 22.0 ]]})
)
result = response .json ()
result