Pythonによるデータ分析・機械学習プログラミングとAIのローコード開発
for i in range (1 , 101 ):
if i % 3 == 0 and i % 5 == 0 :
print ('Fizz Buzz!' )
elif i % 3 == 0 :
print ('Fizz!' )
elif i % 5 == 0 :
print ('Buzz!' )
else :
print (i )
print ('abc' , 'def' , 'ghi' )
print ('abc' , 'def' , 'ghi' , sep = '::' )
a = (5 + 3 - 1 ) * 9
print (str (a ))
a = (5.0 + 3.0 - 1.0 ) * 9.0 / 10.0 % 6.0
print (str (a ))
print (str (int (a )))
from decimal import Decimal
a = (Decimal (5.0 ) + Decimal (3.0 ) - Decimal (1.0 )) * Decimal (9.0 ) / Decimal (10.0 ) % Decimal (6.0 )
print (str (a ))
str1 = 'Hello'
print (str1 )
str2 = "I'm sure that "
str3 = "you can write python."
str4 = str2 + str3
print (str4 )
print ('str2:%s str3:%s' % (str2 , str3 ))
for i in range (1 , 101 ):
if i % 3 == 0 and i % 5 == 0 :
print ('Fizz Buzz!' )
elif i % 3 == 0 :
print ('Fizz!' )
elif i % 5 == 0 :
print ('Buzz!' )
else :
print (f'{ i } は3または5では割り切れません' )
print (str4 [4 ])
print (str4 [4 :10 ])
print (str4 [4 :])
print (type (str4 ))
if type (str4 ) is str :
print ('str4 is str' )
else :
print ('str4 is not str' )
import datetime
datetime .datetime .now ()
from datetime import datetime as dt
dt .now ()
!pip freeze
!pip freeze > requirements .txt
!pip install - r requirements .txt
import requests
response = requests .get ('https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data' )
with open ('imports-85.data' , mode = 'wb' ) as f :
f .write (response .content )
with open ('imports-85.data' , mode = 'r' ) as f :
data = f .read ()
title = 'symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,curb-weight,engine-type,num-of-cylinders,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price'
data = f'{ title } \n { data } '
with open ('automobile.csv' , mode = 'w' ) as f :
f .write (data )
import pandas as pd
pd .set_option ('display.max_columns' , 30 )
df = pd .read_csv ('automobile.csv' )
df .head ()
print ('shape:' , df .shape )
print ('columns:' , df .columns )
df [['make' , 'width' , 'price' ]].query ('width > 70' )
df [df ['width' ] > 70 ][['make' , 'width' , 'price' ]]
Pandas(5)indexを指定したデータの取得
df [['make' , 'price' ]].loc [10 :15 ]
df [['make' , 'width' ]].groupby (['make' ]).mean ()
Pandas(9)特定⽂字列のNaNへの置換と型変換
import numpy as np
df ['normalized-losses' ] = df ['normalized-losses' ].replace ('?' , np .nan )
df ['bore' ] = df ['bore' ].replace ('?' , np .nan )
df ['stroke' ] = df ['stroke' ].replace ('?' , np .nan )
df ['horsepower' ] = df ['horsepower' ].replace ('?' , np .nan )
df ['peak-rpm' ] = df ['peak-rpm' ].replace ('?' , np .nan )
df ['price' ] = df ['price' ].replace ('?' , np .nan )
df = df .astype ({'normalized-losses' : 'float64' , 'bore' : 'float64' , 'stroke' : 'float64' , 'horsepower' : 'float64' , 'peak-rpm' : 'float64' , 'price' : 'float64' })
Pandas(10)量的データに絞り込んで基本統計量を⾒る
df [['width' , 'length' , 'horsepower' , 'price' ]].describe ()
df [['horsepower' , 'price' ]].var ()
Pandas(11)カテゴリデータ(質的データ)の値の種類を⾒る
print (df ['make' ].unique ())
print (type (df ['make' ]))
print ('df count:' , df .count ())
df_a = df .dropna ()
print ('df_a count:' , df_a .count ())
df_b = df .fillna ({
'normalized-losses' : df ['normalized-losses' ].median (),
'bore' : df ['bore' ].median (),
'stroke' : df ['stroke' ].median (),
'horsepower' : df ['horsepower' ].median (),
'peak-rpm' : df ['peak-rpm' ].median (),
'price' : df ['price' ].median ()
})
!pip install matplotlib seaborn
% matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns
make_count = df [['make' ]].value_counts ()
make_count
df_b [['horsepower' , 'price' ]].corr ()
import seaborn as sns
plt .figure (figsize = (9 , 9 ))
sns .heatmap (df_b .corr (), annot = True )
df2 = pd .get_dummies (df_b [['make' ]])
df2
df3 = pd .concat ([df_b .drop (['make' ], axis = 1 ), df2 ], axis = 1 )
df3
plt .figure (figsize = (12 , 12 ))
sns .heatmap (df3 .corr (), annot = True )
from sklearn .preprocessing import LabelEncoder
for category in ['make' , 'fuel-type' , 'aspiration' , 'num-of-doors' , 'body-style' , 'drive-wheels' , 'engine-location' , 'engine-type' , 'num-of-cylinders' , 'fuel-system' ]:
le = LabelEncoder ()
le .fit (df_b [category ])
df_b [category ] = le .transform (df_b [category ])
df_b `` `
### 相関⾏列の作成(3)
` `` python
plt .figure (figsize = (15 , 15 ))
sns .heatmap (df_b .corr (), annot = True )
pd .plotting .scatter_matrix (df_b [['drive-wheels' , 'wheel-base' , 'length' , 'width' , 'curb-weight' , 'engine-size' , 'fuel-system' , 'bore' , 'horsepower' , 'city-mpg' , 'highway-mpg' , 'price' ]],
figsize = (15 ,15 ), range_padding = 0.2 )
plt .show ()
df_b .to_csv ('automobile_converted.csv' , index = False )
import pandas as pd
df = pd .read_csv ('automobile_converted.csv' )
df .head ()
# 説明変数
X_var = df .drop ('price' , axis = 1 )
X_array = X_var .values
# ⽬的変数
y_var = df ['price' ]
y_array = y_var .values
from sklearn .model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split (X_array , y_array , train_size = 0.8 , random_state = 0 )
from sklearn import linear_model
model = linear_model .LinearRegression ()
model .fit (X_train , y_train )
print ('傾き: %s' % model .coef_ )
print ('切⽚: %s' % model .intercept_ )
print (model .score (X_train , y_train ))
print (model .score (X_test , y_test ))
model .predict ([X_test [0 ]])
from sklearn .tree import DecisionTreeRegressor
# 決定⽊回帰
dtr = DecisionTreeRegressor (
max_depth = 3
)
dtr .fit (X_train , y_train )
print (dtr .score (X_train , y_train ))
print (dtr .score (X_test , y_test ))
% matplotlib inline
from matplotlib import pyplot as plt
from sklearn .tree import plot_tree
plt .figure (figsize = (20 , 10 ))
plot_tree (
dtr ,
label = 'all' , # all, none
max_depth = 3 ,
filled = True ,
feature_names = df .columns ,
fontsize = 12 ,
)
plt .show ()
from sklearn .ensemble import RandomForestRegressor
# ランダムフォレスト回帰
rfr = RandomForestRegressor (
n_estimators = 50 ,
max_depth = 3
)
rfr .fit (X_train , y_train )
print (rfr .score (X_train , y_train ))
print (rfr .score (X_test , y_test ))
from xgboost import XGBRegressor
# XGBoost回帰
xgb = XGBRegressor (
n_estimators = 50 ,
use_label_encoder = False ,
eval_metric = 'logloss'
)
xgb .fit (X_train , y_train )
print (xgb .score (X_train , y_train ))
print (xgb .score (X_test , y_test ))
4.ローコードAIツールPycaretの活⽤と機械学習の実践
import pandas as pd
from sklearn .model_selection import train_test_split
df = pd .read_csv ('automobile_converted.csv' )
train = df .sample (frac = 0.8 , random_state = 111 )
test = df .drop (train .index )
train .reset_index (inplace = True , drop = True )
test .reset_index (inplace = True , drop = True )
print ('train: ' + str (train .shape ))
print ('test: ' + str (test .shape ))
from pycaret .regression import *
reg01 = setup (data = train , target = 'price' )
best_model = compare_models ()
model = create_model ('ridge' )
tuned = tune_model (model )
final = finalize_model (tuned )
print (final )
test_predictions = predict_model (final , data = test )
test_predictions .head ()
save_model (final , 'automobile_final' )
from pycaret .regression import *
import pandas as pd
model = load_model ('automobile_final' )
data = [[2.0 , 164.0 , 1.0 , 1.0 , 0.0 , 1.0 , 3.0 , 0.0 , 0.0 , 99.4 , 176.6 , 66.4 , 54.3 , 2824.0 , 3.0 , 1.0 , 136.0 , 5.0 , 3.19 , 3.4 , 8.0 , 115.0 , 5500.0 , 18.0 , 22.0 ]]
columns = ['symboling' , 'normalized-losses' , 'make' , 'fuel-type' , 'aspiration' , 'num-of-doors' , 'body-style' , 'drive-wheels' , 'engine-location' , 'wheel-base' , 'length' , 'width' , 'height' , 'curb-weight' , 'engine-type' , 'num-of-cylinders' , 'engine-size' , 'fuel-system' , 'bore' , 'stroke' , 'compression-ratio' , 'horsepower' , 'peak-rpm' , 'city-mpg' , 'highway-mpg' ]
df = pd .DataFrame (data , columns = columns )
predictions = predict_model (model , df )
print ('predicted price: %i' % predictions [['prediction_label' ]].values [0 ][0 ])
!pip install flask flask - ngrok pyngrok
from pycaret .regression import *
import pandas as pd
from flask import Flask , request , jsonify
from flask_ngrok import run_with_ngrok
app = Flask (__name__ )
run_with_ngrok (app )
@app .route ('/predict' , methods = ['POST' ])
def predict ():
payload = request .json
columns = ['symboling' , 'normalized-losses' , 'make' , 'fuel-type' , 'aspiration' , 'num-of-doors' , 'body-style' , 'drive-wheels' , 'engine-location' , 'wheel-base' , 'length' , 'width' , 'height' , 'curb-weight' , 'engine-type' , 'num-of-cylinders' , 'engine-size' , 'fuel-system' , 'bore' , 'stroke' , 'compression-ratio' , 'horsepower' , 'peak-rpm' , 'city-mpg' , 'highway-mpg' ]
df = pd .DataFrame (payload ['data' ], columns = columns )
model = load_model ('automobile_final' )
predictions = predict_model (model , df )
return jsonify ({
'price' : float (predictions [['prediction_label' ]].values [0 ][0 ])
})
app .run ()
import json
import requests
response = requests .post (
'http://ce59a77d4613.ngrok.io/predict' ,
headers = {'Content-Type' : 'application/json' },
data = json .dumps ({'data' : [[2.0 , 164.0 , 1.0 , 1.0 , 0.0 , 1.0 , 3.0 , 0.0 , 0.0 , 99.4 , 176.6 , 66.4 , 54.3 , 2824.0 , 3.0 , 1.0 , 136.0 , 5.0 , 3.19 , 3.4 , 8.0 , 115.0 , 5500.0 , 18.0 , 22.0 ]]})
)
result = response .json ()
result