딥러닝

[딥러닝] Iris 품종 분류

퓨어맨 2022. 7. 18. 12:00
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

iris = load_iris()
iris.keys()
dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

 

 

X = iris['data']
y = iris['target']

X.shape, y.shape
((150, 4), (150,))

 

 

# 같은 스케일로 오차를 구해주기 위해서 정답데이터를 원핫인코딩 시킨다.
y_one_hot = pd.get_dummies(y)
y_one_hot
	0	1	2
0	1	0	0
1	1	0	0
2	1	0	0
3	1	0	0
4	1	0	0
...	...	...	...
145	0	0	1
146	0	0	1
147	0	0	1
148	0	0	1
149	0	0	1
150 rows × 3 columns

 

 

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y_one_hot, test_size=0.2, random_state=3)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
(120, 4)
(120, 3)
(30, 4)
(30, 3)

 

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# 신경망 구조 설계
model = Sequential()

# 입력층 + 중간층 (X_train의 특성 개수를 입력)
model.add(Dense(100, input_dim=4, activation='relu'))

# 중간층
model.add(Dense(50, activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(10, activation='relu'))

# 출력층 뉴런의 개수는 원핫인코딩 된 컬럼 개수
model.add(Dense(3, activation='softmax'))

model.summary()
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_5 (Dense)             (None, 100)               500       
                                                                 
 dense_6 (Dense)             (None, 50)                5050      
                                                                 
 dense_7 (Dense)             (None, 30)                1530      
                                                                 
 dense_8 (Dense)             (None, 10)                310       
                                                                 
 dense_9 (Dense)             (None, 3)                 33        
                                                                 
=================================================================
Total params: 7,423
Trainable params: 7,423
Non-trainable params: 0
_________________________________________________________________

 

 

# 학습 및 평가방법 설정
# categorical_crossentropy : 다중분류에 사용하는 손실함수
model.compile(loss='categorical_crossentropy',
              optimizer='Adam',   # 최적화함수 : 최근에 가장 많이 사용되는 일반적으로 성능이 좋은 최적화함
              metrics=['acc']    # metrics : 평가방법을 설정(분류문제이기 때문에 정확도를 넣어줌)
              )
              
# 학습
h = model.fit(X_train, y_train, epochs=100)

plt.figure(figsize=(15,5))

plt.plot(h.history['acc'], label='acc')

plt.legend()
plt.show

 

model.evaluate(X_test, y_test)
1/1 [==============================] - 0s 35ms/step - loss: 0.0516 - acc: 0.9667
[0.051631245762109756, 0.9666666388511658]