로지스틱 회귀¶
In [1]:
#데이터 준비
import pandas as pd
fish = pd.read_csv('https://bit.ly/fish_csv')
fish.head()
Out[1]:
Species | Weight | Length | Diagonal | Height | Width | |
---|---|---|---|---|---|---|
0 | Bream | 242.0 | 25.4 | 30.0 | 11.5200 | 4.0200 |
1 | Bream | 290.0 | 26.3 | 31.2 | 12.4800 | 4.3056 |
2 | Bream | 340.0 | 26.5 | 31.1 | 12.3778 | 4.6961 |
3 | Bream | 363.0 | 29.0 | 33.5 | 12.7300 | 4.4555 |
4 | Bream | 430.0 | 29.0 | 34.0 | 12.4440 | 5.1340 |
In [2]:
print(pd.unique(fish['Species']))
['Bream' 'Roach' 'Whitefish' 'Parkki' 'Perch' 'Pike' 'Smelt']
In [3]:
fish_input = fish[['Weight','Length','Diagonal','Height','Height','Width']].to_numpy()
In [4]:
print(fish_input[:5])
[[242. 25.4 30. 11.52 11.52 4.02 ]
[290. 26.3 31.2 12.48 12.48 4.3056]
[340. 26.5 31.1 12.3778 12.3778 4.6961]
[363. 29. 33.5 12.73 12.73 4.4555]
[430. 29. 34. 12.444 12.444 5.134 ]]
In [5]:
fish_target = fish['Species'].to_numpy()
In [6]:
##테스트 훈련셋 나누기#테스트 훈련셋 나누기
from sklearn.model_selection import train_test_split
train_input, test_input, train_target, test_target = train_test_split(fish_input,fish_target,random_state=42)
from sklearn.model_selection import train_test_split
train_input, test_input, train_target, test_target = train_test_split(fish_input,fish_target,random_state=42)
In [7]:
#표준화 전처리
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(train_input)
train_scaled = ss.transform(train_input)
test_scaled = ss.transform(test_input)
In [8]:
#K-최근접 이웃 분류기의 확률 예측
from sklearn.neighbors import KNeighborsClassifier
kn = KNeighborsClassifier(n_neighbors=3)
kn.fit(train_scaled,train_target)
print(kn.score(train_scaled,train_target))
print(kn.score(test_scaled, test_target))
0.8907563025210085
0.85
In [9]:
print(kn.classes_)
['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
In [10]:
print(kn.predict(test_scaled[:5]))
['Perch' 'Smelt' 'Pike' 'Roach' 'Perch']
In [13]:
import numpy as np
proba = kn.predict_proba(test_scaled[:5])
print(np.round(proba,decimals=4))
[[0. 0. 1. 0. 0. 0. 0. ]
[0. 0. 0. 0. 0. 1. 0. ]
[0. 0. 0. 1. 0. 0. 0. ]
[0. 0. 0.3333 0. 0.6667 0. 0. ]
[0. 0. 0.6667 0. 0.3333 0. 0. ]]
In [14]:
distances, indexes = kn.kneighbors(test_scaled[3:4])
print(train_target[indexes])
[['Roach' 'Perch' 'Roach']]
In [15]:
import matplotlib.pyplot as plt
z = np.arange(-5,5,0.1)
phi = 1/(1+np.exp(-z))
plt.plot(z,phi)
plt.xlabel('z')
plt.ylabel('phi')
plt.show()
In [16]:
#불리언 인덱싱
char_arr = np.array(['A','B','C','D','E'])
print(char_arr[[True,False,True,False,False]])
['A' 'C']
In [17]:
bream_smelt_indexes = (train_target == 'Bream') | (train_target == "Smelt")
train_bream_smelt = train_scaled[bream_smelt_indexes]
target_bream_smelt = train_target[bream_smelt_indexes]
In [18]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(train_bream_smelt,target_bream_smelt)
Out[18]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, l1_ratio=None, max_iter=100,
multi_class='auto', n_jobs=None, penalty='l2',
random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
warm_start=False)
In [19]:
print(lr.predict(train_bream_smelt[:5]))
['Bream' 'Smelt' 'Bream' 'Bream' 'Bream']
In [20]:
print(lr.predict_proba(train_bream_smelt[:5]))
[[0.99824878 0.00175122]
[0.02178029 0.97821971]
[0.99630423 0.00369577]
[0.98994018 0.01005982]
[0.99834045 0.00165955]]
In [21]:
print(lr.classes_)
['Bream' 'Smelt']
In [22]:
print(lr.coef_,lr.intercept_)
[[-0.30947441 -0.44377974 -0.51039454 -0.77801689 -0.77801689 -0.56428765]] [-1.9305838]
In [23]:
decisions = lr.decision_function(train_bream_smelt[:5])
print(decisions)
[-6.34568758 3.80472879 -5.59686399 -4.58909483 -6.39954502]
In [24]:
lr = LogisticRegression(C=20, max_iter=1000)
lr.fit(train_scaled,train_target)
print(lr.score(train_scaled, train_target))
print(lr.score(test_scaled,test_target))
0.9327731092436975
0.925
In [25]:
print(lr.predict(test_scaled[:5]))
['Perch' 'Smelt' 'Pike' 'Roach' 'Perch']
In [26]:
proba = lr.predict_proba(test_scaled[:5])
print(np.round(proba, decimals=3))
[[0. 0.01 0.846 0. 0.136 0.006 0.002]
[0. 0.001 0.048 0. 0.007 0.944 0. ]
[0. 0. 0.036 0.924 0.008 0.031 0. ]
[0.005 0.036 0.243 0.004 0.627 0. 0.085]
[0. 0. 0.929 0.004 0.063 0.004 0.001]]
In [27]:
print(lr.classes_)
['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
In [28]:
print(lr.coef_.shape, lr.intercept_.shape)
(7, 6) (7,)
In [30]:
decision = lr.decision_function(test_scaled[:5])
print(np.round(decision,decimals=2))
[[ -8.11 1.02 5.51 -2.09 3.68 0.53 -0.54]
[-13.54 1.7 5.45 -1.27 3.5 8.42 -4.27]
[ -6.82 -6.99 4.09 7.32 2.62 3.92 -4.14]
[ -1.21 0.82 2.74 -1.34 3.68 -6.39 1.69]
[ -8.46 -2.39 6.39 0.82 3.69 0.87 -0.92]]
In [32]:
from scipy.special import softmax
proba = softmax(decision, axis=1)
print(np.round(proba, decimals=3))
[[0. 0.01 0.846 0. 0.136 0.006 0.002]
[0. 0.001 0.048 0. 0.007 0.944 0. ]
[0. 0. 0.036 0.924 0.008 0.031 0. ]
[0.005 0.036 0.243 0.004 0.627 0. 0.085]
[0. 0. 0.929 0.004 0.063 0.004 0.001]]
In [ ]:
출처 : 혼자 공부하는 머신러닝+딥러닝 github/박해선
'ML&DL(수정 중)' 카테고리의 다른 글
트리 알고리즘-1 (0) | 2022.03.22 |
---|---|
확률적 경사 하강법 (0) | 2022.03.22 |
회귀알고리즘과 모델 규제-3 (0) | 2022.03.22 |
회귀알고리즘과 모델 규제-2 (0) | 2022.03.22 |
회귀 알고리즘과 모델 규제-1 (0) | 2022.03.21 |