Gradient Decent를 이용한 로지스틱 회귀 구현 (2)

Numpy를 이용한 구현

데이터 불러오기

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
data = pd.read_csv('assignment2.csv')
data.head()

Label

bias

experience

salary

0

1

1

0.7

48000

1

0

1

1.9

48000

2

1

1

2.5

60000

3

0

1

4.2

63000

4

0

1

6.0

76000

Train Test 데이터 나누기

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, 1:], data.iloc[:, 0], random_state = 0)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
((150, 3), (50, 3), (150,), (50,))

데이터 스케일링

experience와 salary를 스케일링한다.

X_train.head()

bias

experience

salary

71

1

5.3

48000

124

1

8.1

66000

184

1

3.9

60000

97

1

0.2

45000

149

1

1.1

66000

scaler = StandardScaler()
bias_train = X_train["bias"]
bias_train = bias_train.reset_index()["bias"]
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)
X_train["bias"] = bias_train
X_train.head()

bias

experience

salary

0

1

0.187893

-1.143335

1

1

1.185555

0.043974

2

1

-0.310938

-0.351795

3

1

-1.629277

-1.341220

4

1

-1.308600

0.043974

y_train = y_train.reset_index()["Label"]
y_test = y_test.reset_index()["Label"]
bias_test = X_test["bias"]
bias_test = bias_test.reset_index()["bias"]
X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)
X_test["bias"] = bias_test
X_test.head()

bias

experience

salary

0

1

-1.344231

-0.615642

1

1

0.508570

0.307821

2

1

-0.310938

0.571667

3

1

1.363709

1.956862

4

1

-0.987923

-0.747565

1. sigmoid

import random
X_train = np.array(X_train[["bias", "experience", "salary"]])
beta = np.array([random.random(), random.random(), random.random()]) # 임의의 beta값 생성
beta
array([0.90546443, 0.75530667, 0.65834986])
def sigmoid(x, beta) :
    multiplier = 0
    for i in range(x.size):
        multiplier += x[i]*beta[i]
    p = 1.0/(1.0+np.exp(-multiplier))
    return p
sigmoid(X_train[0], beta)
0.5731382785117154

2. log likelihood

#개별 likelihood, 각각의 x입력값에 대한 p의 값 산정
def lg_likelihood_i(x, y, beta, j) :
    p_hat = 0
    p = sigmoid(x[j], beta)
    p_hat += y[j]*np.log(p) + (1-y[j])*np.log(1-p)
    return p_hat
lg_likelihood_i(X_train, y_train, beta, 0)
-0.5566282676261158
def lg_likelihood(x, y, beta) :
    log_p_hat = 0
    for i in range(y.size) :
        log_p_hat += lg_likelihood_i(x, y, beta, i) # log p 의 추정값에 계속 더해준다.

    return log_p_hat
lg_likelihood(X_train, y_train, beta)
-168.57600337087965

3. gradient Ascent

get_gradients는 cost function(log likelihood)상에서 각각의 beta 계수들로 편미분했을 때, 각각의 기울기를 구하는 함수이다.

# gradients 한 번 구하기
def get_gradients(x, y, beta):
    gradients = []

    for i in range(x[0].size) :
        gradient = 0                                  # 각 계수별 기울기
        for j in range(y.size) :
            p = sigmoid(x[j], beta)
            gradient += (y[j] - p)*x[j][i]            # 개별 데이터 x에 대한 값을 합산

        gradient = gradient/y.size                    # 전체 n 값으로 나누기
        gradients.append(gradient)

    gradients = np.array(gradients)

    return gradients

gradients = np.array(get_gradients(X_train, y_train, beta))
gradients
array([-0.37681215, -0.09655044, -0.3003301 ])

step은 구한 기울기를 바탕으로 다음 학습을 진행할 지점을 지정하는 함수이다.

def step(beta, gradients, stepsize=np.array([0.01,0.01,0.01])) : #stepsize:학습률, 기본값은 0.01
    beta = beta + stepsize*gradients
    return beta
step(beta, gradients)
array([0.90169631, 0.75434116, 0.65534656])
#max_cycle:최대 학습 횟수
#tolerance:이 값보다 step의 변화율이 낮으면 학습을 종료함
#theta_0:학습 이전의 계수
#theta:학습 이후의 계수

def gradientAscent(x, y, beta, max_cycle = 200000, tolerance = 0.000001, stepsize=np.array([0.01,0.01,0.01])) :
    theta_0 = beta
    i = 0
    cost = lg_likelihood(x, y, theta_0)/y.size
    gradients = np.array([])
    while i < max_cycle:
        gradients = get_gradients(x, y, theta_0)
        theta = step(theta_0, gradients, stepsize)
        temp = theta_0 - theta
        theta_0 = theta

        if i % 1000 == 0:
            print(gradients)
            #print(theta_0)
            #print(theta)
            #print(np.abs(temp.sum()))
        if np.abs(temp.sum()) < tolerance :
            print("stop")
            break
        i += 1
    return theta_0
beta.sum()
2.3191209550302005

4. Fitting

cf.) Step Size를 고르는 기법으로는 다음 세가지 방법이 있다.

  • Fixed step size

  • Backtracking line search

  • Exact line search

참고 : https://wikidocs.net/18088

# 학습률 0.01은 진행속도가 느려 학습결과에 큰 차이가 없는 0.1로 설정함.
beta = gradientAscent(X_train, y_train, beta, stepsize=np.array([0.1,0.1,0.1]))
beta
[-0.37681215 -0.09655044 -0.3003301 ]
[-0.0030989   0.0103664  -0.00977676]
[-0.00119877  0.0039189  -0.00364469]
[-0.00056101  0.00182317 -0.00168622]
[-0.00028209  0.0009145  -0.00084362]
[-0.00014655  0.00047454 -0.00043719]
[-7.73735736e-05  2.50391335e-04 -2.30529092e-04]
[-4.11904585e-05  1.33256706e-04 -1.22642622e-04]
[-2.20238194e-05  7.12383899e-05 -6.55517795e-05]
stop





array([-1.86332543,  4.25483493, -4.02439239])
lg_likelihood(X_train, y_train, beta) # 수렴한 우도
-44.73076829152757

5. 예측

X_test = np.array(X_test[["bias", "experience", "salary"]])
Label_predict = []
for i in range(y_test.size) :
    p = sigmoid(X_test[i], beta)  # 학습한 beta 값으로 p를 추정한다.
    if p > 0.5 :
        Label_predict.append(1) # p값이 0.5보다 크면 1로 분류한다.
    else :
        Label_predict.append(0)
Label_predict = np.array(Label_predict)
Label_predict
array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0])

6. confusion_matrix

from sklearn.metrics import *
tn, fp, fn, tp = confusion_matrix(y_test, Label_predict).ravel()
confusion_matrix(y_test, Label_predict)
array([[38,  2],
       [ 1,  9]], dtype=int64)
#Accuracy
Accuracy = (tp+tn)/(tp+fn+fp+tn)
Accuracy
0.94

데이터 출처: Data Science from Scratch: First Principles with Python (2015)

Last updated