beta = np.array([random.random(), random.random(), random.random()])# 임의의 beta값 생성beta
array([0.90546443, 0.75530667, 0.65834986])
defsigmoid(x,beta) : multiplier =0for i inrange(x.size): multiplier += x[i]*beta[i] p =1.0/(1.0+np.exp(-multiplier))return psigmoid(X_train[0], beta)
0.5731382785117154
2. log likelihood
#개별 likelihood, 각각의 x입력값에 대한 p의 값 산정deflg_likelihood_i(x,y,beta,j) : p_hat =0 p =sigmoid(x[j], beta) p_hat += y[j]*np.log(p)+ (1-y[j])*np.log(1-p)return p_hatlg_likelihood_i(X_train, y_train, beta, 0)
-0.5566282676261158
deflg_likelihood(x,y,beta) : log_p_hat =0for i inrange(y.size): log_p_hat +=lg_likelihood_i(x, y, beta, i)# log p 의 추정값에 계속 더해준다.return log_p_hatlg_likelihood(X_train, y_train, beta)
-168.57600337087965
3. gradient Ascent
get_gradients는 cost function(log likelihood)상에서 각각의 beta 계수들로 편미분했을 때, 각각의 기울기를 구하는 함수이다.
# gradients 한 번 구하기defget_gradients(x,y,beta): gradients = []for i inrange(x[0].size): gradient =0# 각 계수별 기울기for j inrange(y.size): p =sigmoid(x[j], beta) gradient += (y[j]- p)*x[j][i] # 개별 데이터 x에 대한 값을 합산 gradient = gradient/y.size # 전체 n 값으로 나누기 gradients.append(gradient) gradients = np.array(gradients)return gradientsgradients = np.array(get_gradients(X_train, y_train, beta))gradients