import numpy as np


def sum_squres_error(y, t):
    return 0.5*np.sum((y-t)**2)


t = [0, 0, 1, 0, 0, 0, 0, 0, 0] # 정답 
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.0, 0.1, 0.0, 0.0] #2라고 추정
print(sum_squres_error(np.array(y), np.array(t)))
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0] #틀린추정
print(sum_squres_error(np.array(y), np.array(t)))

0.09250000000000003
0.5975


def cross_entropy_error(y, t):
    delta = 1e-7 # np.log()에 0을 넣으면 -inf가 되어 계산을 할 수 없기 때문
    return -np.sum(t*np.log(y+delta))


t = [0, 0, 1, 0, 0, 0, 0, 0, 0] # 정답 
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.0, 0.1, 0.0, 0.0] #2라고 추정
print(cross_entropy_error(np.array(y), np.array(t)))
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0] #틀린추정
print(cross_entropy_error(np.array(y), np.array(t)))

0.510825457099338
2.302584092994546


np.random.choice(60000, 10) # 0이상 60,000미만의 수 중 무작위로 10개를 뽑음

array([ 2324, 42164, 10912, 13455, 25609, 18090, 25591, 10044, 42262,
       20433])


'''
# Batch 구현
train_size = x_train.shape[0]
batch_size =10
batch_mark = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mark]
t_batch = t_train[batch_mark]
'''

'\n# Batch 구현\ntrain_size = x_train.shape[0]\nbatch_size =10\nbatch_mark = np.random.choice(train_size, batch_size)\nx_batch = x_train[batch_mark]\nt_batch = t_train[batch_mark]\n'


# 교차 엔트로피 구현 one-hot vector일때 
def cross_entropy_error(y, t):
    if y.ndim ==1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.sum(t*np.log(y+1e-7))/batch_size


# 정답레이블이 숫자 레이블로 주어졌을 때
def cross_entropy_error(y, t):
    if y.ndim ==1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t]+1e-7))/batch_size


def numerical_diff(f, x):
    h = 1e-4
    return (f(x+h)-f(x-h))/(2*h)


def func(x):
    return np.sum(x**2) #f(x1,x2) = x1**2 +x2**2


def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x) # x와 같은 shape의 배열 생성
    
    for idx in range(x.size):
        tem_val = x[idx]
        
        #f(x+h)계산
        x[idx] = tem_val + h
        fxh1 = f(x)
        
        #f(x-h)계산
        x[idx] = tem_val - h
        fxh2 = f(x)
        
        grad[idx] = (fxh1-fxh2)/(2*h)
        x[idx] = tem_val # 값 복원
        
    return grad


print(numerical_gradient(func, np.array([3.0, 4.0])))
print(numerical_gradient(func, np.array([0.0, 2.0])))
print(numerical_gradient(func, np.array([3.0, 0.0])))

[6. 8.]
[0. 4.]
[6. 0.]


def gradient_descent(f, init_x, lr =0.1, step_num=100):
    x = init_x
    
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr*grad
    return x


init_x = np.array([-3.0, 4.0])
gradient_descent(func, init_x=init_x)

array([-6.11110793e-10,  8.14814391e-10])


def softmax(a):
    C = np.max(a)
    exp_a = np.exp(a-C) #오버플로 대책
    sum_exp_a= np.sum(exp_a)
    y = exp_a/sum_exp_a
    return y


class SimpleNet:
    def __init__(self):
        self.W = np.random.randn(2,3) #정규분포로 초기화
        
    def predict(self, x):
        return np.dot(x, self.W)
    
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        
        return loss


net = SimpleNet()
print(net.W) #가중치 매개변수

x = np.array([0.6, 0.9])
p = net.predict(x)
print('prediction:',p)

print('최댓값 인덱스: ',np.argmax(p)) # 최댓값의 인덱스

t = np.array([0, 0 ,1])
net.loss(x, t)

[[0.048327   0.59915597 0.11820487]
 [0.33692047 1.05549058 1.86691939]]
prediction: [0.33222462 1.30943511 1.75115038]
최댓값 인덱스:  2

0.6338780242242011


class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        #가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    
    def loss(self, x, t):
        # t: 정답 레이블
        y = self.predict(self, x)
        
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(self, x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y==t)/float(x.shape[0])
        return accuracy
    
    def numrical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grad = {}
        grad['W1'] = numrical_gradient(loss_W, self.params['W1'])
        grad['b1'] = numrical_gradient(loss_W, self.params['b1'])
        grad['W2'] = numrical_gradient(loss_W, self.params['W2'])
        grad['b2'] = numrical_gradient(loss_W, self.params['b2'])
        
        return grads

티스토리

[Deep Learning from Scratch] chapter 4.Training Neural Network

[Deep Learning from Scratch] chapter 4.Training Neural Network

데이터 주도 학습¶

훈련 데이터와 시험 데이터¶

손실함수¶

오차제곱합(Sum of squares for error, SSE)¶

Cross-Entropy Error(CEE)¶

Mini-batch¶

(배치용) 교차 엔트로피 구현¶

수치미분( Numeric differenciation)¶

기울기¶

Gradient descent¶

학습 알고리즘¶

Simpel Net¶

Two layer Net¶