TensorFlow初步(5)-锐单电子商城

# author zyyFTD

# Github: https://github.com/YuyangZhangFTD/zyy_ML-DL

"""This code is for python3.Neural Network using numpy.For convenience, all matrices and vectors should be np.mat.================================== The whole architecture ===========================================input layer : x1 x2 x3 ... x784 ==> x[784, 1]|| full connection ==> w1[512, 784] b1[512, 1]hidden layer1 : a1 a2 a3 ... a512 ==> a[512, 1]|| full connection ==> w2[256, 512] b2[256, 1]hidden layer2 : b1 b2 b3 ... b256 ==> b[256, 1]|| full connection ==> w3[128, 256] b3[128, 1]hidden layer3 : c1 c2 c3 ... c128 ==> c[128, 1]|| full connection ==> w4[64, 128] b4[64, 1]hidden layer4 : d1 d2 d3 ... d64 ==> d[64, 1]|| softmax ==> w5[64, 10] b5[10, 1]output layer : y1 y2 y3 ... y10 ==> y[10, 1]====================================== One part of the network ======================================output f() weight input bias|y1| |w11 w12 w13| |x1| |b1||y2| = f( |w21 w22 w23| * |x2| |b2| ) f() is activation function|y3| |w31 w32 w33| |x3| |b3|The correction is C. If the loss function is\frac{1}{2}(\hat{y}-y)^2, C = \hat{y}-y.f' is the derivation of f.the gradient:dC/dw = C*f'*xdC/db = C*f'*1dC/dx = C*f'*wThe weight and bias can be updated with gradient of w and b.And the correction of the previous layer is the gradient of x in this layer.========================================== softmax ==================================================output f() weight input bias|y1| |w11 w12 w13| |x1| |b1||y2| = f( |w21 w22 w23| * |x2| |b2| ) f() is activation function|y3| |w31 w32 w33| |x3| |b3||z1| = y1/sum(y) |y1||z2| = y2/sum(y) y=|y2||z3| = y3/sum(y) |y3|"""

import numpy as np

import pandas as pd

# ================================= init parameter ========================================

layer_n = 3

input_size = 2

hidden1_size = 5

hidden2_size = 5

# hidden3_size = 128

hidden4_size = 5

output_size = 4

learning_rate = 0.1

# batch_size = 32 # batch size should be 2^n, for GPU training.

epoch_n = 1001

# =========================================================================================

# ================================= all function ==========================================

def one_hot(para_y, para_output): # label one-hot encoding [2]==>0，0，0，0，0，0，0，0，0，0

tmp = []

for ii in range(len(para_y)):

data = [0] * para_output

data[int(para_y[ii])] = 1

tmp.append(data)

return np.mat(tmp)

def one_encode(para_x): # the input data [1-256]==>[1]

tmp = []

for ii in range(len(para_x)):

tmp.append(list(map(lambda x: 1 if x > 0 else 0, para_x[ii])))

return np.mat(tmp)

def sigmoid(para_x, para_weight, para_bias):

tmp = np.exp(-1*(para_weight * para_x para_bias))

return 1/(1 tmp)

def relu(para_x, para_weight, para_bias):

tmp = para_weight * para_x para_bias

return np.mat(list(map(lambda x: x if x > 0 else 0, [float(x) for x in tmp]))).T

def softmax(para_x, para_weight, para_bias):

tmp = np.exp(para_weight * para_x para_bias)

return tmp/sum(tmp)

def feedforward(para_weight, para_bias, para_value, para_input_vector,

activation_function=sigmoid, output_function=softmax):

# the keys of dict must be ordered

n = len(para_weight.keys()) # get number of layers

for ii in range(n):

# get x, w, b for each layer

if ii == 0:

tmp_x = para_input_vector

else:

tmp_x = para_value[ii-1]

tmp_weight = para_weight[ii]

tmp_bias = para_bias[ii]

# y = f(wx b)

# the output layer is softmax layer

if ii < n-1:

para_value[ii] = activation_function(tmp_x, tmp_weight, tmp_bias)

else:

para_value[ii] = output_function(tmp_x, tmp_weight, tmp_bias)

return para_value[n-1]

def loss_func(para_hat, para_true):

# a row vector is a piece of data

# para_hat : [0.1, 0.2, ..., 0.1]

# para_true : [ 0, 1, ..., 0 ]

# the sum of a row vector is 1.0

# calculate the loss of the whole data, return loss and average loss

return np.sum(np.power((para_hat - para_true), 2) * 0.5)

def backpropagation(para_weight, para_bias, para_value, para_true, para_input_vector,

para_eta=0.01, activation_function=sigmoid, loss_function=loss_func):

n = len(para_weight.keys()) # get number oflayers

# the output layer

# the derivation of loss function

if loss_function != loss_func:

# get the derivation of loss function which you set.

# error =

print('Define your own derivation of loss function')

return None

else:

error = para_value[n-1]-para_true # column vector

para_weight[n-1] -= para_eta * error * para_value[n-2].T

para_bias[n-1] -= para_eta * error

tmp_delta = (para_eta * error.T * para_weight[n-1]).T

# the hidden layer

# the derivation of activation function

if activation_function == sigmoid:

def gradient(para_para_x):

return np.multiply(para_para_x, (1 - para_para_x))

elif activation_function == relu:

gradient = one_encode

else:

# get the derivation of activation function which you set

# gradient =

print('Define your own derivation of activation function')

return None

for ii in range(n-1)[::-1]:

tmp_delta = np.multiply(gradient(para_value[ii]), tmp_delta)

if ii == 0:

# para_weight[ii] -= para_eta * para_input_vector * tmp_delta.T

para_weight[ii] -= para_eta * tmp_delta * para_input_vector.T

else:

# para_weight[ii] -= para_eta * para_value[ii-1] * tmp_delta.T

para_weight[ii] -= para_eta * tmp_delta * para_value[ii-1].T

para_bias[ii] -= para_eta * tmp_delta

tmp_delta = para_eta * (tmp_delta.T * para_weight[ii]).T

return para_weight, para_bias

def is_true(para_hat, para_true):

if np.argmax(para_hat) == np.argmax(para_true):

return 1

else:

return 0

def accuracy(para_hat, para_true):

true_num = 0

tmp = 0

for ii in range(len(para_hat)):

if np.argmax(para_hat[ii]) == np.argmax(para_true[ii]):

true_num += 1

tmp += is_true(para_hat[ii], para_true[ii])

print(tmp)

return true_num / len(para_hat)

# ========================================================================================

file = pd.read_csv('Labeled_Data_4cls_100.csv')

train_y = one_hot(file['label'].values, output_size)

train_x = np.mat(file.drop('label', axis=1))

# size_list = [input_size, hidden1_size, hidden2_size, hidden4_size, output_size]

size_list = [input_size, hidden2_size, hidden4_size, output_size]

# size_list = [input_size, hidden4_size, output_size]

weight = dict()

bias = dict()

tmp_value = dict()

for i in range(layer_n):

weight[i] = np.mat(np.random.rand(size_list[i+1], size_list[i]))

bias[i] = np.mat(np.random.rand(size_list[i+1], 1))

tmp_value[i] = np.mat(np.random.rand(size_list[i+1], 1))

data_n = len(train_y)

for epoch_i in range(epoch_n):

loss = 0

sum_true = 0

for ii in range(data_n):

x_tmp = train_x[ii].T

y_tmp = train_y[ii].T

hat_y = feedforward(weight, bias, tmp_value, x_tmp)

backpropagation(weight, bias, tmp_value, y_tmp, x_tmp, para_eta=learning_rate)

loss += loss_func(hat_y, y_tmp)

sum_true += is_true(hat_y, y_tmp)

if epoch_i % 20 == 0:

print('epoch_i :', epoch_i)

print('loss :', loss)

print('average accuracy :', sum_true/data_n)

print('predict all data: ')

hat_test = np.mat(np.ones([len(train_y), output_size]))

for ii in range(len(train_y)):

hat_test[ii] = feedforward(weight, bias, tmp_value, train_x[ii].T).T

print(accuracy(hat_test, train_y))

资讯详情

TensorFlow初步(5)

详细介绍电流互感器功能区别3CT SR ZCT

TensorFlow初步(5)

详细介绍电流互感器功能区别3CT SR ZCT

最近热搜

历史搜索 清除历史记录

历史搜索清除历史记录