2017-07-04 07:37

深度神经网络训练过程中，损失函数的值没有变化

`````` # -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import tensorflow as tf
from numpy.random import RandomState

batch_size = 8
w1 = tf.Variable(tf.random_normal([3,300],stddev = 1,seed = 1))
w2 = tf.Variable(tf.random_normal([300,300],stddev = 1,seed = 1))
w3 = tf.Variable(tf.random_normal([300,300],stddev = 1,seed = 1))
w4 = tf.Variable(tf.random_normal([300,300],stddev = 1,seed = 1))
w5 = tf.Variable(tf.random_normal([300,300],stddev = 1,seed = 1))
w6 = tf.Variable(tf.random_normal([300,5],stddev = 1,seed = 1))
basis1 = tf.Variable(tf.zeros([1, 300]) + 0.1)
basis2 = tf.Variable(tf.zeros([1, 300]) + 0.1)
basis3 = tf.Variable(tf.zeros([1, 300]) + 0.1)
basis4 = tf.Variable(tf.zeros([1, 300]) + 0.1)
basis5 = tf.Variable(tf.zeros([1, 300]) + 0.1)
basis6 = tf.Variable(tf.zeros([1, 5]) + 0.1)
x = tf.placeholder(tf.float32,shape=(None,3))
y_= tf.placeholder(tf.float32,shape=(None,5))

a = tf.matmul(x,w1)+basis1
a = tf.nn.relu(a)
b = tf.matmul(a,w2)+basis2
b = tf.nn.relu(b)
c = tf.matmul(b,w3)+basis3
c = tf.nn.relu(c)
d = tf.matmul(c,w4)+basis4
d = tf.nn.relu(d)
e = tf.matmul(d,w5)+basis5
e = tf.nn.relu(e)
y = tf.matmul(e,w6)+basis6
y = tf.nn.softmax(y)

cross_entropy = -tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)))

#rdm = RandomState(1)
#dataset_size = 128
#X = rdm.rand(dataset_size,2)
#Y = [ [int(x1+x2<1)] for (x1,x2) in X]

f2 = open("C:/Users/sony/Desktop/points(7).txt")
X = []
Y = []
rows2 = D.split('\n')
for row2 in range(len(rows2)):
split_row2 = rows2[row2].split()
if split_row2:
temp = []
templabel = []
i_label = int(split_row2[0])
for k in range(5):#每一行数据分拆
if k == i_label:#输入标签数据，这里的是表示one hot数据
templabel.append(1)
else:
templabel.append(0)
Y.append(templabel)
for i in range(3,6):
#if(int(split_row2[i]) - 0 < 1e-3):
#   split_row2[i] = 1
temp.append(float(split_row2[i]))
X.append(temp)

with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
#print(w1)
#print(w2)
dataset_size = len(X)
STEPS = 500000
for i in range(STEPS):
start = (i*batch_size)%dataset_size
end = min(start+batch_size,dataset_size)
sess.run(train_step,feed_dict={x:X[start:end],y_:Y[start:end]})

if i %100 == 0:
total_cross = sess.run(cross_entropy,feed_dict={x:X,y_:Y})
print("After %d training ,loss is %g"%(i,total_cross))
#print(w1)
#print(w2)

``````
1条回答

• Fukui_YB_yb 2017-07-06 12:45
已采纳

问题解决了，是激活函数的问题，relu激活函数的激活率太低，很多神经元死掉了

