成功安装了gpu版的tensorflow之后,尝试跑两个神经网
第一个:全连接的DNN
关键代码如下:
xs=tf.placeholder(tf.float32,[None,10])
ys=tf.placeholder(tf.float32,[None,7])
'layer1:ful connect'
W_fc1=weight_variable([10,5000],name_data=None)
b_fc1=bias_variable([5000],name_data=None)
h_fc1=tf.nn.relu(tf.matmul(xs,W_fc1)+b_fc1)
'layer2:ful connect'
W_fc2=weight_variable([5000,5000],name_data=None)
b_fc2=bias_variable([5000],name_data=None)
h_fc2=tf.nn.relu(tf.matmul(h_fc1,W_fc2)+b_fc2)
'layer3:ful connect'
W_fc3=weight_variable([5000,5000],name_data=None)
b_fc3=bias_variable([5000],name_data=None)
h_fc3=tf.nn.relu(tf.matmul(h_fc2,W_fc3)+b_fc3)
'output layer::ful connect,maxsoft'
W_fc4=weight_variable([5000,7],name_data=None)
b_fc4=bias_variable([7],name_data=None)
output=tf.nn.sigmoid(tf.matmul(h_fc3,W_fc4)+b_fc4)
能够顺利的利用gpu加速,确实比cpu的计算速度快不少。
然而,在跑cnn的时候(部分代码如下)
'def weights'
def weight_variable(shape,name_data):
initial=tf.truncated_normal(shape,stddev=0.1)
return tf.Variable(initial,dtype=tf.float32,name=name_data)
'def biases'
def bias_variable(shape,name_data):
initial=tf.constant(0.1,shape=shape)
return tf.Variable(initial,dtype=tf.float32,name=name_data)
'def conv2d layer'
def conv2d(x,W):
return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
'def pooling layer as max_pool'
def max_pool_2x2_v(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
'def pooling layer as max_pool'
def max_pool_2x2_s(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,1,1,1],padding='SAME')
#input layer
'placeholder xs & ys'
xs=tf.placeholder(tf.float32,[None,64])
ys=tf.placeholder(tf.float32,[None,1])
'reshape the xs as x_image,which shape is 10*10'
x_image=tf.reshape(xs,[-1,8,8,1])
print('red input::',x_image)
#layer2:conv layer 2 patches
'patch1'
W_conv_r_1_1=weight_variable([3,3,1,20],name_data='W_conv_r_1_1')
b_conv_r_1_1=bias_variable([20],name_data='b_conv_r_1_1')
h_conv_r_1_1=tf.nn.relu6(conv2d(x_image,W_conv_r_1_1)+b_conv_r_1_1)
'patch2'
W_conv_r_1_2=weight_variable([3,3,1,10],name_data='W_conv_r_1_2')
b_conv_r_1_2=bias_variable([10],name_data='b_conv_r_1_2')
h_conv_r_1_2=tf.nn.relu6(conv2d(x_image,W_conv_r_1_2)+b_conv_r_1_2)
'concat to layer2'
h_conv_r_1=tf.concat([h_conv_r_1_1,h_conv_r_1_2],3)
print("red layer2::",h_conv_r_1)
#layer3:conv layer:1 patch add with h_conv_r_1_2
'patch1'
W_conv_r_2_1=weight_variable([5,5,30,30],name_data='W_conv_r_2_1')
b_conv_r_2_1=bias_variable([30],name_data='b_conv_r_2_1')
h_conv_r_2_1=tf.nn.elu(conv2d(h_conv_r_1,W_conv_r_2_1)+b_conv_r_2_1)
'patch for next layer'
W_conv_r_2_2=weight_variable([5,5,30,15],name_data='W_conv_r_2_2')
b_conv_r_2_2=bias_variable([15],name_data='b_conv_r_2_2')
h_conv_r_2_2=tf.nn.elu(conv2d(h_conv_r_1,W_conv_r_2_2)+b_conv_r_2_2)
'concat for layer3'
h_conv_r_2=tf.concat([h_conv_r_2_1,h_conv_r_1_2],3)
print('red layer3;:',h_conv_r_2)
上述代码是一个利用cnn训练黑白棋的程序,可以在CPU环境下顺利的运行,但是在gpu环境下,运行时会报错:Failed to get convolution algorithm (无法获得卷积算法)
完整的报错信息如下:
Traceback (most recent call last):
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1334, in _do_call
return fn(*args)
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1319, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1407, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
[[{{node Conv2D}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Reshape, W_conv_r_1_1/read)]]
[[{{node Sigmoid/_75}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_105_Sigmoid", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\fengg\Desktop\Othello with ResNet 3\Othello with ResNet-large\Othello with ResNet-large\train_ResNet.py", line 326, in <module>
try_point=sess.run(prediction_r, feed_dict={xs:board_try,ys:[[0.0001]]})
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 929, in run
run_metadata_ptr)
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1152, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1328, in _do_run
run_metadata)
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1348, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
[[node Conv2D (defined at C:\Users\fengg\Desktop\Othello with ResNet 3\Othello with ResNet-large\Othello with ResNet-large\train_ResNet.py:31) = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Reshape, W_conv_r_1_1/read)]]
[[{{node Sigmoid/_75}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_105_Sigmoid", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op 'Conv2D', defined at:
File "<string>", line 1, in <module>
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\idlelib\run.py", line 130, in main
ret = method(*args, **kwargs)
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\idlelib\run.py", line 357, in runcode
exec(code, self.locals)
File "C:\Users\fengg\Desktop\Othello with ResNet 3\Othello with ResNet-large\Othello with ResNet-large\train_ResNet.py", line 57, in <module>
h_conv_r_1_1=tf.nn.relu6(conv2d(x_image,W_conv_r_1_1)+b_conv_r_1_1)
File "C:\Users\fengg\Desktop\Othello with ResNet 3\Othello with ResNet-large\Othello with ResNet-large\train_ResNet.py", line 31, in conv2d
return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 1044, in conv2d
data_format=data_format, dilations=dilations, name=name)
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
op_def=op_def)
File "C:\Users\fengg\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
self._traceback = tf_stack.extract_stack()
UnknownError (see above for traceback): Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
[[node Conv2D (defined at C:\Users\fengg\Desktop\Othello with ResNet 3\Othello with ResNet-large\Othello with ResNet-large\train_ResNet.py:31) = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Reshape, W_conv_r_1_1/read)]]
[[{{node Sigmoid/_75}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_105_Sigmoid", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
请问这个问题该如何解决,谢谢了!