Tensorflow Deep MNIST: Resource exhausted: OOM when allocating tensor with shape[10000,32,28,28]
    
        <ul class="article_tags clearfix csdn-tracking-statistics tracking-click" data-mod="popu_377">
            <li class="tit">标签:</li>
- 
     
 Deep Learning
 
 /
- 
     
 tensorflow
 
 /
- 
     
 oom
 
 /
- 
今天测试卷积神经网络报了如题所示的错误,我跑的代码如下。 - 
        
 from
 
 tensorflow.examples.tutorials.mnist
 
 import
 
 input_data
- 
        mnist = input_data.read_data_sets(
 
 ’MNIST_data’
 
 , one_hot=
 
 True
 
 )
- 
        
 import
 
 tensorflow as tf
- sess = tf.InteractiveSession()
- 
        x = tf.placeholder(tf.float32, shape=[
 
 None
 
 ,
 
 784
 
 ])
- 
        y_ = tf.placeholder(tf.float32, shape=[
 
 None
 
 ,
 
 10
 
 ])
- 
        W = tf.Variable(tf.zeros([
 
 784
 
 ,
 
 10
 
 ]))
- 
        b = tf.Variable(tf.zeros([
 
 10
 
 ]))
- y = tf.nn.softmax(tf.matmul(x,W) + b)
- 
        
 def
 
 weight_variable(shape):
- 
        initial = tf.truncated_normal(shape, stddev=
 
 0.1
 
 )
- 
        
 return
 
 tf.Variable(initial)
- 
        
 def
 
 bias_variable(shape):
- 
        initial = tf.constant(
 
 0.1
 
 , shape=shape)
- 
        
 return
 
 tf.Variable(initial)
- 
        
 def
 
 conv2d(x, W):
- 
        
 return
 
 tf.nn.conv2d(x, W, strides=[
 
 1
 
 ,
 
 1
 
 ,
 
 1
 
 ,
 
 1
 
 ], padding=
 
 ‘SAME’
 
 )
- 
        
 def
 
 max_pool_2x2(x):
- 
        
 return
 
 tf.nn.max_pool(x, ksize=[
 
 1
 
 ,
 
 2
 
 ,
 
 2
 
 ,
 
 1
 
 ],
- 
        strides=[
 
 1
 
 ,
 
 2
 
 ,
 
 2
 
 ,
 
 1
 
 ], padding=
 
 ‘SAME’
 
 )
- 
        W_conv1 = weight_variable([
 
 5
 
 ,
 
 5
 
 ,
 
 1
 
 ,
 
 32
 
 ])
- 
        b_conv1 = bias_variable([
 
 32
 
 ])
- 
        x_image = tf.reshape(x, [-
 
 1
 
 ,
 
 28
 
 ,
 
 28
 
 ,
 
 1
 
 ])
- h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
- h_pool1 = max_pool_2x2(h_conv1)
- 
        W_conv2 = weight_variable([
 
 5
 
 ,
 
 5
 
 ,
 
 32
 
 ,
 
 64
 
 ])
- 
        b_conv2 = bias_variable([
 
 64
 
 ])
- h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
- h_pool2 = max_pool_2x2(h_conv2)
- 
        W_fc1 = weight_variable([
 
 7
 
 *
 
 7
 
 *
 
 64
 
 ,
 
 1024
 
 ])
- 
        b_fc1 = bias_variable([
 
 1024
 
 ])
- 
        h_pool2_flat = tf.reshape(h_pool2, [-
 
 1
 
 ,
 
 7
 
 *
 
 7
 
 *
 
 64
 
 ])
- h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
- keep_prob = tf.placeholder(tf.float32)
- h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
- 
        W_fc2 = weight_variable([
 
 1024
 
 ,
 
 10
 
 ])
- 
        b_fc2 = bias_variable([
 
 10
 
 ])
- y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
- 
        cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[
 
 1
 
 ]))
- 
        train_step = tf.train.AdamOptimizer(
 
 1e
 
 –
 
 4
 
 ).minimize(cross_entropy)
- 
        correct_prediction = tf.equal(tf.argmax(y_conv,
 
 1
 
 ), tf.argmax(y_,
 
 1
 
 ))
- accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
- init = tf.initialize_all_variables()
- config = tf.ConfigProto()
- 
        config.gpu_options.allocator_type =
 
 ’BFC’
 
- with tf.Session(config = config) as s:
- sess.run(init)
- 
        
 for
 
 i
 
 in
 
 range(
 
 20000
 
 ):
- 
        batch = mnist.train.next_batch(
 
 50
 
 )
- 
        
 if
 
 i%
 
 100
 
 ==
 
 0
 
 :
- train_accuracy = accuracy.eval(feed_dict={
- 
        x:batch[
 
 0
 
 ], y_: batch[
 
 1
 
 ], keep_prob:
 
 1.0
 
 })
- 
        
 print
 
 (
 
 “step %d, training accuracy %g”
 
 %(i, train_accuracy))
- 
        train_step.run(feed_dict={x: batch[
 
 0
 
 ], y_: batch[
 
 1
 
 ], keep_prob:
 
 0.5
 
 })
- 
        
 print
 
 (
 
 “test accuracy %g”
 
 %accuracy.eval(feed_dict={
- 
        x: mnist.test.images, y_: mnist.test.labels, keep_prob:
 
 1.0
 
 }))
 from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data', one_hot=True) import tensorflow as tf sess = tf.InteractiveSession() x = tf.placeholder(tf.float32, shape=[None, 784]) y_ = tf.placeholder(tf.float32, shape=[None, 10]) W = tf.Variable(tf.zeros([784,10])) b = tf.Variable(tf.zeros([10])) y = tf.nn.softmax(tf.matmul(x,W) + b) def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') W_conv1 = weight_variable([5, 5, 1, 32]) b_conv1 = bias_variable([32]) x_image = tf.reshape(x, [-1,28,28,1]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) W_fc1 = weight_variable([7 * 7 * 64, 1024]) b_fc1 = bias_variable([1024]) h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) keep_prob = tf.placeholder(tf.float32) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) W_fc2 = weight_variable([1024, 10]) b_fc2 = bias_variable([10]) y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init = tf.initialize_all_variables() config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' with tf.Session(config = config) as s: sess.run(init) for i in range(20000): batch = mnist.train.next_batch(50) if i%100 == 0: train_accuracy = accuracy.eval(feed_dict={ x:batch[0], y_: batch[1], keep_prob: 1.0}) print("step %d, training accuracy %g"%(i, train_accuracy)) train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) print("test accuracy %g"%accuracy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))然后错误大致如下 - 
        W tensorflow/core/common_runtime/bfc_allocator.cc:
 
 270
 
 ] **********************************************************xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
- 
        W tensorflow/core/common_runtime/bfc_allocator.cc:
 
 271
 
 ] Ran out of memory trying to allocate
 
 957.03MiB
 
 . See logs
 
 for
 
 memory state.
- 
        W tensorflow/core/framework/op_kernel.cc:
 
 968
 
 ] Resource exhausted: OOM when allocating tensor with shape[
 
 10000
 
 ,
 
 32
 
 ,
 
 28
 
 ,
 
 28
 
 ]
- Traceback (most recent call last):
- 
        File
 
 ”trainer_deepMnist.py”
 
 , line
 
 109
 
 ,
 
 in
 
 <module>
- 
        x: mnist.test.images, y_: mnist.test.labels, keep_prob:
 
 1.0
 
 }))
- 
        File
 
 ”/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py”
 
 , line
 
 559
 
 ,
 
 in
 
 eval
- 
        
 return
 
 _eval_using_default_session(
 
 self
 
 , feed_dict,
 
 self
 
 .graph, session)
- 
        File
 
 ”/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py”
 
 , line
 
 3648
 
 ,
 
 in
 
 _eval_using_default_session
- 
        
 return
 
 session.run(tensors, feed_dict)
- 
        File
 
 ”/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py”
 
 , line
 
 710
 
 ,
 
 in
 
 run
- run_metadata_ptr)
- 
        File
 
 ”/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py”
 
 , line
 
 908
 
 ,
 
 in
 
 _run
- feed_dict_string, options, run_metadata)
- 
        File
 
 ”/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py”
 
 , line
 
 958
 
 ,
 
 in
 
 _do_run
- target_list, options, run_metadata)
- 
        File
 
 ”/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py”
 
 , line
 
 978
 
 ,
 
 in
 
 _do_call
- 
        
 raise
 
 type(e)(node_def, op, message)
- 
        tensorflow.python.framework.errors.ResourceExhaustedError: OOM when allocating tensor with shape[
 
 10000
 
 ,
 
 32
 
 ,
 
 28
 
 ,
 
 28
 
 ]
- 
        [[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format=
 
 ”NHWC”
 
 , padding=
 
 “SAME”
 
 , strides=[
 
 1
 
 ,
 
 1
 
 ,
 
 1
 
 ,
 
 1
 
 ], use_cudnn_on_gpu=true, _device=
 
 “/job:localhost/replica:0/task:0/gpu:0”
 
 ](Reshape, Variable_2/read)]]
- 
        Caused by op u
 
 ’Conv2D’
 
 , defined at:
- 
        File
 
 ”trainer_deepMnist.py”
 
 , line
 
 61
 
 ,
 
 in
 
 <module>
- h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
- 
        File
 
 ”trainer_deepMnist.py”
 
 , line
 
 46
 
 ,
 
 in
 
 conv2d
- 
        
 return
 
 tf.nn.conv2d(x, W, strides=[
 
 1
 
 ,
 
 1
 
 ,
 
 1
 
 ,
 
 1
 
 ], padding=
 
 ‘SAME’
 
 )
- 
        File
 
 ”/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_nn_ops.py”
 
 , line
 
 394
 
 ,
 
 in
 
 conv2d
- data_format=data_format, name=name)
- 
        File
 
 ”/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py”
 
 , line
 
 703
 
 ,
 
 in
 
 apply_op
- op_def=op_def)
- 
        File
 
 ”/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py”
 
 , line
 
 2320
 
 ,
 
 in
 
 create_op
- 
        original_op=
 
 self
 
 ._default_original_op, op_def=op_def)
- 
        File
 
 ”/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py”
 
 , line
 
 1239
 
 ,
 
 in
 
 __init__
- 
        
 self
 
 ._traceback = _extract_stack()
 W tensorflow/core/common_runtime/bfc_allocator.cc:270] **********************************************************xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx W tensorflow/core/common_runtime/bfc_allocator.cc:271] Ran out of memory trying to allocate 957.03MiB. See logs for memory state. W tensorflow/core/framework/op_kernel.cc:968] Resource exhausted: OOM when allocating tensor with shape[10000,32,28,28] Traceback (most recent call last): File "trainer_deepMnist.py", line 109, in <module> x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 559, in eval return _eval_using_default_session(self, feed_dict, self.graph, session) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3648, in _eval_using_default_session return session.run(tensors, feed_dict) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 710, in run run_metadata_ptr) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 908, in _run feed_dict_string, options, run_metadata) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 958, in _do_run target_list, options, run_metadata) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 978, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors.ResourceExhaustedError: OOM when allocating tensor with shape[10000,32,28,28] [[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](Reshape, Variable_2/read)]] Caused by op u'Conv2D', defined at: File "trainer_deepMnist.py", line 61, in <module> h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) File "trainer_deepMnist.py", line 46, in conv2d return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 394, in conv2d data_format=data_format, name=name) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op op_def=op_def) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2320, in create_op original_op=self._default_original_op, op_def=op_def) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1239, in __init__ self._traceback = _extract_stack()原因是GPU OOM,没法分配那么多显存来搞定accuracy evaluation,因此需要改成批处理。 TensorFlow给出的原因解释: Here is how I solved this problem: the error means that the GPU runs out of memory during accuracy evaluation. Hence it needs a smaller sized dataset, which can be achieved by using data in batches. So, instead of running the code on the whole test dataset it needs to be run in batches. 解决方案:把最后那句print换成我这的三行,分批print,就没问题了。 - 
        
 for
 
 i
 
 in
 
 xrange(
 
 10
 
 ):
- 
        testSet = mnist.test.next_batch(
 
 50
 
 )
- 
        
 print
 
 (
 
 “test accuracy %g”
 
 %accuracy.eval(feed_dict={ x: testSet[
 
 0
 
 ], y_: testSet[
 
 1
 
 ], keep_prob:
 
 1.0
 
 }))
 for i in xrange(10): 若报错 可将xrange改为range testSet = mnist.test.next_batch(50) print("test accuracy %g"%accuracy.eval(feed_dict={ x: testSet[0], y_: testSet[1], keep_prob: 1.0}))tensorflow搬运工: https://stackoverflow.com/questions/39076388/tensorflow-deep-mnist-resource-exhausted-oom-when-alloc 
 </div> </div> <script> $(".MathJax").remove(); </script> </article>
- 
        
 
