Thank you for reading my issue.
Now I am making DoReFa Net for cifar image data set.
I would like to implement " .BatchNorm() " in the source code ,but
a error was occured.
The error is this.
Traceback (most recent call last):
File "cifar-dorefa.py", line 197, in
SimpleTrainer(config).train()
File "/home/tomohiro/github/tensorpack/tensorpack/train/trainer.py", line 84, in train
self.main_loop()
File "/home/tomohiro/github/tensorpack/tensorpack/train/base.py", line 108, in main_loop
callbacks.setup_graph(self) # TODO use weakref instead?
File "/home/tomohiro/github/tensorpack/tensorpack/callbacks/base.py", line 52, in setup_graph
self._setup_graph()
File "/home/tomohiro/github/tensorpack/tensorpack/callbacks/group.py", line 126, in _setup_graph
cb.setup_graph(self.trainer)
File "/home/tomohiro/github/tensorpack/tensorpack/callbacks/base.py", line 52, in setup_graph
self._setup_graph()
File "/home/tomohiro/github/tensorpack/tensorpack/callbacks/inference.py", line 88, in _setup_graph
input_names, self.output_tensors)
File "/home/tomohiro/github/tensorpack/tensorpack/train/trainer.py", line 117, in get_predict_func
return self.predictor_factory.get_predictor(input_names, output_names, 0)
File "/home/tomohiro/github/tensorpack/tensorpack/train/trainer.py", line 42, in get_predictor
self._build_predict_tower()
File "/home/tomohiro/github/tensorpack/tensorpack/train/trainer.py", line 55, in _build_predict_tower
self.model, self.towers, prefix=self.PREFIX)
File "/home/tomohiro/github/tensorpack/tensorpack/predict/base.py", line 112, in build_multi_tower_prediction_graph
model._build_graph(input_vars, False)
File "cifar-dorefa.py", line 76, in _build_graph
.BatchNorm('bn2')
File "/home/tomohiro/github/tensorpack/tensorpack/models/init.py", line 53, in f
ret = layer(name, self._t, _args, *_kwargs)
File "/home/tomohiro/github/tensorpack/tensorpack/models/_common.py", line 54, in wrapped_func
outputs = func(_args, *_actual_args)
File "/home/tomohiro/github/tensorpack/tensorpack/models/batch_norm.py", line 70, in BatchNorm
assert not use_local_stat
AssertionError
And my program is this.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# File: cifar-convnet.py
# Author: Yuxin Wu <[email protected]>
import tensorflow as tf
import argparse
import numpy as np
import os
from tensorpack import *
import tensorpack.tfutils.symbolic_functions as symbf
from tensorpack.tfutils.summary import *
from dorefa import get_dorefa
"""
A small convnet model for Cifar10 or Cifar100 dataset.
Cifar10:
90% validation accuracy after 40k step.
91% accuracy after 80k step.
19.3 step/s on Tesla M40
Not a good model for Cifar100, just for demonstration.
"""
BITW = 1
BITA = 2
BITG = 6
BATCH_SIZE = 32
class Model(ModelDesc):
def __init__(self, cifar_classnum):
super(Model, self).__init__()
self.cifar_classnum = cifar_classnum
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 30, 30, 3], 'input'),
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars, is_training):
image, label = input_vars
image = image / 4.0 # just to make range smaller
fw, fa, fg = get_dorefa(BITW, BITA, BITG)
# monkey-patch tf.get_variable to apply fw
old_get_variable = tf.get_variable
def new_get_variable(name, shape=None, **kwargs):
v = old_get_variable(name, shape, **kwargs)
# don't binarize first and last layer
if name != 'W' or 'conv0' in v.op.name or 'fct' in v.op.name:
return v
else:
logger.info("Binarizing weight {}".format(v.op.name))
return fw(v)
tf.get_variable = new_get_variable
def nonlin(x):
if BITA == 32:
return tf.nn.relu(x) # still use relu for 32bit cases
return tf.clip_by_value(x, 0.0, 1.0)
def activate(x):
return fa(nonlin(x))
def cabs(x):
return tf.minimum(1.0, tf.abs(x), name='cabs')
keep_prob = tf.constant(0.5 if is_training else 1.0)
if is_training:
tf.image_summary("train_image", image, 10)
with argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
argscope(FullyConnected, use_bias=False, nl=tf.identity), \
argscope(Conv2D, nl=BNReLU(is_training), use_bias=False, kernel_shape=3):
logits = LinearWrap(image) \
.Conv2D('conv1.1', out_channel=64)\
.Conv2D('conv1.2', out_channel=64) \
.BatchNorm('bn2')\
.apply(fg)\
.apply(activate)\
.MaxPooling('pool1', 3, stride=2, padding='SAME') \
.apply(activate)\
.Conv2D('conv2.1', out_channel=128)\
.apply(activate)\
.Conv2D('conv2.2', out_channel=128)\
.MaxPooling('pool2', 3, stride=2, padding='SAME') \
.apply(activate)\
.Conv2D('conv3.1', out_channel=128, padding='VALID') \
.apply(fg)\
.BatchNorm('bn1')\
.apply(activate)\
.Conv2D('conv3.2', out_channel=128, padding='VALID') \
.apply(activate)\
.FullyConnected('fc0', 1024 + 512,
b_init=tf.constant_initializer(0.1)) \
.tf.nn.dropout(keep_prob) \
.FullyConnected('fc1', 512,
b_init=tf.constant_initializer(0.1)) \
.apply(cabs)\
.FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)()
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
cost = tf.reduce_mean(cost, name='cross_entropy_loss')
tf.get_variable = old_get_variable
prob = tf.nn.softmax(logits, name='output')
# compute the number of failed samples, for ClassificationError to use at test time
wrong = symbf.prediction_incorrect(logits, label)
nr_wrong = tf.reduce_sum(wrong, name='wrong')
# monitor training error
add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
# weight decay on all W of fc layers
wd_cost = tf.mul(0.004,
regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss')
add_moving_summary(cost, wd_cost)
add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test, cifar_classnum):
isTrain = train_or_test == 'train'
if cifar_classnum == 10:
ds = dataset.Cifar10(train_or_test)
else:
ds = dataset.Cifar100(train_or_test)
if isTrain:
augmentors = [
imgaug.RandomCrop((30, 30)),
imgaug.Flip(horiz=True),
imgaug.Brightness(63),
imgaug.Contrast((0.2,1.8)),
imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
(30,30), 0.2, 3),
imgaug.MeanVarianceNormalize(all_channel=True)
]
else:
augmentors = [
imgaug.CenterCrop((30, 30)),
imgaug.MeanVarianceNormalize(all_channel=True)
]
ds = AugmentImageComponent(ds, augmentors)
ds = BatchData(ds, 128, remainder=not isTrain)
if isTrain:
ds = PrefetchData(ds, 3, 2)
return ds
def get_config(cifar_classnum):
logger.auto_set_dir()
# prepare dataset
dataset_train = get_data('train', cifar_classnum)
step_per_epoch = dataset_train.size()
dataset_test = get_data('test', cifar_classnum)
sess_config = get_default_sess_config(0.5)
nr_gpu = get_nr_gpu()
lr = tf.train.exponential_decay(
learning_rate=1e-2,
global_step=get_global_step_var(),
decay_steps=step_per_epoch * (30 if nr_gpu == 1 else 20),
decay_rate=0.5, staircase=True, name='learning_rate')
tf.scalar_summary('learning_rate', lr)
return TrainConfig(
dataset=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=Callbacks([
StatPrinter(),
ModelSaver(),
InferenceRunner(dataset_train, ClassificationError())#dataset_testに書き換える
]),
session_config=sess_config,
model=Model(cifar_classnum),
step_per_epoch=step_per_epoch,
max_epoch=250,
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') # nargs='*' in multi mode
parser.add_argument('--load', help='load model')
parser.add_argument('--classnum', help='10 for cifar10 or 100 for cifar100',
type=int, default=10)
args = parser.parse_args()
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
else:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
with tf.Graph().as_default():
config = get_config(args.classnum)
if args.load:
config.session_init = SaverRestore(args.load)
if args.gpu:
config.nr_tower = len(args.gpu.split(','))
#QueueInpuTrainer(config).train()
SimpleTrainer(config).train()
I changed the with statement as "with argscope(BatchNorm, decay=0.9, epsilon=1e-4, use_local_stat=is_training), ",and the another error was occured.
Traceback (most recent call last):
File "cifar-dorefa.py", line 204, in <module>
QueueInputTrainer(config).train()
File "/home/tomohiro/github/tensorpack/tensorpack/train/trainer.py", line 222, in train
grads = self._single_tower_grad()
File "/home/tomohiro/github/tensorpack/tensorpack/train/trainer.py", line 204, in _single_tower_grad
self.model.build_graph(self.dequed_inputs, True)
File "/home/tomohiro/github/tensorpack/tensorpack/models/model_desc.py", line 60, in build_graph
self._build_graph(model_inputs, is_training)
File "cifar-dorefa.py", line 79, in _build_graph
.Conv2D('conv1.1', out_channel=64)\
File "/home/tomohiro/github/tensorpack/tensorpack/models/__init__.py", line 53, in f
ret = layer(name, self._t, *args, **kwargs)
File "/home/tomohiro/github/tensorpack/tensorpack/models/_common.py", line 54, in wrapped_func
outputs = func(*args, **actual_args)
File "/home/tomohiro/github/tensorpack/tensorpack/models/conv2d.py", line 62, in Conv2D
return nl(tf.nn.bias_add(conv, b) if use_bias else conv, name='output')
File "/home/tomohiro/github/tensorpack/tensorpack/models/nonlin.py", line 74, in BNReLU
x = BatchNorm('bn', x, is_training, **kwargs)
File "/home/tomohiro/github/tensorpack/tensorpack/models/_common.py", line 54, in wrapped_func
outputs = func(*args, **actual_args)
TypeError: BatchNorm() got multiple values for keyword argument 'use_local_stat'
Sorry for very long sentences,but if my code is completed,then I can contribute for yours.