#!/usr/local/bin/python
# -- coding: utf-8 --
‘’’3D convolutional neural network trained
to reduce the False Positive Rate for the LUNA datasets.
The LUNA datasets are organized in the CIFAR architecture.
Author: Kong Haiyang
‘’’
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import time
import math
import numpy as np
from six.moves import xrange
import tensorflow as tf
import csv
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer(‘IMAGE_SIZE’, 40, “Size of input Image.”)
tf.app.flags.DEFINE_integer(‘PIXEL_DATA_SIZE’, 4, “Size of Image pixel.”)
tf.app.flags.DEFINE_integer(‘CHANNEL_NUMBER’, 1, “Size of input Image.”)
tf.app.flags.DEFINE_integer(‘LABEL_NUMBER’, 2, “Label number.”)
tf.app.flags.DEFINE_integer(‘BATCH_SIZE’, 128, “Size of a Batch.”)
tf.app.flags.DEFINE_integer(‘NUM_EPOCHS’, 4, “Number of epochs.”)
tf.app.flags.DEFINE_integer(‘EVAL_BATCH_SIZE’, 64, “Size of an Evalution Batch.”)
tf.app.flags.DEFINE_integer(‘SEED’, 66478, “Seed of Shuffle.”)
tf.app.flags.DEFINE_string(‘TOWER_NAME’, ‘JP’, “Name of tower.”)
tf.app.flags.DEFINE_integer(‘NUM_GPU’, 1, “How many GPUs to use.”)
tf.app.flags.DEFINE_integer(‘NUM_PREPROCESS_THREADS’, 12,
“Number of preprocessing threads.”)
tf.app.flags.DEFINE_integer(‘NUM_LABEL’, 1, “How many Label bytes in a unit of Bin file.”)
tf.app.flags.DEFINE_integer(
‘NUM_IMAGE’, 40 ** 3, “How many Image bytes in a unit of Bin file.”)
tf.app.flags.DEFINE_integer(‘PIXEL_LENGTH’, 4, “Length of label or image pixel.”)
tf.app.flags.DEFINE_string(
‘CSV_FILE’, ‘/home/kong/4T/official3D_110W/Shuffle.csv’, “Csv file path and name.”)
tf.app.flags.DEFINE_string(
‘BIN_FILE’, ‘/home/kong/4T/official3D_110W/shuffle3D64.bin’, “Bin file path and name.”)
tf.app.flags.DEFINE_string(‘XAVIER_INIT’,
‘tf.contrib.layers.xavier_initializer(seed=FLAGS.SEED)’,
“Initialize with XAVIER_INIT.”)
tf.app.flags.DEFINE_bool(‘SAVE_MODEL’, True, ‘Save model or not.’)
def readCSV(filename):
‘’’read lines from a csv file.
‘’’
lines = []
with open(filename, “rb”) as f:
csvreader = csv.reader(f)
for line in csvreader:
lines.append(line)
return lines
def get_noaug_first(no, count):
lines = readCSV(FLAGS.CSV_FILE)[1:]
data = np.empty([count, FLAGS.IMAGE_SIZE, FLAGS.IMAGE_SIZE,
FLAGS.IMAGE_SIZE, 1], dtype=np.float32)
i = count_ = 0
length = (FLAGS.NUM_LABEL + FLAGS.NUM_IMAGE) FLAGS.PIXEL_LENGTH
with open(FLAGS.BIN_FILE, ‘rb’) as f:
for line in lines:
if line[1] == str(no) and line[-1] == ‘0’ and line[-2] == ‘1’:
f.seek(i length)
buf = f.read(length)
data[count_, …] = (np.frombuffer(buf[4:], dtype=np.float32)).reshape(
FLAGS.IMAGE_SIZE, FLAGS.IMAGE_SIZE, FLAGS.IMAGE_SIZE, 1)
count_ += 1
i += 1
labels = np.ones(count, dtype=np.int64)
return data, labels
def init_bin_file():
bin_file_name = [FLAGS.BIN_FILE]
for f in bin_file_name:
if not tf.gfile.Exists(f):
raise ValueError(‘Failed to find file: ‘ + f)
fqb = tf.train.string_input_producer(bin_file_name)
record_bytes = (FLAGS.NUM_LABEL + FLAGS.NUM_IMAGE) * FLAGS.PIXEL_LENGTH
rb = tf.FixedLengthRecordReader(record_bytes=record_bytes)
return fqb, rb
def init_csv_file():
csv_file_name = [FLAGS.CSV_FILE]
for f in csv_file_name:
if not tf.gfile.Exists(f):
raise ValueError(‘Failed to find file: ‘ + f)
fqc = tf.train.string_input_producer(csv_file_name)
rc = tf.TextLineReader(skip_header_lines=True)
return fqc, rc
def get_data_without_no(fqb, rb, fqc, rc, val_no, test_no):
def getBIN():
def getID():
key_raw, value = rc.read(fqc)
value_raw = tf.reshape(value, [1])
split_values = tf.string_split(value_raw, delimiter=’,’)
subsetid = tf.string_to_number(split_values.values[1], out_type=tf.int32)
return subsetid
key, value = rb.read(fqb)
record_bytes = tf.decode_raw(value, tf.float32)
label = tf.cast(tf.slice(record_bytes, [0], [FLAGS.NUM_LABEL]), tf.int64)
image = tf.reshape(tf.slice(record_bytes, [FLAGS.NUM_LABEL], [FLAGS.NUM_IMAGE]),
shape=[40, 40, 40, 1])
return getID(), label, image
subsetid, label, image = getBIN()
cond = lambda subsetid, label, image: tf.logical_or(tf.equal(subsetid, tf.constant(
val_no, dtype=tf.int32)), tf.equal(subsetid, tf.constant(test_no, dtype=tf.int32)))
doRead = lambda subsetid, label, image: getBIN()
result = tf.while_loop(cond, doRead, [subsetid, label, image])
return result
def get_data_with_no(fqb, rb, fqc, rc, no):
def getBIN():
def getID():
key_raw, value = rc.read(fqc)
value_raw = tf.reshape(value, [1])
split_values = tf.string_split(value_raw, delimiter=’,’)
subsetid = tf.string_to_number(split_values.values[1], out_type=tf.int32)
return subsetid
key, value = rb.read(fqb)
record_bytes = tf.decode_raw(value, tf.float32)
label = tf.cast(tf.slice(record_bytes, [0], [FLAGS.NUM_LABEL]), tf.int64)
image = tf.reshape(tf.slice(record_bytes, [FLAGS.NUM_LABEL], [FLAGS.NUM_IMAGE]),
shape=[40, 40, 40, 1])
return getID(), label, image
subsetid, label, image = getBIN()
cond = lambda subsetid, label, image: tf.not_equal(
subsetid, tf.constant(no, dtype=tf.int32))
doRead = lambda subsetid, label, image: getBIN()
result = tf.while_loop(cond, doRead, [subsetid, label, image])
return result
def get_train_data(fqb, rb, fqc, rc, val_no, test_no):
subsetid, label, image = get_data_without_no(fqb, rb, fqc, rc, val_no, test_no)
min_queue_examples = FLAGS.BATCH_SIZE 10
sis, labels, images = tf.train.batch(
[subsetid, label, image],
batch_size=FLAGS.BATCH_SIZE,
num_threads=FLAGS.NUM_PREPROCESS_THREADS,
capacity=min_queue_examples + 3 FLAGS.BATCH_SIZE)
labels = tf.reshape(labels, [-1])
return labels, images
def get_test_data(fqb, rb, fqc, rc, no):
subsetid, label, image = get_data_with_no(fqb, rb, fqc, rc, no)
min_queue_examples = FLAGS.BATCH_SIZE 5
sis, labels, images = tf.train.batch(
[subsetid, label, image],
batch_size=FLAGS.BATCH_SIZE,
num_threads=FLAGS.NUM_PREPROCESS_THREADS,
capacity=min_queue_examples + 3 FLAGS.BATCH_SIZE)
labels = tf.reshape(labels, [-1])
return labels, images
def get_size():
ss = [0] 10
noaug = [0] 10
with open(FLAGS.CSV_FILE) as f:
csvreader = csv.reader(f)
for line in csvreader:
if line[0] != ‘candidateID’:
ss[int(line[1])] += 1
if line[-1] == ‘0’ and line[-2] == ‘1’:
noaug[int(line[1])] += 1
return ss, noaug
# Wb = {
# ‘W11’: tf.Variable(tf.truncated_normal([3, 3, 3, FLAGS.CHANNEL_NUMBER, 16], stddev=0.1, seed=FLAGS.SEED)),
# ‘b11’: tf.Variable(tf.zeros([16])),
# ‘W12’: tf.Variable(tf.truncated_normal([3, 3, 3, 16, 24], stddev=0.1, seed=FLAGS.SEED)),
# ‘b12’: tf.Variable(tf.zeros([24])),
# ‘W2’: tf.Variable(tf.truncated_normal([3, 3, 3, 24, 32], stddev=0.1, seed=FLAGS.SEED)),
# ‘b2’: tf.Variable(tf.zeros([32])),
# ‘W3’: tf.Variable(tf.truncated_normal([3, 3, 3, 32, 48], stddev=0.1, seed=FLAGS.SEED)),
# ‘b3’: tf.Variable(tf.zeros([48])),
# ‘W4’: tf.Variable(tf.truncated_normal([3, 3, 3, 48, 64], stddev=0.1, seed=FLAGS.SEED)),
# ‘b4’: tf.Variable(tf.zeros([64])),
# ‘fcw1’: tf.Variable(tf.truncated_normal([2**3 * 64, 32], stddev=0.1, seed=FLAGS.SEED)),
# ‘fcb1’: tf.Variable(tf.zeros([32])),
# ‘fcw2’: tf.Variable(tf.truncated_normal([32, FLAGS.LABEL_NUMBER], stddev=0.1, seed=FLAGS.SEED)),
# ‘fcb2’: tf.Variable(tf.zeros([FLAGS.LABEL_NUMBER]))
# }
Wb = {
‘W11’: tf.get_variable(‘W11’, [3, 3, 3, FLAGS.CHANNEL_NUMBER, 16], tf.float32, tf.contrib.layers.xavier_initializer(seed=FLAGS.SEED)),
‘b11’: tf.Variable(tf.zeros([16])),
‘W12’: tf.get_variable(‘W12’, [3, 3, 3, 16, 24], tf.float32, tf.contrib.layers.xavier_initializer(seed=FLAGS.SEED)),
‘b12’: tf.Variable(tf.zeros([24])),
‘W2’: tf.get_variable(‘W2’, [3, 3, 3, 24, 32], tf.float32, tf.contrib.layers.xavier_initializer(seed=FLAGS.SEED)),
‘b2’: tf.Variable(tf.zeros([32])),
‘W3’: tf.get_variable(‘W3’, [3, 3, 3, 32, 48], tf.float32, tf.contrib.layers.xavier_initializer(seed=FLAGS.SEED)),
‘b3’: tf.Variable(tf.zeros([48])),
‘W4’: tf.get_variable(‘W4’, [3, 3, 3, 48, 64], tf.float32, tf.contrib.layers.xavier_initializer(seed=FLAGS.SEED)),
‘b4’: tf.Variable(tf.zeros([64])),
‘fcw1’: tf.get_variable(‘fcw1’, [2**3 * 64, 32], tf.float32, tf.contrib.layers.xavier_initializer(seed=FLAGS.SEED)),
‘fcb1’: tf.Variable(tf.zeros([32])),
‘fcw2’: tf.get_variable(‘fcw2’, [32, FLAGS.LABEL_NUMBER], tf.float32, tf.contrib.layers.xavier_initializer(seed=FLAGS.SEED)),
‘fcb2’: tf.Variable(tf.zeros([FLAGS.LABEL_NUMBER]))
}
def model(data, keep_prob):
with tf.variable_scope(‘conv1’):
conv = tf.nn.conv3d(data, Wb[‘W11’], strides=[1, 1, 1, 1, 1], padding=’SAME’)
relu = tf.nn.relu(tf.nn.bias_add(conv, Wb[‘b11’]))
conv = tf.nn.conv3d(relu, Wb[‘W12’], strides=[1, 1, 1, 1, 1], padding=’SAME’)
relu = tf.nn.relu(tf.nn.bias_add(conv, Wb[‘b12’]))
pool = tf.nn.max_pool3d(relu, ksize=[1, 2, 2, 2, 1],
strides=[1, 2, 2, 2, 1], padding=’VALID’)
with tf.variable_scope(‘conv2’):
conv = tf.nn.conv3d(pool, Wb[‘W2’], strides=[1, 1, 1, 1, 1], padding=’SAME’)
relu = tf.nn.relu(tf.nn.bias_add(conv, Wb[‘b2’]))
pool = tf.nn.max_pool3d(relu, ksize=[1, 2, 2, 2, 1],
strides=[1, 2, 2, 2, 1], padding=’VALID’)
with tf.variable_scope(‘conv3’):
conv = tf.nn.conv3d(pool, Wb[‘W3’], strides=[1, 1, 1, 1, 1], padding=’SAME’)
relu = tf.nn.relu(tf.nn.bias_add(conv, Wb[‘b3’]))
pool = tf.nn.max_pool3d(relu, ksize=[1, 2, 2, 2, 1],
strides=[1, 2, 2, 2, 1], padding=’VALID’)
with tf.variable_scope(‘conv4’):
conv = tf.nn.conv3d(pool, Wb[‘W4’], strides=[1, 1, 1, 1, 1], padding=’SAME’)
relu = tf.nn.relu(tf.nn.bias_add(conv, Wb[‘b4’]))
pool = tf.nn.max_pool3d(relu, ksize=[1, 2, 2, 2, 1],
strides=[1, 2, 2, 2, 1], padding=’VALID’)
with tf.variable_scope(‘reshape’):
ps = pool.get_shape().as_list()
reshape = tf.reshape(pool, [-1, ps[1] ps[2] ps[3] * ps[4]])
with tf.variable_scope(‘fc1’):
hidden = tf.nn.relu(tf.matmul(reshape, Wb[‘fcw1’]) + Wb[‘fcb1’])
with tf.variable_scope(‘dropout’):
hidden = tf.nn.dropout(hidden, keep_prob, seed=FLAGS.SEED)
with tf.variable_scope(‘fc2’):
out = tf.matmul(hidden, Wb[‘fcw2’]) + Wb[‘fcb2’]
return out
def eval_in_batches(data, sess, eval_prediction, eval_data):
size = data.shape[0]
if size < FLAGS.EVAL_BATCH_SIZE:
raise ValueError(“batch size for evals larger than dataset: %d” % size)
predictions = np.ndarray(shape=(size, FLAGS.LABEL_NUMBER), dtype=np.float32)
for begin in xrange(0, size, FLAGS.EVAL_BATCH_SIZE):
end = begin + FLAGS.EVAL_BATCH_SIZE
if end <= size:
predictions[begin:end, :] = sess.run(eval_prediction, feed_dict={
eval_data: data[begin:end, …]})
else:
batch_predictions = sess.run(eval_prediction, feed_dict={
eval_data: data[-FLAGS.EVAL_BATCH_SIZE:, …]})
predictions[begin:, :] = batch_predictions[begin - size:, :]
return predictions
def error_rate(predictions, labels):
“””Return the error rate based on dense predictions and sparse labels.”””
return 100.0 - (100.0 * np.sum(np.argmax(predictions, 1) == labels) /
predictions.shape[0])
def lunaTrain(VIEW_DIRECTORY, imgName, csvName, ss_list, noaug_list):
for cross in range(10):
sssstttt = time.time()
print(‘Cross {}…’.format(cross))
WORK_DIRECTORY = os.path.join(VIEW_DIRECTORY, ‘Cross{}’.format(cross))
testNo = cross
valNo = (cross + 1) % 10
st = time.time()
train_size = sum(ss_list) - ss_list[testNo] - ss_list[valNo]
val_size = ss_list[valNo]
test_size = ss_list[testNo]
test_no_aug_data, test_no_aug_label = get_noaug_first(testNo, noaug_list[testNo])
val_no_aug_data, val_no_aug_label = get_noaug_first(valNo, noaug_list[valNo])
print(‘Reading no aug data finished in {:.2f} seconds…’.format(time.time() - st))
fqbt, rbt = init\_bin\_file()
fqct, rct = init\_csv\_file()
fqbv, rbv = init\_bin\_file()
fqcv, rcv = init\_csv\_file()
fqbe, rbe = init\_bin\_file()
fqce, rce = init\_csv\_file()
data_node = tf.placeholder(tf.float32, shape=(
None, FLAGS.IMAGE\_SIZE, FLAGS.IMAGE\_SIZE, FLAGS.IMAGE\_SIZE, FLAGS.CHANNEL\_NUMBER))
labels_node = tf.placeholder(tf.int64, shape=(None,))
eval_data = tf.placeholder(tf.float32, shape=(
None, FLAGS.IMAGE\_SIZE, FLAGS.IMAGE\_SIZE, FLAGS.IMAGE\_SIZE, FLAGS.CHANNEL\_NUMBER))
keep_hidden = tf.placeholder(tf.float32)
logits = model(data\_node, keep\_hidden)
loss = tf.reduce\_mean(tf.nn.sparse\_softmax\_cross\_entropy\_with\_logits(
logits, labels_node))
batch = tf.Variable(0, trainable=False)
learning\_rate = tf.train.exponential\_decay(0.01, batch * FLAGS.BATCH_SIZE,
train_size / 5, 0.95, staircase=True)
optimizer = tf.train.MomentumOptimizer(
learning\_rate, 0.9).minimize(loss, global\_step=batch)
eval\_predictions = tf.nn.softmax(model(eval\_data, 1))
train\_label\_node, train\_data\_node = get\_train\_data(
fqbt, rbt, fqct, rct, valNo, testNo)
val\_label\_node, val\_data\_node = get\_test\_data(fqbv, rbv, fqcv, rcv, valNo)
test\_label\_node, test\_data\_node = get\_test\_data(fqbe, rbe, fqce, rce, testNo)
saver = tf.train.Saver(tf.all_variables())
TRAIN\_FREQUENCY = train\_size // FLAGS.BATCH_SIZE // 20
VAL\_FREQUENCY = train\_size // FLAGS.BATCH_SIZE // 20
TEST\_FREQUENCY = train\_size // FLAGS.BATCH_SIZE // 20
with tf.Session() as sess:
sess.run(tf.initialize\_local\_variables())
sess.run(tf.initialize\_all\_variables())
summary\_writer = tf.train.SummaryWriter(WORK\_DIRECTORY, sess.graph)
coord = tf.train.Coordinator()
threads = tf.train.start\_queue\_runners(sess=sess, coord=coord)
try:
while not coord.should_stop():
start_time = time.time()
valPE = test_errorP = 100
for step in xrange(int(FLAGS.NUM\_EPOCHS * train\_size) // FLAGS.BATCH_SIZE):
# st_read = time.time()
train\_data, train\_label = sess.run(\[train\_data\_node, train\_label\_node\])
# print('Read data costs {:.3f} seconds.'.format(time.time() - st_read))
feed\_dict = {data\_node: train_data,
labels\_node: train\_label, keep_hidden: 0.5}
# st_train = time.time()
_, l, lr = sess.run(\[optimizer, loss, learning\_rate\], feed\_dict=feed_dict)
# print('Train data costs {:.3f} seconds.'.format(time.time() - st_train))
if step != 0 and step % TRAIN_FREQUENCY == 0:
et = time.time() - start_time
print('Step %d (epoch %.2f), %.1f ms' %
(step, float(step) * FLAGS.BATCH\_SIZE / train\_size, 1000 * et / TRAIN_FREQUENCY))
print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
start_time = time.time()
if step != 0 and VAL\_FREQUENCY != 0 and step % VAL\_FREQUENCY == 0:
valPE = error\_rate(eval\_in_batches(
val\_no\_aug\_data, sess, eval\_predictions, eval\_data), val\_no\_aug\_label)
print('Validation error of no aug Positive: %.3f%%' % valPE)
if valPE <= 5 and test_errorP <= 5:
st = time.time()
val\_label\_total = \[\]
prediction_total = \[\]
for vi in xrange(val\_size // FLAGS.BATCH\_SIZE):
val\_data, val\_label = sess.run(\[val\_data\_node, val\_label\_node\])
predictions = eval\_in\_batches(
val\_data, sess, eval\_predictions, eval_data)
val\_label\_total.extend(val_label)
prediction_total.extend(predictions)
val\_label\_total = np.array(val\_label\_total)
prediction\_total = np.array(prediction\_total)
valE = error\_rate(prediction\_total, val\_label\_total)
print('Validation error: %.3f%%' % valE)
print('Validation costs {:.2f} seconds.'.format(time.time() - st))
val_data = 0
start_time = time.time()
if step != 0 and TEST\_FREQUENCY != 0 and step % TEST\_FREQUENCY == 0:
test\_errorP = error\_rate(eval\_in\_batches(
test\_no\_aug\_data, sess, eval\_predictions, eval\_data), test\_no\_aug\_label)
print('Test error of no aug Positive: %.3f%%' % test_errorP)
if valPE <= 5 and test_errorP <= 5:
st = time.time()
test\_label\_total = \[\]
prediction_total = \[\]
for ti in xrange(test\_size // FLAGS.BATCH\_SIZE):
test\_data, test\_label = sess.run(\[test\_data\_node, test\_label\_node\])
predictions = eval\_in\_batches(
test\_data, sess, eval\_predictions, eval_data)
test\_label\_total.extend(test_label)
prediction_total.extend(predictions)
test\_label\_total = np.array(test\_label\_total)
prediction\_total = np.array(prediction\_total)
test\_error = error\_rate(prediction\_total, test\_label_total)
print('Test error: %.3f%%' % test_error)
print('Test costs {:.2f} seconds.'.format(time.time() - st))
if FLAGS.SAVE_MODEL:
checkpoint\_path = os.path.join(WORK\_DIRECTORY, 'model.ckpt')
saver.save(sess, checkpoint\_path, global\_step=step)
VAL\_FREQUENCY = train\_size // FLAGS.BATCH_SIZE
TEST\_FREQUENCY = train\_size // FLAGS.BATCH_SIZE * 2
test_data = 0
start_time = time.time()
else:
if FLAGS.SAVE_MODEL:
checkpoint\_path = os.path.join(WORK\_DIRECTORY, 'model.ckpt')
saver.save(sess, checkpoint\_path, global\_step=step)
coord.request_stop()
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
pass
coord.join(threads)
print('All costs {:.2f} seconds...'.format(time.time() - sssstttt))
train\_data = val\_data = test_data = 0
train\_labels = val\_labels = test_labels = 0
def main(_):
viewPath = ‘/home/kong/4T/official3D_110W’
csvName = ‘/home/kong/4T/official3D_110W/Shuffle.csv’
imgName = ‘/home/kong/4T/official3D_110W/shuffle3D.bin’
viewPath = ‘/home/admin6/3d’
csvName = ‘/home/admin6/3d/Shuffle.csv’
imgName = ‘/home/admin6/3d/shuffle3D64.bin’
ss_list, noaug_list = get_size()
lunaTrain(viewPath, imgName, csvName, ss_list, noaug_list)
if __name__ == ‘__main__‘:
tf.app.run()