import tensorflow as tf input = [ [1., 0., 0.], [0., 1., 0.], [0., 0., 1.] ] winning_hands = [ [0., 1., 0.], [0., 0., 1.], [1., 0., 0.] ] x = tf.placeholder("float", [None, 3]) W = tf.Variable(tf.zeros([3, 3])) b = tf.Variable(tf.zeros([3])) y = tf.nn.softmax(tf.matmul(x, W) + b) y_ = tf.placeholder("float", [None, 3]) cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) train_step = tf.train.GradientDe