Python 3 - 94,3% de précision, 6447 points sur un ensemble de validation de 20% des données
Utilise 3 réseaux de neurones, un régresseur des voisins les plus proches, une forêt aléatoire et un boost de gradient. Ces prédictions sont combinées avec une forêt aléatoire qui a également accès aux données.
import collections
import numpy as np
import numpy.ma as ma
import random
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization, Activation, Conv2D, Flatten
from keras.layers.noise import GaussianDropout
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
import tensorflow
tensorflow.set_random_seed(1)
np.random.seed(1)
random.seed(1)
def load_data():
with open('test_cases.txt', 'r') as f:
for line in f:
yield line.split(',')
def parse_data(rows):
black_pieces = "kqrnbp"
white_pieces = black_pieces.upper()
for i, row in enumerate(rows):
if len(row) >= 2:
board = row[0]
board = np.array([1 if c == 'W' else -1 if c == 'B' else 0 for c in board], dtype=np.float32)
pieces = row[1]
counts = collections.Counter(pieces)
white_counts = np.array([counts[c] for c in white_pieces], dtype=np.float32)
black_counts = np.array([counts[c] for c in black_pieces], dtype=np.float32)
yield (outcome, white_counts, black_counts, board)
else:
if 'White' in row[0]:
outcome = 1
else:
outcome = 0
data = list(parse_data(load_data()))
random.shuffle(data)
data = list(zip(*data))
y = np.array(data[0])
x = list(zip(*data[1:]))
conv_x = []
for white_counts, black_counts, board in x:
board = board.reshape((1, 8, 8))
white_board = board > 0
black_board = board < 0
counts = [white_counts, black_counts]
for i, c in enumerate(counts):
n = c.shape[0]
counts[i] = np.tile(c, 64).reshape(n, 8, 8)
features = np.concatenate([white_board, black_board] + counts, axis=0)
conv_x.append(features)
conv_x = np.array(conv_x)
x = np.array([np.concatenate(xi) for xi in x])
s = x.std(axis=0)
u = x.mean(axis=0)
nz = s != 0
x = x[:,nz]
u = u[nz]
s = s[nz]
x = (x - u) / s
i = 2 * len(y) // 10
x_test, x_train = x[:i], x[i:]
conv_x_test, conv_x_train = conv_x[:i], conv_x[i:]
y_test, y_train = y[:i], y[i:]
model = Sequential()
def conv(n, w=3, shape=None):
if shape is None:
model.add(Conv2D(n, w, padding="same"))
else:
model.add(Conv2D(n, w, padding="same", input_shape=shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
conv(128, shape=conv_x[0].shape)
conv(128)
conv(128)
conv(128)
conv(128)
conv(128)
conv(128)
conv(128)
conv(128)
conv(128)
conv(2, w=1)
model.add(Flatten())
model.add(GaussianDropout(0.5))
model.add(Dense(256))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(GaussianDropout(0.5))
model.add(Dense(1))
model.add(BatchNormalization())
model.add(Activation('sigmoid'))
model.compile(loss='mse', optimizer=Adam())
model5 = model
model = Sequential()
model.add(Dense(50, input_shape=(x.shape[1],)))
model.add(Activation('sigmoid'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='mse', optimizer=Adam())
model0 = model
model = Sequential()
model.add(Dense(1024, input_shape=(x.shape[1],)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(GaussianDropout(0.5))
model.add(Dense(1024))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(GaussianDropout(0.5))
model.add(Dense(1024))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(GaussianDropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='mse', optimizer=Adam())
model4 = model
use_all_data = False
x_valid, y_valid = x_test, y_test
if use_all_data:
x_train, y_train = x_test, y_test = x, y
validation_data=None
else:
validation_data=(x_test, y_test)
def subsample(x, y, p=0.9, keep_rest=False):
m = np.random.binomial(1, p, size=len(y)).astype(np.bool)
r = (x[m,:], y[m])
if not keep_rest:
return r
m = ~m
return r + (x[m,:], y[m])
epochs=100
x0, y0, x_valid, y_valid = subsample(conv_x_train, y_train, keep_rest=True)
model5.fit(x0, y0, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[EarlyStopping(patience=1)])
x0, y0, x_valid, y_valid = subsample(x_train, y_train, keep_rest=True)
model0.fit(x0, y0, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[EarlyStopping(patience=1)])
x0, y0, x_valid, y_valid = subsample(x_train, y_train, keep_rest=True)
model4.fit(x0, y0, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[EarlyStopping(patience=1)])
model1 = RandomForestRegressor(n_estimators=400, n_jobs=-1, verbose=1)
model1.fit(*subsample(x_train, y_train))
model2 = GradientBoostingRegressor(learning_rate=0.2, n_estimators=5000, verbose=1)
model2.fit(*subsample(x_train, y_train))
model3 = KNeighborsRegressor(n_neighbors=2, weights='distance', p=1)
model3.fit(*subsample(x_train, y_train))
models = (model0, model1, model2, model3, model4, model5)
model_names = [
"shallow neural net",
"random forest",
"gradient boosting",
"k-nearest neighbors",
"deep neural net",
"conv-net",
"ensemble"
]
def combine(predictions):
clip = lambda x: np.clip(x, 0, 1)
return clip(np.array([y.flatten() for y in predictions]).T)
def augment(x, conv_x):
p = combine([m.predict(x) for m in models[:-1]] + [models[-1].predict(conv_x)])
return np.concatenate((x, p), axis=1)
model = RandomForestRegressor(n_estimators=200, n_jobs=-1, verbose=1)
model.fit(augment(x_train, conv_x_train), y_train)
def accuracy(prediction):
class_prediction = np.where(prediction > 0.5, 1, 0)
return np.sum(class_prediction == y_test) / len(y_test)
predictions = [m.predict(x_test).flatten() for m in models[:-1]] + [models[-1].predict(conv_x_test).flatten()]+ [model.predict(augment(x_test, conv_x_test))]
for s, p in zip(model_names, predictions):
print(s + " accuracy: ", accuracy(p))
def evaluate(prediction):
return np.sum(1 - (prediction - y_test) ** 2) * (len(y) / len(y_test))
for s, p in zip(model_names, predictions):
print(s + " score: ", evaluate(p))