Моя реализация функции оценки и отсечения альфа-бета для Connect Four недостаточно умна

Я пытаюсь правильно реализовать игровой ИИ Connect Four, но пока не пользуюсь своими глупыми действиями:

Он не блокирует игру противника, которая может привести к провалу ИИ,

Он не принимает ходов, которые могут привести к победе ИИ.

Мой проект состоит из следующих двух GitHub-репозиториев:

где GameAI содержит:

SortingAlphaBetaPruningGameEngine

package net.coderodde.zerosum.ai.impl; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import net.coderodde.zerosum.ai.EvaluatorFunction; import net.coderodde.zerosum.ai.GameEngine; import net.coderodde.zerosum.ai.State; /** * This class implements the * <a href="#" onclick="location.href='https://en.wikipedia.org/wiki/Minimax'; return false;">Minimax</a> algorithm for * zero-sum two-player games. * * @param <S> the game state type. * @param the player color type. * @author Rodion "rodde" Efremov * @version 1.6 (May 26, 2019) */ public final class SortingAlphaBetaPruningGameEngine <S extends State<S>, P extends Enum> extends GameEngine<S, P> { /** * Stores the terminal node or a node at the depth zero with the best value * so far, which belongs to the maximizing player moves. */ private S bestTerminalMaximizingState; /** * Stores the value of {@code bestTerminalMaximizingState}. */ private double bestTerminalMaximizingStateValue; /** * Stores the terminal node or a node at the depth zero with the best value * so far, which belongs to the minimizing player moves. */ private S bestTerminalMinimizingState; /** * Stores the value of {@code bestTerminalMinimizingState}. */ private double bestTerminalMinimizingStateValue; /** * Indicates whether we are computing a next ply for the minimizing player * or not. If not, we are computing a next ply for the maximizing player. */ private boolean makingPlyForMinimizingPlayer; /** * Maps each visited state to its parent state. */ private final Map<S, S> parents = new HashMap<>(); /** * Constructs this minimax game engine. * @param evaluatorFunction the evaluator function. * @param depth the search depth. */ public SortingAlphaBetaPruningGameEngine( EvaluatorFunction<S> evaluatorFunction, int depth) { super(evaluatorFunction, depth, Integer.MAX_VALUE); } /** * {@inheritDoc } */ @Override public S makePly(S state, P minimizingPlayer, P maximizingPlayer, P initialPlayer) { // Reset the best known values: bestTerminalMaximizingStateValue = Double.NEGATIVE_INFINITY; bestTerminalMinimizingStateValue = Double.POSITIVE_INFINITY; makingPlyForMinimizingPlayer = initialPlayer != minimizingPlayer; // Do the game tree search: makePlyImpl(state, depth, Double.NEGATIVE_INFINITY, // intial alpha Double.POSITIVE_INFINITY, // intial beta minimizingPlayer, maximizingPlayer, initialPlayer); // Find the next game state starting from 'state': S returnState = inferBestState( initialPlayer == minimizingPlayer ? bestTerminalMinimizingState : bestTerminalMaximizingState); // Release the resources: parents.clear(); bestTerminalMaximizingState = null; bestTerminalMinimizingState = null; // We are done with a single move: return returnState; } private S inferBestState(S bestTerminalState) { List<S> statePath = new ArrayList<>(); S state = bestTerminalState; while (state != null) { statePath.add(state); state = parents.get(state); } if (statePath.size() == 1) { // The root node is terminal. Return null: return null; } // Return the second upmost state: Collections.<S>reverse(statePath); return statePath.get(1); } /** * Performs a single step down the game tree branch. * * @param state the starting state. * @param depth the maximum depth of the game tree. * @param minimizingPlayer the minimizing player. * @param maximizingPlayer the maximizing player. * @param currentPlayer the current player. * @return the value of the best ply. */ private double makePlyImpl(S state, int depth, double alpha, double beta, P minimizingPlayer, P maximizingPlayer, P currentPlayer) { if (depth == 0 || state.isTerminal()) { double value = evaluatorFunction.evaluate(state); if (!makingPlyForMinimizingPlayer) { if (bestTerminalMinimizingStateValue > value) { bestTerminalMinimizingStateValue = value; bestTerminalMinimizingState = state; } } else { if (bestTerminalMaximizingStateValue < value) { bestTerminalMaximizingStateValue = value; bestTerminalMaximizingState = state; } } return value; } if (currentPlayer == maximizingPlayer) { double value = Double.NEGATIVE_INFINITY; List<S> children = state.children(); children.sort((S a, S b) -> { double valueA = super.evaluatorFunction.evaluate(a); double valueB = super.evaluatorFunction.evaluate(b); return Double.compare(valueB, valueA); }); for (S child : children) { value = Math.max( value, makePlyImpl(child, depth - 1, alpha, beta, minimizingPlayer, maximizingPlayer, minimizingPlayer)); parents.put(child, state); alpha = Math.max(alpha, value); if (alpha >= beta) { break; } } return value; } else { // Here, 'initialPlayer == minimizingPlayer'. double value = Double.POSITIVE_INFINITY; List<S> children = state.children(); children.sort((S a, S b) -> { double valueA = super.evaluatorFunction.evaluate(a); double valueB = super.evaluatorFunction.evaluate(b); return Double.compare(valueA, valueB); }); for (S child : children) { value = Math.min( value, makePlyImpl(child, depth - 1, alpha, beta, minimizingPlayer, maximizingPlayer, maximizingPlayer)); parents.put(child, state); beta = Math.min(beta, value); if (alpha >= beta) { break; } } return value; } } }

и у меня есть две функции оценки из Интернета/моей головы. Первый (см. Ниже) находит все паттерны длины 2, 3 и 4 и умножает их число появлений на константы, которые будут благоприятствовать более длинным из них. Не похоже на работу. Другой поддерживает матрицу целых чисел; каждое целое число обозначает количество шаблонов, которые могут занимать слот этих целых чисел. Тоже не сработало.

BruteForceConnectFourStateEvaluatorFunction

package net.coderodde.games.connect.four.impl; import net.coderodde.games.connect.four.ConnectFourState; import net.coderodde.games.connect.four.PlayerColor; import net.coderodde.zerosum.ai.EvaluatorFunction; /** * This class implements the default Connect Four state evaluator. The white * player wants to maximize, the red player wants to minimize. * * @author Rodion "rodde" Efremov * @version 1.6 (May 24, 2019) */ public final class BruteForceConnectFourStateEvaluatorFunction implements EvaluatorFunction<ConnectFourState> { private static final double POSITIVE_WIN_VALUE = 1e9; private static final double NEGATIVE_WIN_VALUE = -1e9; private static final double POSITIVE_CLOSE_TO_WIN_VALUE = 1e6; private static final double NEGATIVE_CLOSE_TO_WIN_VALUE = -1e6; private static final double BASE_VALUE = 1e1; /** * The weight matrix. Maps each position to its weight. We need this in * order to */ private final double[][] weightMatrix; /** * The winning length. */ private final int winningLength; /** * Constructs the default heuristic function for Connect Four game states. * * @param width the game board width. * @param height the game board height. * @param maxWeight the maximum weight in the weight matrix. * @param winningPatternLength the winning pattern length. */ public BruteForceConnectFourStateEvaluatorFunction(final int width, final int height, final double maxWeight, final int winningPatternLength) { this.weightMatrix = getWeightMatrix(width, height, maxWeight); this.winningLength = winningPatternLength; } /** * Evaluates the given input {@code state} and returns the estimate. * @param state the state to estimate. * @return the estimate. */ @Override public double evaluate(ConnectFourState state) { PlayerColor winnerPlayerColor = state.checkVictory(); if (winnerPlayerColor == PlayerColor.MAXIMIZING_PLAYER) { return POSITIVE_WIN_VALUE - state.getDepth(); } if (winnerPlayerColor == PlayerColor.MINIMIZING_PLAYER) { return NEGATIVE_WIN_VALUE + state.getDepth(); } // 'minimizingPatternCounts[i]' gives the number of patterns of // length 'i': int[] minimizingPatternCounts = new int[state.getWinningLength() + 1]; int[] maximizingPatternCounts = new int[minimizingPatternCounts.length]; // Do not consider patterns of length one! for (int targetLength = 2; targetLength <= winningLength; targetLength++) { int count = findMinimizingPatternCount(state, targetLength); if (count == 0) { // Once here, it is not possible to find patterns of larger // length than targetLength: break; } minimizingPatternCounts[targetLength] = count; } for (int targetLength = 2; targetLength <= state.getWinningLength(); targetLength++) { int count = findMaximizingPatternCount(state, targetLength); if (count == 0) { // Once here, it is not possible to find patterns of larger // length than targetLength: break; } maximizingPatternCounts[targetLength] = count; } double score = computeBaseScore(minimizingPatternCounts, maximizingPatternCounts); score += computeAlmostFullPatternScores(state, winningLength); return score + getWeights(weightMatrix, state); } private static final double computeAlmostFullPatternScores(ConnectFourState state, int winningLength) { final int targetLength = winningLength - 2; double score = 0.0; for (int y = state.getHeight() - 1; y >= 0; y--) { loop: for (int x = 0; x < state.getWidth() - targetLength; x++) { if (state.readCell(x, y) == null) { // Try to find 'targetLength' marks: PlayerColor targetPlayerColor = state.readCell(x + 1, y); if (targetPlayerColor == null) { continue loop; } int currentLength = 1; for (int xx = x + 1; xx < state.getWidth() - 1; xx++) { if (state.readCell(xx, y) == targetPlayerColor) { currentLength++; if (currentLength == targetLength) { if (state.getPlayerColor() == PlayerColor.MINIMIZING_PLAYER) { score += NEGATIVE_CLOSE_TO_WIN_VALUE; } else { score += POSITIVE_CLOSE_TO_WIN_VALUE; } continue loop; } } } } } return score; } return score; } /** * Finds the number of red patterns of length {@code targetLength}. * @param state the target state. * @param targetLength the length of the pattern to find. * @return the number of red patterns of length {@code targetLength}. */ private static final int findMinimizingPatternCount(ConnectFourState state, int targetLength) { return findPatternCount(state, targetLength, PlayerColor.MINIMIZING_PLAYER); } /** * Finds the number of white patterns of length {@code targetLength}. * @param state the target state. * @param targetLength the length of the pattern to find. * @return the number of white patterns of length {@code targetLength}. */ private static final int findMaximizingPatternCount(ConnectFourState state, int targetLength) { return findPatternCount(state, targetLength, PlayerColor.MAXIMIZING_PLAYER); } /** * Implements the target pattern counting function for both the player * colors. * @param state the state to search. * @param targetLength the length of the patterns to count. * @param playerColor the target player color. * @return the number of patterns of length {@code targetLength} and color * {@code playerColor}. */ private static final int findPatternCount(ConnectFourState state, int targetLength, PlayerColor playerColor) { int count = 0; count += findHorizontalPatternCount(state, targetLength, playerColor); count += findVerticalPatternCount(state, targetLength, playerColor); count += findAscendingDiagonalPatternCount(state, targetLength, playerColor); count += findDescendingDiagonalPatternCount(state, targetLength, playerColor); return count; } /** * Scans the input state for diagonal descending patterns and * returns the number of such patterns. * @param state the target state. * @param patternLength the target pattern length. * @param playerColor the target player color. * @return the number of patterns. */ private static final int findDescendingDiagonalPatternCount(ConnectFourState state, int patternLength, PlayerColor playerColor) { int patternCount = 0; for (int y = 0; y < state.getWinningLength() - 1; y++) { inner: for (int x = 0; x <= state.getWidth() - state.getWinningLength(); x++) { for (int i = 0; i < patternLength; i++) { if (state.readCell(x + i, y + i) != playerColor) { continue inner; } } patternCount++; } } return patternCount; } /** * Scans the input state for diagonal ascending patterns and returns * the number of such patterns. * @param state the target state. * @param patternLength the target pattern length. * @param playerColor the target player color. * @return the number of patterns. */ private static final int findAscendingDiagonalPatternCount(ConnectFourState state, int patternLength, PlayerColor playerColor) { int patternCount = 0; for (int y = state.getHeight() - 1; y > state.getHeight() - state.getWinningLength(); y--) { inner: for (int x = 0; x <= state.getWidth() - state.getWinningLength(); x++) { for (int i = 0; i < patternLength; i++) { if (state.readCell(x + i, y - i) != playerColor) { continue inner; } } patternCount++; } } return patternCount; } /** * Scans the input state for diagonal horizontal patterns and returns * the number of such patterns. * @param state the target state. * @param patternLength the target pattern length. * @param playerColor the target player color. * @return the number of patterns. */ private static final int findHorizontalPatternCount( ConnectFourState state, int patternLength, PlayerColor playerColor) { int patternCount = 0; for (int y = state.getHeight() - 1; y >= 0; y--) { inner: for (int x = 0; x <= state.getWidth() - patternLength; x++) { if (state.readCell(x, y) == null) { continue inner; } for (int i = 0; i < patternLength; i++) { if (state.readCell(x + i, y) != playerColor) { continue inner; } } patternCount++; } } return patternCount; } /** * Scans the input state for diagonal vertical patterns and returns * the number of such patterns. * @param state the target state. * @param patternLength the target pattern length. * @param playerColor the target player color. * @return the number of patterns. */ private static final int findVerticalPatternCount(ConnectFourState state, int patternLength, PlayerColor playerColor) { int patternCount = 0; outer: for (int x = 0; x < state.getWidth(); x++) { inner: for (int y = state.getHeight() - 1; y > state.getHeight() - state.getWinningLength(); y--) { if (state.readCell(x, y) == null) { continue outer; } for (int i = 0; i < patternLength; i++) { if (state.readCell(x, y - i) != playerColor) { continue inner; } } patternCount++; } } return patternCount; } /** * Gets the state weight. We use this in order to discourage the positions * that are close to borders/far away from the center of the game board. * @param weightMatrix the weighting matrix. * @param state the state to weight. * @return the state weight. */ private static final double getWeights(final double[][] weightMatrix, final ConnectFourState state) { double score = 0.0; outer: for (int x = 0; x < state.getWidth(); x++) { for (int y = state.getHeight() - 1; y >= 0; y--) { PlayerColor playerColor = state.readCell(x, y); if (playerColor == null) { continue outer; } if (playerColor == PlayerColor.MINIMIZING_PLAYER) { score -= weightMatrix[y][x]; } else { score += weightMatrix[y][x]; } } } return score; } /** * Computes the base scorer that relies on number of patterns. For example, * {@code redPatternCounts[i]} will denote the number of patterns of length * [@code i}. * @param minimizingPatternCounts the pattern count map for red patterns. * @param maximizingPatternCounts the pattern count map for white patterns. * @return the base estimate. */ private static final double computeBaseScore( int[] minimizingPatternCounts, int[] maximizingPatternCounts) { final int winningLength = minimizingPatternCounts.length - 1; double value = 0.0; if (minimizingPatternCounts[winningLength] != 0) { value = NEGATIVE_WIN_VALUE; } if (maximizingPatternCounts[winningLength] != 0) { value = POSITIVE_WIN_VALUE; } for (int length = 2; length < minimizingPatternCounts.length; length++) { int minimizingCount = minimizingPatternCounts[length]; value -= minimizingCount * Math.pow(BASE_VALUE, length); int maximizingCount = maximizingPatternCounts[length]; value += maximizingCount * Math.pow(BASE_VALUE, length); } return value; } /** * Computes the weight matrix. The closer the entry in the board is to the * center of the board, the closer the weight of that position will be to * {@code maxWeight}. * * @param width the width of the matrix. * @param height the height of the matrix. * @param maxWeight the maximum weight. The minimum weight will be always * 1.0. * @return the weight matrix. */ private static final double[][] getWeightMatrix(final int width, final int height, final double maxWeight) { final double[][] weightMatrix = new double[height][width]; for (int y = 0; y < weightMatrix.length; y++) { for (int x = 0; x < weightMatrix[0].length; x++) { int left = x; int right = weightMatrix[0].length - x - 1; int top = y; int bottom = weightMatrix.length - y - 1; int horizontalDifference = Math.abs(left - right); int verticalDifference = Math.abs(top - bottom); weightMatrix[y][x] = 1.0 + (maxWeight - 1.0) / (horizontalDifference + verticalDifference); } } return weightMatrix; } }

WeightMatrixConnectFourStateEvaluatorFunction

Я совершенно не понимаю, почему обе функции оценщика не обеспечивают интеллектуальных игр. Любой совет?

Ответы

Ответ 2

Я прочитал, и я не могу себе представить, как помочь вам, я признаюсь, что я был потерян, но я нашел вопрос, который может помочь вам в стеке https://codereview.stackexchange.com/questions/150541/connect-four -game-с-минимаксной-д.в.

Ответ 3

Выигрышные и проигрышные ходы в таком случае не являются эвристическими функциями, они являются двоичными ответами да/нет. Вы не должны относиться к ним эвристически для простой игры, такой как connect 4. Вы проверяете каждый ход "победит ли это?" (если так, то сделай это). Если нет, проверяйте каждый ход: "Это мешает другому игроку выиграть на следующем ходу?" (опять же, если так, сделайте это). После этого вы применяете эвристику, чтобы найти лучший ход из доступных.

Я подозреваю, что вы сталкиваетесь с такими проблемами, как "выигрышный ход в углу (значение 3) никогда не превзойдет проигрышный ход в середине (значение 13)".