返回tictactoe的minimax算法中的bestMove

Return bestMove in minimax algorithm for tictactoe

本文关键字：bestMove 算法 minimax tictactoe 返回更新时间：2023-10-16

我曾尝试为Russel Norvig的《人工智能》一书中给出的井字游戏编写极小极大算法。除了将bestMove返回给用户之外，它什么都有。我正在努力返回最佳移动，但无法决定何时选择最佳移动。救命，有人吗？

moveT MiniMax(stateT state)
{
moveT bestMove;
max_move(state,bestMove);
return bestMove;
}
int max_move(stateT state,int & bestMove)
{
int v = -10000;
if(GameIsOver(state))
{
return EvaluateStaticPosition(state);
}
vector<moveT> moveList;
GenerateMoveList(state, moveList);
int nMoves = moveList.size();
for(int i = 0 ; i < nMoves ; i++)
{
moveT move = moveList[i];
MakeMove(state, move);
int curValue = min_move(state,bestMove);
if(curValue > v)
{
v = curValue;
bestMove = move;
}
RetractMove(state, move);
}
return v;
}
int min_move(stateT state, int &bestMove)
{
int v = 10000;
if(GameIsOver(state))
{
return EvaluateStaticPosition(state);
}
vector<moveT> moveList;
GenerateMoveList(state, moveList);
int nMoves = moveList.size();
for(int i = 0 ; i < nMoves; i++)
{
moveT move = moveList[i];
MakeMove(state, move);
int curValue = max_move(state,depth+1,bestMove);
if(curValue < v)
{
curValue = v;
}
RetractMove(state, move);
}
return v;
}

第页。S.：还有其他用于查找最小最大值的伪代码。然而，他们只关注井字游戏，我正试图将其扩展到其他游戏。谢谢

更新：整个代码可以在这里找到：http://ideone.com/XPswCl

在最简单的minimax版本中，第一个玩家希望最大化他的分数，第二个玩家希望最小化第一个玩家的分数。由于第一和第二玩家都只关心第一玩家的分数，EvaluateStaticPosition应该返回一个值，指示棋盘状态对第一玩家有多好。轮到谁是无关紧要的。

int EvaluateStaticPosition(stateT state)
{
if(CheckForWin(state, FIRST_PLAYER))
{
return WINNING_POSITION;
} 
if(CheckForWin(state, Opponent(FIRST_PLAYER)))
{
return LOSING_POSITION;
} 
return NEUTRAL_POSITION;
}

现在，当你想要最适合第一个玩家的移动时，请调用MaxMove。当你想要对第二个玩家最好的移动时，调用MinMove。

moveT MiniMax(stateT state)
{
moveT bestMove;
int i = 0;
if (state.whoseTurn == FIRST_PLAYER){
i = MaxMove(state, bestMove);
}
else{
i = MinMove(state,bestMove);
}
cout<<"i is "<<i<<endl;
return bestMove;
}

最后，MinMove和MaxMove内部存在一些问题。当您在任意一个中分配curRating时，不应将bestMove作为第二个参数传递给MaxMove或MinMove。然后，它会将对手的最佳移动放入bestMove，这是没有意义的。相反，声明一个opponentsBestMove对象并将其作为第二个参数传递。(你实际上不会使用这个对象，甚至不会在之后查看它的值，但这没关系)。有了这个更改，就永远不会在MinMove中为bestMove分配任何内容，所以应该在if(curRating < v)块中这样做。

int MaxMove(stateT state, moveT &bestMove)
{
if(GameIsOver(state))
{
return EvaluateStaticPosition(state);
}
vector<moveT> moveList;
GenerateMoveList(state, moveList);
int nMoves = moveList.size();
int v = -1000;
for(int i = 0 ;i<nMoves; i++)
{
moveT move = moveList[i];
MakeMove(state, move);
moveT opponentsBestMove;
int curRating = MinMove(state, opponentsBestMove);
if (curRating > v)
{
v = curRating;
bestMove = move;
}
RetractMove(state, move);
}
return v;
}
int MinMove(stateT state,  moveT &bestMove)
{
if(GameIsOver(state))
{
return EvaluateStaticPosition(state);
}
vector<moveT>moveList;
GenerateMoveList(state, moveList);
int nMoves = moveList.size();
int v = 1000;
for(int i = 0 ; i<nMoves; i++)
{
moveT move = moveList[i];
MakeMove(state , move);
moveT opponentsBestMove;
int curRating = MaxMove(state,opponentsBestMove);
if(curRating < v)
{
v = curRating;
bestMove = move;
}
RetractMove(state, move);
}
return v;
}

在这一点上，你应该有一个无与伦比的人工智能！

The final position looks like this:
O | O | X
---+---+---
X | X | O
---+---+---
O | X | X
Cat's game.

另一种方法利用了井字游戏是零和游戏的事实。换句话说，在游戏结束时，玩家的分数之和将等于零。对于双人游戏，这意味着一名玩家的分数将始终为另一名玩家分数的负数。这对我们来说很方便，因为最小化其他玩家的分数就等于最大化自己的分数。因此，我们可以让两名球员都尝试最大化自己的得分，而不是让一名球员最大化他的得分，一名球员最小化另一名球员的得分。

将EvaluateStaticPosition更改回其原始形式，以便根据当前玩家的棋盘状态进行评分。

int EvaluateStaticPosition(stateT state)
{
if(CheckForWin(state, state.whoseTurn))
{
return WINNING_POSITION;
}
if(CheckForWin(state, Opponent(state.whoseTurn)))
{
return LOSING_POSITION;
}
return NEUTRAL_POSITION;
}

删除MinMove，因为我们只关心最大化。重写MaxMove，使其选择给对手最差分数的动作。最佳移动的分数是其他玩家最差分数的负数。

int MaxMove(stateT state, moveT &bestMove)
{
if(GameIsOver(state))
{
return EvaluateStaticPosition(state);
}
vector<moveT> moveList;
GenerateMoveList(state, moveList);
int nMoves = moveList.size();
int v = -1000;
for(int i = 0 ;i<nMoves; i++)
{
moveT move = moveList[i];
MakeMove(state, move);
moveT opponentsBestMove;
int curRating = -MaxMove(state, opponentsBestMove);
if (curRating > v)
{
v = curRating;
bestMove = move;
}
RetractMove(state, move);
}
return v;
}

由于MaxMove用于两个播放器，因此我们不再需要在MiniMax函数中区分播放器。

moveT MiniMax(stateT state)
{
moveT bestMove;
int i = 0;
i = MaxMove(state, bestMove);
cout<<"i is "<<i<<endl;
return bestMove;
}

看起来MiniMax为您正确地选择了它，只需使用初始状态和深度来调用它。(除非根据状态第一个玩家是第二个玩家，否则你应该在MiniMax中调用min_move。)

编辑：是的，我忽略了一些东西，bestMove目前没有多大意义。在max_move内的程序中，您可以这样更改循环：

for(int i = 0 ; i < nMoves ; i++)
{
moveT move = moveList[i];
MakeMove(state, move);
int new_value = min_move(state, depth+1);
if(new_value > v)
{
v=new_value;
}
RetractMove(state, move);
}

在那之后，你可以思考bestMove是什么意思？我的想法是，你有兴趣为井字游戏找到一个"尽可能好的"系列动作。为此，你需要一个向量，甚至更好的是一个堆栈。但这也意味着将std::stack<int>* best_moves作为最后一个参数。

对于堆栈实现，在min_move中返回下一步，如果它们的值最好，则将move推到best_moves堆栈的顶部。当然，在游戏结束时，你只需要返回空堆栈。它需要OOP方法来正确地完成它，我会在有时间的时候做。

如果您所需要的只是最好的下一步，那么我建议您将min_move和max_moe的返回类型更改为如下结构：

struct Value_move{
int value;
moveT best_move;
};

然后max_move的新实现看起来如下：

const int MOVE_INVALID = -12345;
const int MOVE_NOTHING = -12346;
Value_move max_move(stateT state, int depth)
{
Value_move best;
best.value = -10000; best.best_move = MOVE_INVALID;
if(GameIsOver(state))
{
best.value = EvaluateStaticPosition(state);
best.best_move = MOVE_NOTHING;
return best;
}
vector<moveT> moveList;
GenerateMoveList(state, moveList);
int nMoves = moveList.size();
for(int i = 0 ; i < nMoves ; i++)
{
moveT move = moveList[i];
MakeMove(state, move);
Value_move curr = min_move(state, depth+1);
if(curr.value > best.value)
{
best.value = curr.value;
best.best_move = move;
}
RetractMove(state, move);
}
return v;
}

您只需要在MiniMax函数中的返回结构中拾取best_move字段。

备注：
你必须承认，尽管这在很多方面都不像c++程序，而是一个c程序。否则，CapitalCarmelCase中的所有函数都应该是类方法，您应该通过(const)ref而不是值来传递状态——但只有当状态真的是typedef后面的指针时，整个代码才有意义。

您的代码找到了正确的值，但随后通过向下传递相同的引用来覆盖它。

int curValue = min_move(state,bestMove);

应该成为

moveT nextMove; // No need to actually do anything with this value
int curValue = min_move(state,nextMove);

您还需要对min_move函数进行同样的更改。

注意：在min_move中，您的代码调用max_move的参数比您为函数定义的参数多。