当前位置：首页 > news >正文

南平建设网站wordpress 登录注册

news 2026/6/11 13:51:24

南平建设网站,wordpress 登录注册,技术培训ui设计,都江堰建设局官方网站目录前言模型成果演示训练过程演示代码实现 deep_network tetris test train 前言当今#xff0c;深度学习在各个领域展现出了惊人的应用潜力#xff0c;而游戏开发领域也不例外。俄罗斯方块作为经典的益智游戏#xff0c;一直以来深受玩家喜爱。在这个项目中深度学习在各个领域展现出了惊人的应用潜力而游戏开发领域也不例外。俄罗斯方块作为经典的益智游戏一直以来深受玩家喜爱。在这个项目中我将深度学习与游戏开发相结合通过使用PyTorch为俄罗斯方块赋予了智能化的能力。这个深度学习项目的目标是训练一个模型使其能够自动玩俄罗斯方块并且在游戏中取得高分。通过使用神经网络我以游戏的状态作为输入然后模型将预测最佳的移动策略从而使方块能够正确地落下并消除行。通过反复训练和优化我希望能够让模型达到专业玩家的水平并且掌握一些高级策略。本博客将详细介绍我在这个项目中所采用的深度学习方法和技术。我将分享我的代码实现并解释我在训练过程中所遇到的挑战和解决方案。无论你是对深度学习感兴趣还是对俄罗斯方块情有独钟这个项目都能够给你带来一些启发和思考。我相信通过将深度学习和游戏开发相结合我们能够为游戏带来全新的可能性。让我们一起探索这个项目看看深度学习如何在俄罗斯方块这个经典游戏中展现其强大的应用能力吧模型成果演示 Pytorch个人学习记录总结俄罗斯方块の深度学习小项目训练过程演示 Pytorch个人学习记录总结俄罗斯方块の深度学习小项目代码实现 deep_network import torch.nn as nnclass DeepQNetwork(nn.Module):def __init__(self):super(DeepQNetwork, self).__init__()self.conv1 nn.Sequential(nn.Linear(4, 64), nn.ReLU(inplaceTrue))self.conv2 nn.Sequential(nn.Linear(64, 64), nn.ReLU(inplaceTrue))self.conv3 nn.Sequential(nn.Linear(64, 1))self._create_weights()def _create_weights(self):for m in self.modules():if isinstance(m, nn.Linear):nn.init.xavier_uniform_(m.weight)nn.init.constant_(m.bias, 0)def forward(self, x):x self.conv1(x)x self.conv2(x)x self.conv3(x)return xtetris import numpy as np from PIL import Image import cv2 from matplotlib import style import torch import randomstyle.use(ggplot)class Tetris:piece_colors [(0, 0, 0),(255, 255, 0),(147, 88, 254),(54, 175, 144),(255, 0, 0),(102, 217, 238),(254, 151, 32),(0, 0, 255)]pieces [[[1, 1],[1, 1]],[[0, 2, 0],[2, 2, 2]],[[0, 3, 3],[3, 3, 0]],[[4, 4, 0],[0, 4, 4]],[[5, 5, 5, 5]],[[0, 0, 6],[6, 6, 6]],[[7, 0, 0],[7, 7, 7]]]def __init__(self, height20, width10, block_size20):self.height heightself.width widthself.block_size block_sizeself.extra_board np.ones((self.height * self.block_size, self.width * int(self.block_size / 2), 3),dtypenp.uint8) * np.array([204, 204, 255], dtypenp.uint8)self.text_color (200, 20, 220)self.reset()def reset(self):self.board [[0] * self.width for _ in range(self.height)]self.score 0self.tetrominoes 0self.cleared_lines 0self.bag list(range(len(self.pieces)))random.shuffle(self.bag)self.ind self.bag.pop()self.piece [row[:] for row in self.pieces[self.ind]]self.current_pos {x: self.width // 2 - len(self.piece[0]) // 2, y: 0}self.gameover Falsereturn self.get_state_properties(self.board)def rotate(self, piece):num_rows_orig num_cols_new len(piece)num_rows_new len(piece[0])rotated_array []for i in range(num_rows_new):new_row [0] * num_cols_newfor j in range(num_cols_new):new_row[j] piece[(num_rows_orig - 1) - j][i]rotated_array.append(new_row)return rotated_arraydef get_state_properties(self, board):lines_cleared, board self.check_cleared_rows(board)holes self.get_holes(board)bumpiness, height self.get_bumpiness_and_height(board)return torch.FloatTensor([lines_cleared, holes, bumpiness, height])def get_holes(self, board):num_holes 0for col in zip(*board):row 0while row self.height and col[row] 0:row 1num_holes len([x for x in col[row 1:] if x 0])return num_holesdef get_bumpiness_and_height(self, board):board np.array(board)mask board ! 0invert_heights np.where(mask.any(axis0), np.argmax(mask, axis0), self.height)heights self.height - invert_heightstotal_height np.sum(heights)currs heights[:-1]nexts heights[1:]diffs np.abs(currs - nexts)total_bumpiness np.sum(diffs)return total_bumpiness, total_heightdef get_next_states(self):states {}piece_id self.indcurr_piece [row[:] for row in self.piece]if piece_id 0: # O piecenum_rotations 1elif piece_id 2 or piece_id 3 or piece_id 4:num_rotations 2else:num_rotations 4for i in range(num_rotations):valid_xs self.width - len(curr_piece[0])for x in range(valid_xs 1):piece [row[:] for row in curr_piece]pos {x: x, y: 0}while not self.check_collision(piece, pos):pos[y] 1self.truncate(piece, pos)board self.store(piece, pos)states[(x, i)] self.get_state_properties(board)curr_piece self.rotate(curr_piece)return statesdef get_current_board_state(self):board [x[:] for x in self.board]for y in range(len(self.piece)):for x in range(len(self.piece[y])):board[y self.current_pos[y]][x self.current_pos[x]] self.piece[y][x]return boarddef new_piece(self):if not len(self.bag):self.bag list(range(len(self.pieces)))random.shuffle(self.bag)self.ind self.bag.pop()self.piece [row[:] for row in self.pieces[self.ind]]self.current_pos {x: self.width // 2 - len(self.piece[0]) // 2,y: 0}if self.check_collision(self.piece, self.current_pos):self.gameover Truedef check_collision(self, piece, pos):future_y pos[y] 1for y in range(len(piece)):for x in range(len(piece[y])):if future_y y self.height - 1 or self.board[future_y y][pos[x] x] and piece[y][x]:return Truereturn Falsedef truncate(self, piece, pos):gameover Falselast_collision_row -1for y in range(len(piece)):for x in range(len(piece[y])):if self.board[pos[y] y][pos[x] x] and piece[y][x]:if y last_collision_row:last_collision_row yif pos[y] - (len(piece) - last_collision_row) 0 and last_collision_row -1:while last_collision_row 0 and len(piece) 1:gameover Truelast_collision_row -1del piece[0]for y in range(len(piece)):for x in range(len(piece[y])):if self.board[pos[y] y][pos[x] x] and piece[y][x] and y last_collision_row:last_collision_row yreturn gameoverdef store(self, piece, pos):board [x[:] for x in self.board]for y in range(len(piece)):for x in range(len(piece[y])):if piece[y][x] and not board[y pos[y]][x pos[x]]:board[y pos[y]][x pos[x]] piece[y][x]return boarddef check_cleared_rows(self, board):to_delete []for i, row in enumerate(board[::-1]):if 0 not in row:to_delete.append(len(board) - 1 - i)if len(to_delete) 0:board self.remove_row(board, to_delete)return len(to_delete), boarddef remove_row(self, board, indices):for i in indices[::-1]:del board[i]board [[0 for _ in range(self.width)]] boardreturn boarddef step(self, action, renderTrue, videoNone):x, num_rotations actionself.current_pos {x: x, y: 0}for _ in range(num_rotations):self.piece self.rotate(self.piece)while not self.check_collision(self.piece, self.current_pos):self.current_pos[y] 1if render:self.render(video)overflow self.truncate(self.piece, self.current_pos)if overflow:self.gameover Trueself.board self.store(self.piece, self.current_pos)lines_cleared, self.board self.check_cleared_rows(self.board)score 1 (lines_cleared ** 2) * self.widthself.score scoreself.tetrominoes 1self.cleared_lines lines_clearedif not self.gameover:self.new_piece()if self.gameover:self.score - 2return score, self.gameoverdef render(self, videoNone):if not self.gameover:img [self.piece_colors[p] for row in self.get_current_board_state() for p in row]else:img [self.piece_colors[p] for row in self.board for p in row]img np.array(img).reshape((self.height, self.width, 3)).astype(np.uint8)img img[..., ::-1]img Image.fromarray(img, RGB)img img.resize((self.width * self.block_size, self.height * self.block_size), 0)img np.array(img)img[[i * self.block_size for i in range(self.height)], :, :] 0img[:, [i * self.block_size for i in range(self.width)], :] 0img np.concatenate((img, self.extra_board), axis1)cv2.putText(img, Score:, (self.width * self.block_size int(self.block_size / 2), self.block_size),fontFacecv2.FONT_HERSHEY_DUPLEX, fontScale1.0, colorself.text_color)cv2.putText(img, str(self.score),(self.width * self.block_size int(self.block_size / 2), 2 * self.block_size),fontFacecv2.FONT_HERSHEY_DUPLEX, fontScale1.0, colorself.text_color)cv2.putText(img, Pieces:, (self.width * self.block_size int(self.block_size / 2), 4 * self.block_size),fontFacecv2.FONT_HERSHEY_DUPLEX, fontScale1.0, colorself.text_color)cv2.putText(img, str(self.tetrominoes),(self.width * self.block_size int(self.block_size / 2), 5 * self.block_size),fontFacecv2.FONT_HERSHEY_DUPLEX, fontScale1.0, colorself.text_color)cv2.putText(img, Lines:, (self.width * self.block_size int(self.block_size / 2), 7 * self.block_size),fontFacecv2.FONT_HERSHEY_DUPLEX, fontScale1.0, colorself.text_color)cv2.putText(img, str(self.cleared_lines),(self.width * self.block_size int(self.block_size / 2), 8 * self.block_size),fontFacecv2.FONT_HERSHEY_DUPLEX, fontScale1.0, colorself.text_color)if video:video.write(img)cv2.imshow(Deep Q-Learning Tetris, img)cv2.waitKey(1)test import argparse import torch import cv2 from src.tetris import Tetrisdef get_args():parser argparse.ArgumentParser(Implementation of Deep Q Network to play Tetris)parser.add_argument(--width, typeint, default10, helpThe common width for all images)parser.add_argument(--height, typeint, default20, helpThe common height for all images)parser.add_argument(--block_size, typeint, default30, helpSize of a block)parser.add_argument(--fps, typeint, default300, helpframes per second)parser.add_argument(--saved_path, typestr, defaulttrained_models)parser.add_argument(--output, typestr, defaultoutput.mp4)args parser.parse_args()return argsdef run_test(opt):if torch.cuda.is_available():torch.cuda.manual_seed(123)else:torch.manual_seed(123)if torch.cuda.is_available():model torch.load({}/tetris.format(opt.saved_path))else:model torch.load({}/tetris.format(opt.saved_path), map_locationlambda storage, loc: storage)model.eval()env Tetris(widthopt.width, heightopt.height, block_sizeopt.block_size)env.reset()if torch.cuda.is_available():model.cuda()out cv2.VideoWriter(opt.output, cv2.VideoWriter_fourcc(*MJPG), opt.fps,(int(1.5*opt.width*opt.block_size), opt.height*opt.block_size))while True:next_steps env.get_next_states()next_actions, next_states zip(*next_steps.items())next_states torch.stack(next_states)if torch.cuda.is_available():next_states next_states.cuda()predictions model(next_states)[:, 0]index torch.argmax(predictions).item()action next_actions[index]_, done env.step(action, renderTrue, videoout)if done:out.release()breakif __name__ __main__:opt get_args()run_test(opt)train import argparse import os import shutil from random import random, randint, sampleimport numpy as np import torch import torch.nn as nn from tensorboardX import SummaryWriterfrom src.deep_q_network import DeepQNetwork from src.tetris import Tetris from collections import dequedef get_args():parser argparse.ArgumentParser(Implementation of Deep Q Network to play Tetris)parser.add_argument(--width, typeint, default10, helpThe common width for all images)parser.add_argument(--height, typeint, default20, helpThe common height for all images)parser.add_argument(--block_size, typeint, default30, helpSize of a block)parser.add_argument(--batch_size, typeint, default512, helpThe number of images per batch)parser.add_argument(--lr, typefloat, default1e-3)parser.add_argument(--gamma, typefloat, default0.99)parser.add_argument(--initial_epsilon, typefloat, default1)parser.add_argument(--final_epsilon, typefloat, default1e-3)parser.add_argument(--num_decay_epochs, typefloat, default2000)parser.add_argument(--num_epochs, typeint, default3000)parser.add_argument(--save_interval, typeint, default1000)parser.add_argument(--replay_memory_size, typeint, default30000,helpNumber of epoches between testing phases)parser.add_argument(--log_path, typestr, defaulttensorboard)parser.add_argument(--saved_path, typestr, defaulttrained_models)args parser.parse_args()return argsdef train(opt):if torch.cuda.is_available():torch.cuda.manual_seed(123)else:torch.manual_seed(123)if os.path.isdir(opt.log_path):shutil.rmtree(opt.log_path)os.makedirs(opt.log_path)writer SummaryWriter(opt.log_path)env Tetris(widthopt.width, heightopt.height, block_sizeopt.block_size)model DeepQNetwork()optimizer torch.optim.Adam(model.parameters(), lropt.lr)criterion nn.MSELoss()state env.reset()if torch.cuda.is_available():model.cuda()state state.cuda()replay_memory deque(maxlenopt.replay_memory_size)epoch 0while epoch opt.num_epochs:next_steps env.get_next_states()# Exploration or exploitationepsilon opt.final_epsilon (max(opt.num_decay_epochs - epoch, 0) * (opt.initial_epsilon - opt.final_epsilon) / opt.num_decay_epochs)u random()random_action u epsilonnext_actions, next_states zip(*next_steps.items())next_states torch.stack(next_states)if torch.cuda.is_available():next_states next_states.cuda()model.eval()with torch.no_grad():predictions model(next_states)[:, 0]model.train()if random_action:index randint(0, len(next_steps) - 1)else:index torch.argmax(predictions).item()next_state next_states[index, :]action next_actions[index]reward, done env.step(action, renderTrue)if torch.cuda.is_available():next_state next_state.cuda()replay_memory.append([state, reward, next_state, done])if done:final_score env.scorefinal_tetrominoes env.tetrominoesfinal_cleared_lines env.cleared_linesstate env.reset()if torch.cuda.is_available():state state.cuda()else:state next_statecontinueif len(replay_memory) opt.replay_memory_size / 10:continueepoch 1batch sample(replay_memory, min(len(replay_memory), opt.batch_size))state_batch, reward_batch, next_state_batch, done_batch zip(*batch)state_batch torch.stack(tuple(state for state in state_batch))reward_batch torch.from_numpy(np.array(reward_batch, dtypenp.float32)[:, None])next_state_batch torch.stack(tuple(state for state in next_state_batch))if torch.cuda.is_available():state_batch state_batch.cuda()reward_batch reward_batch.cuda()next_state_batch next_state_batch.cuda()q_values model(state_batch)model.eval()with torch.no_grad():next_prediction_batch model(next_state_batch)model.train()y_batch torch.cat(tuple(reward if done else reward opt.gamma * prediction for reward, done, prediction inzip(reward_batch, done_batch, next_prediction_batch)))[:, None]optimizer.zero_grad()loss criterion(q_values, y_batch)loss.backward()optimizer.step()print(Epoch: {}/{}, Action: {}, Score: {}, Tetrominoes {}, Cleared lines: {}.format(epoch,opt.num_epochs,action,final_score,final_tetrominoes,final_cleared_lines))writer.add_scalar(Train/Score, final_score, epoch - 1)writer.add_scalar(Train/Tetrominoes, final_tetrominoes, epoch - 1)writer.add_scalar(Train/Cleared lines, final_cleared_lines, epoch - 1)if epoch 0 and epoch % opt.save_interval 0:torch.save(model, {}/tetris_{}.format(opt.saved_path, epoch))torch.save(model, {}/tetris.format(opt.saved_path))if __name__ __main__:opt get_args()train(opt)

查看全文

http://www.eeditor.cn/news/120351/