()
self.screen = pygame.display.set_mode((640, 480), 0, 32)
self.background = pygame.Surface...((640, 480)).convert()
self.background.fill((0, 0, 0))
self.paddle_1 = pygame.Surface...((10, 50)).convert()
self.paddle_1.fill((0, 255, 255))
self.paddle_2 = pygame.Surface...((10, 50)).convert()
self.paddle_2.fill((255, 255, 0))
ball_surface = pygame.Surface(...self.ball_pos = (312.5, 232.5)
2、行动决策机制:
首先在程序框架中设定不同的行动作为训练对象
# 行动paddle_1(训练对象)
if action[0] == 1: