大佬教程收集整理的这篇文章主要介绍了在自定义环境中运行的 Tensorflow C51 示例代码给出了形状错误,大佬教程大佬觉得挺不错的,现在分享给大家,也给大家做个参考。
我正在尝试从本教程中获取 Tensorflow 示例代码:https://www.tensorflow.org/agents/tutorials/9_c51_tutorial 在使用 pychrono 引擎的自定义环境中运行。目前,一个简单的测试代码以随机动作运行,但是当我尝试运行完整代码时出现错误:
InvalIDArgumentError: Must have updates.shape = inDices.shape + params.shape[1:]
or updates.shape = [],got updates.shape [1],inDices.shape [1],params.shape [100000,1] [Op:resourceScatterupdate]
自定义环境的代码为:
import pychrono as chrono
from pychrono import irrlicht as chronoirr
import numpy as np
#from __future__ import absolute_import
#from __future__ import division
#from __future__ import print_function
#import abc
#import tensorflow as tf
#import numpy as np
from tf_agents.environments import py_environment
#from tf_agents.environments import tf_environment
#from tf_agents.environments import tf_py_environment
#from tf_agents.environments import utils
from tf_agents.specs import array_spec
#from tf_agents.environments import wrappers
#from tf_agents.environments import suite_gym
from tf_agents.trajectorIEs import time_step as ts
from tyPing import Text #Any,Optional,chrono.SetChronoDataPath('/Users/maxwelllittle/opt/anaconda3/pkgs/pychrono-6.0.0-py37_0/share/chrono/data/')
class PyEnv(py_environment.PyEnvironment):
def __init__(self):
self.render = 0
self._action_spec = array_spec.boundedArraySpec(
shape=(),dtype=np.int32,minimum=0,maximum=1,name='action')
self._observation_spec = array_spec.boundedArraySpec(
shape=(1,1,4),dtype=np.float64,minimum=-1000,maximum=1000,name='observation')
self._time_step_spec = ts.timestep(array_spec.ArraySpec(shape=(1,),dtype = np.int32,name='step_type'),array_spec.ArraySpec(shape=(1,dtype = np.float32,name='reWARD'),name='discount'),self._observation_speC)
self._episode_ended = false
self.timestep = 0.01
self.omega = 0
# ---------------------------------------------------------------------
#
# Create the simulation system and add items
#
self.rev_pend_sys = chrono.ChSystemNSC()
chrono.ChCollisionModel.SetDefaultSuggestedEnvelope(0.001)
chrono.ChCollisionModel.SetDefaultSuggestedmargin(0.001)
#rev_pend_sys.SetSolverType(chrono.ChSolver.Type_barZILAIBORWEIN) # precise,more slow
self.rev_pend_sys.SetSolverMaxIterations(70)
# Create a contact material (surface property)to share between all objects.
self.rod_material = chrono.ChMaterialSurfaceNSC()
self.rod_material.SetFriction(0.5)
self.rod_material.SetdamPingF(0.2)
self.rod_material.SetCompliance (0.0000001)
self.rod_material.SetComplianCET(0.0000001)
# Create the set of rods in a vertical stack,along Y axis
self.size_rod_y = 2.0
self.radius_rod = 0.05
self.density_rod = 50; # kg/m^3
self.mass_rod = self.density_rod * self.size_rod_y *chrono.CH_C_PI* (self.radius_rod**2);
self.inertia_rod_y = (self.radius_rod**2) * self.mass_rod/2;
self.inertia_rod_x = (self.mass_rod/12)*((self.size_rod_y**2)+3*(self.radius_rod**2))
self.size_table_x = 0.3;
self.size_table_y = 0.3;
self.size_table_z = 0.3;
if self.render:
self.myapplication = chronoirr.ChIrrApp(self.rev_pend_sys)
self.myapplication.AddShadowAll();
self.myapplication.Settimestep(0.01)
self. myapplication.SetTryRealtime(true)
self.myapplication.AddTypicalSky()
self.myapplication.AddTypicallogo(chrono.GetChronoDATAFILE('logo_pychrono_Alpha.png'))
self.myapplication.AddTypicalCamera(chronoirr.vector3df(0.5,0.5,1.0))
self.myapplication.AddlightWithShadow(chronoirr.vector3df(2,4,2),# point
chronoirr.vector3df(0,0),# aimpoint
9,# radius (power)
1,9,# near,far
30) # angle of FOV
def _reset(self):
#print("reset")
self.isdone = false
self.rev_pend_sys.Clear()
# create it
self.body_rod = chrono.ChBody()
# set initial position
self.body_rod.SetPos(chrono.ChVectorD(0,self.size_rod_y/2,0 ))
# set mass propertIEs
self.body_rod.SetMass(self.mass_rod)
self.body_rod.SeTinertiaXX(chrono.ChVectorD(self.inertia_rod_x,self.inertia_rod_y,self.inertia_rod_X))
# Visualization shape,for rendering animation
self.cyl_base1= chrono.ChVectorD(0,-self.size_rod_y/2,0 )
self.cyl_base2= chrono.ChVectorD(0,0 )
self.body_rod_shape = chrono.ChCylinderShape()
self.body_rod_shape.GetCylinderGeometry().p1= self.cyl_base1
self.body_rod_shape.GetCylinderGeometry().p2= self.cyl_base2
self.body_rod_shape.GetCylinderGeometry().rad= self.radius_rod
self.body_rod.AddAsset(self.body_rod_shapE)
self.rev_pend_sys.Add(self.body_rod)
self.body_floor = chrono.ChBody()
self.body_floor.SetbodyFixed(true)
self.body_floor.SetPos(chrono.ChVectorD(0,-5,0 ))
if self.render:
self.body_floor_shape = chrono.ChBoxShape()
self.body_floor_shape.GetBoxGeometry().Size = chrono.ChVectorD(3,3)
self.body_floor.GetAssets().push_BACk(self.body_floor_shapE)
self.body_floor_texture = chrono.ChTexture()
self.body_floor_texture.SetTexturefilename(chrono.GetChronoDATAFILE('textures/concrete.jpg'))
self.body_floor.GetAssets().push_BACk(self.body_floor_texturE)
self.rev_pend_sys.Add(self.body_floor)
self.body_table = chrono.ChBody()
self.body_table.SetPos(chrono.ChVectorD(0,-self.size_table_y/2,0 ))
if self.render:
self.body_table_shape = chrono.ChBoxShape()
self.body_table_shape.GetBoxGeometry().Size = chrono.ChVectorD(self.size_table_x/2,self.size_table_y/2,self.size_table_z/2)
self.body_table_shape.Setcolor(chrono.Chcolor(0.4,0.4,0.5))
self.body_table.GetAssets().push_BACk(self.body_table_shapE)
self.body_table_texture = chrono.ChTexture()
self.body_table_texture.SetTexturefilename(chrono.GetChronoDATAFILE('textures/concrete.jpg'))
self.body_table.GetAssets().push_BACk(self.body_table_texturE)
self.body_table.SetMass(0.1)
self.rev_pend_sys.Add(self.body_tablE)
self.link_slIDer = chrono.ChlinkLockprismatic()
z2x = chrono.ChQuaternionD()
z2x.Q_from_AngAxis(-chrono.CH_C_PI / 2,chrono.ChVectorD(0,0))
self.link_slIDer.Initialize(self.body_table,self.body_floor,chrono.ChCoordsysD(chrono.ChVectorD(0,z2X))
self.rev_pend_sys.Add(self.link_slIDer)
self.act_initpos = chrono.ChVectorD(0,0)
self.actuator = chrono.ChlinkMotorlinearForce()
self.actuator.Initialize(self.body_table,chrono.ChFrameD(self.act_initpos))
self.rev_pend_sys.Add(self.actuator)
self.rod_pin = chrono.ChMarker()
self.body_rod.AddMarker(self.rod_pin)
self.rod_pin.Impose_Abs_Coord(chrono.ChCoordsysD(chrono.ChVectorD(0,0)))
self.table_pin = chrono.ChMarker()
self.body_table.AddMarker(self.table_pin)
self.table_pin.Impose_Abs_Coord(chrono.ChCoordsysD(chrono.ChVectorD(0,0)))
self.pin_joint = chrono.ChlinkLockRevolute()
self.pin_joint.Initialize(self.rod_pin,self.table_pin)
self.rev_pend_sys.Add(self.pin_joint)
if self.render:
# ---------------------------------------------------------------------
#
# Create an Irrlicht application to visualize the system
#
# ==important!== Use this function for adding a ChIrrNodeAsset to all items
# in the system. these ChIrrNodeAsset assets are 'proxIEs' to the Irrlicht meshes.
# If you need a finer control on which item really needs a visualization proxy
# Irrlicht,just use application.AssetBind(myitem); on a per-item basis.
self.myapplication.AssetBindAll();
# ==important!== Use this function for 'converTing' into Irrlicht meshes the assets
# that you added to the bodIEs into 3D shapes,they can be visualized by Irrlicht!
self.myapplication.Assetupdateall();
self.omega = self.pin_joint.GetRelWvel().Length()
self.isdone= false
self.steps= 0
self.timestepVal = 0
self._state = [self.link_slIDer.Getdist(),self.link_slIDer.Getdist_dt(),self.pin_joint.GetRelAngle(),self.omega]
#return np.array(self.statE)
return ts.restart(np.array([self._state],dtype=np.float64))
def _step(self,action):
forCEToApply = 0.0
self.timestepVal += 1
if action == 1:
forCEToApply = chrono.ChFunction_Const(1)
else:
forCEToApply = chrono.ChFunction_Const(-1)
self.actuator.SetForceFunction(forCEToApply)
self.omega = self.pin_joint.GetRelWvel().Length()
if self.render:
self.myapplication.GetDevice().run()
self.myapplication.beginScene()
self.myapplication.DrawAll()
self.myapplication.DoStep()
else:
self.rev_pend_sys.DoStepDynamics(self.timestep)
if self.render:
self.myapplication.EndScene()
self._state = [self.link_slIDer.Getdist(),self.omega]
if abs(self.link_slIDer.Getdist()) > 2 or abs(self.pin_joint.GetRelAngle()) > 0.2: #Went out of bounds
#return np.array(self.statE),np.array([0]),np.array([1])
return ts.termination(np.array([self._state],dtype=np.float64),reWARD=0.0)
elif self.steps > 100: #Finished
#return np.array(self.statE),np.array([1]),reWARD=1.0)
else: #Not done yet
return ts.Transition(np.array([self._state],reWARD=0.0,discount=1.0) #discount = 1.0 means ignore this reWARD.
#return np.array(self.statE),np.array([1])
def action_spec(self):
return self._action_spec
def observation_spec(self):
return self._observation_spec
def time_step_spec(self):
return self._time_step_spec
#these two arent in py_environment.PyEnvironment,the rest are
def close(self):
if self.render:
self.myapplication.GetDevice().closeDevice()
print('Destructor called,Device deleted.')
else:
print('Destructor called,No device to delete.')
def render(self,mode: Text = 'rgb_array'):
try:
self.myapplication.SetVIDeoframeSave(true)
#self.myapplication.SetVIDeoframeSaveInterval(interval)
except:
print('No ChIrrApp found. CAnnot save vIDeo frames.')
失败的代码是:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import base64
import imageio
import IPython
import matplotlib
import matplotlib.pyplot as plt
import PIl.Image
import pyvirtualdisplay
import PyEnv
import tensorflow as tf
from tf_agents.agents.categorical_dqn import categorical_dqn_agent
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import suite_gym
from tf_agents.environments import tf_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.networks import categorical_q_network
from tf_agents.policIEs import random_tf_policy
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.trajectorIEs import trajectory
from tf_agentS.Utils import common
tf.compat.v1.enable_v2_behavior()
# Set up a virtual display for rendering OpenAI gym environments.
#display = pyvirtualdisplay.display(visible=0,size=(1400,900)).start()
#env_name = "CartPole-v1" # @param {type:"String"}
num_iterations = 15000 # @param {type:"Integer"}
initial_collect_steps = 1000 # @param {type:"Integer"}
collect_steps_per_iteration = 1 # @param {type:"Integer"}
replay_buffer_capacity = 100000 # @param {type:"Integer"}
fc_layer_params = (100,)
batch_size = 64 # @param {type:"Integer"}
learning_rate = 1e-3 # @param {type:"number"}
g@R_197_11624@a = 0.99
log_interval = 200 # @param {type:"Integer"}
num_atoms = 51 # @param {type:"Integer"}
min_q_value = -20 # @param {type:"Integer"}
max_q_value = 20 # @param {type:"Integer"}
n_step_update = 2 # @param {type:"Integer"}
num_eval_episodes = 10 # @param {type:"Integer"}
eval_interval = 1000 # @param {type:"Integer"}
Train_py_env = PyEnv.PyEnv()
eval_py_env = PyEnv.PyEnv()
Train_env = tf_py_environment.TFPyEnvironment(Train_py_env)
eval_env = tf_py_environment.TFPyEnvironment(eval_py_env)
categorical_q_net = categorical_q_network.CategoricalQNetwork(
Train_env.observation_spec(),Train_env.action_spec(),num_atoms=num_atoms,fc_layer_params=fc_layer_params)
optimizer = tf.compat.v1.Train.AdamOptimizer(learning_rate=learning_ratE)
Train_step_counter = tf.compat.v2.Variable(0)
agent = categorical_dqn_agent.CategoricalDqnAgent(
Train_env.time_step_spec(),categorical_q_network=categorical_q_net,optimizer=optimizer,min_q_value=min_q_value,max_q_value=max_q_value,n_step_update=n_step_update,td_errors_loss_fn=common.element_wise_squared_loss,g@R_197_11624@a=g@R_197_11624@a,Train_step_counter=Train_step_counter)
agent.initialize()
def compute_avg_return(environment,policy,num_episodes=10):
@R_569_10586@l_return = 0.0
for _ in range(num_episodes):
time_step = environment.reset()
episode_return = 0.0
while not time_step.is_last():
action_step = policy.action(time_step)
time_step = environment.step(action_step.action)
episode_return += time_step.reWARD
@R_569_10586@l_return += episode_return
avg_return = @R_569_10586@l_return / num_episodes
return avg_return.numpy()[0]
random_policy = random_tf_policy.RandomTFPolicy(Train_env.time_step_spec(),Train_env.action_spec())
compute_avg_return(eval_env,random_policy,num_eval_episodes)
# Please also see the metrics module for standard implementations of different
# metrics.
replay_buffer = tf_uniform_replay_buffer.TFUniformreplayBuffer(
data_spec=agent.collect_data_spec,batch_size=Train_env.batch_size,max_length=replay_buffer_capacity)
def collect_step(environment,policy):
time_step = environment.current_time_step()
action_step = policy.action(time_step)
next_time_step = environment.step(action_step.action)
traj = trajectory.from_Transition(time_step,action_step,next_time_step)
# Add trajectory to the replay buffer
replay_buffer.add_batch(traj)
for _ in range(initial_collect_steps):
collect_step(Train_env,random_policy)
错误跟踪:
runfile('/Users/maxwelllittle/opt/anaconda3/pkgs/pychrono-6.0.0-py37_0/lib/python3.7/site-packages/pychrono/Auxon1/C51LongTest.py',wdir='/Users/maxwelllittle/opt/anaconda3/pkgs/pychrono-6.0.0-py37_0/lib/python3.7/site-packages/pychrono/Auxon1')
Reloaded modules: PyEnv
TraceBACk (most recent call last):
file "/Users/maxwelllittle/opt/anaconda3/pkgs/pychrono-6.0.0-py37_0/lib/python3.7/site-packages/pychrono/Auxon1/C51LongTest.py",line 143,in <module>
collect_step(Train_env,random_policy)
file "/Users/maxwelllittle/opt/anaconda3/pkgs/pychrono-6.0.0-py37_0/lib/python3.7/site-packages/pychrono/Auxon1/C51LongTest.py",line 140,in collect_step
replay_buffer.add_batch(traj)
file "/Users/maxwelllittle/opt/anaconda3/envs/snubh/lib/python3.7/site-packages/tf_agents/replay_buffers/replay_buffer.py",line 83,in add_batch
return self._add_batch(items)
file "/Users/maxwelllittle/opt/anaconda3/envs/snubh/lib/python3.7/site-packages/tf_agents/replay_buffers/tf_uniform_replay_buffer.py",line 205,in _add_batch
write_data_op = self._data_table.write(write_rows,items)
file "/Users/maxwelllittle/opt/anaconda3/envs/snubh/lib/python3.7/site-packages/tf_agents/replay_buffers/table.py",line 131,in write
for (slot,value) in zip(flattened_slots,flattened_values)
file "/Users/maxwelllittle/opt/anaconda3/envs/snubh/lib/python3.7/site-packages/tf_agents/replay_buffers/table.py",in <Listcomp>
for (slot,flattened_values)
file "/Users/maxwelllittle/opt/anaconda3/envs/snubh/lib/python3.7/site-packages/tensorflow/python/ops/state_ops.py",line 306,in scatter_update
name=Name))
file "/Users/maxwelllittle/opt/anaconda3/envs/snubh/lib/python3.7/site-packages/tensorflow/python/ops/gen_resource_variable_ops.py",line 1120,in resource_scatter_update
_ops.raise_from_not_ok_status(e,Name)
file "/Users/maxwelllittle/opt/anaconda3/envs/snubh/lib/python3.7/site-packages/tensorflow/python/framework/ops.py",line 6862,in raise_from_not_ok_status
six.raise_from(core._status_to_exception(e.code,messagE),NonE)
file "<String>",line 3,in raise_from
InvalIDArgumentError: Must have updates.shape = inDices.shape + params.shape[1:] or updates.shape = [],1] [Op:resourceScatterupdate]
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)
以上是大佬教程为你收集整理的在自定义环境中运行的 Tensorflow C51 示例代码给出了形状错误全部内容,希望文章能够帮你解决在自定义环境中运行的 Tensorflow C51 示例代码给出了形状错误所遇到的程序开发问题。
如果觉得大佬教程网站内容还不错,欢迎将大佬教程推荐给程序员好友。
本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
如您有任何意见或建议可联系处理。小编QQ:384754419,请注明来意。