have the wrong name on the weather.csv file, it should be weather_data.csv acording to the schema.json of the same folders.
print(observation)
>>>
[
[1, 4, 9, 11.04, 16.74, 12.48, 7.76, 80.12, 55.33, 67.31, 85.33, 115.19, 164.94, 0.0, 88.31, 16.88, 413.68, 0.0, 450.45, 0.5453005045, 21.88, 40.35, 70.91, 11.447196, 1.0, 0.0, 0.0, 0.0, nan],
[1, 4, 9, 11.04, 16.74, 12.48, 7.76, 80.12, 55.33, 67.31, 85.33, 115.19, 164.94, 0.0, 88.31, 16.88, 413.68, 0.0, 450.45, 0.5453005045, 22.94, 33.11, 11.41, 1.0, 0.0, 0.0, 0.0, nan],
[1, 4, 9, 11.04, 16.74, 12.48, 7.76, 80.12, 55.33, 67.31, 85.33, 115.19, 164.94, 0.0, 88.31, 16.88, 413.68, 0.0, 450.45, 0.5453005045, 21.05, 43.22, 7.61, 0.9813014190740775, 0.0, 0.0, nan],
[1, 4, 9, 11.04, 16.74, 12.48, 7.76, 80.12, 55.33, 67.31, 85.33, 115.19, 164.94, 0.0, 88.31, 16.88, 413.68, 0.0, 450.45, 0.5453005045, 20.92, 41.61, 1.55, 3.815732, 0.9813886486921994, 0.0, 0.0, nan],
[1, 4, 9, 11.04, 16.74, 12.48, 7.76, 80.12, 55.33, 67.31, 85.33, 115.19, 164.94, 0.0, 88.31, 16.88, 413.68, 0.0, 450.45, 0.5453005045, 22.57, 41.81, 16.8, 2.3848325, 1.0, 0.0, 0.0, 0.0, nan],
[1, 4, 9, 11.04, 16.74, 12.48, 7.76, 80.12, 55.33, 67.31, 85.33, 115.19, 164.94, 0.0, 88.31, 16.88, 413.68, 0.0, 450.45, 0.5453005045, 21.95, 43.22, 12.8, 1.907866, 1.0, 0.0, 0.0, 0.0, nan],
[1, 4, 9, 11.04, 16.74, 12.48, 7.76, 80.12, 55.33, 67.31, 85.33, 115.19, 164.94, 0.0, 88.31, 16.88, 413.68, 0.0, 450.45, 0.5453005045, 23.18, 41.62, 12.3, 0.9266858922799234, 0.0, 0.0, 0.0, nan],
[1, 4, 9, 11.04, 16.74, 12.48, 7.76, 80.12, 55.33, 67.31, 85.33, 115.19, 164.94, 0.0, 88.31, 16.88, 413.68, 0.0, 450.45, 0.5453005045, 22.94, 41.5, 21.0, 0.9904502885063199, 0.0, 0.0, 0.0, nan],
[1, 4, 9, 11.04, 16.74, 12.48, 7.76, 80.12, 55.33, 67.31, 85.33, 115.19, 164.94, 0.0, 88.31, 16.88, 413.68, 0.0, 450.45, 0.5453005045, 23.1, 41.84, 10.1, 1.0, 0.0, 0.0, 0.0, nan]
]
Traceback (most recent call last):
File "\citylearn_playground\citylearn_sb3.py", line 70, in <module>
agent.learn(total_timesteps=100)
File "\citylearn_playground\venv\lib\site-packages\stable_baselines3\ppo\ppo.py", line 317, in learn
return super().learn(
File "\citylearn_playground\venv\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py", line 262, in learn
continue_training = self.collect_rollouts(self.env, callback, self.rollout_buffer, n_rollout_steps=self.n_steps)
File "\citylearn_playground\venv\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py", line 172, in collect_rollouts
actions, values, log_probs = self.policy(obs_tensor)
File "\citylearn_playground\venv\lib\site-packages\torch\nn\modules\module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "\citylearn_playground\venv\lib\site-packages\stable_baselines3\common\policies.py", line 590, in forward
distribution = self._get_action_dist_from_latent(latent_pi)
File "\citylearn_playground\venv\lib\site-packages\stable_baselines3\common\policies.py", line 606, in _get_action_dist_from_latent
return self.action_dist.proba_distribution(mean_actions, self.log_std)
File "\citylearn_playground\venv\lib\site-packages\stable_baselines3\common\distributions.py", line 153, in proba_distribution
self.distribution = Normal(mean_actions, action_std)
File "\citylearn_playground\venv\lib\site-packages\torch\distributions\normal.py", line 56, in __init__
super(Normal, self).__init__(batch_shape, validate_args=validate_args)
File "\citylearn_playground\venv\lib\site-packages\torch\distributions\distribution.py", line 56, in __init__
raise ValueError(
ValueError: Expected parameter loc (Tensor of shape (1, 9)) of distribution Normal(loc: torch.Size([1, 9]), scale: torch.Size([1, 9])) to satisfy the constraint Real(), but found invalid values:
tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0')
Process finished with exit code 1
The package installed with pip is considerably diferent with the one found on this repository. Is the pip package not "oficial" ?
what is the recomended way to install CityLearn for usage?
this is the class i am using to transform the action and observation space for gym, if there is an oficial or better way, i whould like to ask for a bit of help
from citylearn.citylearn import CityLearnEnv
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
import gym
import numpy as np
class EnvCityGym(gym.Env):
def __init__(self, env):
self.env = env
self.num_envs = 1
# get the number of buildings
self.num_buildings = len(env.action_spaces)
self.act_lows = np.array([])
self.act_highs = np.array([])
for uid in env.buildings_states_actions:
#print(env.buildings_states_actions[uid]["actions"])
#print(sum(env.buildings_states_actions[uid]["actions"].values()))
self.act_lows = np.concatenate((self.act_lows, np.array([-1] * sum(env.buildings_states_actions[uid]["actions"].values())),))
self.act_highs = np.concatenate((self.act_highs, np.array([1] * sum(env.buildings_states_actions[uid]["actions"].values())),))
# define action and observation space
#log.debug(self.act_lows)
#log.debug(self.act_highs)
self.action_space = gym.spaces.Box(low=self.act_lows,
high=self.act_highs, dtype=np.float32)
self.obs_lows = np.array([])
self.obs_highs = np.array([])
for obs_box in env.observation_spaces:
self.obs_lows = np.concatenate((self.obs_lows, obs_box.low))
self.obs_highs = np.concatenate((self.obs_highs, obs_box.high))
self.observation_space = gym.spaces.Box(low=self.obs_lows, high=self.obs_highs,
dtype=np.float32)
def reset(self):
obs = self.env.reset()
observation = self.get_observation(obs)
return observation
def get_observation(self, obs):
obs_list = np.array([])
for obs_box in obs:
obs_list = np.concatenate((obs_list, obs_box))
print(obs)
#obs_list = np.nan_to_num(obs_list) #This removes the nan from the observation but does not solve the issue
print(obs_list)
return obs_list
def step(self, action):
action = [[act] for act in action]
obs, reward, done, info = self.env.step(action)
observation = self.get_observation(obs)
return observation, sum(reward), done, info
def render(self, mode='human'):
return self.env.render(mode)
if __name__ == "__main__":
import torch as th
th.autograd.set_detect_anomaly(True)
city_env = CityLearnEnv(schema="citylearn_challenge_2020_climate_zone_1")
env = EnvCityGym(city_env)
agent = PPO(policy=MlpPolicy, env=env)
agent.learn(total_timesteps=100)
state = env.reset()
done = False
action, coordination_vars = agent.select_action(state)
while not done:
next_state, reward, done, _ = env.step(action)
action_next, coordination_vars_next = agent.select_action(next_state)
coordination_vars = coordination_vars_next
state = next_state
action = action_next
env.cost()