Spaces:

open-rl-leaderboard
/

sb3_backend

Sleeping

App Files Files Community

qgallouedec HF staff commited on May 14

Commit

ec9b09e

•

1 Parent(s): 16f5fe6

dataset v2 and pybullet

Browse files

Files changed (2) hide show

app.py +1 -1
src/backend.py +57 -73

app.py CHANGED Viewed

@@ -63,7 +63,7 @@ pre, code {
 REPO_ID = "open-rl-leaderboard/leaderboard"
-RESULTS_REPO = "open-rl-leaderboard/results"
 links_md = f"""

 REPO_ID = "open-rl-leaderboard/leaderboard"
+RESULTS_REPO = "open-rl-leaderboard/results_v2"
 links_md = f"""

src/backend.py CHANGED Viewed

@@ -2,11 +2,9 @@ import fnmatch
 import importlib
 import json
 import os
-import re
 import shutil
 import sys
-import tempfile
-import time
 import zipfile
 from pathlib import Path
 from typing import Optional
@@ -15,7 +13,8 @@ import numpy as np
 import rl_zoo3.import_envs  # noqa: F401 pylint: disable=unused-import
 import torch as th
 import yaml
-from huggingface_hub import CommitOperationAdd, HfApi
 from huggingface_hub.utils import EntryNotFoundError
 from huggingface_sb3 import EnvironmentName, ModelName, ModelRepoId, load_from_hub
 from requests.exceptions import HTTPError
@@ -118,6 +117,16 @@ ALL_ENV_IDS = [
     "Reacher-v4",
     "Swimmer-v4",
     "Walker2d-v4",
 ]
@@ -504,85 +513,59 @@ def evaluate(
 logger = setup_logger(__name__)
 API = HfApi(token=os.environ.get("TOKEN"))
-RESULTS_REPO = "open-rl-leaderboard/results"
 def _backend_routine():
     # List only the text classification models
-    rl_models = list(API.list_models(filter=["reinforcement-learning", "stable-baselines3"]))
-    logger.info(f"Found {len(rl_models)} RL models")
-    compatible_models = []
-    for model in rl_models:
-        compatible_models.append((model.modelId, model.sha))
-    logger.info(f"Found {len(compatible_models)} compatible models")
     # Get the results
-    pattern = re.compile(r"^[^/]*/[^/]*/[^/]*results_[a-f0-9]+\.json$")
-    filenames = API.list_repo_files(RESULTS_REPO, repo_type="dataset")
-    filenames = [filename for filename in filenames if pattern.match(filename)]
-    evaluated_models = set()
-    for filename in filenames:
-        path = API.hf_hub_download(repo_id=RESULTS_REPO, filename=filename, repo_type="dataset")
-        with open(path) as fp:
-            report = json.load(fp)
-        evaluated_models.add((report["config"]["model_id"], report["config"]["model_sha"]))
-    # Find the models that are not associated with any results
-    pending_models = list(set(compatible_models) - evaluated_models)
     logger.info(f"Found {len(pending_models)} pending models")
     if len(pending_models) == 0:
         return None
     # Run an evaluation on the models
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        for model_id, sha in pending_models:
-            time.sleep(60)
-            commits = []
-            model_info = API.model_info(model_id, revision=sha)
-            # Extract the environment IDs from the tags (usually only one)
-            env_ids = pattern_match(model_info.tags, ALL_ENV_IDS)
-            if len(env_ids) == 0:
-                logger.error(f"No environment found for {model_id}")
-                continue
-            else:
-                env = env_ids[0]
-            user_id, repo_name = model_id.split("/")
-            algo = model_info.model_index[0]["name"].lower()
-            logger.info(f"Running evaluation on {model_id}")
-            report = {"config": {"model_id": model_id, "model_sha": sha}}
-            try:
-                episodic_returns = evaluate(
-                    user_id, repo_name, env, "rl-trained-agents", algo, no_render=True, verbose=1
-                )
-                evaluations = {env: {"episodic_returns": episodic_returns}}
-            except Exception as e:
-                logger.error(f"Error evaluating {model_id}: {e}")
-                evaluations = None
-            if evaluations is not None:
-                report["results"] = evaluations
-                report["status"] = "DONE"
-            else:
-                report["status"] = "FAILED"
-            # Update the results
-            dumped = json.dumps(report, indent=2)
-            path_in_repo = f"{model_id}/results_{sha}.json"
-            local_path = os.path.join(tmp_dir, path_in_repo)
-            os.makedirs(os.path.dirname(local_path), exist_ok=True)
-            with open(local_path, "w") as f:
-                f.write(dumped)
-            commits.append(CommitOperationAdd(path_in_repo=path_in_repo, path_or_fileobj=local_path))
-            API.create_commit(
-                repo_id=RESULTS_REPO, commit_message="Add evaluation results", operations=commits, repo_type="dataset"
-            )
 def backend_routine():
@@ -593,4 +576,5 @@ def backend_routine():
 if __name__ == "__main__":
-    backend_routine()

 import importlib
 import json
 import os
+import random
 import shutil
 import sys
 import zipfile
 from pathlib import Path
 from typing import Optional
 import rl_zoo3.import_envs  # noqa: F401 pylint: disable=unused-import
 import torch as th
 import yaml
+from datasets import load_dataset
+from huggingface_hub import HfApi
 from huggingface_hub.utils import EntryNotFoundError
 from huggingface_sb3 import EnvironmentName, ModelName, ModelRepoId, load_from_hub
 from requests.exceptions import HTTPError
     "Reacher-v4",
     "Swimmer-v4",
     "Walker2d-v4",
+    # PyBullet
+    "AntBulletEnv-v0",
+    "HalfCheetahBulletEnv-v0",
+    "HopperBulletEnv-v0",
+    "HumanoidBulletEnv-v0",
+    "InvertedDoublePendulumBulletEnv-v0",
+    "InvertedPendulumSwingupBulletEnv-v0",
+    "MinitaurBulletEnv-v0",
+    "ReacherBulletEnv-v0",
+    "Walker2DBulletEnv-v0",
 ]
 logger = setup_logger(__name__)
 API = HfApi(token=os.environ.get("TOKEN"))
+RESULTS_REPO = "open-rl-leaderboard/results_v2"
 def _backend_routine():
     # List only the text classification models
+    sb3_models = [
+        (model.modelId, model.sha) for model in API.list_models(filter=["reinforcement-learning", "stable-baselines3"])
+    ]
+    logger.info(f"Found {len(sb3_models)} SB3 models")
     # Get the results
+    dataset = load_dataset(
+        RESULTS_REPO, split="train", download_mode="force_redownload", verification_mode="no_checks"
+    )
+    evaluated_models = [("/".join([x["user_id"], x["model_id"]]), x["sha"]) for x in dataset]
+    pending_models = list(set(sb3_models) - set(evaluated_models))
     logger.info(f"Found {len(pending_models)} pending models")
     if len(pending_models) == 0:
         return None
+    # Select a random model
+    repo_id, sha = random.choice(pending_models)
+    user_id, model_id = repo_id.split("/")
+    row = {"model_id": model_id, "user_id": user_id, "sha": sha}
     # Run an evaluation on the models
+    model_info = API.model_info(repo_id, revision=sha)
+    # Extract the environment IDs from the tags (usually only one)
+    env_ids = pattern_match(model_info.tags, ALL_ENV_IDS)
+    if len(env_ids) > 0:
+        env = env_ids[0]
+        logger.info(f"Running evaluation on {user_id}/{model_id}")
+        algo = model_info.model_index[0]["name"].lower()
+        try:
+            episodic_returns = evaluate(user_id, model_id, env, "rl-trained-agents", algo, no_render=True, verbose=1)
+            row["status"] = "DONE"
+            row["env_id"] = env
+            row["episodic_returns"] = episodic_returns
+        except Exception as e:
+            logger.error(f"Error evaluating {model_id}: {e}")
+            row["status"] = "FAILED"
+    else:
+        logger.error(f"No environment found for {model_id}")
+        row["status"] = "FAILED"
+    dataset = load_dataset(
+        RESULTS_REPO, split="train", download_mode="force_redownload", verification_mode="no_checks"
+    )  # Reload the dataset, in case it was updated
+    dataset = dataset.add_item(row)
+    dataset.push_to_hub(RESULTS_REPO, split="train")
 def backend_routine():
 if __name__ == "__main__":
+    while True:
+        backend_routine()