diff --git a/games/wordle/master.py b/games/wordle/master.py
index 53ac2211942c284ed7517f6dc6952072129c9db4..4a8c94be1e0217c9d749a93e299d470cf274e5a1 100644
--- a/games/wordle/master.py
+++ b/games/wordle/master.py
@@ -2,7 +2,7 @@ from typing import List, Tuple, Dict
 import numpy as np
 
 from backends import Model, HumanModel
-from clemgame.clemgame import GameMaster, GameBenchmark
+from clemgame.clemgame import GameMaster, GameBenchmark, GameScorer
 from clemgame import get_logger
 import clemgame.metrics as metrics
 from games.wordle.game import WordleGame
@@ -19,8 +19,6 @@ class WordleGameMaster(GameMaster):
         self.config = experiment
         self.player_model_names = [player_model.get_name() for player_model in player_models]
 
-        self.cm = ComputeMetrics()
-
     def setup(self, game_id, target_word, target_word_clue, target_word_difficulty):
         self.game_id = game_id
 
@@ -475,6 +473,65 @@ class WordleGameMaster(GameMaster):
             f"game_result = {self.game_final_status}", data_for_computation
         )
 
+
+class WordleGameScorer(GameScorer):
+
+    def __init__(self, experiment: Dict, game_instance: Dict):
+        super().__init__(GAME_NAME, experiment, game_instance)
+        self.cm = ComputeMetrics()
+
+    def compute_scores(self, episode_interactions: Dict) -> None:
+        for key, val in episode_interactions.items():
+            if key == "turns":
+                # Look for last turn data and in that 'action' key
+                if (
+                    val
+                    and val[-1]
+                    and "action" in val[-1][-1]
+                    and "data_for_computation" in val[-1][-1]["action"]
+                ):
+                    data_to_compute_scores = val[-1][-1]["action"][
+                        "data_for_computation"
+                    ]
+                    if data_to_compute_scores:
+                        aborted, loss = self._compute_game_status(
+                            data_to_compute_scores["game_final_status"]
+                        )
+                        self._compute_req_count(
+                            data_to_compute_scores["guesser_req_count"],
+                            data_to_compute_scores["critic_req_count"],
+                            data_to_compute_scores["guesser_parsed_req_count"],
+                            data_to_compute_scores["critic_parsed_req_count"],
+                            data_to_compute_scores["turns_req_count"],
+                            data_to_compute_scores["turns_parse_count"],
+                        )
+                        self._compute_game_specific_metrics(
+                            aborted,
+                            loss,
+                            data_to_compute_scores["turns_guess_feedback"],
+                            data_to_compute_scores["use_critic"],
+                            data_to_compute_scores["critic_guesses_change"],
+                            data_to_compute_scores["target_word_difficulty"],
+                        )
+                        return
+
+    def _compute_game_status(self, status):
+        aborted = 0
+        loss = 0
+        success = 0
+
+        if status == "ABORTED":
+            aborted = 1
+        elif status == "LOSS":
+            loss = 1
+        else:
+            success = 1
+
+        self.log_episode_score(metrics.METRIC_ABORTED, aborted)
+        self.log_episode_score(metrics.METRIC_LOSE, loss)
+        self.log_episode_score(metrics.METRIC_SUCCESS, success)
+        return aborted, loss
+
     def _compute_req_count(
         self,
         guesser_req_count,
@@ -530,23 +587,6 @@ class WordleGameMaster(GameMaster):
             for idx, score in enumerate(turns_violate_count):
                 self.log_turn_score(idx + 1, "Violated Request Count", score)
 
-    def _compute_game_status(self, status):
-        aborted = 0
-        loss = 0
-        success = 0
-
-        if status == "ABORTED":
-            aborted = 1
-        elif status == "LOSS":
-            loss = 1
-        else:
-            success = 1
-
-        self.log_episode_score(metrics.METRIC_ABORTED, aborted)
-        self.log_episode_score(metrics.METRIC_LOSE, loss)
-        self.log_episode_score(metrics.METRIC_SUCCESS, success)
-        return aborted, loss
-
     def _compute_game_specific_metrics(
         self,
         aborted,
@@ -610,7 +650,7 @@ class WordleGameMaster(GameMaster):
         self.log_episode_score(metrics.BENCH_SCORE, speed)
         self.log_episode_score("repeats guess", repeats_guess)
         self.log_episode_score("total guess repetitions", num_guess_repeats)
-        self.log_key("Target Word Difficulty", target_word_difficulty)
+        # self.log_key("Target Word Difficulty", target_word_difficulty) todo scoring should not change the interaction
 
         for idx, score in enumerate(turn_score):
             self.log_turn_score(idx + 1, "closeness score", score)
@@ -665,41 +705,6 @@ class WordleGameMaster(GameMaster):
                         "Non-Repetition-Guesser-On-Critic-Disagreement", 0
                     )
 
-    def compute_scores(self, episode_interactions: Dict) -> None:
-        for key, val in episode_interactions.items():
-            if key == "turns":
-                # Look for last turn data and in that 'action' key
-                if (
-                    val
-                    and val[-1]
-                    and "action" in val[-1][-1]
-                    and "data_for_computation" in val[-1][-1]["action"]
-                ):
-                    data_to_compute_scores = val[-1][-1]["action"][
-                        "data_for_computation"
-                    ]
-                    if data_to_compute_scores:
-                        aborted, loss = self._compute_game_status(
-                            data_to_compute_scores["game_final_status"]
-                        )
-                        self._compute_req_count(
-                            data_to_compute_scores["guesser_req_count"],
-                            data_to_compute_scores["critic_req_count"],
-                            data_to_compute_scores["guesser_parsed_req_count"],
-                            data_to_compute_scores["critic_parsed_req_count"],
-                            data_to_compute_scores["turns_req_count"],
-                            data_to_compute_scores["turns_parse_count"],
-                        )
-                        self._compute_game_specific_metrics(
-                            aborted,
-                            loss,
-                            data_to_compute_scores["turns_guess_feedback"],
-                            data_to_compute_scores["use_critic"],
-                            data_to_compute_scores["critic_guesses_change"],
-                            data_to_compute_scores["target_word_difficulty"],
-                        )
-                        return
-
 
 class WordleGameBenchmark(GameBenchmark):
     def __init__(self):
@@ -711,6 +716,9 @@ class WordleGameBenchmark(GameBenchmark):
     def create_game_master(self, experiment: Dict, player_models: List[Model]) -> GameMaster:
         return WordleGameMaster(self.name, experiment, player_models)
 
+    def create_game_scorer(self, experiment: Dict, game_instance: Dict) -> GameScorer:
+        return WordleGameScorer(experiment, game_instance)
+
     def is_single_player(self) -> bool:
         return True
 
diff --git a/games/wordle_withclue/master.py b/games/wordle_withclue/master.py
index 5c3a746090234cb572351525df125e12a0447510..aaa40748a108cca5a07dc2d33295b8b94ef27708 100644
--- a/games/wordle_withclue/master.py
+++ b/games/wordle_withclue/master.py
@@ -1,8 +1,8 @@
 from typing import Dict, List
 
 from backends import Model
-from clemgame.clemgame import GameBenchmark, GameMaster
-from games.wordle.master import WordleGameMaster
+from clemgame.clemgame import GameBenchmark, GameMaster, GameScorer
+from games.wordle.master import WordleGameMaster, WordleGameScorer
 
 # this will resolve into subdirectories to find the instances
 GAME_NAME = "wordle_withclue"
@@ -18,5 +18,8 @@ class WordleWithClueGameBenchmark(GameBenchmark):
     def create_game_master(self, experiment: Dict, player_models: List[Model]) -> GameMaster:
         return WordleGameMaster(self.name, experiment, player_models)
 
+    def create_game_scorer(self, experiment: Dict, game_instance: Dict) -> GameScorer:
+        return WordleGameScorer(experiment, game_instance)
+
     def is_single_player(self) -> bool:
         return True
diff --git a/games/wordle_withcritic/master.py b/games/wordle_withcritic/master.py
index 79632d3939e5ce9cd4326a280b536b5722936644..4d5cd6c0c4ab20aaa3c1690982be25aada50ce5f 100644
--- a/games/wordle_withcritic/master.py
+++ b/games/wordle_withcritic/master.py
@@ -1,8 +1,8 @@
 from typing import Dict, List
 
 from backends import Model
-from clemgame.clemgame import GameBenchmark, GameMaster
-from games.wordle.master import WordleGameMaster
+from clemgame.clemgame import GameBenchmark, GameMaster, GameScorer
+from games.wordle.master import WordleGameMaster, WordleGameScorer
 
 GAME_NAME = "wordle_withcritic"
 
@@ -16,3 +16,6 @@ class WordleWithClueAndCriticGameBenchmark(GameBenchmark):
 
     def create_game_master(self, experiment: Dict, player_models: List[Model]) -> GameMaster:
         return WordleGameMaster(self.name, experiment, player_models)
+
+    def create_game_scorer(self, experiment: Dict, game_instance: Dict) -> GameScorer:
+        return WordleGameScorer(experiment, game_instance)
\ No newline at end of file