use action aggregation and rewards tracker :)

cove9988 · Jan 12, 2025 · 2a7dce9 · 2a7dce9
1 parent 9559324
commit 2a7dce9
Show file tree

Hide file tree

Showing 11 changed files with 405 additions and 92 deletions.
diff --git a/doc/action_aggregation_strategy.md b/doc/action_aggregation_strategy.md
@@ -0,0 +1,184 @@
+1. Define an Action Series Window
+You create a fixed-size sliding window (e.g., 1 hour or N steps) to aggregate action signals. Each step appends the latest action to this window, and you evaluate the aggregate behavior of the actions.
+
+2. Action Aggregation Logic
+Count the Frequency: Count the number of buy, sell, and hold signals within the window.
+Set a Threshold: Define thresholds for triggering actions. For example:
+
+Execute a buy only if more than 70% of actions in the window are buy.
+
+Execute a sell if more than 70% of actions in the window are sell.
+
+Otherwise, hold.
+
+4. Incorporating into PPO
+Modify the PPO reward function to account for aggregated actions instead of raw actions at every step. For example:
+Reward consistency of aggregated actions.
+Penalize frequent changes in aggregated signals.
+In the environment, only open positions based on the aggregated action.
+5. Advantages
+Noise Reduction: Avoids reacting to random, temporary market fluctuations.
+Trend-Based Actions: Focuses on broader trends instead of single-step decisions.
+Position Management: Reduces the number of simultaneous positions and makes them more deliberate.
+6. Optimizations
+Dynamic Window Size: Adjust window size based on volatility or time of day.
+Weighted Actions: Give more weight to recent actions in the window.
+Include Price Trends: Use additional criteria (e.g., moving averages, RSI) in combination with action aggregation.
+
+full implementation of the Action Aggregator with all three optimizations:
+
+Dynamic Window Size: Adjust the window size based on volatility or time of day.
+Weighted Actions: Give more weight to recent actions in the window.
+Include Price Trends: Combine action aggregation with price-based indicators like moving averages or RSI.
+```python
+from collections import deque
+import numpy as np
+
+class ActionAggregator:
+    def __init__(self, base_window_size=10, volatility_threshold=0.01):
+        self.base_window_size = base_window_size
+        self.volatility_threshold = volatility_threshold
+        self.action_window = deque(maxlen=base_window_size)
+        self.weighted_window = deque(maxlen=base_window_size)
+
+    def adjust_window_size(self, volatility):
+        """Adjust the window size dynamically based on volatility."""
+        if volatility > self.volatility_threshold:
+            self.action_window = deque(maxlen=self.base_window_size // 2)
+            self.weighted_window = deque(maxlen=self.base_window_size // 2)
+        else:
+            self.action_window = deque(maxlen=self.base_window_size)
+            self.weighted_window = deque(maxlen=self.base_window_size)
+
+    def add_action(self, action, weight=1.0):
+        """Add the latest action and its weight to the window."""
+        self.action_window.append(action)
+        self.weighted_window.append(weight)
+
+    def calculate_price_trend(self, prices):
+        """Calculate a simple trend indicator (e.g., moving average difference)."""
+        if len(prices) < 5:  # Ensure enough data
+            return 0
+        short_ma = np.mean(prices[-3:])
+        long_ma = np.mean(prices[-5:])
+        return short_ma - long_ma
+
+    def get_aggregated_action(self, prices):
+        """Decide the action based on aggregated signals and price trends."""
+        if len(self.action_window) < self.action_window.maxlen:
+            return "hold"  # Not enough data yet
+
+        # Count weighted actions
+        buy_weight = sum(w for a, w in zip(self.action_window, self.weighted_window) if a == "buy")
+        sell_weight = sum(w for a, w in zip(self.action_window, self.weighted_window) if a == "sell")
+        hold_weight = sum(w for a, w in zip(self.action_window, self.weighted_window) if a == "hold")
+
+        # Price trend adjustment
+        trend = self.calculate_price_trend(prices)
+
+        # Decide based on weighted majority and trend
+        if buy_weight > sell_weight and trend > 0:
+            return "buy"
+        elif sell_weight > buy_weight and trend < 0:
+            return "sell"
+        else:
+            return "hold"
+
+# Example usage
+if __name__ == "__main__":
+    # Simulate price data and actions
+    prices = [1.1010, 1.1020, 1.1030, 1.1015, 1.1040, 1.1050]
+    actions = ["buy", "hold", "sell", "buy", "buy", "buy", "hold", "sell", "sell", "buy"]
+    volatilities = [0.009, 0.012, 0.008, 0.015, 0.007]  # Simulated volatilities
+
+    aggregator = ActionAggregator(base_window_size=10, volatility_threshold=0.01)
+
+    for i, action in enumerate(actions):
+        # Simulate volatility adjustment
+        volatility = volatilities[i % len(volatilities)]
+        aggregator.adjust_window_size(volatility)
+
+        # Add action with weight
+        weight = 1.5 if i > len(actions) // 2 else 1.0  # Example weight strategy
+        aggregator.add_action(action, weight)
+
+        # Get aggregated action
+        aggregated_action = aggregator.get_aggregated_action(prices[: i + 1])
+        print(f"Step {i + 1}: Aggregated Action: {aggregated_action}")
+
+```
+Explanation
+Dynamic Window Size:
+
+The window size is halved when volatility exceeds the volatility_threshold.
+This makes the model more responsive during high volatility.
+Weighted Actions:
+
+Recent actions can have higher weights.
+Older actions can be given less importance by assigning lower weights.
+Price Trends:
+
+A simple moving average difference is used as a trend indicator.
+Positive trends favor buy, and negative trends favor sell.
+
+Penalizing frequent changes in aggregated signals and rewarding consistency involves tracking the stability of actions over time and incorporating penalties for frequent changes into the reward system. Here's how you can implement this:
+
+Approach
+Track Action Stability:
+
+Maintain a history of aggregated actions.
+Count the number of consecutive actions that are the same.
+Reset the count when the action changes.
+Penalize Changes:
+
+Each time the aggregated action changes (e.g., from "buy" to "sell" or "hold"), apply a penalty.
+Reward Consistency:
+
+For every consecutive step with the same action, reward consistency by adding a small positive value to the reward.
+Incorporate into PPO Reward:
+
+Modify the reward function to include the penalties and rewards for action stability.
+
+```python 
+class ActionStabilityTracker:
+    def __init__(self):
+        self.last_action = None
+        self.consecutive_count = 0
+        self.change_penalty = -1.0  # Penalty for frequent changes
+        self.consistency_reward = 0.5  # Reward for consistent actions
+
+    def calculate_stability_reward(self, current_action):
+        """Calculate reward/penalty based on action stability."""
+        if current_action == self.last_action:
+            self.consecutive_count += 1
+            reward = self.consistency_reward * self.consecutive_count
+        else:
+            reward = self.change_penalty
+            self.consecutive_count = 1  # Reset the counter for new action
+
+        self.last_action = current_action
+        return reward
+
+
+# Integrate into PPO reward
+def calculate_ppo_reward(base_reward, current_action, stability_tracker):
+    """Calculate final reward incorporating base reward and stability."""
+    stability_reward = stability_tracker.calculate_stability_reward(current_action)
+    return base_reward + stability_reward
+
+
+# Example usage
+if __name__ == "__main__":
+    actions = ["buy", "buy", "sell", "sell", "sell", "hold", "buy", "buy", "sell"]
+    base_rewards = [10, 15, 5, 20, 25, -5, 30, 35, 10]  # Example base rewards
+
+    stability_tracker = ActionStabilityTracker()
+    total_rewards = []
+
+    for action, base_reward in zip(actions, base_rewards):
+        final_reward = calculate_ppo_reward(base_reward, action, stability_tracker)
+        total_rewards.append(final_reward)
+        print(f"Action: {action}, Base Reward: {base_reward}, Final Reward: {final_reward}")
+
+    print("Total Rewards:", total_rewards)
+```
diff --git a/src/configure.json b/src/configure.json
@@ -17,7 +17,7 @@
             "close_60_sma"
         ],
         "over_night_cash_penalty": 5,
-        "balance": 1000,
+        "balance": 10000,
         "symbol_col": "symbol", 
         "time_col": "time",
         "random_start": true,

diff --git a/src/data_processor.py b/src/data_processor.py
@@ -118,7 +118,7 @@ def split_timeserious(df, key_ts='dt', freq='W', symbol=''):
     for n, g in df.groupby(pd.Grouper(level=key_ts,freq=freq)):
         p =f'./data/split/{symbol}/{freq_name[freq]}'
         os.makedirs(p, exist_ok=True)
-        #fname = f'{symbol}_{n:%Y%m%d}_{freq}_{count}.csv'
+        # fname = f'{symbol}_{n:%Y%m%d}_{freq}_{count}.csv'
         fname = f'{symbol}_{n:%Y}_{count}.csv'
         fn = f'{p}/{fname}'
         print(f'save to:{fn} -- row {len(g)}')

diff --git a/src/eval.py b/src/eval.py
@@ -11,7 +11,7 @@
 logger = logging.getLogger(__name__)
 
 features = ['open', 'high', 'low', 'close', 'minute', 'hour', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30', 'close_30_sma', 'close_60_sma']
-model_file = '/home/paulg/github/tradesformer/data/model/AUDUSD/weekly/AUDUSD_2023_66.zip'
+model_file = '/home/paulg/github/tradesformer/data/model/AUDUSD/weekly/AUDUSD_2024_120.zip'
 # csv_file = "/home/paulg/github/tradesformer/data/split/EURUSD/weekly/EURUSD_2024_103.csv"
 data_directory = "/home/paulg/github/tradesformer/data/split/AUDUSD/weekly"
 csv_files = glob.glob(os.path.join(data_directory, "*.csv"))
@@ -28,7 +28,7 @@
     total_buy = 0
     total_sell = 0
     totoal_rewards = 0
-    mode = 'human' #'graph'
+    mode = 'graph' #'graph', 'human'
     while not done:
         action, _states = model.predict(observation)
         observation, reward, terminated, truncated, info = env.step(action)