Zimbra · ashishkataria86 · Nov 21, 2024 · silentsakky · Nov 23, 2024 · ashishkataria86
diff --git a/build.xml b/build.xml
@@ -28,7 +28,7 @@
  <property name='build.data.dir'	value='${build.dir}/data/output' />
  <property name='build.lib.dir'     value='${build.dir}/lib' />
 
- <property name='jar.file'         value='${build.lib.dir}/${name}-${version}z2.jar'/>
+ <property name='jar.file'         value='${build.lib.dir}/${name}-${version}z3.jar'/>
 
  <target name='compile'
 		description="compiles the source"

diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java
@@ -97,6 +97,28 @@ public AntiSamyDOMScanner(Policy policy) {
     public AntiSamyDOMScanner() throws PolicyException {
         super();
     }
+    // Method to decode the Unicode escape sequences
+    private String decodeUnicodeEscapes(String input) {  
+        try {
+            StringBuffer decodedString = new StringBuffer();
+            String regex = "\\\\([0-9a-fA-F]{4})";
+            // Compile the regex
+            Pattern pattern = Pattern.compile(regex);
+            Matcher matcher = pattern.matcher(input);
+
+            // Find all matches and replace them with the decoded character
+            while (matcher.find()) {
+                String hexValue = matcher.group(1);
+                int unicodeValue = Integer.parseInt(hexValue, 16);
+                matcher.appendReplacement(decodedString, String.valueOf((char) unicodeValue));
+            }  
+            matcher.appendTail(decodedString);
+            return decodedString.toString().replaceAll("\\\\", "");
+        } catch (Exception e) {
+            // If decoding fails, just return the original string
+            return input;
+        }
+    }
 
     /**
      * This is where the magic lives.
@@ -167,7 +189,7 @@ public CleanResults scan(String html) throws ScanException {
              */
 
 
-            final String trimmedHtml = html;
+            final String trimmedHtml = decodeUnicodeEscapes(html);
 
             StringWriter out = new StringWriter();