From 63da35d257d990e38bf77c4ccb2c90ff1f5210a9 Mon Sep 17 00:00:00 2001
From: jasongwq <jasongwq@gmail.com>
Date: Sat, 3 Sep 2022 08:52:18 +0800
Subject: [PATCH] WebCrawl support URLEncode and Disabled (#2678)

---
 .../fess/helper/WebFsIndexHelper.java         | 28 ++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
index d16fe85d0..2dc7491df 100644
--- a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
+++ b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
@@ -162,22 +162,42 @@ public class WebFsIndexHelper {
             }));
 
             // set included urls
-            split(includedUrlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).forEach(urlValue -> {
-                if (!urlValue.startsWith("#")) {
+            final AtomicBoolean urlEncodeDisabled = new AtomicBoolean(false);
+            split(includedUrlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).forEach(line -> {
+                if (!line.startsWith("#")) {
+                    final String urlValue;
+                    if (urlEncodeDisabled.get()) {
+                        urlValue = line;
+                        urlEncodeDisabled.set(false);
+                    } else {
+                        urlValue = systemHelper.encodeUrlFilter(line);
+                    }
                     crawler.addIncludeFilter(urlValue);
                     if (logger.isInfoEnabled()) {
                         logger.info("Included URL: {}", urlValue);
                     }
+                } else if (line.startsWith("#DISABLE_URL_ENCODE")) {
+                    urlEncodeDisabled.set(true);
                 }
             }));
 
             // set excluded urls
-            split(excludedUrlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).forEach(urlValue -> {
-                if (!urlValue.startsWith("#")) {
+            urlEncodeDisabled.set(false);
+            split(excludedUrlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).forEach(line -> {
+                if (!line.startsWith("#")) {
+                    final String urlValue;
+                    if (urlEncodeDisabled.get()) {
+                        urlValue = line;
+                        urlEncodeDisabled.set(false);
+                    } else {
+                        urlValue = systemHelper.encodeUrlFilter(line);
+                    }
                     crawler.addExcludeFilter(urlValue);
                     if (logger.isInfoEnabled()) {
                         logger.info("Excluded URL: {}", urlValue);
                     }
+                } else if (line.startsWith("#DISABLE_URL_ENCODE")) {
+                    urlEncodeDisabled.set(true);
                 }
             }));
 
-- 
GitLab