From 63da35d257d990e38bf77c4ccb2c90ff1f5210a9 Mon Sep 17 00:00:00 2001 From: jasongwq <jasongwq@gmail.com> Date: Sat, 3 Sep 2022 08:52:18 +0800 Subject: [PATCH] WebCrawl support URLEncode and Disabled (#2678) --- .../fess/helper/WebFsIndexHelper.java | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java index d16fe85d0..2dc7491df 100644 --- a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java +++ b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java @@ -162,22 +162,42 @@ public class WebFsIndexHelper { })); // set included urls - split(includedUrlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).forEach(urlValue -> { - if (!urlValue.startsWith("#")) { + final AtomicBoolean urlEncodeDisabled = new AtomicBoolean(false); + split(includedUrlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).forEach(line -> { + if (!line.startsWith("#")) { + final String urlValue; + if (urlEncodeDisabled.get()) { + urlValue = line; + urlEncodeDisabled.set(false); + } else { + urlValue = systemHelper.encodeUrlFilter(line); + } crawler.addIncludeFilter(urlValue); if (logger.isInfoEnabled()) { logger.info("Included URL: {}", urlValue); } + } else if (line.startsWith("#DISABLE_URL_ENCODE")) { + urlEncodeDisabled.set(true); } })); // set excluded urls - split(excludedUrlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).forEach(urlValue -> { - if (!urlValue.startsWith("#")) { + urlEncodeDisabled.set(false); + split(excludedUrlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).forEach(line -> { + if (!line.startsWith("#")) { + final String urlValue; + if (urlEncodeDisabled.get()) { + urlValue = line; + urlEncodeDisabled.set(false); + } else { + urlValue = systemHelper.encodeUrlFilter(line); + } crawler.addExcludeFilter(urlValue); if (logger.isInfoEnabled()) { logger.info("Excluded URL: {}", urlValue); } + } else if (line.startsWith("#DISABLE_URL_ENCODE")) { + urlEncodeDisabled.set(true); } })); -- GitLab