diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java index 45c4d2e068b3d8eda89d72e361153440d39f7d63..25f13ec4668e7a5a8577d818dcebc2ee5380bbcc 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java @@ -36,7 +36,6 @@ import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -53,7 +52,6 @@ import org.codelibs.fesen.search.sort.SortBuilder; import org.codelibs.fesen.search.sort.SortBuilders; import org.codelibs.fesen.search.sort.SortOrder; import org.codelibs.fess.Constants; -import org.codelibs.fess.exception.FessSystemException; import org.codelibs.fess.helper.PermissionHelper; import org.codelibs.fess.mylasta.action.FessUserBean; import org.codelibs.fess.taglib.FessFunctions; @@ -759,28 +757,7 @@ public interface FessProp { default PrunedTag[] getCrawlerDocumentHtmlPrunedTagsAsArray() { PrunedTag[] tags = (PrunedTag[]) propMap.get("crawlerDocumentHtmlPrunedTags"); if (tags == null) { - tags = split(getCrawlerDocumentHtmlPrunedTags(), ",").get(stream -> stream.filter(StringUtil::isNotBlank).map(v -> { - final Pattern pattern = Pattern.compile("(\\w+)(\\[[^\\]]+\\])?(\\.\\w+)?(#\\w+)?"); - final Matcher matcher = pattern.matcher(v.trim()); - if (matcher.matches()) { - final PrunedTag tag = new PrunedTag(matcher.group(1)); - if (matcher.group(2) != null) { - final String attrPair = matcher.group(2).substring(1, matcher.group(2).length() - 1); - final Matcher equalMatcher = Pattern.compile("([\\w\\-]+)=(\\S+)").matcher(attrPair); - if (equalMatcher.matches()) { - tag.setAttr(equalMatcher.group(1), equalMatcher.group(2)); - } - } - if (matcher.group(3) != null) { - tag.setCss(matcher.group(3).substring(1)); - } - if (matcher.group(4) != null) { - tag.setId(matcher.group(4).substring(1)); - } - return tag; - } - throw new FessSystemException("Invalid pruned tag: " + v); - }).toArray(n -> new PrunedTag[n])); + tags = PrunedTag.parse(getCrawlerDocumentHtmlPrunedTags()); propMap.put("crawlerDocumentHtmlPrunedTags", tags); } return tags; diff --git a/src/main/java/org/codelibs/fess/util/PrunedTag.java b/src/main/java/org/codelibs/fess/util/PrunedTag.java index aa793ce7d05d13b87937f3ab34fab45673ea55a1..4e4672b591688f8375580f1324ad29bcfb57b39b 100644 --- a/src/main/java/org/codelibs/fess/util/PrunedTag.java +++ b/src/main/java/org/codelibs/fess/util/PrunedTag.java @@ -15,11 +15,16 @@ */ package org.codelibs.fess.util; +import static org.codelibs.core.stream.StreamUtil.split; + import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; import org.codelibs.core.lang.StringUtil; import org.codelibs.core.stream.StreamUtil; +import org.codelibs.fess.exception.FessSystemException; import org.w3c.dom.Node; public class PrunedTag { @@ -105,4 +110,29 @@ public class PrunedTag { public String toString() { return "PrunedTag [tag=" + tag + ", id=" + id + ", css=" + css + ", attrName=" + attrName + ", attrValue=" + attrValue + "]"; } + + public static PrunedTag[] parse(final String value) { + return split(value, ",").get(stream -> stream.filter(StringUtil::isNotBlank).map(v -> { + final Pattern pattern = Pattern.compile("(\\w+)(\\[[^\\]]+\\])?(\\.[\\w\\-]+)?(#[\\w\\-]+)?"); + final Matcher matcher = pattern.matcher(v.trim()); + if (matcher.matches()) { + final PrunedTag tag = new PrunedTag(matcher.group(1)); + if (matcher.group(2) != null) { + final String attrPair = matcher.group(2).substring(1, matcher.group(2).length() - 1); + final Matcher equalMatcher = Pattern.compile("([\\w\\-]+)=(\\S+)").matcher(attrPair); + if (equalMatcher.matches()) { + tag.setAttr(equalMatcher.group(1), equalMatcher.group(2)); + } + } + if (matcher.group(3) != null) { + tag.setCss(matcher.group(3).substring(1)); + } + if (matcher.group(4) != null) { + tag.setId(matcher.group(4).substring(1)); + } + return tag; + } + throw new FessSystemException("Invalid pruned tag: " + v); + }).toArray(n -> new PrunedTag[n])); + } } diff --git a/src/test/java/org/codelibs/fess/util/PrunedTagTest.java b/src/test/java/org/codelibs/fess/util/PrunedTagTest.java index db8c6d00e5284472133892820aef757d4a9dd70c..baf16de7ca6c135e7cf0a5d498ab22c360fda441 100644 --- a/src/test/java/org/codelibs/fess/util/PrunedTagTest.java +++ b/src/test/java/org/codelibs/fess/util/PrunedTagTest.java @@ -44,6 +44,39 @@ public class PrunedTagTest extends UnitFessTestCase { prunedtag.setCss(css); assertEquals("PrunedTag [tag=" + tag + ", id=" + id + ", css=" + css + ", attrName=" + attrName + ", attrValue=" + attrValue + "]", prunedtag.toString()); + } + + public void test_parse() { + PrunedTag[] tags = PrunedTag.parse(""); + assertEquals(0, tags.length); + + tags = PrunedTag.parse("a"); + assertEquals(1, tags.length); + assertEquals("PrunedTag [tag=a, id=null, css=null, attrName=null, attrValue=null]", tags[0].toString()); + + tags = PrunedTag.parse("a#test"); + assertEquals(1, tags.length); + assertEquals("PrunedTag [tag=a, id=test, css=null, attrName=null, attrValue=null]", tags[0].toString()); + + tags = PrunedTag.parse("a.test"); + assertEquals(1, tags.length); + assertEquals("PrunedTag [tag=a, id=null, css=test, attrName=null, attrValue=null]", tags[0].toString()); + + tags = PrunedTag.parse("a[target=_blank]"); + assertEquals(1, tags.length); + assertEquals("PrunedTag [tag=a, id=null, css=null, attrName=target, attrValue=_blank]", tags[0].toString()); + + tags = PrunedTag.parse("a.link,div#123"); + assertEquals(2, tags.length); + assertEquals("PrunedTag [tag=a, id=null, css=link, attrName=null, attrValue=null]", tags[0].toString()); + assertEquals("PrunedTag [tag=div, id=123, css=null, attrName=null, attrValue=null]", tags[1].toString()); + + tags = PrunedTag.parse("a#test-a"); + assertEquals(1, tags.length); + assertEquals("PrunedTag [tag=a, id=test-a, css=null, attrName=null, attrValue=null]", tags[0].toString()); + tags = PrunedTag.parse("a.test-a"); + assertEquals(1, tags.length); + assertEquals("PrunedTag [tag=a, id=null, css=test-a, attrName=null, attrValue=null]", tags[0].toString()); } } \ No newline at end of file