diff --git a/pom.xml b/pom.xml index aa018d88e5834b78f68ce276a0bddee85c7f9c1e..009b4653f9718585d2ebca133236ccfdb2d85ab0 100644 --- a/pom.xml +++ b/pom.xml @@ -69,7 +69,7 @@ <database>h2</database> <databaseGroupId>com.h2database</databaseGroupId> <databaseArtifactId>h2</databaseArtifactId> - <databaseVersion>1.3.172</databaseVersion> + <databaseVersion>1.4.178</databaseVersion> <databaseDriver>org.h2.Driver</databaseDriver> <databaseUrl>jdbc:h2:file:${basedir}/src/main/webapp/WEB-INF/db/fess</databaseUrl> <databaseTestUrl>jdbc:h2:file:${basedir}/target/test-classes/db/fess</databaseTestUrl> @@ -83,7 +83,7 @@ <database>mysql</database> <databaseGroupId>mysql</databaseGroupId> <databaseArtifactId>mysql-connector-java</databaseArtifactId> - <databaseVersion>5.1.30</databaseVersion> + <databaseVersion>5.1.31</databaseVersion> <databaseDriver>com.mysql.jdbc.Driver</databaseDriver> <databaseUrl>jdbc:mysql://localhost:3306/fess_db?noDatetimeStringSync=true&zeroDateTimeBehavior=convertToNull&useUnicode=true&characterEncoding=UTF-8&autoReconnect=true</databaseUrl> <databaseTestUrl>jdbc:mysql://localhost:3306/fess_testdb?noDatetimeStringSync=true&zeroDateTimeBehavior=convertToNull&useUnicode=true&characterEncoding=UTF-8&autoReconnect=true</databaseTestUrl> @@ -108,7 +108,7 @@ </profiles> <properties> <dbflute.version>1.0.4K</dbflute.version> - <s2robot.version>0.7.2</s2robot.version> + <s2robot.version>0.8.0-SNAPSHOT</s2robot.version> <solr.version>4.8.1</solr.version> <slf4j.version>1.7.7</slf4j.version> <poi.version>3.10-FINAL</poi.version> @@ -601,15 +601,15 @@ <scope>provided</scope> </dependency> <dependency> - <groupId>javax.servlet.jsp</groupId> - <artifactId>jsp-api</artifactId> - <version>2.2</version> - <scope>provided</scope> + <groupId>javax.servlet.jsp</groupId> + <artifactId>jsp-api</artifactId> + <version>2.2</version> + <scope>provided</scope> </dependency> <dependency> <groupId>javax.mail</groupId> <artifactId>javax.mail-api</artifactId> - <version>1.5.1</version> + <version>1.5.2</version> </dependency> <dependency> <groupId>javax.activation</groupId> @@ -658,7 +658,7 @@ <dependency> <groupId>args4j</groupId> <artifactId>args4j</artifactId> - <version>2.0.26</version> + <version>2.0.29</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> @@ -688,7 +688,7 @@ <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> - <version>1.8.4</version> + <version>1.8.6</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> @@ -714,22 +714,22 @@ <dependency> <groupId>org.javassist</groupId> <artifactId>javassist</artifactId> - <version>3.18.1-GA</version> + <version>3.18.2-GA</version> </dependency> <dependency> <groupId>net.arnx</groupId> <artifactId>jsonic</artifactId> - <version>1.3.3</version> + <version>1.3.5</version> </dependency> <dependency> <groupId>com.github.jknack</groupId> <artifactId>handlebars</artifactId> - <version>1.3.0</version> + <version>1.3.1</version> </dependency> <dependency> <groupId>org.codehaus.groovy</groupId> <artifactId>groovy-all</artifactId> - <version>2.2.2</version> + <version>2.3.3</version> </dependency> <dependency> <groupId>com.github.detro</groupId> diff --git a/src/main/config/h2/fess.ddl b/src/main/config/h2/fess.ddl index 3ab4d5a8280d30ce35460b4b0bfe66c923e9625a..0702f5d1f56bc1bd45080bc83de477e07c03d1c0 100644 --- a/src/main/config/h2/fess.ddl +++ b/src/main/config/h2/fess.ddl @@ -6,7 +6,7 @@ DROP TABLE IF EXISTS CLICK_LOG; DROP TABLE IF EXISTS LABEL_TYPE_TO_ROLE_TYPE_MAPPING; DROP TABLE IF EXISTS SEARCH_LOG; DROP TABLE IF EXISTS USER_INFO; -DROP TABLE IF EXISTS DATA_CONFIG_TO_BROWSER_TYPE_MAPPING +DROP TABLE IF EXISTS DATA_CONFIG_TO_BROWSER_TYPE_MAPPING; DROP TABLE IF EXISTS DATA_CONFIG_TO_LABEL_TYPE_MAPPING; DROP TABLE IF EXISTS DATA_CONFIG_TO_ROLE_TYPE_MAPPING; DROP TABLE IF EXISTS DATA_CRAWLING_CONFIG; diff --git a/src/main/h2/webapp/WEB-INF/db/fess.mv.db b/src/main/h2/webapp/WEB-INF/db/fess.mv.db new file mode 100644 index 0000000000000000000000000000000000000000..c3c2f19efd3ff820fb2dc7edb3e80a5e0cedfd70 Binary files /dev/null and b/src/main/h2/webapp/WEB-INF/db/fess.mv.db differ diff --git a/src/main/h2/webapp/WEB-INF/db/robot.mv.db b/src/main/h2/webapp/WEB-INF/db/robot.mv.db new file mode 100644 index 0000000000000000000000000000000000000000..46277a00ad8d5c3e01233ab56c0dc69ad76401c3 Binary files /dev/null and b/src/main/h2/webapp/WEB-INF/db/robot.mv.db differ diff --git a/src/main/java/jp/sf/fess/ds/impl/FileListDataStoreImpl.java b/src/main/java/jp/sf/fess/ds/impl/FileListDataStoreImpl.java index b51ecf2770345778b886c9d642e42162558edc71..2b20498e3c7aae000de61b60799d1a336e191265 100644 --- a/src/main/java/jp/sf/fess/ds/impl/FileListDataStoreImpl.java +++ b/src/main/java/jp/sf/fess/ds/impl/FileListDataStoreImpl.java @@ -34,6 +34,7 @@ import org.codelibs.solr.lib.SolrGroup; import org.seasar.framework.container.SingletonS2Container; import org.seasar.framework.util.SerializeUtil; import org.seasar.robot.RobotSystemException; +import org.seasar.robot.builder.RequestDataBuilder; import org.seasar.robot.client.S2RobotClient; import org.seasar.robot.client.S2RobotClientFactory; import org.seasar.robot.entity.ResponseData; @@ -196,7 +197,9 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl { } final long startTime = System.currentTimeMillis(); - final ResponseData responseData = client.doGet(url); + final ResponseData responseData = client + .execute(RequestDataBuilder.newRequestData().get() + .url(url).build()); responseData.setExecutionTime(System.currentTimeMillis() - startTime); responseData.setSessionId((String) dataMap diff --git a/src/main/java/jp/sf/fess/helper/CrawlingConfigHelper.java b/src/main/java/jp/sf/fess/helper/CrawlingConfigHelper.java index e31be865a0505c8be71659caea1b700271af79c7..7baf05cb8952762ddb83bc82ed594c568552e0ba 100644 --- a/src/main/java/jp/sf/fess/helper/CrawlingConfigHelper.java +++ b/src/main/java/jp/sf/fess/helper/CrawlingConfigHelper.java @@ -42,6 +42,7 @@ import jp.sf.fess.util.ComponentUtil; import org.apache.commons.io.IOUtils; import org.seasar.framework.container.SingletonS2Container; import org.seasar.framework.util.Base64Util; +import org.seasar.robot.builder.RequestDataBuilder; import org.seasar.robot.client.S2RobotClient; import org.seasar.robot.client.S2RobotClientFactory; import org.seasar.robot.entity.ResponseData; @@ -179,7 +180,8 @@ public class CrawlingConfigHelper implements Serializable { throw new FessSystemException("No S2RobotClient: " + configIdObj + ", url: " + url); } - final ResponseData responseData = client.doGet(url); + final ResponseData responseData = client.execute(RequestDataBuilder + .newRequestData().get().url(url).build()); final HttpServletResponse response = ResponseUtil.getResponse(); writeFileName(response, responseData); writeContentType(response, responseData); diff --git a/src/main/java/jp/sf/fess/robot/FessS2RobotThread.java b/src/main/java/jp/sf/fess/robot/FessS2RobotThread.java index 449d6708d7370249460a066344fea0a84bc5503f..724e7dd05236801e30f9959ae219e0947bfce400 100644 --- a/src/main/java/jp/sf/fess/robot/FessS2RobotThread.java +++ b/src/main/java/jp/sf/fess/robot/FessS2RobotThread.java @@ -47,8 +47,10 @@ import org.codelibs.solr.lib.SolrGroup; import org.codelibs.solr.lib.SolrGroupManager; import org.codelibs.solr.lib.policy.QueryType; import org.seasar.robot.S2RobotThread; +import org.seasar.robot.builder.RequestDataBuilder; import org.seasar.robot.client.S2RobotClient; import org.seasar.robot.client.smb.SmbClient; +import org.seasar.robot.entity.RequestData; import org.seasar.robot.entity.ResponseData; import org.seasar.robot.entity.UrlQueue; import org.seasar.robot.log.LogType; @@ -88,7 +90,9 @@ public class FessS2RobotThread extends S2RobotThread { ResponseData responseData = null; try { // head method - responseData = client.doHead(urlQueue.getUrl()); + responseData = client + .execute(RequestDataBuilder.newRequestData().head() + .url(urlQueue.getUrl()).build()); if (responseData == null) { return true; } @@ -217,7 +221,7 @@ public class FessS2RobotThread extends S2RobotThread { } protected void storeChildUrlsToQueue(final UrlQueue urlQueue, - final Set<String> childUrlSet) { + final Set<RequestData> childUrlSet) { if (childUrlSet != null) { synchronized (robotContext.getAccessCountLock()) { // add an url @@ -229,7 +233,7 @@ public class FessS2RobotThread extends S2RobotThread { } @SuppressWarnings("unchecked") - protected Set<String> getAnchorSet(final Object obj) { + protected Set<RequestData> getAnchorSet(final Object obj) { List<String> anchorList; if (obj instanceof String) { anchorList = new ArrayList<String>(); @@ -244,9 +248,10 @@ public class FessS2RobotThread extends S2RobotThread { return null; } - final Set<String> childUrlSet = new LinkedHashSet<String>(); + final Set<RequestData> childUrlSet = new LinkedHashSet<>(); for (final String anchor : anchorList) { - childUrlSet.add(anchor); + childUrlSet.add(RequestDataBuilder.newRequestData().get() + .url(anchor).build()); } return childUrlSet; } @@ -294,7 +299,7 @@ public class FessS2RobotThread extends S2RobotThread { return null; } - protected Set<String> getChildUrlSet(final String id) { + protected Set<RequestData> getChildUrlSet(final String id) { final SolrGroupManager solrGroupManager = ComponentUtil .getSolrGroupManager(); final SolrGroup solrGroup = solrGroupManager @@ -313,11 +318,12 @@ public class FessS2RobotThread extends S2RobotThread { if (logger.isDebugEnabled()) { logger.debug("Found solr documents: " + docList); } - final Set<String> urlSet = new HashSet<String>(docList.size()); + final Set<RequestData> urlSet = new HashSet<>(docList.size()); for (final SolrDocument doc : docList) { final Object obj = doc.get("url"); if (obj != null) { - urlSet.add(obj.toString()); + urlSet.add(RequestDataBuilder.newRequestData().get() + .url(obj.toString()).build()); } } return urlSet; diff --git a/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java b/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java index 8fcccb15d4a9211117a959c0502b7d3a05908876..59d3a94ce965e8cc0ea91b5403bc99ea7a5b1d17 100644 --- a/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java +++ b/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java @@ -54,8 +54,10 @@ import org.seasar.framework.util.InputStreamUtil; import org.seasar.framework.util.SerializeUtil; import org.seasar.robot.RobotCrawlAccessException; import org.seasar.robot.RobotSystemException; +import org.seasar.robot.builder.RequestDataBuilder; import org.seasar.robot.client.fs.ChildUrlsException; import org.seasar.robot.entity.AccessResultData; +import org.seasar.robot.entity.RequestData; import org.seasar.robot.entity.ResponseData; import org.seasar.robot.entity.ResultData; import org.seasar.robot.entity.UrlQueue; @@ -66,7 +68,6 @@ import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; -import org.w3c.dom.traversal.NodeIterator; import org.xml.sax.InputSource; public class FessXpathTransformer extends AbstractFessXpathTransformer { @@ -201,8 +202,9 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer { final String canonicalUrl = getCanonicalUrl(responseData, document); if (canonicalUrl != null && !canonicalUrl.equals(responseData.getUrl())) { - final Set<String> childUrlSet = new HashSet<String>(); - childUrlSet.add(canonicalUrl); + final Set<RequestData> childUrlSet = new HashSet<>(); + childUrlSet.add(RequestDataBuilder.newRequestData().get() + .url(canonicalUrl).build()); throw new ChildUrlsException(childUrlSet); } } @@ -498,18 +500,21 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer { } } - protected List<String> getAnchorList(final Document document, + protected List<RequestData> getAnchorList(final Document document, final ResponseData responseData) { - List<String> anchorList = new ArrayList<String>(); + List<RequestData> anchorList = new ArrayList<>(); final String baseHref = getBaseHref(document); try { final URL url = new URL(baseHref != null ? baseHref : responseData.getUrl()); for (final Map.Entry<String, String> entry : childUrlRuleMap .entrySet()) { - anchorList.addAll(getUrlFromTagAttribute(url, document, + for (String u : getUrlFromTagAttribute(url, document, entry.getKey(), entry.getValue(), - responseData.getCharSet())); + responseData.getCharSet())) { + anchorList.add(RequestDataBuilder.newRequestData().get() + .url(u).build()); + } } anchorList = convertChildUrlList(anchorList); } catch (final Exception e) { @@ -521,20 +526,19 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer { } @Override - protected List<String> convertChildUrlList(final List<String> urlList) { - - final List<String> newUrlList = new ArrayList<String>(); + protected List<RequestData> convertChildUrlList( + final List<RequestData> urlList) { if (urlList != null) { - for (String url : urlList) { + for (RequestData requestData : urlList) { + String url = requestData.getUrl(); for (final Map.Entry<String, String> entry : convertUrlMap .entrySet()) { url = url.replaceAll(entry.getKey(), entry.getValue()); } - - newUrlList.add(replaceOverlappingHost(url)); + requestData.setUrl(replaceOverlappingHost(url)); } } - return newUrlList; + return urlList; } public void addPrunedTag(final String tagName) { diff --git a/src/main/webapp/WEB-INF/db/fess.h2.db b/src/main/webapp/WEB-INF/db/fess.h2.db deleted file mode 100644 index 40d8d2327f5d19d2cd84ad12a258162ab201190c..0000000000000000000000000000000000000000 Binary files a/src/main/webapp/WEB-INF/db/fess.h2.db and /dev/null differ diff --git a/src/main/webapp/WEB-INF/db/robot.h2.db b/src/main/webapp/WEB-INF/db/robot.h2.db deleted file mode 100644 index 2241baa8855b4031c658d86b7949ad30b7ae903f..0000000000000000000000000000000000000000 Binary files a/src/main/webapp/WEB-INF/db/robot.h2.db and /dev/null differ diff --git a/src/test/java/jp/sf/fess/transformer/FessXpathTransformerTest.java b/src/test/java/jp/sf/fess/transformer/FessXpathTransformerTest.java index 9e073eebe8fd6ee765bc62d9d1cc415e813fd2f9..1a66b843ada4945053cba0fed74e55e64895308f 100644 --- a/src/test/java/jp/sf/fess/transformer/FessXpathTransformerTest.java +++ b/src/test/java/jp/sf/fess/transformer/FessXpathTransformerTest.java @@ -33,7 +33,9 @@ import javax.xml.transform.stream.StreamResult; import org.cyberneko.html.parsers.DOMParser; import org.seasar.extension.unit.S2TestCase; import org.seasar.framework.container.ComponentNotFoundRuntimeException; +import org.seasar.robot.builder.RequestDataBuilder; import org.seasar.robot.client.fs.ChildUrlsException; +import org.seasar.robot.entity.RequestData; import org.seasar.robot.entity.ResponseData; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -178,32 +180,37 @@ public class FessXpathTransformerTest extends S2TestCase { } public void test_convertChildUrlList() { - List<String> urlList = new ArrayList<String>(); + List<RequestData> urlList = new ArrayList<>(); urlList = fessXpathTransformer.convertChildUrlList(urlList); assertEquals(0, urlList.size()); urlList.clear(); - urlList.add("http://www.example.com"); + urlList.add(RequestDataBuilder.newRequestData().get() + .url("http://www.example.com").build()); urlList = fessXpathTransformer.convertChildUrlList(urlList); assertEquals(1, urlList.size()); - assertEquals("http://www.example.com", urlList.get(0)); + assertEquals("http://www.example.com", urlList.get(0).getUrl()); urlList.clear(); - urlList.add("http://www.example.com"); - urlList.add("http://www.test.com"); + urlList.add(RequestDataBuilder.newRequestData().get() + .url("http://www.example.com").build()); + urlList.add(RequestDataBuilder.newRequestData().get() + .url("http://www.test.com").build()); urlList = fessXpathTransformer.convertChildUrlList(urlList); assertEquals(2, urlList.size()); - assertEquals("http://www.example.com", urlList.get(0)); - assertEquals("http://www.test.com", urlList.get(1)); + assertEquals("http://www.example.com", urlList.get(0).getUrl()); + assertEquals("http://www.test.com", urlList.get(1).getUrl()); urlList.clear(); - urlList.add("feed://www.example.com"); - urlList.add("http://www.test.com"); + urlList.add(RequestDataBuilder.newRequestData().get() + .url("feed://www.example.com").build()); + urlList.add(RequestDataBuilder.newRequestData().get() + .url("http://www.test.com").build()); urlList = fessXpathTransformer.convertChildUrlList(urlList); assertEquals(2, urlList.size()); - assertEquals("http://www.example.com", urlList.get(0)); - assertEquals("http://www.test.com", urlList.get(1)); + assertEquals("http://www.example.com", urlList.get(0).getUrl()); + assertEquals("http://www.test.com", urlList.get(1).getUrl()); } @@ -277,10 +284,10 @@ public class FessXpathTransformerTest extends S2TestCase { transformer.putAdditionalData(dataMap, responseData, document); fail(); } catch (final ChildUrlsException e) { - final Set<String> childUrlList = e.getChildUrlList(); + final Set<RequestData> childUrlList = e.getChildUrlList(); assertEquals(1, childUrlList.size()); assertEquals("http://example.com/hoge", childUrlList.iterator() - .next()); + .next().getUrl()); } data = "<html><link rel=\"canonical\" href=\"http://example.com/hoge\"><body>aaa</body></html>"; @@ -289,10 +296,10 @@ public class FessXpathTransformerTest extends S2TestCase { transformer.putAdditionalData(dataMap, responseData, document); fail(); } catch (final ChildUrlsException e) { - final Set<String> childUrlList = e.getChildUrlList(); + final Set<RequestData> childUrlList = e.getChildUrlList(); assertEquals(1, childUrlList.size()); assertEquals("http://example.com/hoge", childUrlList.iterator() - .next()); + .next().getUrl()); } }