Skip to content
This repository was archived by the owner on Nov 14, 2019. It is now read-only.

Commit acbe544

Browse files
committed
use clearn crawler instance and modify error handling
1 parent 9ce99a0 commit acbe544

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

src/main/java/org/codelibs/riverweb/RiverWeb.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.codelibs.fess.crawler.client.http.RequestHeader;
3232
import org.codelibs.fess.crawler.client.http.impl.AuthenticationImpl;
3333
import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine;
34-
import org.codelibs.fess.crawler.exception.EsAccessException;
3534
import org.codelibs.riverweb.app.service.ScriptService;
3635
import org.codelibs.riverweb.entity.RiverConfig;
3736
import org.codelibs.riverweb.interval.WebRiverIntervalController;
@@ -176,6 +175,7 @@ private int execute() {
176175
if (configId instanceof String) {
177176
print("Config %s is started with Session %s.", configId, sessionId);
178177
try {
178+
crawler = SingletonLaContainer.getComponent(Crawler.class);
179179
crawl(configId.toString(), sessionId);
180180
} finally {
181181
print("Config %s is finished.", configId);
@@ -359,15 +359,23 @@ private int crawl(String configId, String sessionId) {
359359
final List<String> includeFilterList = (List<String>) crawlSettings.get("include_urls");
360360
if (includeFilterList != null) {
361361
for (final String regex : includeFilterList) {
362-
crawler.addIncludeFilter(regex);
362+
try {
363+
crawler.addIncludeFilter(regex);
364+
} catch (DocumentAlreadyExistsException e) {
365+
logger.warn(regex + " exists in " + sessionId);
366+
}
363367
}
364368
}
365369
// exclude regex
366370
@SuppressWarnings("unchecked")
367371
final List<String> excludeFilterList = (List<String>) crawlSettings.get("exclude_urls");
368372
if (excludeFilterList != null) {
369373
for (final String regex : excludeFilterList) {
370-
crawler.addExcludeFilter(regex);
374+
try {
375+
crawler.addExcludeFilter(regex);
376+
} catch (DocumentAlreadyExistsException e) {
377+
logger.warn(regex + " exists in " + sessionId);
378+
}
371379
}
372380
}
373381

0 commit comments

Comments
 (0)