41 lines
1.4 KiB
Java
41 lines
1.4 KiB
Java
package eu.dnetlib.bioschemas.api.scraper;
|
|
|
|
import eu.dnetlib.bioschemas.api.ServiceScrapeDriver;
|
|
import org.springframework.stereotype.Component;
|
|
|
|
import javax.servlet.http.HttpServletRequest;
|
|
|
|
@Component
|
|
public class ScrapingExecutor {
|
|
|
|
private final ScrapingExecution lastScrapingExecution = new ScrapingExecution();
|
|
|
|
public ScrapingExecution getLastScrapingExecution() {
|
|
return lastScrapingExecution;
|
|
}
|
|
|
|
public ScrapingExecution startScraping(final String datasourceKey, final String sitemapUrl, final String outputDataPattern, final String remoteAddr) {
|
|
synchronized (lastScrapingExecution) {
|
|
if (lastScrapingExecution.getStatus() != ScrapingStatus.RUNNING) {
|
|
lastScrapingExecution.startNew("Scraping for " + datasourceKey + " " + sitemapUrl + " - request from " + remoteAddr);
|
|
new Thread(() -> {
|
|
try {
|
|
String sitemapUrlKey = "loc";
|
|
String outputFilename = datasourceKey.concat(outputDataPattern);
|
|
ServiceScrapeDriver service = new ServiceScrapeDriver(sitemapUrl, sitemapUrlKey, null, outputFilename);
|
|
service.start();
|
|
lastScrapingExecution.complete();
|
|
} catch (final Throwable e) {
|
|
lastScrapingExecution.fail(e);
|
|
}
|
|
}).start();
|
|
} else {
|
|
final long now = System.currentTimeMillis();
|
|
return new ScrapingExecution(null, now, now, ScrapingStatus.NOT_LAUNCHED, "An other scraping is running");
|
|
}
|
|
|
|
}
|
|
return lastScrapingExecution;
|
|
}
|
|
}
|