oai + refatoring
This commit is contained in:
parent
05e0723d19
commit
25cdca97e1
18
pom.xml
18
pom.xml
|
@ -29,9 +29,14 @@
|
|||
<dependency>
|
||||
<groupId>com.h2database</groupId>
|
||||
<artifactId>h2</artifactId>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>2.12.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
|
@ -49,6 +54,17 @@
|
|||
<version>2.1.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.dom4j</groupId>
|
||||
<artifactId>dom4j</artifactId>
|
||||
<version>2.1.4</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents.client5</groupId>
|
||||
<artifactId>httpclient5</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
package eu.dnetlib.apps.oai2ftp;
|
||||
package eu.dnetlib.apps.oai2ftp.controller;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -14,7 +16,7 @@ import org.springframework.web.bind.annotation.ResponseBody;
|
|||
import org.springframework.web.bind.annotation.ResponseStatus;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionStatus;
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionInfo;
|
||||
import eu.dnetlib.apps.oai2ftp.service.Oai2FtpService;
|
||||
|
||||
@RestController
|
||||
|
@ -27,14 +29,16 @@ public class Oai2FtpController {
|
|||
private Oai2FtpService service;
|
||||
|
||||
@GetMapping("/collect")
|
||||
public CollectionStatus startCollection(@RequestParam final String baseUrl,
|
||||
@RequestParam(required = false, defaultValue = "oai_dc") final String format,
|
||||
@RequestParam(required = false) final String setSpec) {
|
||||
return service.startCollection(baseUrl, format, setSpec);
|
||||
public CollectionInfo startCollection(@RequestParam final String oaiBaseUrl,
|
||||
@RequestParam(required = false, defaultValue = "oai_dc") final String oaiFormat,
|
||||
@RequestParam(required = false) final String oaiSet,
|
||||
@RequestParam(required = false) final LocalDateTime oaiFrom,
|
||||
@RequestParam(required = false) final LocalDateTime oaiUntil) {
|
||||
return service.startCollection(oaiBaseUrl, oaiFormat, oaiSet, oaiFrom, oaiUntil);
|
||||
}
|
||||
|
||||
@GetMapping("/status/{id}")
|
||||
public CollectionStatus getExecutionStatus(@PathVariable final String id) {
|
||||
public CollectionInfo getExecutionStatus(@PathVariable final String id) {
|
||||
return service.getStatus(id);
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package eu.dnetlib.apps.oai2ftp;
|
||||
package eu.dnetlib.apps.oai2ftp.controller;
|
||||
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
|
@ -8,9 +8,9 @@ public class CollectionCall implements Serializable {
|
|||
private static final long serialVersionUID = 4915954425467830605L;
|
||||
|
||||
private String url;
|
||||
private ExecutionStatus status;
|
||||
private ExecutionStatus status = ExecutionStatus.READY;
|
||||
private int responseCode;
|
||||
private long savedRecords;
|
||||
private long numberOfRecords = 0;
|
||||
|
||||
public String getUrl() {
|
||||
return url;
|
||||
|
@ -36,12 +36,12 @@ public class CollectionCall implements Serializable {
|
|||
this.responseCode = responseCode;
|
||||
}
|
||||
|
||||
public long getSavedRecords() {
|
||||
return savedRecords;
|
||||
public long getNumberOfRecords() {
|
||||
return numberOfRecords;
|
||||
}
|
||||
|
||||
public void setSavedRecords(final long savedRecords) {
|
||||
this.savedRecords = savedRecords;
|
||||
public void setNumberOfRecords(final long numberOfRecords) {
|
||||
this.numberOfRecords = numberOfRecords;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -57,4 +57,5 @@ public class CollectionCall implements Serializable {
|
|||
final CollectionCall other = (CollectionCall) obj;
|
||||
return Objects.equals(url, other.url);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,135 @@
|
|||
package eu.dnetlib.apps.oai2ftp.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class CollectionInfo implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = -8467778040892221645L;
|
||||
|
||||
private String id;
|
||||
private String oaiBaseUrl;
|
||||
private String oaiFormat;
|
||||
private String oaiSet;
|
||||
private LocalDateTime oaiFrom;
|
||||
private LocalDateTime oaiUntil;
|
||||
private String ftpServer;
|
||||
private String ftpDir;
|
||||
private LocalDateTime start;
|
||||
private LocalDateTime end;
|
||||
private ExecutionStatus executionStatus;
|
||||
private long total;
|
||||
private final List<CollectionCall> calls = new ArrayList<>();
|
||||
private String message;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getOaiBaseUrl() {
|
||||
return oaiBaseUrl;
|
||||
}
|
||||
|
||||
public void setOaiBaseUrl(final String oaiBaseUrl) {
|
||||
this.oaiBaseUrl = oaiBaseUrl;
|
||||
}
|
||||
|
||||
public String getOaiFormat() {
|
||||
return oaiFormat;
|
||||
}
|
||||
|
||||
public void setOaiFormat(final String oaiFormat) {
|
||||
this.oaiFormat = oaiFormat;
|
||||
}
|
||||
|
||||
public String getOaiSet() {
|
||||
return oaiSet;
|
||||
}
|
||||
|
||||
public void setOaiSet(final String oaiSet) {
|
||||
this.oaiSet = oaiSet;
|
||||
}
|
||||
|
||||
public LocalDateTime getOaiFrom() {
|
||||
return oaiFrom;
|
||||
}
|
||||
|
||||
public void setOaiFrom(final LocalDateTime oaiFrom) {
|
||||
this.oaiFrom = oaiFrom;
|
||||
}
|
||||
|
||||
public LocalDateTime getOaiUntil() {
|
||||
return oaiUntil;
|
||||
}
|
||||
|
||||
public void setOaiUntil(final LocalDateTime oaiUntil) {
|
||||
this.oaiUntil = oaiUntil;
|
||||
}
|
||||
|
||||
public String getFtpServer() {
|
||||
return ftpServer;
|
||||
}
|
||||
|
||||
public void setFtpServer(final String ftpServer) {
|
||||
this.ftpServer = ftpServer;
|
||||
}
|
||||
|
||||
public String getFtpDir() {
|
||||
return ftpDir;
|
||||
}
|
||||
|
||||
public void setFtpDir(final String ftpDir) {
|
||||
this.ftpDir = ftpDir;
|
||||
}
|
||||
|
||||
public LocalDateTime getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public void setStart(final LocalDateTime start) {
|
||||
this.start = start;
|
||||
}
|
||||
|
||||
public LocalDateTime getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public void setEnd(final LocalDateTime end) {
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
public ExecutionStatus getExecutionStatus() {
|
||||
return executionStatus;
|
||||
}
|
||||
|
||||
public void setExecutionStatus(final ExecutionStatus executionStatus) {
|
||||
this.executionStatus = executionStatus;
|
||||
}
|
||||
|
||||
public long getTotal() {
|
||||
return total;
|
||||
}
|
||||
|
||||
public void setTotal(final long total) {
|
||||
this.total = total;
|
||||
}
|
||||
|
||||
public String getMessage() {
|
||||
return message;
|
||||
}
|
||||
|
||||
public void setMessage(final String message) {
|
||||
this.message = message;
|
||||
}
|
||||
|
||||
public List<CollectionCall> getCalls() {
|
||||
return calls;
|
||||
}
|
||||
|
||||
}
|
|
@ -18,14 +18,26 @@ public class CollectionLogEntry implements Serializable {
|
|||
@Column(name = "id")
|
||||
private String id;
|
||||
|
||||
@Column(name = "base_url")
|
||||
private String baseUrl;
|
||||
@Column(name = "oai_base_url")
|
||||
private String oaiBaseUrl;
|
||||
|
||||
@Column(name = "format")
|
||||
private String format;
|
||||
@Column(name = "oai_format")
|
||||
private String oaiFormat;
|
||||
|
||||
@Column(name = "set_spec")
|
||||
private String setSpec;
|
||||
@Column(name = "oai_set")
|
||||
private String oaiSet;
|
||||
|
||||
@Column(name = "oai_from")
|
||||
private LocalDateTime oaiFrom;
|
||||
|
||||
@Column(name = "oai_until")
|
||||
private LocalDateTime oaiUntil;
|
||||
|
||||
@Column(name = "ftp_server")
|
||||
private String ftpServer;
|
||||
|
||||
@Column(name = "ftp_dir")
|
||||
private String ftpDir;
|
||||
|
||||
@Column(name = "start_date")
|
||||
private LocalDateTime start;
|
||||
|
@ -53,28 +65,60 @@ public class CollectionLogEntry implements Serializable {
|
|||
this.id = id;
|
||||
}
|
||||
|
||||
public String getBaseUrl() {
|
||||
return baseUrl;
|
||||
public String getOaiBaseUrl() {
|
||||
return oaiBaseUrl;
|
||||
}
|
||||
|
||||
public void setBaseUrl(final String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
public void setOaiBaseUrl(final String oaiBaseUrl) {
|
||||
this.oaiBaseUrl = oaiBaseUrl;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
public String getOaiFormat() {
|
||||
return oaiFormat;
|
||||
}
|
||||
|
||||
public void setFormat(final String format) {
|
||||
this.format = format;
|
||||
public void setOaiFormat(final String oaiFormat) {
|
||||
this.oaiFormat = oaiFormat;
|
||||
}
|
||||
|
||||
public String getSetSpec() {
|
||||
return setSpec;
|
||||
public String getOaiSet() {
|
||||
return oaiSet;
|
||||
}
|
||||
|
||||
public void setSetSpec(final String setSpec) {
|
||||
this.setSpec = setSpec;
|
||||
public void setOaiSet(final String oaiSet) {
|
||||
this.oaiSet = oaiSet;
|
||||
}
|
||||
|
||||
public LocalDateTime getOaiFrom() {
|
||||
return oaiFrom;
|
||||
}
|
||||
|
||||
public void setOaiFrom(final LocalDateTime oaiFrom) {
|
||||
this.oaiFrom = oaiFrom;
|
||||
}
|
||||
|
||||
public LocalDateTime getOaiUntil() {
|
||||
return oaiUntil;
|
||||
}
|
||||
|
||||
public void setOaiUntil(final LocalDateTime oaiUntil) {
|
||||
this.oaiUntil = oaiUntil;
|
||||
}
|
||||
|
||||
public String getFtpServer() {
|
||||
return ftpServer;
|
||||
}
|
||||
|
||||
public void setFtpServer(final String ftpServer) {
|
||||
this.ftpServer = ftpServer;
|
||||
}
|
||||
|
||||
public String getFtpDir() {
|
||||
return ftpDir;
|
||||
}
|
||||
|
||||
public void setFtpDir(final String ftpDir) {
|
||||
this.ftpDir = ftpDir;
|
||||
}
|
||||
|
||||
public LocalDateTime getStart() {
|
||||
|
|
|
@ -1,98 +0,0 @@
|
|||
package eu.dnetlib.apps.oai2ftp.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.LinkedHashSet;
|
||||
|
||||
public class CollectionStatus implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = -8467778040892221645L;
|
||||
|
||||
private String id;
|
||||
private String baseUrl;
|
||||
private String format;
|
||||
private String setSpec;
|
||||
private LocalDateTime start;
|
||||
private LocalDateTime end;
|
||||
private ExecutionStatus executionStatus;
|
||||
private long total;
|
||||
private final LinkedHashSet<CollectionCall> calls = new LinkedHashSet<>();
|
||||
private String message;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getBaseUrl() {
|
||||
return baseUrl;
|
||||
}
|
||||
|
||||
public void setBaseUrl(final String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public void setFormat(final String format) {
|
||||
this.format = format;
|
||||
}
|
||||
|
||||
public String getSetSpec() {
|
||||
return setSpec;
|
||||
}
|
||||
|
||||
public void setSetSpec(final String setSpec) {
|
||||
this.setSpec = setSpec;
|
||||
}
|
||||
|
||||
public LocalDateTime getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public void setStart(final LocalDateTime start) {
|
||||
this.start = start;
|
||||
}
|
||||
|
||||
public LocalDateTime getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public void setEnd(final LocalDateTime end) {
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
public ExecutionStatus getExecutionStatus() {
|
||||
return executionStatus;
|
||||
}
|
||||
|
||||
public void setExecutionStatus(final ExecutionStatus executionStatus) {
|
||||
this.executionStatus = executionStatus;
|
||||
}
|
||||
|
||||
public long getTotal() {
|
||||
return total;
|
||||
}
|
||||
|
||||
public void setTotal(final long total) {
|
||||
this.total = total;
|
||||
}
|
||||
|
||||
public LinkedHashSet<CollectionCall> getCalls() {
|
||||
return calls;
|
||||
}
|
||||
|
||||
public String getMessage() {
|
||||
return message;
|
||||
}
|
||||
|
||||
public void setMessage(final String message) {
|
||||
this.message = message;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
package eu.dnetlib.apps.oai2ftp.service;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionStatus;
|
||||
import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus;
|
||||
|
||||
public class CollectionJob {
|
||||
|
||||
private final CollectionStatus status;
|
||||
private final BiConsumer<String, String> saveRecord;
|
||||
private final Consumer<CollectionStatus> onEnd;
|
||||
|
||||
public CollectionJob(final String id, final String baseUrl, final String format, final String setSpec, final BiConsumer<String, String> saveRecord,
|
||||
final Consumer<CollectionStatus> onEnd) {
|
||||
|
||||
this.status = new CollectionStatus();
|
||||
status.setId(id);
|
||||
|
||||
status.setBaseUrl(baseUrl);
|
||||
status.setFormat(format);
|
||||
status.setSetSpec(setSpec);
|
||||
|
||||
status.setStart(LocalDateTime.now());
|
||||
status.setEnd(null);
|
||||
|
||||
status.setExecutionStatus(ExecutionStatus.READY);
|
||||
status.setTotal(0);
|
||||
|
||||
status.setMessage("");
|
||||
|
||||
this.saveRecord = saveRecord;
|
||||
this.onEnd = onEnd;
|
||||
}
|
||||
|
||||
public void oaiCollect() {
|
||||
// TODO
|
||||
}
|
||||
|
||||
public CollectionStatus getStatus() {
|
||||
return status;
|
||||
}
|
||||
|
||||
}
|
|
@ -3,9 +3,9 @@ package eu.dnetlib.apps.oai2ftp.service;
|
|||
import java.time.Duration;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
@ -14,17 +14,22 @@ import java.util.stream.Collectors;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentHelper;
|
||||
import org.dom4j.Node;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionStatus;
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionCall;
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionInfo;
|
||||
import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus;
|
||||
import eu.dnetlib.apps.oai2ftp.repository.CollectionLogEntryRepository;
|
||||
import eu.dnetlib.apps.oai2ftp.utils.ConvertUtils;
|
||||
import eu.dnetlib.apps.oai2ftp.utils.FtpClientFactory;
|
||||
import eu.dnetlib.apps.oai2ftp.utils.FtpClientWrapper;
|
||||
import eu.dnetlib.apps.oai2ftp.utils.HttpFetcher;
|
||||
import eu.dnetlib.apps.oai2ftp.utils.SimpleUtils;
|
||||
|
||||
@Service
|
||||
public class Oai2FtpService {
|
||||
|
@ -33,73 +38,125 @@ public class Oai2FtpService {
|
|||
|
||||
private final ExecutorService jobExecutor = Executors.newFixedThreadPool(100);
|
||||
|
||||
private final Map<String, CollectionJob> runningJobs = new LinkedHashMap<>();
|
||||
private final Map<String, CollectionInfo> infoMap = new LinkedHashMap<>();
|
||||
|
||||
@Autowired
|
||||
private FtpClientFactory ftpClientFactory;
|
||||
|
||||
@Value("${oai2ftp.conf.execution.expirationTime}")
|
||||
private long fullStatusExpirationTime; // in hours
|
||||
private long fullInfoExpirationTime; // in hours
|
||||
|
||||
@Autowired
|
||||
private CollectionLogEntryRepository collectionLogEntryRepository;
|
||||
|
||||
public CollectionStatus startCollection(final String baseUrl, final String format, final String setSpec) {
|
||||
final String jobId = generateNewJobId();
|
||||
public CollectionInfo startCollection(final String baseUrl,
|
||||
final String format,
|
||||
final String setSpec,
|
||||
final LocalDateTime from,
|
||||
final LocalDateTime until) {
|
||||
final String jobId = SimpleUtils.generateNewJobId();
|
||||
|
||||
final FtpClientWrapper ftp = ftpClientFactory.newClientForJob(jobId);
|
||||
|
||||
final CollectionJob job = new CollectionJob(jobId,
|
||||
baseUrl,
|
||||
format,
|
||||
setSpec,
|
||||
(id, xml) -> ftp.saveFile(ConvertUtils.oaiIdToFilename(id), xml),
|
||||
(status) -> {
|
||||
final CollectionInfo info = new CollectionInfo();
|
||||
info.setId(jobId);
|
||||
|
||||
info.setOaiBaseUrl(baseUrl);
|
||||
info.setOaiFormat(format);
|
||||
info.setOaiSet(setSpec);
|
||||
info.setOaiFrom(from);
|
||||
info.setOaiUntil(until);
|
||||
|
||||
info.setStart(LocalDateTime.now());
|
||||
info.setEnd(null);
|
||||
|
||||
info.setExecutionStatus(ExecutionStatus.READY);
|
||||
info.setTotal(0);
|
||||
info.setMessage("");
|
||||
|
||||
infoMap.put(jobId, info);
|
||||
|
||||
jobExecutor.execute(() -> {
|
||||
try {
|
||||
info.setExecutionStatus(ExecutionStatus.RUNNING);
|
||||
oaiCollect(baseUrl, format, setSpec, from, until, ftp, info);
|
||||
info.setExecutionStatus(ExecutionStatus.COMPLETED);
|
||||
} catch (final Throwable e) {
|
||||
info.setExecutionStatus(ExecutionStatus.FAILED);
|
||||
info.setMessage(e.getMessage());
|
||||
} finally {
|
||||
ftp.disconnect();
|
||||
collectionLogEntryRepository.save(ConvertUtils.statusToLog(status));
|
||||
});
|
||||
collectionLogEntryRepository.save(SimpleUtils.infoToLog(info));
|
||||
}
|
||||
});
|
||||
|
||||
runningJobs.put(jobId, job);
|
||||
|
||||
jobExecutor.execute(() -> job.oaiCollect());
|
||||
|
||||
return job.getStatus();
|
||||
};
|
||||
|
||||
private String generateNewJobId() {
|
||||
return "job-" + UUID.randomUUID();
|
||||
return info;
|
||||
}
|
||||
|
||||
public CollectionStatus getStatus(final String jobId) {
|
||||
final CollectionJob job = runningJobs.get(jobId);
|
||||
if (job != null) {
|
||||
return job.getStatus();
|
||||
public void oaiCollect(final String baseUrl,
|
||||
final String format,
|
||||
final String setSpec,
|
||||
final LocalDateTime from,
|
||||
final LocalDateTime until,
|
||||
final FtpClientWrapper ftp,
|
||||
final CollectionInfo info)
|
||||
throws Exception {
|
||||
|
||||
String url = SimpleUtils.oaiFirstUrl(baseUrl, format, setSpec, from, until);
|
||||
|
||||
while (StringUtils.isNotBlank(url)) {
|
||||
final CollectionCall call = new CollectionCall();
|
||||
call.setUrl(url);
|
||||
info.getCalls().add(call);
|
||||
|
||||
final String xml = HttpFetcher.download(call);
|
||||
final Document doc = DocumentHelper.parseText(xml);
|
||||
|
||||
final List<Node> records = doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']");
|
||||
call.setNumberOfRecords(records.size());
|
||||
|
||||
for (final Node n : records) {
|
||||
final String id = n.valueOf("/*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']");
|
||||
ftp.saveFile(SimpleUtils.oaiIdToFilename(id), n.asXML());
|
||||
info.setTotal(info.getTotal() + 1);
|
||||
}
|
||||
|
||||
final String rtoken = doc.valueOf("//*[local-name()='resumptionToken']").trim();
|
||||
|
||||
url = SimpleUtils.oaiNextUrl(baseUrl, rtoken);
|
||||
}
|
||||
}
|
||||
|
||||
public CollectionInfo getStatus(final String jobId) {
|
||||
final CollectionInfo info = infoMap.get(jobId);
|
||||
if (info != null) {
|
||||
return info;
|
||||
} else {
|
||||
return collectionLogEntryRepository.findById(jobId)
|
||||
.map(ConvertUtils::logToStatus)
|
||||
.map(SimpleUtils::logToInfo)
|
||||
.orElse(null);
|
||||
}
|
||||
}
|
||||
|
||||
@Scheduled(fixedRate = 30, timeUnit = TimeUnit.MINUTES)
|
||||
public void cronCleanJobs() throws Exception {
|
||||
final Set<String> toDelete = runningJobs.entrySet()
|
||||
final Set<String> toDelete = infoMap.entrySet()
|
||||
.stream()
|
||||
.filter(e -> {
|
||||
final ExecutionStatus status = e.getValue().getStatus().getExecutionStatus();
|
||||
final ExecutionStatus status = e.getValue().getExecutionStatus();
|
||||
return status == ExecutionStatus.COMPLETED || status == ExecutionStatus.FAILED;
|
||||
})
|
||||
.filter(e -> {
|
||||
final LocalDateTime end = e.getValue().getStatus().getEnd();
|
||||
final LocalDateTime end = e.getValue().getEnd();
|
||||
final long hours = Duration.between(end, LocalDateTime.now()).toHours();
|
||||
return Math.abs(hours) > fullStatusExpirationTime;
|
||||
return Math.abs(hours) > fullInfoExpirationTime;
|
||||
})
|
||||
.map(e -> e.getKey())
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
log.info("Cleaning expired jobs: " + StringUtils.join(toDelete, ", "));
|
||||
|
||||
toDelete.forEach(runningJobs::remove);
|
||||
toDelete.forEach(infoMap::remove);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
package eu.dnetlib.apps.oai2ftp.utils;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionLogEntry;
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionStatus;
|
||||
import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus;
|
||||
|
||||
public class ConvertUtils {
|
||||
|
||||
public static CollectionStatus logToStatus(final CollectionLogEntry log) {
|
||||
final CollectionStatus status = new CollectionStatus();
|
||||
status.setId(log.getId());
|
||||
status.setBaseUrl(log.getBaseUrl());
|
||||
status.setFormat(log.getFormat());
|
||||
status.setSetSpec(log.getSetSpec());
|
||||
status.setStart(log.getStart());
|
||||
status.setEnd(log.getEnd());
|
||||
status.setExecutionStatus(log.isSuccess() ? ExecutionStatus.COMPLETED : ExecutionStatus.FAILED);
|
||||
status.setTotal(log.getTotal());
|
||||
status.setMessage(log.getMessage());
|
||||
return status;
|
||||
}
|
||||
|
||||
public static CollectionLogEntry statusToLog(final CollectionStatus status) {
|
||||
final CollectionLogEntry log = new CollectionLogEntry();
|
||||
log.setId(status.getId());
|
||||
log.setBaseUrl(status.getBaseUrl());
|
||||
log.setFormat(status.getFormat());
|
||||
log.setSetSpec(status.getSetSpec());
|
||||
log.setStart(status.getStart());
|
||||
log.setEnd(status.getEnd());
|
||||
log.setSuccess(status.getExecutionStatus() == ExecutionStatus.COMPLETED);
|
||||
log.setTotal(status.getTotal());
|
||||
log.setNumberOfCalls(status.getCalls().size());
|
||||
log.setMessage(status.getMessage());
|
||||
return log;
|
||||
}
|
||||
|
||||
public static String oaiIdToFilename(final String id) {
|
||||
return DigestUtils.md5Hex(id) + ".xml";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package eu.dnetlib.apps.oai2ftp.utils;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hc.client5.http.classic.methods.HttpGet;
|
||||
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
|
||||
import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
|
||||
import org.apache.hc.core5.http.io.entity.EntityUtils;
|
||||
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionCall;
|
||||
import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus;
|
||||
|
||||
public class HttpFetcher {
|
||||
|
||||
public static String download(final CollectionCall call) throws IOException {
|
||||
|
||||
try (final CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
|
||||
call.setStatus(ExecutionStatus.RUNNING);
|
||||
return httpClient.execute(new HttpGet(call.getUrl()), response -> {
|
||||
final int code = response.getCode();
|
||||
call.setResponseCode(response.getCode());
|
||||
|
||||
if (code >= 200 && code < 300 && response.getEntity() != null) {
|
||||
call.setStatus(ExecutionStatus.COMPLETED);
|
||||
return EntityUtils.toString(response.getEntity());
|
||||
} else {
|
||||
call.setStatus(ExecutionStatus.FAILED);
|
||||
throw new IOException("Invalid http response");
|
||||
}
|
||||
});
|
||||
} catch (final Throwable e) {
|
||||
call.setStatus(ExecutionStatus.FAILED);
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
package eu.dnetlib.apps.oai2ftp.utils;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionInfo;
|
||||
import eu.dnetlib.apps.oai2ftp.model.CollectionLogEntry;
|
||||
import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus;
|
||||
|
||||
public class SimpleUtils {
|
||||
|
||||
private static final String UTF_8 = StandardCharsets.UTF_8.toString();
|
||||
|
||||
private static final DateTimeFormatter oaiDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
|
||||
|
||||
public static String generateNewJobId() {
|
||||
return "job-" + UUID.randomUUID();
|
||||
}
|
||||
|
||||
public static String oaiFirstUrl(final String baseUrl, final String format, final String setSpec, final LocalDateTime from, final LocalDateTime until) {
|
||||
try {
|
||||
String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(format, UTF_8);
|
||||
|
||||
if (setSpec != null && !setSpec.isEmpty()) {
|
||||
url += "&set=" + URLEncoder.encode(setSpec, UTF_8);
|
||||
}
|
||||
if (from != null) {
|
||||
url += "&from=" + URLEncoder.encode(from.format(oaiDateFormatter), UTF_8);
|
||||
}
|
||||
if (until != null) {
|
||||
url += "&until=" + URLEncoder.encode(until.format(oaiDateFormatter), UTF_8);
|
||||
}
|
||||
return url;
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static String oaiNextUrl(final String baseUrl, final String rtoken) {
|
||||
try {
|
||||
if (StringUtils.isNotBlank(rtoken)) {
|
||||
return baseUrl + "?verb=ListRecords&resumptionToken=" + URLEncoder.encode(rtoken, UTF_8);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static CollectionInfo logToInfo(final CollectionLogEntry log) {
|
||||
final CollectionInfo info = new CollectionInfo();
|
||||
info.setId(log.getId());
|
||||
info.setOaiBaseUrl(log.getOaiBaseUrl());
|
||||
info.setOaiFormat(log.getOaiFormat());
|
||||
info.setOaiSet(log.getOaiSet());
|
||||
info.setOaiFrom(log.getOaiFrom());
|
||||
info.setOaiUntil(log.getOaiUntil());
|
||||
info.setFtpServer(log.getFtpServer());
|
||||
info.setFtpDir(log.getFtpDir());
|
||||
info.setStart(log.getStart());
|
||||
info.setEnd(log.getEnd());
|
||||
info.setExecutionStatus(log.isSuccess() ? ExecutionStatus.COMPLETED : ExecutionStatus.FAILED);
|
||||
info.setTotal(log.getTotal());
|
||||
info.setMessage(log.getMessage());
|
||||
return info;
|
||||
}
|
||||
|
||||
public static CollectionLogEntry infoToLog(final CollectionInfo info) {
|
||||
final CollectionLogEntry log = new CollectionLogEntry();
|
||||
log.setId(info.getId());
|
||||
log.setOaiBaseUrl(info.getOaiBaseUrl());
|
||||
log.setOaiFormat(info.getOaiFormat());
|
||||
log.setOaiSet(info.getOaiSet());
|
||||
log.setOaiFrom(info.getOaiFrom());
|
||||
log.setOaiUntil(info.getOaiUntil());
|
||||
log.setFtpServer(info.getFtpServer());
|
||||
log.setFtpDir(info.getFtpDir());
|
||||
log.setStart(info.getStart());
|
||||
log.setEnd(info.getEnd());
|
||||
log.setSuccess(info.getExecutionStatus() == ExecutionStatus.COMPLETED);
|
||||
log.setTotal(info.getTotal());
|
||||
log.setNumberOfCalls(info.getCalls().size());
|
||||
log.setMessage(info.getMessage());
|
||||
return log;
|
||||
}
|
||||
|
||||
public static String oaiIdToFilename(final String id) {
|
||||
return DigestUtils.md5Hex(id) + ".xml";
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue