diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..27e6117
Binary files /dev/null and b/.DS_Store differ
diff --git a/pom.xml b/pom.xml
index a5baa8a..19c9ace 100644
--- a/pom.xml
+++ b/pom.xml
@@ -29,9 +29,14 @@
com.h2database
h2
- runtime
+
+ commons-io
+ commons-io
+ 2.12.0
+
+
commons-codec
commons-codec
@@ -49,6 +54,17 @@
2.1.0
+
+ org.dom4j
+ dom4j
+ 2.1.4
+
+
+
+ org.apache.httpcomponents.client5
+ httpclient5
+
+
org.springframework.boot
spring-boot-starter-test
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/Oai2FtpController.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/controller/Oai2FtpController.java
similarity index 76%
rename from src/main/java/eu/dnetlib/apps/Oai2ftp/Oai2FtpController.java
rename to src/main/java/eu/dnetlib/apps/Oai2ftp/controller/Oai2FtpController.java
index a982189..7a8fd95 100644
--- a/src/main/java/eu/dnetlib/apps/Oai2ftp/Oai2FtpController.java
+++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/controller/Oai2FtpController.java
@@ -1,4 +1,6 @@
-package eu.dnetlib.apps.oai2ftp;
+package eu.dnetlib.apps.oai2ftp.controller;
+
+import java.time.LocalDateTime;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.logging.Log;
@@ -14,7 +16,7 @@ import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.ResponseStatus;
import org.springframework.web.bind.annotation.RestController;
-import eu.dnetlib.apps.oai2ftp.model.CollectionStatus;
+import eu.dnetlib.apps.oai2ftp.model.CollectionInfo;
import eu.dnetlib.apps.oai2ftp.service.Oai2FtpService;
@RestController
@@ -27,14 +29,16 @@ public class Oai2FtpController {
private Oai2FtpService service;
@GetMapping("/collect")
- public CollectionStatus startCollection(@RequestParam final String baseUrl,
- @RequestParam(required = false, defaultValue = "oai_dc") final String format,
- @RequestParam(required = false) final String setSpec) {
- return service.startCollection(baseUrl, format, setSpec);
+ public CollectionInfo startCollection(@RequestParam final String oaiBaseUrl,
+ @RequestParam(required = false, defaultValue = "oai_dc") final String oaiFormat,
+ @RequestParam(required = false) final String oaiSet,
+ @RequestParam(required = false) final LocalDateTime oaiFrom,
+ @RequestParam(required = false) final LocalDateTime oaiUntil) {
+ return service.startCollection(oaiBaseUrl, oaiFormat, oaiSet, oaiFrom, oaiUntil);
}
@GetMapping("/status/{id}")
- public CollectionStatus getExecutionStatus(@PathVariable final String id) {
+ public CollectionInfo getExecutionStatus(@PathVariable final String id) {
return service.getStatus(id);
}
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/SwaggerController.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/controller/SwaggerController.java
similarity index 87%
rename from src/main/java/eu/dnetlib/apps/Oai2ftp/SwaggerController.java
rename to src/main/java/eu/dnetlib/apps/Oai2ftp/controller/SwaggerController.java
index 56cc150..cd0668c 100644
--- a/src/main/java/eu/dnetlib/apps/Oai2ftp/SwaggerController.java
+++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/controller/SwaggerController.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.apps.oai2ftp;
+package eu.dnetlib.apps.oai2ftp.controller;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionCall.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionCall.java
index 37132d7..28fbb4d 100644
--- a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionCall.java
+++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionCall.java
@@ -8,9 +8,9 @@ public class CollectionCall implements Serializable {
private static final long serialVersionUID = 4915954425467830605L;
private String url;
- private ExecutionStatus status;
+ private ExecutionStatus status = ExecutionStatus.READY;
private int responseCode;
- private long savedRecords;
+ private long numberOfRecords = 0;
public String getUrl() {
return url;
@@ -36,12 +36,12 @@ public class CollectionCall implements Serializable {
this.responseCode = responseCode;
}
- public long getSavedRecords() {
- return savedRecords;
+ public long getNumberOfRecords() {
+ return numberOfRecords;
}
- public void setSavedRecords(final long savedRecords) {
- this.savedRecords = savedRecords;
+ public void setNumberOfRecords(final long numberOfRecords) {
+ this.numberOfRecords = numberOfRecords;
}
@Override
@@ -57,4 +57,5 @@ public class CollectionCall implements Serializable {
final CollectionCall other = (CollectionCall) obj;
return Objects.equals(url, other.url);
}
+
}
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionInfo.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionInfo.java
new file mode 100644
index 0000000..8e5ac29
--- /dev/null
+++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionInfo.java
@@ -0,0 +1,135 @@
+package eu.dnetlib.apps.oai2ftp.model;
+
+import java.io.Serializable;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.List;
+
+public class CollectionInfo implements Serializable {
+
+ private static final long serialVersionUID = -8467778040892221645L;
+
+ private String id;
+ private String oaiBaseUrl;
+ private String oaiFormat;
+ private String oaiSet;
+ private LocalDateTime oaiFrom;
+ private LocalDateTime oaiUntil;
+ private String ftpServer;
+ private String ftpDir;
+ private LocalDateTime start;
+ private LocalDateTime end;
+ private ExecutionStatus executionStatus;
+ private long total;
+ private final List calls = new ArrayList<>();
+ private String message;
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(final String id) {
+ this.id = id;
+ }
+
+ public String getOaiBaseUrl() {
+ return oaiBaseUrl;
+ }
+
+ public void setOaiBaseUrl(final String oaiBaseUrl) {
+ this.oaiBaseUrl = oaiBaseUrl;
+ }
+
+ public String getOaiFormat() {
+ return oaiFormat;
+ }
+
+ public void setOaiFormat(final String oaiFormat) {
+ this.oaiFormat = oaiFormat;
+ }
+
+ public String getOaiSet() {
+ return oaiSet;
+ }
+
+ public void setOaiSet(final String oaiSet) {
+ this.oaiSet = oaiSet;
+ }
+
+ public LocalDateTime getOaiFrom() {
+ return oaiFrom;
+ }
+
+ public void setOaiFrom(final LocalDateTime oaiFrom) {
+ this.oaiFrom = oaiFrom;
+ }
+
+ public LocalDateTime getOaiUntil() {
+ return oaiUntil;
+ }
+
+ public void setOaiUntil(final LocalDateTime oaiUntil) {
+ this.oaiUntil = oaiUntil;
+ }
+
+ public String getFtpServer() {
+ return ftpServer;
+ }
+
+ public void setFtpServer(final String ftpServer) {
+ this.ftpServer = ftpServer;
+ }
+
+ public String getFtpDir() {
+ return ftpDir;
+ }
+
+ public void setFtpDir(final String ftpDir) {
+ this.ftpDir = ftpDir;
+ }
+
+ public LocalDateTime getStart() {
+ return start;
+ }
+
+ public void setStart(final LocalDateTime start) {
+ this.start = start;
+ }
+
+ public LocalDateTime getEnd() {
+ return end;
+ }
+
+ public void setEnd(final LocalDateTime end) {
+ this.end = end;
+ }
+
+ public ExecutionStatus getExecutionStatus() {
+ return executionStatus;
+ }
+
+ public void setExecutionStatus(final ExecutionStatus executionStatus) {
+ this.executionStatus = executionStatus;
+ }
+
+ public long getTotal() {
+ return total;
+ }
+
+ public void setTotal(final long total) {
+ this.total = total;
+ }
+
+ public String getMessage() {
+ return message;
+ }
+
+ public void setMessage(final String message) {
+ this.message = message;
+ }
+
+ public List getCalls() {
+ return calls;
+ }
+
+}
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionLogEntry.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionLogEntry.java
index 2281f11..9393851 100644
--- a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionLogEntry.java
+++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionLogEntry.java
@@ -18,14 +18,26 @@ public class CollectionLogEntry implements Serializable {
@Column(name = "id")
private String id;
- @Column(name = "base_url")
- private String baseUrl;
+ @Column(name = "oai_base_url")
+ private String oaiBaseUrl;
- @Column(name = "format")
- private String format;
+ @Column(name = "oai_format")
+ private String oaiFormat;
- @Column(name = "set_spec")
- private String setSpec;
+ @Column(name = "oai_set")
+ private String oaiSet;
+
+ @Column(name = "oai_from")
+ private LocalDateTime oaiFrom;
+
+ @Column(name = "oai_until")
+ private LocalDateTime oaiUntil;
+
+ @Column(name = "ftp_server")
+ private String ftpServer;
+
+ @Column(name = "ftp_dir")
+ private String ftpDir;
@Column(name = "start_date")
private LocalDateTime start;
@@ -53,28 +65,60 @@ public class CollectionLogEntry implements Serializable {
this.id = id;
}
- public String getBaseUrl() {
- return baseUrl;
+ public String getOaiBaseUrl() {
+ return oaiBaseUrl;
}
- public void setBaseUrl(final String baseUrl) {
- this.baseUrl = baseUrl;
+ public void setOaiBaseUrl(final String oaiBaseUrl) {
+ this.oaiBaseUrl = oaiBaseUrl;
}
- public String getFormat() {
- return format;
+ public String getOaiFormat() {
+ return oaiFormat;
}
- public void setFormat(final String format) {
- this.format = format;
+ public void setOaiFormat(final String oaiFormat) {
+ this.oaiFormat = oaiFormat;
}
- public String getSetSpec() {
- return setSpec;
+ public String getOaiSet() {
+ return oaiSet;
}
- public void setSetSpec(final String setSpec) {
- this.setSpec = setSpec;
+ public void setOaiSet(final String oaiSet) {
+ this.oaiSet = oaiSet;
+ }
+
+ public LocalDateTime getOaiFrom() {
+ return oaiFrom;
+ }
+
+ public void setOaiFrom(final LocalDateTime oaiFrom) {
+ this.oaiFrom = oaiFrom;
+ }
+
+ public LocalDateTime getOaiUntil() {
+ return oaiUntil;
+ }
+
+ public void setOaiUntil(final LocalDateTime oaiUntil) {
+ this.oaiUntil = oaiUntil;
+ }
+
+ public String getFtpServer() {
+ return ftpServer;
+ }
+
+ public void setFtpServer(final String ftpServer) {
+ this.ftpServer = ftpServer;
+ }
+
+ public String getFtpDir() {
+ return ftpDir;
+ }
+
+ public void setFtpDir(final String ftpDir) {
+ this.ftpDir = ftpDir;
}
public LocalDateTime getStart() {
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionStatus.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionStatus.java
deleted file mode 100644
index 95eb0fd..0000000
--- a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionStatus.java
+++ /dev/null
@@ -1,98 +0,0 @@
-package eu.dnetlib.apps.oai2ftp.model;
-
-import java.io.Serializable;
-import java.time.LocalDateTime;
-import java.util.LinkedHashSet;
-
-public class CollectionStatus implements Serializable {
-
- private static final long serialVersionUID = -8467778040892221645L;
-
- private String id;
- private String baseUrl;
- private String format;
- private String setSpec;
- private LocalDateTime start;
- private LocalDateTime end;
- private ExecutionStatus executionStatus;
- private long total;
- private final LinkedHashSet calls = new LinkedHashSet<>();
- private String message;
-
- public String getId() {
- return id;
- }
-
- public void setId(final String id) {
- this.id = id;
- }
-
- public String getBaseUrl() {
- return baseUrl;
- }
-
- public void setBaseUrl(final String baseUrl) {
- this.baseUrl = baseUrl;
- }
-
- public String getFormat() {
- return format;
- }
-
- public void setFormat(final String format) {
- this.format = format;
- }
-
- public String getSetSpec() {
- return setSpec;
- }
-
- public void setSetSpec(final String setSpec) {
- this.setSpec = setSpec;
- }
-
- public LocalDateTime getStart() {
- return start;
- }
-
- public void setStart(final LocalDateTime start) {
- this.start = start;
- }
-
- public LocalDateTime getEnd() {
- return end;
- }
-
- public void setEnd(final LocalDateTime end) {
- this.end = end;
- }
-
- public ExecutionStatus getExecutionStatus() {
- return executionStatus;
- }
-
- public void setExecutionStatus(final ExecutionStatus executionStatus) {
- this.executionStatus = executionStatus;
- }
-
- public long getTotal() {
- return total;
- }
-
- public void setTotal(final long total) {
- this.total = total;
- }
-
- public LinkedHashSet getCalls() {
- return calls;
- }
-
- public String getMessage() {
- return message;
- }
-
- public void setMessage(final String message) {
- this.message = message;
- }
-
-}
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/service/CollectionJob.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/service/CollectionJob.java
deleted file mode 100644
index e59ba90..0000000
--- a/src/main/java/eu/dnetlib/apps/Oai2ftp/service/CollectionJob.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package eu.dnetlib.apps.oai2ftp.service;
-
-import java.time.LocalDateTime;
-import java.util.function.BiConsumer;
-import java.util.function.Consumer;
-
-import eu.dnetlib.apps.oai2ftp.model.CollectionStatus;
-import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus;
-
-public class CollectionJob {
-
- private final CollectionStatus status;
- private final BiConsumer saveRecord;
- private final Consumer onEnd;
-
- public CollectionJob(final String id, final String baseUrl, final String format, final String setSpec, final BiConsumer saveRecord,
- final Consumer onEnd) {
-
- this.status = new CollectionStatus();
- status.setId(id);
-
- status.setBaseUrl(baseUrl);
- status.setFormat(format);
- status.setSetSpec(setSpec);
-
- status.setStart(LocalDateTime.now());
- status.setEnd(null);
-
- status.setExecutionStatus(ExecutionStatus.READY);
- status.setTotal(0);
-
- status.setMessage("");
-
- this.saveRecord = saveRecord;
- this.onEnd = onEnd;
- }
-
- public void oaiCollect() {
- // TODO
- }
-
- public CollectionStatus getStatus() {
- return status;
- }
-
-}
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/service/Oai2FtpService.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/service/Oai2FtpService.java
index 4b6ef19..3e64209 100644
--- a/src/main/java/eu/dnetlib/apps/Oai2ftp/service/Oai2FtpService.java
+++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/service/Oai2FtpService.java
@@ -3,9 +3,9 @@ package eu.dnetlib.apps.oai2ftp.service;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.LinkedHashMap;
+import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
@@ -14,17 +14,22 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.dom4j.Document;
+import org.dom4j.DocumentHelper;
+import org.dom4j.Node;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
-import eu.dnetlib.apps.oai2ftp.model.CollectionStatus;
+import eu.dnetlib.apps.oai2ftp.model.CollectionCall;
+import eu.dnetlib.apps.oai2ftp.model.CollectionInfo;
import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus;
import eu.dnetlib.apps.oai2ftp.repository.CollectionLogEntryRepository;
-import eu.dnetlib.apps.oai2ftp.utils.ConvertUtils;
import eu.dnetlib.apps.oai2ftp.utils.FtpClientFactory;
import eu.dnetlib.apps.oai2ftp.utils.FtpClientWrapper;
+import eu.dnetlib.apps.oai2ftp.utils.HttpFetcher;
+import eu.dnetlib.apps.oai2ftp.utils.SimpleUtils;
@Service
public class Oai2FtpService {
@@ -33,73 +38,125 @@ public class Oai2FtpService {
private final ExecutorService jobExecutor = Executors.newFixedThreadPool(100);
- private final Map runningJobs = new LinkedHashMap<>();
+ private final Map infoMap = new LinkedHashMap<>();
@Autowired
private FtpClientFactory ftpClientFactory;
@Value("${oai2ftp.conf.execution.expirationTime}")
- private long fullStatusExpirationTime; // in hours
+ private long fullInfoExpirationTime; // in hours
@Autowired
private CollectionLogEntryRepository collectionLogEntryRepository;
- public CollectionStatus startCollection(final String baseUrl, final String format, final String setSpec) {
- final String jobId = generateNewJobId();
+ public CollectionInfo startCollection(final String baseUrl,
+ final String format,
+ final String setSpec,
+ final LocalDateTime from,
+ final LocalDateTime until) {
+ final String jobId = SimpleUtils.generateNewJobId();
final FtpClientWrapper ftp = ftpClientFactory.newClientForJob(jobId);
- final CollectionJob job = new CollectionJob(jobId,
- baseUrl,
- format,
- setSpec,
- (id, xml) -> ftp.saveFile(ConvertUtils.oaiIdToFilename(id), xml),
- (status) -> {
+ final CollectionInfo info = new CollectionInfo();
+ info.setId(jobId);
+
+ info.setOaiBaseUrl(baseUrl);
+ info.setOaiFormat(format);
+ info.setOaiSet(setSpec);
+ info.setOaiFrom(from);
+ info.setOaiUntil(until);
+
+ info.setStart(LocalDateTime.now());
+ info.setEnd(null);
+
+ info.setExecutionStatus(ExecutionStatus.READY);
+ info.setTotal(0);
+ info.setMessage("");
+
+ infoMap.put(jobId, info);
+
+ jobExecutor.execute(() -> {
+ try {
+ info.setExecutionStatus(ExecutionStatus.RUNNING);
+ oaiCollect(baseUrl, format, setSpec, from, until, ftp, info);
+ info.setExecutionStatus(ExecutionStatus.COMPLETED);
+ } catch (final Throwable e) {
+ info.setExecutionStatus(ExecutionStatus.FAILED);
+ info.setMessage(e.getMessage());
+ } finally {
ftp.disconnect();
- collectionLogEntryRepository.save(ConvertUtils.statusToLog(status));
- });
+ collectionLogEntryRepository.save(SimpleUtils.infoToLog(info));
+ }
+ });
- runningJobs.put(jobId, job);
-
- jobExecutor.execute(() -> job.oaiCollect());
-
- return job.getStatus();
- };
-
- private String generateNewJobId() {
- return "job-" + UUID.randomUUID();
+ return info;
}
- public CollectionStatus getStatus(final String jobId) {
- final CollectionJob job = runningJobs.get(jobId);
- if (job != null) {
- return job.getStatus();
+ public void oaiCollect(final String baseUrl,
+ final String format,
+ final String setSpec,
+ final LocalDateTime from,
+ final LocalDateTime until,
+ final FtpClientWrapper ftp,
+ final CollectionInfo info)
+ throws Exception {
+
+ String url = SimpleUtils.oaiFirstUrl(baseUrl, format, setSpec, from, until);
+
+ while (StringUtils.isNotBlank(url)) {
+ final CollectionCall call = new CollectionCall();
+ call.setUrl(url);
+ info.getCalls().add(call);
+
+ final String xml = HttpFetcher.download(call);
+ final Document doc = DocumentHelper.parseText(xml);
+
+ final List records = doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']");
+ call.setNumberOfRecords(records.size());
+
+ for (final Node n : records) {
+ final String id = n.valueOf("/*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']");
+ ftp.saveFile(SimpleUtils.oaiIdToFilename(id), n.asXML());
+ info.setTotal(info.getTotal() + 1);
+ }
+
+ final String rtoken = doc.valueOf("//*[local-name()='resumptionToken']").trim();
+
+ url = SimpleUtils.oaiNextUrl(baseUrl, rtoken);
+ }
+ }
+
+ public CollectionInfo getStatus(final String jobId) {
+ final CollectionInfo info = infoMap.get(jobId);
+ if (info != null) {
+ return info;
} else {
return collectionLogEntryRepository.findById(jobId)
- .map(ConvertUtils::logToStatus)
+ .map(SimpleUtils::logToInfo)
.orElse(null);
}
}
@Scheduled(fixedRate = 30, timeUnit = TimeUnit.MINUTES)
public void cronCleanJobs() throws Exception {
- final Set toDelete = runningJobs.entrySet()
+ final Set toDelete = infoMap.entrySet()
.stream()
.filter(e -> {
- final ExecutionStatus status = e.getValue().getStatus().getExecutionStatus();
+ final ExecutionStatus status = e.getValue().getExecutionStatus();
return status == ExecutionStatus.COMPLETED || status == ExecutionStatus.FAILED;
})
.filter(e -> {
- final LocalDateTime end = e.getValue().getStatus().getEnd();
+ final LocalDateTime end = e.getValue().getEnd();
final long hours = Duration.between(end, LocalDateTime.now()).toHours();
- return Math.abs(hours) > fullStatusExpirationTime;
+ return Math.abs(hours) > fullInfoExpirationTime;
})
.map(e -> e.getKey())
.collect(Collectors.toSet());
log.info("Cleaning expired jobs: " + StringUtils.join(toDelete, ", "));
- toDelete.forEach(runningJobs::remove);
+ toDelete.forEach(infoMap::remove);
}
}
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/ConvertUtils.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/ConvertUtils.java
deleted file mode 100644
index cf33c64..0000000
--- a/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/ConvertUtils.java
+++ /dev/null
@@ -1,43 +0,0 @@
-package eu.dnetlib.apps.oai2ftp.utils;
-
-import org.apache.commons.codec.digest.DigestUtils;
-
-import eu.dnetlib.apps.oai2ftp.model.CollectionLogEntry;
-import eu.dnetlib.apps.oai2ftp.model.CollectionStatus;
-import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus;
-
-public class ConvertUtils {
-
- public static CollectionStatus logToStatus(final CollectionLogEntry log) {
- final CollectionStatus status = new CollectionStatus();
- status.setId(log.getId());
- status.setBaseUrl(log.getBaseUrl());
- status.setFormat(log.getFormat());
- status.setSetSpec(log.getSetSpec());
- status.setStart(log.getStart());
- status.setEnd(log.getEnd());
- status.setExecutionStatus(log.isSuccess() ? ExecutionStatus.COMPLETED : ExecutionStatus.FAILED);
- status.setTotal(log.getTotal());
- status.setMessage(log.getMessage());
- return status;
- }
-
- public static CollectionLogEntry statusToLog(final CollectionStatus status) {
- final CollectionLogEntry log = new CollectionLogEntry();
- log.setId(status.getId());
- log.setBaseUrl(status.getBaseUrl());
- log.setFormat(status.getFormat());
- log.setSetSpec(status.getSetSpec());
- log.setStart(status.getStart());
- log.setEnd(status.getEnd());
- log.setSuccess(status.getExecutionStatus() == ExecutionStatus.COMPLETED);
- log.setTotal(status.getTotal());
- log.setNumberOfCalls(status.getCalls().size());
- log.setMessage(status.getMessage());
- return log;
- }
-
- public static String oaiIdToFilename(final String id) {
- return DigestUtils.md5Hex(id) + ".xml";
- }
-}
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/HttpFetcher.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/HttpFetcher.java
new file mode 100644
index 0000000..38188e3
--- /dev/null
+++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/HttpFetcher.java
@@ -0,0 +1,36 @@
+package eu.dnetlib.apps.oai2ftp.utils;
+
+import java.io.IOException;
+
+import org.apache.hc.client5.http.classic.methods.HttpGet;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+
+import eu.dnetlib.apps.oai2ftp.model.CollectionCall;
+import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus;
+
+public class HttpFetcher {
+
+ public static String download(final CollectionCall call) throws IOException {
+
+ try (final CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
+ call.setStatus(ExecutionStatus.RUNNING);
+ return httpClient.execute(new HttpGet(call.getUrl()), response -> {
+ final int code = response.getCode();
+ call.setResponseCode(response.getCode());
+
+ if (code >= 200 && code < 300 && response.getEntity() != null) {
+ call.setStatus(ExecutionStatus.COMPLETED);
+ return EntityUtils.toString(response.getEntity());
+ } else {
+ call.setStatus(ExecutionStatus.FAILED);
+ throw new IOException("Invalid http response");
+ }
+ });
+ } catch (final Throwable e) {
+ call.setStatus(ExecutionStatus.FAILED);
+ throw new IOException(e);
+ }
+ }
+}
diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/SimpleUtils.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/SimpleUtils.java
new file mode 100644
index 0000000..d71122c
--- /dev/null
+++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/SimpleUtils.java
@@ -0,0 +1,98 @@
+package eu.dnetlib.apps.oai2ftp.utils;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.UUID;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.apps.oai2ftp.model.CollectionInfo;
+import eu.dnetlib.apps.oai2ftp.model.CollectionLogEntry;
+import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus;
+
+public class SimpleUtils {
+
+ private static final String UTF_8 = StandardCharsets.UTF_8.toString();
+
+ private static final DateTimeFormatter oaiDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
+
+ public static String generateNewJobId() {
+ return "job-" + UUID.randomUUID();
+ }
+
+ public static String oaiFirstUrl(final String baseUrl, final String format, final String setSpec, final LocalDateTime from, final LocalDateTime until) {
+ try {
+ String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(format, UTF_8);
+
+ if (setSpec != null && !setSpec.isEmpty()) {
+ url += "&set=" + URLEncoder.encode(setSpec, UTF_8);
+ }
+ if (from != null) {
+ url += "&from=" + URLEncoder.encode(from.format(oaiDateFormatter), UTF_8);
+ }
+ if (until != null) {
+ url += "&until=" + URLEncoder.encode(until.format(oaiDateFormatter), UTF_8);
+ }
+ return url;
+ } catch (final UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static String oaiNextUrl(final String baseUrl, final String rtoken) {
+ try {
+ if (StringUtils.isNotBlank(rtoken)) {
+ return baseUrl + "?verb=ListRecords&resumptionToken=" + URLEncoder.encode(rtoken, UTF_8);
+ } else {
+ return null;
+ }
+ } catch (final UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static CollectionInfo logToInfo(final CollectionLogEntry log) {
+ final CollectionInfo info = new CollectionInfo();
+ info.setId(log.getId());
+ info.setOaiBaseUrl(log.getOaiBaseUrl());
+ info.setOaiFormat(log.getOaiFormat());
+ info.setOaiSet(log.getOaiSet());
+ info.setOaiFrom(log.getOaiFrom());
+ info.setOaiUntil(log.getOaiUntil());
+ info.setFtpServer(log.getFtpServer());
+ info.setFtpDir(log.getFtpDir());
+ info.setStart(log.getStart());
+ info.setEnd(log.getEnd());
+ info.setExecutionStatus(log.isSuccess() ? ExecutionStatus.COMPLETED : ExecutionStatus.FAILED);
+ info.setTotal(log.getTotal());
+ info.setMessage(log.getMessage());
+ return info;
+ }
+
+ public static CollectionLogEntry infoToLog(final CollectionInfo info) {
+ final CollectionLogEntry log = new CollectionLogEntry();
+ log.setId(info.getId());
+ log.setOaiBaseUrl(info.getOaiBaseUrl());
+ log.setOaiFormat(info.getOaiFormat());
+ log.setOaiSet(info.getOaiSet());
+ log.setOaiFrom(info.getOaiFrom());
+ log.setOaiUntil(info.getOaiUntil());
+ log.setFtpServer(info.getFtpServer());
+ log.setFtpDir(info.getFtpDir());
+ log.setStart(info.getStart());
+ log.setEnd(info.getEnd());
+ log.setSuccess(info.getExecutionStatus() == ExecutionStatus.COMPLETED);
+ log.setTotal(info.getTotal());
+ log.setNumberOfCalls(info.getCalls().size());
+ log.setMessage(info.getMessage());
+ return log;
+ }
+
+ public static String oaiIdToFilename(final String id) {
+ return DigestUtils.md5Hex(id) + ".xml";
+ }
+}