From 25cdca97e14553e977fddd24424303cc1e05f6e9 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 26 May 2023 11:22:15 +0200 Subject: [PATCH] oai + refatoring --- .DS_Store | Bin 0 -> 6148 bytes pom.xml | 18 ++- .../{ => controller}/Oai2FtpController.java | 18 ++- .../{ => controller}/SwaggerController.java | 2 +- .../apps/Oai2ftp/model/CollectionCall.java | 13 +- .../apps/Oai2ftp/model/CollectionInfo.java | 135 ++++++++++++++++++ .../Oai2ftp/model/CollectionLogEntry.java | 80 ++++++++--- .../apps/Oai2ftp/model/CollectionStatus.java | 98 ------------- .../apps/Oai2ftp/service/CollectionJob.java | 46 ------ .../apps/Oai2ftp/service/Oai2FtpService.java | 125 +++++++++++----- .../apps/Oai2ftp/utils/ConvertUtils.java | 43 ------ .../apps/Oai2ftp/utils/HttpFetcher.java | 36 +++++ .../apps/Oai2ftp/utils/SimpleUtils.java | 98 +++++++++++++ 13 files changed, 458 insertions(+), 254 deletions(-) create mode 100644 .DS_Store rename src/main/java/eu/dnetlib/apps/Oai2ftp/{ => controller}/Oai2FtpController.java (76%) rename src/main/java/eu/dnetlib/apps/Oai2ftp/{ => controller}/SwaggerController.java (87%) create mode 100644 src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionInfo.java delete mode 100644 src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionStatus.java delete mode 100644 src/main/java/eu/dnetlib/apps/Oai2ftp/service/CollectionJob.java delete mode 100644 src/main/java/eu/dnetlib/apps/Oai2ftp/utils/ConvertUtils.java create mode 100644 src/main/java/eu/dnetlib/apps/Oai2ftp/utils/HttpFetcher.java create mode 100644 src/main/java/eu/dnetlib/apps/Oai2ftp/utils/SimpleUtils.java diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..27e6117deaa67c693ad74d72e034a9e2fa05616c GIT binary patch literal 6148 zcmeHKOG?B*5Up}X2)J+%1ec9Bm>aZ1!~@I)BnjvuLk|w_a}n?0CA^Er@YP36Ft{8< zs-XK->X-C`baxREZ=Sm~(Ta#F3_%uUM#MbpI`QCBAWMz5Z%XOKx^UN@2l@5|9q5VL z?lAs-HT4hcZCiGGT?Sv@eFyJKT{nGcV5u(O)5XjA+s)Y9{o>s}j;p?E@tA^vU?3O> z27-ZOF@Q5$q&hQma0cpz-4 zKucwBG1$^!Pac;Y2SZCI_U42A&7aK+>)TO3X*h8MeV~9xHAPNNfA<)MC?PQg14m%s11hp9#Q*>R literal 0 HcmV?d00001 diff --git a/pom.xml b/pom.xml index a5baa8a..19c9ace 100644 --- a/pom.xml +++ b/pom.xml @@ -29,9 +29,14 @@ com.h2database h2 - runtime + + commons-io + commons-io + 2.12.0 + + commons-codec commons-codec @@ -49,6 +54,17 @@ 2.1.0 + + org.dom4j + dom4j + 2.1.4 + + + + org.apache.httpcomponents.client5 + httpclient5 + + org.springframework.boot spring-boot-starter-test diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/Oai2FtpController.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/controller/Oai2FtpController.java similarity index 76% rename from src/main/java/eu/dnetlib/apps/Oai2ftp/Oai2FtpController.java rename to src/main/java/eu/dnetlib/apps/Oai2ftp/controller/Oai2FtpController.java index a982189..7a8fd95 100644 --- a/src/main/java/eu/dnetlib/apps/Oai2ftp/Oai2FtpController.java +++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/controller/Oai2FtpController.java @@ -1,4 +1,6 @@ -package eu.dnetlib.apps.oai2ftp; +package eu.dnetlib.apps.oai2ftp.controller; + +import java.time.LocalDateTime; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.commons.logging.Log; @@ -14,7 +16,7 @@ import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.bind.annotation.ResponseStatus; import org.springframework.web.bind.annotation.RestController; -import eu.dnetlib.apps.oai2ftp.model.CollectionStatus; +import eu.dnetlib.apps.oai2ftp.model.CollectionInfo; import eu.dnetlib.apps.oai2ftp.service.Oai2FtpService; @RestController @@ -27,14 +29,16 @@ public class Oai2FtpController { private Oai2FtpService service; @GetMapping("/collect") - public CollectionStatus startCollection(@RequestParam final String baseUrl, - @RequestParam(required = false, defaultValue = "oai_dc") final String format, - @RequestParam(required = false) final String setSpec) { - return service.startCollection(baseUrl, format, setSpec); + public CollectionInfo startCollection(@RequestParam final String oaiBaseUrl, + @RequestParam(required = false, defaultValue = "oai_dc") final String oaiFormat, + @RequestParam(required = false) final String oaiSet, + @RequestParam(required = false) final LocalDateTime oaiFrom, + @RequestParam(required = false) final LocalDateTime oaiUntil) { + return service.startCollection(oaiBaseUrl, oaiFormat, oaiSet, oaiFrom, oaiUntil); } @GetMapping("/status/{id}") - public CollectionStatus getExecutionStatus(@PathVariable final String id) { + public CollectionInfo getExecutionStatus(@PathVariable final String id) { return service.getStatus(id); } diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/SwaggerController.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/controller/SwaggerController.java similarity index 87% rename from src/main/java/eu/dnetlib/apps/Oai2ftp/SwaggerController.java rename to src/main/java/eu/dnetlib/apps/Oai2ftp/controller/SwaggerController.java index 56cc150..cd0668c 100644 --- a/src/main/java/eu/dnetlib/apps/Oai2ftp/SwaggerController.java +++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/controller/SwaggerController.java @@ -1,4 +1,4 @@ -package eu.dnetlib.apps.oai2ftp; +package eu.dnetlib.apps.oai2ftp.controller; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.GetMapping; diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionCall.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionCall.java index 37132d7..28fbb4d 100644 --- a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionCall.java +++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionCall.java @@ -8,9 +8,9 @@ public class CollectionCall implements Serializable { private static final long serialVersionUID = 4915954425467830605L; private String url; - private ExecutionStatus status; + private ExecutionStatus status = ExecutionStatus.READY; private int responseCode; - private long savedRecords; + private long numberOfRecords = 0; public String getUrl() { return url; @@ -36,12 +36,12 @@ public class CollectionCall implements Serializable { this.responseCode = responseCode; } - public long getSavedRecords() { - return savedRecords; + public long getNumberOfRecords() { + return numberOfRecords; } - public void setSavedRecords(final long savedRecords) { - this.savedRecords = savedRecords; + public void setNumberOfRecords(final long numberOfRecords) { + this.numberOfRecords = numberOfRecords; } @Override @@ -57,4 +57,5 @@ public class CollectionCall implements Serializable { final CollectionCall other = (CollectionCall) obj; return Objects.equals(url, other.url); } + } diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionInfo.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionInfo.java new file mode 100644 index 0000000..8e5ac29 --- /dev/null +++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionInfo.java @@ -0,0 +1,135 @@ +package eu.dnetlib.apps.oai2ftp.model; + +import java.io.Serializable; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; + +public class CollectionInfo implements Serializable { + + private static final long serialVersionUID = -8467778040892221645L; + + private String id; + private String oaiBaseUrl; + private String oaiFormat; + private String oaiSet; + private LocalDateTime oaiFrom; + private LocalDateTime oaiUntil; + private String ftpServer; + private String ftpDir; + private LocalDateTime start; + private LocalDateTime end; + private ExecutionStatus executionStatus; + private long total; + private final List calls = new ArrayList<>(); + private String message; + + public String getId() { + return id; + } + + public void setId(final String id) { + this.id = id; + } + + public String getOaiBaseUrl() { + return oaiBaseUrl; + } + + public void setOaiBaseUrl(final String oaiBaseUrl) { + this.oaiBaseUrl = oaiBaseUrl; + } + + public String getOaiFormat() { + return oaiFormat; + } + + public void setOaiFormat(final String oaiFormat) { + this.oaiFormat = oaiFormat; + } + + public String getOaiSet() { + return oaiSet; + } + + public void setOaiSet(final String oaiSet) { + this.oaiSet = oaiSet; + } + + public LocalDateTime getOaiFrom() { + return oaiFrom; + } + + public void setOaiFrom(final LocalDateTime oaiFrom) { + this.oaiFrom = oaiFrom; + } + + public LocalDateTime getOaiUntil() { + return oaiUntil; + } + + public void setOaiUntil(final LocalDateTime oaiUntil) { + this.oaiUntil = oaiUntil; + } + + public String getFtpServer() { + return ftpServer; + } + + public void setFtpServer(final String ftpServer) { + this.ftpServer = ftpServer; + } + + public String getFtpDir() { + return ftpDir; + } + + public void setFtpDir(final String ftpDir) { + this.ftpDir = ftpDir; + } + + public LocalDateTime getStart() { + return start; + } + + public void setStart(final LocalDateTime start) { + this.start = start; + } + + public LocalDateTime getEnd() { + return end; + } + + public void setEnd(final LocalDateTime end) { + this.end = end; + } + + public ExecutionStatus getExecutionStatus() { + return executionStatus; + } + + public void setExecutionStatus(final ExecutionStatus executionStatus) { + this.executionStatus = executionStatus; + } + + public long getTotal() { + return total; + } + + public void setTotal(final long total) { + this.total = total; + } + + public String getMessage() { + return message; + } + + public void setMessage(final String message) { + this.message = message; + } + + public List getCalls() { + return calls; + } + +} diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionLogEntry.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionLogEntry.java index 2281f11..9393851 100644 --- a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionLogEntry.java +++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionLogEntry.java @@ -18,14 +18,26 @@ public class CollectionLogEntry implements Serializable { @Column(name = "id") private String id; - @Column(name = "base_url") - private String baseUrl; + @Column(name = "oai_base_url") + private String oaiBaseUrl; - @Column(name = "format") - private String format; + @Column(name = "oai_format") + private String oaiFormat; - @Column(name = "set_spec") - private String setSpec; + @Column(name = "oai_set") + private String oaiSet; + + @Column(name = "oai_from") + private LocalDateTime oaiFrom; + + @Column(name = "oai_until") + private LocalDateTime oaiUntil; + + @Column(name = "ftp_server") + private String ftpServer; + + @Column(name = "ftp_dir") + private String ftpDir; @Column(name = "start_date") private LocalDateTime start; @@ -53,28 +65,60 @@ public class CollectionLogEntry implements Serializable { this.id = id; } - public String getBaseUrl() { - return baseUrl; + public String getOaiBaseUrl() { + return oaiBaseUrl; } - public void setBaseUrl(final String baseUrl) { - this.baseUrl = baseUrl; + public void setOaiBaseUrl(final String oaiBaseUrl) { + this.oaiBaseUrl = oaiBaseUrl; } - public String getFormat() { - return format; + public String getOaiFormat() { + return oaiFormat; } - public void setFormat(final String format) { - this.format = format; + public void setOaiFormat(final String oaiFormat) { + this.oaiFormat = oaiFormat; } - public String getSetSpec() { - return setSpec; + public String getOaiSet() { + return oaiSet; } - public void setSetSpec(final String setSpec) { - this.setSpec = setSpec; + public void setOaiSet(final String oaiSet) { + this.oaiSet = oaiSet; + } + + public LocalDateTime getOaiFrom() { + return oaiFrom; + } + + public void setOaiFrom(final LocalDateTime oaiFrom) { + this.oaiFrom = oaiFrom; + } + + public LocalDateTime getOaiUntil() { + return oaiUntil; + } + + public void setOaiUntil(final LocalDateTime oaiUntil) { + this.oaiUntil = oaiUntil; + } + + public String getFtpServer() { + return ftpServer; + } + + public void setFtpServer(final String ftpServer) { + this.ftpServer = ftpServer; + } + + public String getFtpDir() { + return ftpDir; + } + + public void setFtpDir(final String ftpDir) { + this.ftpDir = ftpDir; } public LocalDateTime getStart() { diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionStatus.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionStatus.java deleted file mode 100644 index 95eb0fd..0000000 --- a/src/main/java/eu/dnetlib/apps/Oai2ftp/model/CollectionStatus.java +++ /dev/null @@ -1,98 +0,0 @@ -package eu.dnetlib.apps.oai2ftp.model; - -import java.io.Serializable; -import java.time.LocalDateTime; -import java.util.LinkedHashSet; - -public class CollectionStatus implements Serializable { - - private static final long serialVersionUID = -8467778040892221645L; - - private String id; - private String baseUrl; - private String format; - private String setSpec; - private LocalDateTime start; - private LocalDateTime end; - private ExecutionStatus executionStatus; - private long total; - private final LinkedHashSet calls = new LinkedHashSet<>(); - private String message; - - public String getId() { - return id; - } - - public void setId(final String id) { - this.id = id; - } - - public String getBaseUrl() { - return baseUrl; - } - - public void setBaseUrl(final String baseUrl) { - this.baseUrl = baseUrl; - } - - public String getFormat() { - return format; - } - - public void setFormat(final String format) { - this.format = format; - } - - public String getSetSpec() { - return setSpec; - } - - public void setSetSpec(final String setSpec) { - this.setSpec = setSpec; - } - - public LocalDateTime getStart() { - return start; - } - - public void setStart(final LocalDateTime start) { - this.start = start; - } - - public LocalDateTime getEnd() { - return end; - } - - public void setEnd(final LocalDateTime end) { - this.end = end; - } - - public ExecutionStatus getExecutionStatus() { - return executionStatus; - } - - public void setExecutionStatus(final ExecutionStatus executionStatus) { - this.executionStatus = executionStatus; - } - - public long getTotal() { - return total; - } - - public void setTotal(final long total) { - this.total = total; - } - - public LinkedHashSet getCalls() { - return calls; - } - - public String getMessage() { - return message; - } - - public void setMessage(final String message) { - this.message = message; - } - -} diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/service/CollectionJob.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/service/CollectionJob.java deleted file mode 100644 index e59ba90..0000000 --- a/src/main/java/eu/dnetlib/apps/Oai2ftp/service/CollectionJob.java +++ /dev/null @@ -1,46 +0,0 @@ -package eu.dnetlib.apps.oai2ftp.service; - -import java.time.LocalDateTime; -import java.util.function.BiConsumer; -import java.util.function.Consumer; - -import eu.dnetlib.apps.oai2ftp.model.CollectionStatus; -import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus; - -public class CollectionJob { - - private final CollectionStatus status; - private final BiConsumer saveRecord; - private final Consumer onEnd; - - public CollectionJob(final String id, final String baseUrl, final String format, final String setSpec, final BiConsumer saveRecord, - final Consumer onEnd) { - - this.status = new CollectionStatus(); - status.setId(id); - - status.setBaseUrl(baseUrl); - status.setFormat(format); - status.setSetSpec(setSpec); - - status.setStart(LocalDateTime.now()); - status.setEnd(null); - - status.setExecutionStatus(ExecutionStatus.READY); - status.setTotal(0); - - status.setMessage(""); - - this.saveRecord = saveRecord; - this.onEnd = onEnd; - } - - public void oaiCollect() { - // TODO - } - - public CollectionStatus getStatus() { - return status; - } - -} diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/service/Oai2FtpService.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/service/Oai2FtpService.java index 4b6ef19..3e64209 100644 --- a/src/main/java/eu/dnetlib/apps/Oai2ftp/service/Oai2FtpService.java +++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/service/Oai2FtpService.java @@ -3,9 +3,9 @@ package eu.dnetlib.apps.oai2ftp.service; import java.time.Duration; import java.time.LocalDateTime; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.Set; -import java.util.UUID; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -14,17 +14,22 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.dom4j.Document; +import org.dom4j.DocumentHelper; +import org.dom4j.Node; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; -import eu.dnetlib.apps.oai2ftp.model.CollectionStatus; +import eu.dnetlib.apps.oai2ftp.model.CollectionCall; +import eu.dnetlib.apps.oai2ftp.model.CollectionInfo; import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus; import eu.dnetlib.apps.oai2ftp.repository.CollectionLogEntryRepository; -import eu.dnetlib.apps.oai2ftp.utils.ConvertUtils; import eu.dnetlib.apps.oai2ftp.utils.FtpClientFactory; import eu.dnetlib.apps.oai2ftp.utils.FtpClientWrapper; +import eu.dnetlib.apps.oai2ftp.utils.HttpFetcher; +import eu.dnetlib.apps.oai2ftp.utils.SimpleUtils; @Service public class Oai2FtpService { @@ -33,73 +38,125 @@ public class Oai2FtpService { private final ExecutorService jobExecutor = Executors.newFixedThreadPool(100); - private final Map runningJobs = new LinkedHashMap<>(); + private final Map infoMap = new LinkedHashMap<>(); @Autowired private FtpClientFactory ftpClientFactory; @Value("${oai2ftp.conf.execution.expirationTime}") - private long fullStatusExpirationTime; // in hours + private long fullInfoExpirationTime; // in hours @Autowired private CollectionLogEntryRepository collectionLogEntryRepository; - public CollectionStatus startCollection(final String baseUrl, final String format, final String setSpec) { - final String jobId = generateNewJobId(); + public CollectionInfo startCollection(final String baseUrl, + final String format, + final String setSpec, + final LocalDateTime from, + final LocalDateTime until) { + final String jobId = SimpleUtils.generateNewJobId(); final FtpClientWrapper ftp = ftpClientFactory.newClientForJob(jobId); - final CollectionJob job = new CollectionJob(jobId, - baseUrl, - format, - setSpec, - (id, xml) -> ftp.saveFile(ConvertUtils.oaiIdToFilename(id), xml), - (status) -> { + final CollectionInfo info = new CollectionInfo(); + info.setId(jobId); + + info.setOaiBaseUrl(baseUrl); + info.setOaiFormat(format); + info.setOaiSet(setSpec); + info.setOaiFrom(from); + info.setOaiUntil(until); + + info.setStart(LocalDateTime.now()); + info.setEnd(null); + + info.setExecutionStatus(ExecutionStatus.READY); + info.setTotal(0); + info.setMessage(""); + + infoMap.put(jobId, info); + + jobExecutor.execute(() -> { + try { + info.setExecutionStatus(ExecutionStatus.RUNNING); + oaiCollect(baseUrl, format, setSpec, from, until, ftp, info); + info.setExecutionStatus(ExecutionStatus.COMPLETED); + } catch (final Throwable e) { + info.setExecutionStatus(ExecutionStatus.FAILED); + info.setMessage(e.getMessage()); + } finally { ftp.disconnect(); - collectionLogEntryRepository.save(ConvertUtils.statusToLog(status)); - }); + collectionLogEntryRepository.save(SimpleUtils.infoToLog(info)); + } + }); - runningJobs.put(jobId, job); - - jobExecutor.execute(() -> job.oaiCollect()); - - return job.getStatus(); - }; - - private String generateNewJobId() { - return "job-" + UUID.randomUUID(); + return info; } - public CollectionStatus getStatus(final String jobId) { - final CollectionJob job = runningJobs.get(jobId); - if (job != null) { - return job.getStatus(); + public void oaiCollect(final String baseUrl, + final String format, + final String setSpec, + final LocalDateTime from, + final LocalDateTime until, + final FtpClientWrapper ftp, + final CollectionInfo info) + throws Exception { + + String url = SimpleUtils.oaiFirstUrl(baseUrl, format, setSpec, from, until); + + while (StringUtils.isNotBlank(url)) { + final CollectionCall call = new CollectionCall(); + call.setUrl(url); + info.getCalls().add(call); + + final String xml = HttpFetcher.download(call); + final Document doc = DocumentHelper.parseText(xml); + + final List records = doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']"); + call.setNumberOfRecords(records.size()); + + for (final Node n : records) { + final String id = n.valueOf("/*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']"); + ftp.saveFile(SimpleUtils.oaiIdToFilename(id), n.asXML()); + info.setTotal(info.getTotal() + 1); + } + + final String rtoken = doc.valueOf("//*[local-name()='resumptionToken']").trim(); + + url = SimpleUtils.oaiNextUrl(baseUrl, rtoken); + } + } + + public CollectionInfo getStatus(final String jobId) { + final CollectionInfo info = infoMap.get(jobId); + if (info != null) { + return info; } else { return collectionLogEntryRepository.findById(jobId) - .map(ConvertUtils::logToStatus) + .map(SimpleUtils::logToInfo) .orElse(null); } } @Scheduled(fixedRate = 30, timeUnit = TimeUnit.MINUTES) public void cronCleanJobs() throws Exception { - final Set toDelete = runningJobs.entrySet() + final Set toDelete = infoMap.entrySet() .stream() .filter(e -> { - final ExecutionStatus status = e.getValue().getStatus().getExecutionStatus(); + final ExecutionStatus status = e.getValue().getExecutionStatus(); return status == ExecutionStatus.COMPLETED || status == ExecutionStatus.FAILED; }) .filter(e -> { - final LocalDateTime end = e.getValue().getStatus().getEnd(); + final LocalDateTime end = e.getValue().getEnd(); final long hours = Duration.between(end, LocalDateTime.now()).toHours(); - return Math.abs(hours) > fullStatusExpirationTime; + return Math.abs(hours) > fullInfoExpirationTime; }) .map(e -> e.getKey()) .collect(Collectors.toSet()); log.info("Cleaning expired jobs: " + StringUtils.join(toDelete, ", ")); - toDelete.forEach(runningJobs::remove); + toDelete.forEach(infoMap::remove); } } diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/ConvertUtils.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/ConvertUtils.java deleted file mode 100644 index cf33c64..0000000 --- a/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/ConvertUtils.java +++ /dev/null @@ -1,43 +0,0 @@ -package eu.dnetlib.apps.oai2ftp.utils; - -import org.apache.commons.codec.digest.DigestUtils; - -import eu.dnetlib.apps.oai2ftp.model.CollectionLogEntry; -import eu.dnetlib.apps.oai2ftp.model.CollectionStatus; -import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus; - -public class ConvertUtils { - - public static CollectionStatus logToStatus(final CollectionLogEntry log) { - final CollectionStatus status = new CollectionStatus(); - status.setId(log.getId()); - status.setBaseUrl(log.getBaseUrl()); - status.setFormat(log.getFormat()); - status.setSetSpec(log.getSetSpec()); - status.setStart(log.getStart()); - status.setEnd(log.getEnd()); - status.setExecutionStatus(log.isSuccess() ? ExecutionStatus.COMPLETED : ExecutionStatus.FAILED); - status.setTotal(log.getTotal()); - status.setMessage(log.getMessage()); - return status; - } - - public static CollectionLogEntry statusToLog(final CollectionStatus status) { - final CollectionLogEntry log = new CollectionLogEntry(); - log.setId(status.getId()); - log.setBaseUrl(status.getBaseUrl()); - log.setFormat(status.getFormat()); - log.setSetSpec(status.getSetSpec()); - log.setStart(status.getStart()); - log.setEnd(status.getEnd()); - log.setSuccess(status.getExecutionStatus() == ExecutionStatus.COMPLETED); - log.setTotal(status.getTotal()); - log.setNumberOfCalls(status.getCalls().size()); - log.setMessage(status.getMessage()); - return log; - } - - public static String oaiIdToFilename(final String id) { - return DigestUtils.md5Hex(id) + ".xml"; - } -} diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/HttpFetcher.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/HttpFetcher.java new file mode 100644 index 0000000..38188e3 --- /dev/null +++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/HttpFetcher.java @@ -0,0 +1,36 @@ +package eu.dnetlib.apps.oai2ftp.utils; + +import java.io.IOException; + +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; +import org.apache.hc.core5.http.io.entity.EntityUtils; + +import eu.dnetlib.apps.oai2ftp.model.CollectionCall; +import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus; + +public class HttpFetcher { + + public static String download(final CollectionCall call) throws IOException { + + try (final CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { + call.setStatus(ExecutionStatus.RUNNING); + return httpClient.execute(new HttpGet(call.getUrl()), response -> { + final int code = response.getCode(); + call.setResponseCode(response.getCode()); + + if (code >= 200 && code < 300 && response.getEntity() != null) { + call.setStatus(ExecutionStatus.COMPLETED); + return EntityUtils.toString(response.getEntity()); + } else { + call.setStatus(ExecutionStatus.FAILED); + throw new IOException("Invalid http response"); + } + }); + } catch (final Throwable e) { + call.setStatus(ExecutionStatus.FAILED); + throw new IOException(e); + } + } +} diff --git a/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/SimpleUtils.java b/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/SimpleUtils.java new file mode 100644 index 0000000..d71122c --- /dev/null +++ b/src/main/java/eu/dnetlib/apps/Oai2ftp/utils/SimpleUtils.java @@ -0,0 +1,98 @@ +package eu.dnetlib.apps.oai2ftp.utils; + +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.UUID; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.lang3.StringUtils; + +import eu.dnetlib.apps.oai2ftp.model.CollectionInfo; +import eu.dnetlib.apps.oai2ftp.model.CollectionLogEntry; +import eu.dnetlib.apps.oai2ftp.model.ExecutionStatus; + +public class SimpleUtils { + + private static final String UTF_8 = StandardCharsets.UTF_8.toString(); + + private static final DateTimeFormatter oaiDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); + + public static String generateNewJobId() { + return "job-" + UUID.randomUUID(); + } + + public static String oaiFirstUrl(final String baseUrl, final String format, final String setSpec, final LocalDateTime from, final LocalDateTime until) { + try { + String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(format, UTF_8); + + if (setSpec != null && !setSpec.isEmpty()) { + url += "&set=" + URLEncoder.encode(setSpec, UTF_8); + } + if (from != null) { + url += "&from=" + URLEncoder.encode(from.format(oaiDateFormatter), UTF_8); + } + if (until != null) { + url += "&until=" + URLEncoder.encode(until.format(oaiDateFormatter), UTF_8); + } + return url; + } catch (final UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + + public static String oaiNextUrl(final String baseUrl, final String rtoken) { + try { + if (StringUtils.isNotBlank(rtoken)) { + return baseUrl + "?verb=ListRecords&resumptionToken=" + URLEncoder.encode(rtoken, UTF_8); + } else { + return null; + } + } catch (final UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + + public static CollectionInfo logToInfo(final CollectionLogEntry log) { + final CollectionInfo info = new CollectionInfo(); + info.setId(log.getId()); + info.setOaiBaseUrl(log.getOaiBaseUrl()); + info.setOaiFormat(log.getOaiFormat()); + info.setOaiSet(log.getOaiSet()); + info.setOaiFrom(log.getOaiFrom()); + info.setOaiUntil(log.getOaiUntil()); + info.setFtpServer(log.getFtpServer()); + info.setFtpDir(log.getFtpDir()); + info.setStart(log.getStart()); + info.setEnd(log.getEnd()); + info.setExecutionStatus(log.isSuccess() ? ExecutionStatus.COMPLETED : ExecutionStatus.FAILED); + info.setTotal(log.getTotal()); + info.setMessage(log.getMessage()); + return info; + } + + public static CollectionLogEntry infoToLog(final CollectionInfo info) { + final CollectionLogEntry log = new CollectionLogEntry(); + log.setId(info.getId()); + log.setOaiBaseUrl(info.getOaiBaseUrl()); + log.setOaiFormat(info.getOaiFormat()); + log.setOaiSet(info.getOaiSet()); + log.setOaiFrom(info.getOaiFrom()); + log.setOaiUntil(info.getOaiUntil()); + log.setFtpServer(info.getFtpServer()); + log.setFtpDir(info.getFtpDir()); + log.setStart(info.getStart()); + log.setEnd(info.getEnd()); + log.setSuccess(info.getExecutionStatus() == ExecutionStatus.COMPLETED); + log.setTotal(info.getTotal()); + log.setNumberOfCalls(info.getCalls().size()); + log.setMessage(info.getMessage()); + return log; + } + + public static String oaiIdToFilename(final String id) { + return DigestUtils.md5Hex(id) + ".xml"; + } +}