From 777536ce9180dc7fe6e4c5351d312fac5bd45aef Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 7 Jul 2021 11:23:40 +0200 Subject: [PATCH] [aggregation] string values used as regular expressions in the OAI collection classes are defined in a single point as constants, to be reused across the code (PR#122) --- .../dhp/collection/plugin/oai/OaiCollectorPlugin.java | 7 +++++-- .../eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 67fd352a3..9918e4abe 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -21,6 +21,9 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; public class OaiCollectorPlugin implements CollectorPlugin { + public static final String DATE_REGEX = "\\d{4}-\\d{2}-\\d{2}"; + public static final String UTC_DATETIME_REGEX = "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"; + private static final String FORMAT_PARAM = "format"; private static final String OAI_SET_PARAM = "set"; private static final Object OAI_FROM_DATE_PARAM = "fromDate"; @@ -62,11 +65,11 @@ public class OaiCollectorPlugin implements CollectorPlugin { throw new CollectorException("Param 'mdFormat' is null or empty"); } - if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}") && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) { + if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) { throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); } - if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}") && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) { + if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) { throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index c044e02db..75dd746ea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -107,10 +107,12 @@ public class OaiIterator implements Iterator { if (set != null && !set.isEmpty()) { url += "&set=" + URLEncoder.encode(set, "UTF-8"); } - if (fromDate != null && (fromDate.matches("\\d{4}-\\d{2}-\\d{2}") || fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) { + if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX) + || fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); } - if (untilDate != null && (untilDate.matches("\\d{4}-\\d{2}-\\d{2}") || untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) { + if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX) + || untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); } log.info("Start harvesting using url: " + url);