1
0
Fork 0

[aggregation] string values used as regular expressions in the OAI collection classes are defined in a single point as constants, to be reused across the code (PR#122)

This commit is contained in:
Claudio Atzori 2021-07-07 11:23:40 +02:00
parent bc014023c8
commit 777536ce91
2 changed files with 9 additions and 4 deletions

View File

@ -21,6 +21,9 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
public class OaiCollectorPlugin implements CollectorPlugin {
public static final String DATE_REGEX = "\\d{4}-\\d{2}-\\d{2}";
public static final String UTC_DATETIME_REGEX = "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z";
private static final String FORMAT_PARAM = "format";
private static final String OAI_SET_PARAM = "set";
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
@ -62,11 +65,11 @@ public class OaiCollectorPlugin implements CollectorPlugin {
throw new CollectorException("Param 'mdFormat' is null or empty");
}
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}") && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
}
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}") && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
}

View File

@ -107,10 +107,12 @@ public class OaiIterator implements Iterator<String> {
if (set != null && !set.isEmpty()) {
url += "&set=" + URLEncoder.encode(set, "UTF-8");
}
if (fromDate != null && (fromDate.matches("\\d{4}-\\d{2}-\\d{2}") || fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
}
if (untilDate != null && (untilDate.matches("\\d{4}-\\d{2}-\\d{2}") || untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
}
log.info("Start harvesting using url: " + url);