forked from D-Net/dnet-hadoop
[aggregation] string values used as regular expressions in the OAI collection classes are defined in a single point as constants, to be reused across the code (PR#122)
This commit is contained in:
parent
bc014023c8
commit
777536ce91
|
@ -21,6 +21,9 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
|||
|
||||
public class OaiCollectorPlugin implements CollectorPlugin {
|
||||
|
||||
public static final String DATE_REGEX = "\\d{4}-\\d{2}-\\d{2}";
|
||||
public static final String UTC_DATETIME_REGEX = "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z";
|
||||
|
||||
private static final String FORMAT_PARAM = "format";
|
||||
private static final String OAI_SET_PARAM = "set";
|
||||
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
|
||||
|
@ -62,11 +65,11 @@ public class OaiCollectorPlugin implements CollectorPlugin {
|
|||
throw new CollectorException("Param 'mdFormat' is null or empty");
|
||||
}
|
||||
|
||||
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}") && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
|
||||
if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
|
||||
}
|
||||
|
||||
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}") && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
|
||||
if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
|
||||
}
|
||||
|
||||
|
|
|
@ -107,10 +107,12 @@ public class OaiIterator implements Iterator<String> {
|
|||
if (set != null && !set.isEmpty()) {
|
||||
url += "&set=" + URLEncoder.encode(set, "UTF-8");
|
||||
}
|
||||
if (fromDate != null && (fromDate.matches("\\d{4}-\\d{2}-\\d{2}") || fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
|
||||
if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
||||
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
||||
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
|
||||
}
|
||||
if (untilDate != null && (untilDate.matches("\\d{4}-\\d{2}-\\d{2}") || untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
|
||||
if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
||||
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
||||
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
|
||||
}
|
||||
log.info("Start harvesting using url: " + url);
|
||||
|
|
Loading…
Reference in New Issue