forked from D-Net/dnet-hadoop
[aggregation] string values used as regular expressions in the OAI collection classes are defined in a single point as constants, to be reused across the code (PR#122)
This commit is contained in:
parent
bc014023c8
commit
777536ce91
|
@ -21,6 +21,9 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||||
|
|
||||||
public class OaiCollectorPlugin implements CollectorPlugin {
|
public class OaiCollectorPlugin implements CollectorPlugin {
|
||||||
|
|
||||||
|
public static final String DATE_REGEX = "\\d{4}-\\d{2}-\\d{2}";
|
||||||
|
public static final String UTC_DATETIME_REGEX = "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z";
|
||||||
|
|
||||||
private static final String FORMAT_PARAM = "format";
|
private static final String FORMAT_PARAM = "format";
|
||||||
private static final String OAI_SET_PARAM = "set";
|
private static final String OAI_SET_PARAM = "set";
|
||||||
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
|
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
|
||||||
|
@ -62,11 +65,11 @@ public class OaiCollectorPlugin implements CollectorPlugin {
|
||||||
throw new CollectorException("Param 'mdFormat' is null or empty");
|
throw new CollectorException("Param 'mdFormat' is null or empty");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}") && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
|
if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
|
||||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
|
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}") && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
|
if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
|
||||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
|
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -107,10 +107,12 @@ public class OaiIterator implements Iterator<String> {
|
||||||
if (set != null && !set.isEmpty()) {
|
if (set != null && !set.isEmpty()) {
|
||||||
url += "&set=" + URLEncoder.encode(set, "UTF-8");
|
url += "&set=" + URLEncoder.encode(set, "UTF-8");
|
||||||
}
|
}
|
||||||
if (fromDate != null && (fromDate.matches("\\d{4}-\\d{2}-\\d{2}") || fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
|
if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
||||||
|
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
||||||
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
|
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
|
||||||
}
|
}
|
||||||
if (untilDate != null && (untilDate.matches("\\d{4}-\\d{2}-\\d{2}") || untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
|
if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
||||||
|
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
||||||
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
|
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
|
||||||
}
|
}
|
||||||
log.info("Start harvesting using url: " + url);
|
log.info("Start harvesting using url: " + url);
|
||||||
|
|
Loading…
Reference in New Issue