IdentifierFactory considers only DOIs matching a given regex
This commit is contained in:
parent
86d6fbe95b
commit
ea2a0ea949
|
@ -12,12 +12,10 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
import com.clearspring.analytics.util.Lists;
|
import com.clearspring.analytics.util.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
|
||||||
|
|
||||||
public class CleaningFunctions {
|
public class CleaningFunctions {
|
||||||
|
|
||||||
public static final String DOI_URL_PREFIX = "^http(s?):\\/\\/(dx\\.)?doi\\.org\\/";
|
public static final String DOI_URL_PREFIX_REGEX = "(^http(s?):\\/\\/)(((dx\\.)?doi\\.org)|(handle\\.test\\.datacite\\.org))\\/";
|
||||||
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
||||||
public static final String NONE = "none";
|
public static final String NONE = "none";
|
||||||
|
|
||||||
|
@ -231,7 +229,7 @@ public class CleaningFunctions {
|
||||||
|
|
||||||
// TODO add cleaning for more PID types as needed
|
// TODO add cleaning for more PID types as needed
|
||||||
case "doi":
|
case "doi":
|
||||||
pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX, ""));
|
pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX_REGEX, ""));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return pid;
|
return pid;
|
||||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
|
@ -21,6 +22,12 @@ public class IdentifierFactory implements Serializable {
|
||||||
public static final String ID_PREFIX_SEPARATOR = "|";
|
public static final String ID_PREFIX_SEPARATOR = "|";
|
||||||
public final static String ID_REGEX = "^[0-9][0-9]\\" + ID_PREFIX_SEPARATOR + ".{12}" + ID_SEPARATOR
|
public final static String ID_REGEX = "^[0-9][0-9]\\" + ID_PREFIX_SEPARATOR + ".{12}" + ID_SEPARATOR
|
||||||
+ "[a-zA-Z0-9]{32}$";
|
+ "[a-zA-Z0-9]{32}$";
|
||||||
|
|
||||||
|
public final static String DOI_REGEX = "(^10\\.[0-9]{4,9}\\/[-._;()\\/:a-zA-Z0-9]+$)|" +
|
||||||
|
"(^10\\.1002\\/[^\\s]+$)|" +
|
||||||
|
"(^10\\.1021\\/[a-zA-Z0-9_][a-zA-Z0-9_][0-9]++$)|" +
|
||||||
|
"(^10\\.1207\\/[a-zA-Z0-9_]+\\&[0-9]+_[0-9]+$)";
|
||||||
|
|
||||||
public static final int ID_PREFIX_LEN = 12;
|
public static final int ID_PREFIX_LEN = 12;
|
||||||
public static final String NONE = "none";
|
public static final String NONE = "none";
|
||||||
|
|
||||||
|
@ -48,10 +55,21 @@ public class IdentifierFactory implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static boolean pidFilter(StructuredProperty s) {
|
protected static boolean pidFilter(StructuredProperty s) {
|
||||||
return Objects.nonNull(s.getQualifier()) &&
|
if (Objects.isNull(s.getQualifier()) ||
|
||||||
PidType.isValid(s.getQualifier().getClassid()) &&
|
StringUtils.isBlank(StringUtils.trim(s.getValue()))) {
|
||||||
StringUtils.isNotBlank(StringUtils.trim(s.getValue())) &&
|
return false;
|
||||||
!NONE.equals(StringUtils.trim(StringUtils.lowerCase(s.getValue())));
|
}
|
||||||
|
try {
|
||||||
|
switch (PidType.valueOf(s.getQualifier().getClassid())) {
|
||||||
|
case doi:
|
||||||
|
final String doi = StringUtils.trim(StringUtils.lowerCase(s.getValue()));
|
||||||
|
return doi.matches(DOI_REGEX);
|
||||||
|
default:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String verifyIdSyntax(String s) {
|
private static String verifyIdSyntax(String s) {
|
||||||
|
|
Loading…
Reference in New Issue