[Cleaning] updated cleaning rule for DOIs

pull/90/head
Claudio Atzori 3 years ago
parent f667e94a31
commit 34d653de41

@ -15,7 +15,7 @@ import eu.dnetlib.dhp.schema.oaf.*;
public class CleaningFunctions {
public static final String DOI_URL_PREFIX_REGEX = "(^http(s?):\\/\\/)(((dx\\.)?doi\\.org)|(handle\\.test\\.datacite\\.org))\\/";
public static final String DOI_PREFIX_REGEX = "^10\\.";
public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
public static final int ORCID_LEN = 19;
@ -308,7 +308,7 @@ public class CleaningFunctions {
// TODO add cleaning for more PID types as needed
case "doi":
pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX_REGEX, ""));
pid.setValue(value.toLowerCase().replaceAll(DOI_PREFIX_REGEX, "10."));
break;
}
return pid;

Loading…
Cancel
Save