diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java index d0f5a3b27e..c0c451b88c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java @@ -7,7 +7,7 @@ import java.util.regex.Pattern; // https://researchguides.stevens.edu/c.php?g=442331&p=6577176 public class PmidCleaningRule { - public static final Pattern PATTERN = Pattern.compile("[1-9]{1,8}"); + public static final Pattern PATTERN = Pattern.compile("0*(\\d{1,8})"); public static String clean(String pmid) { String s = pmid @@ -17,7 +17,7 @@ public class PmidCleaningRule { final Matcher m = PATTERN.matcher(s); if (m.find()) { - return m.group(); + return m.group(1); } return ""; } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java index 9562adf7e0..295eac85f7 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java @@ -9,10 +9,16 @@ class PmidCleaningRuleTest { @Test void testCleaning() { + // leading zeros are removed assertEquals("1234", PmidCleaningRule.clean("01234")); + // tolerant to spaces in the middle assertEquals("1234567", PmidCleaningRule.clean("0123 4567")); + // stop parsing at first not numerical char assertEquals("123", PmidCleaningRule.clean("0123x4567")); + // invalid id leading to empty result assertEquals("", PmidCleaningRule.clean("abc")); + // valid id with zeroes in the number + assertEquals("20794075", PmidCleaningRule.clean("20794075")); } }