Fix cleaning of Pmid where parsing of numbers stopped at first not leading 0' character

This commit is contained in:
Giambattista Bloisi 2023-10-06 12:35:54 +02:00 committed by Claudio Atzori
parent 6856ab28ab
commit 2f3cf6d0e7
2 changed files with 8 additions and 2 deletions

View File

@ -7,7 +7,7 @@ import java.util.regex.Pattern;
// https://researchguides.stevens.edu/c.php?g=442331&p=6577176 // https://researchguides.stevens.edu/c.php?g=442331&p=6577176
public class PmidCleaningRule { public class PmidCleaningRule {
public static final Pattern PATTERN = Pattern.compile("[1-9]{1,8}"); public static final Pattern PATTERN = Pattern.compile("0*(\\d{1,8})");
public static String clean(String pmid) { public static String clean(String pmid) {
String s = pmid String s = pmid
@ -17,7 +17,7 @@ public class PmidCleaningRule {
final Matcher m = PATTERN.matcher(s); final Matcher m = PATTERN.matcher(s);
if (m.find()) { if (m.find()) {
return m.group(); return m.group(1);
} }
return ""; return "";
} }

View File

@ -9,10 +9,16 @@ class PmidCleaningRuleTest {
@Test @Test
void testCleaning() { void testCleaning() {
// leading zeros are removed
assertEquals("1234", PmidCleaningRule.clean("01234")); assertEquals("1234", PmidCleaningRule.clean("01234"));
// tolerant to spaces in the middle
assertEquals("1234567", PmidCleaningRule.clean("0123 4567")); assertEquals("1234567", PmidCleaningRule.clean("0123 4567"));
// stop parsing at first not numerical char
assertEquals("123", PmidCleaningRule.clean("0123x4567")); assertEquals("123", PmidCleaningRule.clean("0123x4567"));
// invalid id leading to empty result
assertEquals("", PmidCleaningRule.clean("abc")); assertEquals("", PmidCleaningRule.clean("abc"));
// valid id with zeroes in the number
assertEquals("20794075", PmidCleaningRule.clean("20794075"));
} }
} }