diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 1909ddcca..ccd42225a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -16,6 +16,13 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.hash.Hashing; +/** +* PacePerson tries to derive information from the fullname string of an author. +* Such informations are Names, Surnames an Fullname split into terms. It provides also an additional field for +* the original data. +* The calculation of the names and the surnames is not always possible. When it is impossible to assert which are the +* names and the surnames, the lists are empty. +* */ public class PacePerson { private static final String UTF8 = "UTF-8"; @@ -26,10 +33,19 @@ public class PacePerson { private static Set particles = null; + /** + * Capitalizes a string + * + * @param s the string to capitalize + * @return the input string with capital letter + * */ public static final String capitalize(final String s) { return WordUtils.capitalize(s.toLowerCase(), ' ', '-'); } + /** + * Adds a dot to a string with length equals to 1 + * */ public static final String dotAbbreviations(final String s) { return s.length() == 1 ? s + "." : s; } @@ -46,6 +62,12 @@ public class PacePerson { return h; } + /** + * The constructor of the class. It fills the fields of the class basing on the input fullname. + * + * @param s the input string (fullname of the author) + * @param aggressive set the string normalization type + * */ public PacePerson(String s, final boolean aggressive) { original = s; s = Normalizer.normalize(s, Normalizer.Form.NFD); @@ -64,6 +86,7 @@ public class PacePerson { // s = s.replaceAll("[\\W&&[^,-]]", ""); } + //if the string contains a comma, it can derive surname and name by splitting on it if (s.contains(",")) { final String[] arr = s.split(","); if (arr.length == 1) { @@ -74,21 +97,23 @@ public class PacePerson { fullname.addAll(surname); fullname.addAll(name); } - } else { + } else { //otherwise, it should rely on CAPS terms and short terms fullname = splitTerms(s); int lastInitialPosition = fullname.size(); boolean hasSurnameInUpperCase = false; + //computes lastInitialPosition and hasSurnameInUpperCase for (int i = 0; i < fullname.size(); i++) { final String term = fullname.get(i); if (term.length() == 1) { - lastInitialPosition = i; + lastInitialPosition = i; //first word in the name longer than 1 (to avoid name with dots) } else if (term.equals(term.toUpperCase())) { - hasSurnameInUpperCase = true; + hasSurnameInUpperCase = true; //if one of the words is CAPS } } + //manages particular cases of fullnames if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini name = fullname.subList(0, lastInitialPosition + 1); surname = fullname.subList(lastInitialPosition + 1, fullname.size()); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java new file mode 100644 index 000000000..7ee60a0aa --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java @@ -0,0 +1,26 @@ +package eu.dnetlib.dhp.common; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class PacePersonTest { + + @Test + public void pacePersonTest1(){ + + PacePerson p = new PacePerson("Artini, Michele", false); + assertEquals("Artini",p.getSurnameString()); + assertEquals("Michele", p.getNameString()); + assertEquals("Artini, Michele", p.getNormalisedFullname()); + } + + @Test + public void pacePersonTest2(){ + PacePerson p = new PacePerson("Michele G. Artini", false); + assertEquals("Artini, Michele G.", p.getNormalisedFullname()); + assertEquals("Michele G", p.getNameString()); + assertEquals("Artini", p.getSurnameString()); + } + +}