From fa8c5bcd390d7283f10b03e36fc4e67d17592839 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 11 Jun 2020 12:19:32 +0200 Subject: [PATCH 1/2] javadoc for the PacePerson class and implementation of a unit test --- .../eu/dnetlib/dhp/common/PacePerson.java | 31 +++++++++++++++++-- .../eu/dnetlib/dhp/common/PacePersonTest.java | 26 ++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 1909ddcca6..ccd42225ae 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -16,6 +16,13 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.hash.Hashing; +/** +* PacePerson tries to derive information from the fullname string of an author. +* Such informations are Names, Surnames an Fullname split into terms. It provides also an additional field for +* the original data. +* The calculation of the names and the surnames is not always possible. When it is impossible to assert which are the +* names and the surnames, the lists are empty. +* */ public class PacePerson { private static final String UTF8 = "UTF-8"; @@ -26,10 +33,19 @@ public class PacePerson { private static Set particles = null; + /** + * Capitalizes a string + * + * @param s the string to capitalize + * @return the input string with capital letter + * */ public static final String capitalize(final String s) { return WordUtils.capitalize(s.toLowerCase(), ' ', '-'); } + /** + * Adds a dot to a string with length equals to 1 + * */ public static final String dotAbbreviations(final String s) { return s.length() == 1 ? s + "." : s; } @@ -46,6 +62,12 @@ public class PacePerson { return h; } + /** + * The constructor of the class. It fills the fields of the class basing on the input fullname. + * + * @param s the input string (fullname of the author) + * @param aggressive set the string normalization type + * */ public PacePerson(String s, final boolean aggressive) { original = s; s = Normalizer.normalize(s, Normalizer.Form.NFD); @@ -64,6 +86,7 @@ public class PacePerson { // s = s.replaceAll("[\\W&&[^,-]]", ""); } + //if the string contains a comma, it can derive surname and name by splitting on it if (s.contains(",")) { final String[] arr = s.split(","); if (arr.length == 1) { @@ -74,21 +97,23 @@ public class PacePerson { fullname.addAll(surname); fullname.addAll(name); } - } else { + } else { //otherwise, it should rely on CAPS terms and short terms fullname = splitTerms(s); int lastInitialPosition = fullname.size(); boolean hasSurnameInUpperCase = false; + //computes lastInitialPosition and hasSurnameInUpperCase for (int i = 0; i < fullname.size(); i++) { final String term = fullname.get(i); if (term.length() == 1) { - lastInitialPosition = i; + lastInitialPosition = i; //first word in the name longer than 1 (to avoid name with dots) } else if (term.equals(term.toUpperCase())) { - hasSurnameInUpperCase = true; + hasSurnameInUpperCase = true; //if one of the words is CAPS } } + //manages particular cases of fullnames if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini name = fullname.subList(0, lastInitialPosition + 1); surname = fullname.subList(lastInitialPosition + 1, fullname.size()); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java new file mode 100644 index 0000000000..7ee60a0aa4 --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java @@ -0,0 +1,26 @@ +package eu.dnetlib.dhp.common; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class PacePersonTest { + + @Test + public void pacePersonTest1(){ + + PacePerson p = new PacePerson("Artini, Michele", false); + assertEquals("Artini",p.getSurnameString()); + assertEquals("Michele", p.getNameString()); + assertEquals("Artini, Michele", p.getNormalisedFullname()); + } + + @Test + public void pacePersonTest2(){ + PacePerson p = new PacePerson("Michele G. Artini", false); + assertEquals("Artini, Michele G.", p.getNormalisedFullname()); + assertEquals("Michele G", p.getNameString()); + assertEquals("Artini", p.getSurnameString()); + } + +} From e79943965b23e36936889613dc2ba8b7691f1740 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 11 Jun 2020 12:49:31 +0200 Subject: [PATCH 2/2] Fixes #5604: field oamandatepublications in XML --- .../eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index d950a816d9..21ffd69920 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -896,6 +896,9 @@ public class XmlRecordFactory implements Serializable { if (p.getContracttype() != null) { metadata.add(XmlSerializationUtils.mapQualifier("contracttype", p.getContracttype())); } + if (p.getOamandatepublications() != null) { + metadata.add(XmlSerializationUtils.asXmlElement("oamandatepublications", p.getOamandatepublications().getValue())); + } if (p.getEcsc39() != null) { metadata.add(XmlSerializationUtils.asXmlElement("ecsc39", p.getEcsc39().getValue())); }