diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index 45e3f84b1..183f68904 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -1,13 +1,14 @@ package eu.dnetlib.dhp.schema.oaf.utils; +import java.io.Serializable; +import java.util.Objects; + +import org.apache.commons.lang.StringUtils; + import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.utils.DHPUtils; -import org.apache.commons.lang.StringUtils; - -import java.io.Serializable; -import java.util.Objects; /** * Factory class for OpenAIRE identifiers in the Graph @@ -16,7 +17,8 @@ public class IdentifierFactory implements Serializable { public static final String ID_SEPARATOR = "::"; public static final String ID_PREFIX_SEPARATOR = "|"; - public final static String ID_REGEX = "^[0-9][0-9]\\"+ID_PREFIX_SEPARATOR+".{12}"+ID_SEPARATOR+"[a-zA-Z0-9]{32}$"; + public final static String ID_REGEX = "^[0-9][0-9]\\" + ID_PREFIX_SEPARATOR + ".{12}" + ID_SEPARATOR + + "[a-zA-Z0-9]{32}$"; public static final int ID_PREFIX_LEN = 12; public static String createIdentifier(T entity) { @@ -26,18 +28,18 @@ public class IdentifierFactory implements Serializable { } return entity - .getPid() - .stream() - .filter(s -> Objects.nonNull(s.getQualifier())) - .filter(s -> PidType.isValid(s.getQualifier().getClassid())) - .min(new PidComparator<>(entity)) - .map(s -> idFromPid(entity, s)) - .map(IdentifierFactory::verifyIdSyntax) - .orElseGet(entity::getId); + .getPid() + .stream() + .filter(s -> Objects.nonNull(s.getQualifier())) + .filter(s -> PidType.isValid(s.getQualifier().getClassid())) + .min(new PidComparator<>(entity)) + .map(s -> idFromPid(entity, s)) + .map(IdentifierFactory::verifyIdSyntax) + .orElseGet(entity::getId); } private static String verifyIdSyntax(String s) { - if(StringUtils.isBlank(s) || !s.matches(ID_REGEX)) { + if (StringUtils.isBlank(s) || !s.matches(ID_REGEX)) { throw new RuntimeException(String.format("malformed id: '%s'", s)); } else { return s; @@ -46,16 +48,16 @@ public class IdentifierFactory implements Serializable { private static String idFromPid(T entity, StructuredProperty s) { return new StringBuilder() - .append(StringUtils.substringBefore(entity.getId(), ID_PREFIX_SEPARATOR)) - .append(ID_PREFIX_SEPARATOR) - .append(createPrefix(s.getQualifier().getClassid())) - .append(ID_SEPARATOR) - .append(DHPUtils.md5(normalizePidValue(s.getValue()))) - .toString(); + .append(StringUtils.substringBefore(entity.getId(), ID_PREFIX_SEPARATOR)) + .append(ID_PREFIX_SEPARATOR) + .append(createPrefix(s.getQualifier().getClassid())) + .append(ID_SEPARATOR) + .append(DHPUtils.md5(normalizePidValue(s.getValue()))) + .toString(); } private static String normalizePidValue(String value) { - //TODO more aggressive cleaning? keep only alphanum and punctuation? + // TODO more aggressive cleaning? keep only alphanum and punctuation? return value.toLowerCase().replaceAll(" ", ""); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java index 87af06ec2..0e6a694de 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java @@ -1,17 +1,18 @@ + package eu.dnetlib.dhp.schema.oaf.utils; import org.apache.commons.lang3.EnumUtils; public enum PidType { - // Result - doi, pmid, pmc, handle, arXiv, NCID, GBIF, nct, pdb, + // Result + doi, pmid, pmc, handle, arXiv, NCID, GBIF, nct, pdb, - // Organization + // Organization GRID, mag_id, urn; - public static boolean isValid(String type) { - return EnumUtils.isValidEnum(PidType.class, type); - } - + public static boolean isValid(String type) { + return EnumUtils.isValidEnum(PidType.class, type); + } + } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java index 2d34c58c8..d458c613e 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java @@ -1,43 +1,47 @@ + package eu.dnetlib.dhp.schema.oaf.utils; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.utils.DHPUtils; -import org.apache.commons.io.IOUtils; - -import org.junit.jupiter.api.Test; - -import java.io.IOException; - import static org.junit.jupiter.api.Assertions.*; +import java.io.IOException; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.utils.DHPUtils; + public class IdentifierFactoryTest { - private static ObjectMapper OBJECT_MAPPER = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - @Test - public void testCreateIdentifierForPublication() throws IOException { + @Test + public void testCreateIdentifierForPublication() throws IOException { - verifyIdentifier("publication_doi.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2011.03.013")); - verifyIdentifier("publication_pmc.json", "50|pmc_________::" + DHPUtils.md5("21459329")); - verifyIdentifier("publication_urn.json", "50|urn_________::" + DHPUtils.md5("urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2")); + verifyIdentifier("publication_doi.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2011.03.013")); + verifyIdentifier("publication_pmc.json", "50|pmc_________::" + DHPUtils.md5("21459329")); + verifyIdentifier( + "publication_urn.json", + "50|urn_________::" + DHPUtils.md5("urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2")); - final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"; - verifyIdentifier("publication_3.json", defaultID); - verifyIdentifier("publication_4.json", defaultID); - verifyIdentifier("publication_5.json", defaultID); - } + final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"; + verifyIdentifier("publication_3.json", defaultID); + verifyIdentifier("publication_4.json", defaultID); + verifyIdentifier("publication_5.json", defaultID); + } - protected void verifyIdentifier(String filename, String expectedID) throws IOException { - final String json = IOUtils.toString(getClass().getResourceAsStream(filename)); - final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class); + protected void verifyIdentifier(String filename, String expectedID) throws IOException { + final String json = IOUtils.toString(getClass().getResourceAsStream(filename)); + final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class); - String id = IdentifierFactory.createIdentifier(pub); - - assertNotNull(id); - assertEquals(expectedID, id); - } + String id = IdentifierFactory.createIdentifier(pub); + assertNotNull(id); + assertEquals(expectedID, id); + } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java index aacb228db..101d46d35 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java @@ -10,7 +10,6 @@ import java.util.Objects; * - private String description to store the description of the programme */ - public class H2020Programme implements Serializable { private String code; private String description; diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 73c5613ea..443c18230 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -1,14 +1,14 @@ package eu.dnetlib.dhp.schema.oaf; -import eu.dnetlib.dhp.schema.common.LicenseComparator; - import java.io.Serializable; import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.LicenseComparator; + public class Result extends OafEntity implements Serializable { private List measures; @@ -247,7 +247,8 @@ public class Result extends OafEntity implements Serializable { instance = mergeLists(instance, r.getInstance()); - if (r.getBestaccessright() != null && new LicenseComparator().compare(r.getBestaccessright(), bestaccessright) < 0) + if (r.getBestaccessright() != null + && new LicenseComparator().compare(r.getBestaccessright(), bestaccessright) < 0) bestaccessright = r.getBestaccessright(); if (r.getResulttype() != null && compareTrust(this, r) < 0) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index 2cf023fb9..7f0ca983f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -177,14 +177,12 @@ public class PrepareProgramme { prepareClassification(h2020Programmes); - h2020Programmes.map(csvProgramme -> OBJECT_MAPPER.writeValueAsString(csvProgramme)) - .saveAsTextFile(outputPath); - - + h2020Programmes + .map(csvProgramme -> OBJECT_MAPPER.writeValueAsString(csvProgramme)) + .saveAsTextFile(outputPath); } - private static void prepareClassification(JavaRDD h2020Programmes) { Object[] codedescription = h2020Programmes .map(value -> new Tuple2<>(value.getCode(), value.getTitle())) @@ -255,7 +253,7 @@ public class PrepareProgramme { } h2020Programmes.foreach(csvProgramme -> { if (!csvProgramme.getCode().endsWith(".") && !csvProgramme.getCode().contains("Euratom") - && !csvProgramme.getCode().equals("H2020-EC")) + && !csvProgramme.getCode().equals("H2020-EC")) csvProgramme.setClassification(map.get(csvProgramme.getCode() + ".")); else csvProgramme.setClassification(map.get(csvProgramme.getCode())); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java index c51c10876..06f8c2fef 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java @@ -10,7 +10,6 @@ public class ProjectSubset implements Serializable { private String code; - public String getCode() { return code; }