From d4c3476152df16fc38aa18e9563d6db9e506f4f1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 11 May 2021 11:08:54 +0200 Subject: [PATCH] mapping datasource.journal only when an issn is available, null otherwhise --- .../dhp/schema/oaf/utils/OafMapperUtils.java | 368 ++++++++++++++++++ .../schema/oaf/utils/OafMapperUtilsTest.java | 69 ++++ .../dhp/schema/oaf/utils/dataset_1.json | 1 + .../dhp/schema/oaf/utils/dataset_2.json | 1 + .../dhp/schema/oaf/utils/publication_1.json | 1 + .../dhp/schema/oaf/utils/publication_2.json | 1 + .../raw/MigrateDbEntitiesApplication.java | 11 - 7 files changed, 441 insertions(+), 11 deletions(-) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java create mode 100644 dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json create mode 100644 dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json create mode 100644 dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json create mode 100644 dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java new file mode 100644 index 000000000..c2dae2550 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -0,0 +1,368 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; + +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; + +import eu.dnetlib.dhp.schema.common.AccessRightComparator; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; + +public class OafMapperUtils { + + public static Oaf merge(final Oaf left, final Oaf right) { + if (ModelSupport.isSubClass(left, OafEntity.class)) { + return mergeEntities((OafEntity) left, (OafEntity) right); + } else if (ModelSupport.isSubClass(left, Relation.class)) { + ((Relation) left).mergeFrom((Relation) right); + } else { + throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); + } + return left; + } + + public static OafEntity mergeEntities(OafEntity left, OafEntity right) { + if (ModelSupport.isSubClass(left, Result.class)) { + return mergeResults((Result) left, (Result) right); + } else if (ModelSupport.isSubClass(left, Datasource.class)) { + ((Datasource) left).mergeFrom((Datasource) right); + } else if (ModelSupport.isSubClass(left, Organization.class)) { + ((Organization) left).mergeFrom((Organization) right); + } else if (ModelSupport.isSubClass(left, Project.class)) { + ((Project) left).mergeFrom((Project) right); + } else { + throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); + } + return left; + } + + public static Result mergeResults(Result left, Result right) { + if (new ResultTypeComparator().compare(left, right) < 0) { + left.mergeFrom(right); + return left; + } else { + right.mergeFrom(left); + return right; + } + } + + public static KeyValue keyValue(final String k, final String v) { + final KeyValue kv = new KeyValue(); + kv.setKey(k); + kv.setValue(v); + return kv; + } + + public static List listKeyValues(final String... s) { + if (s.length % 2 > 0) { + throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); + } + + final List list = new ArrayList<>(); + for (int i = 0; i < s.length; i += 2) { + list.add(keyValue(s[i], s[i + 1])); + } + return list; + } + + public static Field field(final T value, final DataInfo info) { + if (value == null || StringUtils.isBlank(value.toString())) { + return null; + } + + final Field field = new Field<>(); + field.setValue(value); + field.setDataInfo(info); + return field; + } + + public static List> listFields(final DataInfo info, final String... values) { + return Arrays + .stream(values) + .map(v -> field(v, info)) + .filter(Objects::nonNull) + .filter(distinctByKey(f -> f.getValue())) + .collect(Collectors.toList()); + } + + public static List> listFields(final DataInfo info, final List values) { + return values + .stream() + .map(v -> field(v, info)) + .filter(Objects::nonNull) + .filter(distinctByKey(f -> f.getValue())) + .collect(Collectors.toList()); + } + + public static Qualifier unknown(final String schemeid, final String schemename) { + return qualifier("UNKNOWN", "Unknown", schemeid, schemename); + } + + public static AccessRight accessRight( + final String classid, + final String classname, + final String schemeid, + final String schemename) { + return accessRight(classid, classname, schemeid, schemename, null); + } + + public static AccessRight accessRight( + final String classid, + final String classname, + final String schemeid, + final String schemename, + final OpenAccessRoute openAccessRoute) { + final AccessRight accessRight = new AccessRight(); + accessRight.setClassid(classid); + accessRight.setClassname(classname); + accessRight.setSchemeid(schemeid); + accessRight.setSchemename(schemename); + accessRight.setOpenAccessRoute(openAccessRoute); + return accessRight; + } + + public static Qualifier qualifier( + final String classid, + final String classname, + final String schemeid, + final String schemename) { + final Qualifier q = new Qualifier(); + q.setClassid(classid); + q.setClassname(classname); + q.setSchemeid(schemeid); + q.setSchemename(schemename); + return q; + } + + public static Qualifier qualifier(final Qualifier qualifier) { + final Qualifier q = new Qualifier(); + q.setClassid(qualifier.getClassid()); + q.setClassname(qualifier.getClassname()); + q.setSchemeid(qualifier.getSchemeid()); + q.setSchemename(qualifier.getSchemename()); + return q; + } + + public static StructuredProperty structuredProperty( + final String value, + final String classid, + final String classname, + final String schemeid, + final String schemename, + final DataInfo dataInfo) { + + return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo); + } + + public static StructuredProperty structuredProperty( + final String value, + final Qualifier qualifier, + final DataInfo dataInfo) { + if (value == null) { + return null; + } + final StructuredProperty sp = new StructuredProperty(); + sp.setValue(value); + sp.setQualifier(qualifier); + sp.setDataInfo(dataInfo); + return sp; + } + + public static ExtraInfo extraInfo( + final String name, + final String value, + final String typology, + final String provenance, + final String trust) { + final ExtraInfo info = new ExtraInfo(); + info.setName(name); + info.setValue(value); + info.setTypology(typology); + info.setProvenance(provenance); + info.setTrust(trust); + return info; + } + + public static OAIProvenance oaiIProvenance( + final String identifier, + final String baseURL, + final String metadataNamespace, + final Boolean altered, + final String datestamp, + final String harvestDate) { + + final OriginDescription desc = new OriginDescription(); + desc.setIdentifier(identifier); + desc.setBaseURL(baseURL); + desc.setMetadataNamespace(metadataNamespace); + desc.setAltered(altered); + desc.setDatestamp(datestamp); + desc.setHarvestDate(harvestDate); + + final OAIProvenance p = new OAIProvenance(); + p.setOriginDescription(desc); + + return p; + } + + public static Journal journal( + final String name, + final String issnPrinted, + final String issnOnline, + final String issnLinking, + final DataInfo dataInfo) { + + return hasIssn(issnPrinted, issnOnline, issnLinking) ? journal( + name, + issnPrinted, + issnOnline, + issnLinking, + null, + null, + null, + null, + null, + null, + null, + dataInfo) : null; + } + + public static Journal journal( + final String name, + final String issnPrinted, + final String issnOnline, + final String issnLinking, + final String ep, + final String iss, + final String sp, + final String vol, + final String edition, + final String conferenceplace, + final String conferencedate, + final DataInfo dataInfo) { + + if (StringUtils.isNotBlank(name) || hasIssn(issnPrinted, issnOnline, issnLinking)) { + final Journal j = new Journal(); + j.setName(name); + j.setIssnPrinted(issnPrinted); + j.setIssnOnline(issnOnline); + j.setIssnLinking(issnLinking); + j.setEp(ep); + j.setIss(iss); + j.setSp(sp); + j.setVol(vol); + j.setEdition(edition); + j.setConferenceplace(conferenceplace); + j.setConferencedate(conferencedate); + j.setDataInfo(dataInfo); + return j; + } else { + return null; + } + } + + private static boolean hasIssn(String issnPrinted, String issnOnline, String issnLinking) { + return StringUtils.isNotBlank(issnPrinted) + || StringUtils.isNotBlank(issnOnline) + || StringUtils.isNotBlank(issnLinking); + } + + public static DataInfo dataInfo( + final Boolean deletedbyinference, + final String inferenceprovenance, + final Boolean inferred, + final Boolean invisible, + final Qualifier provenanceaction, + final String trust) { + final DataInfo d = new DataInfo(); + d.setDeletedbyinference(deletedbyinference); + d.setInferenceprovenance(inferenceprovenance); + d.setInferred(inferred); + d.setInvisible(invisible); + d.setProvenanceaction(provenanceaction); + d.setTrust(trust); + return d; + } + + public static String createOpenaireId( + final int prefix, + final String originalId, + final boolean to_md5) { + if (StringUtils.isBlank(originalId)) { + return null; + } else if (to_md5) { + final String nsPrefix = StringUtils.substringBefore(originalId, "::"); + final String rest = StringUtils.substringAfter(originalId, "::"); + return String.format("%s|%s::%s", prefix, nsPrefix, IdentifierFactory.md5(rest)); + } else { + return String.format("%s|%s", prefix, originalId); + } + } + + public static String createOpenaireId( + final String type, + final String originalId, + final boolean to_md5) { + switch (type) { + case "datasource": + return createOpenaireId(10, originalId, to_md5); + case "organization": + return createOpenaireId(20, originalId, to_md5); + case "person": + return createOpenaireId(30, originalId, to_md5); + case "project": + return createOpenaireId(40, originalId, to_md5); + default: + return createOpenaireId(50, originalId, to_md5); + } + } + + public static String asString(final Object o) { + return o == null ? "" : o.toString(); + } + + public static Predicate distinctByKey( + final Function keyExtractor) { + final Map seen = new ConcurrentHashMap<>(); + return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null; + } + + public static Qualifier createBestAccessRights(final List instanceList) { + return getBestAccessRights(instanceList); + } + + protected static Qualifier getBestAccessRights(final List instanceList) { + if (instanceList != null) { + final Optional min = instanceList + .stream() + .map(i -> i.getAccessright()) + .min(new AccessRightComparator<>()); + + final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); + + if (StringUtils.isBlank(rights.getClassid())) { + rights.setClassid(UNKNOWN); + } + if (StringUtils.isBlank(rights.getClassname()) + || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { + rights.setClassname(NOT_AVAILABLE); + } + if (StringUtils.isBlank(rights.getSchemeid())) { + rights.setSchemeid(DNET_ACCESS_MODES); + } + if (StringUtils.isBlank(rights.getSchemename())) { + rights.setSchemename(DNET_ACCESS_MODES); + } + + return rights; + } + return null; + } +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java new file mode 100644 index 000000000..597f7e79b --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -0,0 +1,69 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import static org.junit.jupiter.api.Assertions.*; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class OafMapperUtilsTest { + + private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + @Test + public void testMergePubs() throws IOException { + Publication p1 = read("publication_1.json", Publication.class); + Publication p2 = read("publication_2.json", Publication.class); + Dataset d1 = read("dataset_1.json", Dataset.class); + Dataset d2 = read("dataset_2.json", Dataset.class); + + assertEquals(p1.getCollectedfrom().size(), 1); + assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); + assertEquals(d2.getCollectedfrom().size(), 1); + assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); + + assertTrue( + OafMapperUtils + .mergeResults(p1, d2) + .getResulttype() + .getClassid() + .equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)); + + assertEquals(p2.getCollectedfrom().size(), 1); + assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); + assertEquals(d1.getCollectedfrom().size(), 1); + assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); + + assertTrue( + OafMapperUtils + .mergeResults(p2, d1) + .getResulttype() + .getClassid() + .equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)); + } + + protected HashSet cfId(List collectedfrom) { + return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); + } + + protected T read(String filename, Class clazz) throws IOException { + final String json = IOUtils.toString(getClass().getResourceAsStream(filename)); + return OBJECT_MAPPER.readValue(json, clazz); + } + +} diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json new file mode 100644 index 000000000..e38c4d1cc --- /dev/null +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json new file mode 100644 index 000000000..52e4e126a --- /dev/null +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json new file mode 100644 index 000000000..704c5ad4d --- /dev/null +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json new file mode 100644 index 000000000..a1744e84e --- /dev/null +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 4d18f7cad..9b6400a32 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -592,17 +592,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return res; } - private Journal prepareJournal(final ResultSet rs, final DataInfo info) throws SQLException { - if (Objects.isNull(rs)) { - return null; - } else { - - return journal( - rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"), - rs.getString("issnLinking"), info); - } - } - public List processOrgOrgMergeRels(final ResultSet rs) { try { final DataInfo info = prepareDataInfo(rs); // TODO