From d9c9482a5be86738782808f7cbccbae49f0e341d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 6 Feb 2023 13:45:21 +0100 Subject: [PATCH] WIP: refactoring the internal graph data model and its utilities --- .../dhp/schema/oaf/common/ModelSupport.java | 2 +- .../dhp/schema/oaf/utils/OafMapperUtils.java | 11 + .../dhp/blacklist/ReadBlacklistFromDB.java | 4 +- .../dhp/blacklist/BlacklistRelationTest.java | 5 +- .../broker/oa/PrepareRelatedDatasetsJob.java | 1 - .../broker/oa/PrepareRelatedProjectsJob.java | 1 - .../oa/PrepareRelatedPublicationsJob.java | 1 - .../broker/oa/PrepareRelatedSoftwaresJob.java | 1 - .../dhp/broker/oa/util/BrokerConstants.java | 2 +- .../dhp/broker/oa/util/ConversionUtils.java | 57 ++-- .../broker/oa/util/ConversionUtilsTest.java | 1 - dhp-workflows/dhp-distcp/pom.xml | 13 - .../dhp/distcp/oozie_app/config-default.xml | 18 -- .../dnetlib/dhp/distcp/oozie_app/workflow.xml | 46 --- .../oa/graph/clean/CleanGraphSparkJob.java | 4 +- .../dhp/oa/graph/clean/CleaningRuleMap.java | 1 - .../country/GetDatasourceFromCountry.java | 3 +- .../oa/graph/hive/GraphHiveImporterJob.java | 2 +- .../graph/hive/GraphHiveTableImporterJob.java | 2 +- .../raw/AbstractMdRecordToOafMapper.java | 190 +++++------- .../raw/DispatchEntitiesApplication.java | 3 +- .../raw/GenerateEntitiesApplication.java | 9 +- .../oa/graph/raw/MergeClaimsApplication.java | 2 +- .../raw/MigrateDbEntitiesApplication.java | 273 ++++++++---------- .../dhp/oa/graph/raw/OafToOafMapper.java | 99 +++---- .../dhp/oa/graph/raw/OdfToOafMapper.java | 132 ++++----- .../raw/common/VerifyNsPrefixPredicate.java | 8 +- .../hostedbymap/SparkProduceHostedByMap.scala | 2 +- .../raw/CopyHdfsOafSparkApplication.scala | 2 +- .../resolution/SparkResolveEntities.scala | 5 +- .../sx/graph/SparkConvertRDDtoDataset.scala | 9 +- .../dhp/sx/graph/SparkCreateInputGraph.scala | 3 +- .../dhp/sx/graph/SparkCreateScholix.scala | 5 +- .../oa/graph/GraphHiveImporterJobTest.java | 2 +- .../clean/GraphCleaningFunctionsTest.java | 8 +- .../group/GroupEntitiesSparkJobTest.java | 6 +- .../raw/GenerateEntitiesApplicationTest.java | 6 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 57 ++-- .../raw/MigrateDbEntitiesApplicationTest.java | 75 +++-- .../common/VerifyNsPrefixPredicateTest.java | 10 +- .../resolution/ResolveEntitiesTest.scala | 15 +- .../CreateRelatedEntitiesJob_phase1.java | 35 +-- .../CreateRelatedEntitiesJob_phase2.java | 19 +- .../dhp/oa/provision/PrepareRelationsJob.java | 2 - .../dhp/oa/provision/SortableRelation.java | 4 +- .../dhp/oa/provision/model/JoinedEntity.java | 4 +- .../model/ProvisionModelSupport.java | 2 +- .../dhp/oa/provision/model/RelatedEntity.java | 6 +- .../dhp/oa/provision/model/XmlInstance.java | 2 - .../oa/provision/utils/TemplateFactory.java | 20 +- .../oa/provision/utils/XmlRecordFactory.java | 216 +++++++------- .../utils/XmlSerializationUtils.java | 16 +- dhp-workflows/pom.xml | 1 - 53 files changed, 587 insertions(+), 836 deletions(-) delete mode 100644 dhp-workflows/dhp-distcp/pom.xml delete mode 100644 dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/workflow.xml diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java index 8a86a293d..5d03dfb4a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java @@ -320,7 +320,7 @@ public class ModelSupport { return String.format("%s.%s", dbName, tableName); } - public static String tableIdentifier(String dbName, Class clazz) { + public static String tableIdentifier(String dbName, Class clazz) { checkArgument(Objects.nonNull(clazz), "clazz is needed to derive the table name, thus cannot be null"); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 78372b323..acdc305bc 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -415,6 +415,17 @@ public class OafMapperUtils { source, target, relType, subRelType, relClass, provenance, null, null); } + public static Relation getRelation(final String source, + final String target, + final String relType, + final String subRelType, + final String relClass, + final List provenance, + final List properties) { + return getRelation( + source, target, relType, subRelType, relClass, provenance, null, properties); + } + public static Relation getRelation(final String source, final String target, final String relType, diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 380991526..eb87f28e2 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -13,6 +13,8 @@ import java.util.List; import java.util.function.Consumer; import java.util.function.Function; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.common.RelationInverse; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -25,8 +27,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.DbClient; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.common.RelationInverse; import eu.dnetlib.dhp.schema.oaf.Relation; public class ReadBlacklistFromDB implements Closeable { diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java index 160658e5b..b2e3f1453 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java @@ -4,12 +4,11 @@ package eu.dnetlib.dhp.blacklist; import java.util.Arrays; import java.util.List; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.common.RelationInverse; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.common.RelationInverse; - public class BlacklistRelationTest { @Test diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasetsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasetsJob.java index 260d85b10..ad8a21164 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasetsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasetsJob.java @@ -68,7 +68,6 @@ public class PrepareRelatedDatasetsJob { final Dataset rels = ClusterUtils .loadRelations(graphPath, spark) - .filter((FilterFunction) r -> r.getDataInfo().getDeletedbyinference()) .filter((FilterFunction) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction) r -> ClusterUtils.isValidResultResultClass(r.getRelClass())) .filter((FilterFunction) r -> !ClusterUtils.isDedupRoot(r.getSource())) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedProjectsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedProjectsJob.java index 5ff469cd0..73fcc9d51 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedProjectsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedProjectsJob.java @@ -70,7 +70,6 @@ public class PrepareRelatedProjectsJob { final Dataset rels = ClusterUtils .loadRelations(graphPath, spark) - .filter((FilterFunction) r -> r.getDataInfo().getDeletedbyinference()) .filter((FilterFunction) r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT)) .filter((FilterFunction) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)) .filter((FilterFunction) r -> !ClusterUtils.isDedupRoot(r.getSource())) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedPublicationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedPublicationsJob.java index 9e9261731..9b0fa69f7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedPublicationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedPublicationsJob.java @@ -69,7 +69,6 @@ public class PrepareRelatedPublicationsJob { final Dataset rels = ClusterUtils .loadRelations(graphPath, spark) - .filter((FilterFunction) r -> r.getDataInfo().getDeletedbyinference()) .filter((FilterFunction) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction) r -> ClusterUtils.isValidResultResultClass(r.getRelClass())) .filter((FilterFunction) r -> !ClusterUtils.isDedupRoot(r.getSource())) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedSoftwaresJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedSoftwaresJob.java index a1bb12c56..2637bee37 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedSoftwaresJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedSoftwaresJob.java @@ -73,7 +73,6 @@ public class PrepareRelatedSoftwaresJob { final Dataset rels; rels = ClusterUtils .loadRelations(graphPath, spark) - .filter((FilterFunction) r -> r.getDataInfo().getDeletedbyinference()) .filter((FilterFunction) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)) .filter((FilterFunction) r -> !ClusterUtils.isDedupRoot(r.getSource())) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index 790ca4e61..b8c138294 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -8,7 +8,7 @@ import java.util.Set; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; public class BrokerConstants { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index 4bf4d3341..b02f1dbf5 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -1,12 +1,7 @@ package eu.dnetlib.dhp.broker.oa.util; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; +import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; @@ -105,16 +100,16 @@ public class ConversionUtils { res.setOpenaireId(cleanOpenaireId(result.getId())); res.setOriginalId(first(result.getOriginalId())); - res.setTypology(classId(result.getResulttype())); + res.setTypology(result.getResulttype()); res.setTitles(structPropList(result.getTitle())); - res.setAbstracts(fieldList(result.getDescription())); + res.setAbstracts(result.getDescription()); res.setLanguage(classId(result.getLanguage())); res.setSubjects(subjectList(result.getSubject())); res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor)); - res.setPublicationdate(fieldValue(result.getDateofacceptance())); - res.setPublisher(fieldValue(result.getPublisher())); - res.setEmbargoenddate(fieldValue(result.getEmbargoenddate())); - res.setContributor(fieldList(result.getContributor())); + res.setPublicationdate(result.getDateofacceptance()); + res.setPublisher(result.getPublisher().getName()); + res.setEmbargoenddate(result.getEmbargoenddate()); + res.setContributor(result.getContributor()); res .setJournal( result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null); @@ -210,10 +205,9 @@ public class ConversionUtils { final OaBrokerProject res = new OaBrokerProject(); res.setOpenaireId(cleanOpenaireId(p.getId())); - res.setTitle(fieldValue(p.getTitle())); - res.setAcronym(fieldValue(p.getAcronym())); - res.setCode(fieldValue(p.getCode())); - + res.setTitle(p.getTitle()); + res.setAcronym(p.getAcronym()); + res.setCode(p.getCode()); final String ftree = fieldValue(p.getFundingtree()); if (StringUtils.isNotBlank(ftree)) { try { @@ -238,7 +232,7 @@ public class ConversionUtils { res.setOpenaireId(cleanOpenaireId(sw.getId())); res.setName(structPropValue(sw.getTitle())); res.setDescription(fieldValue(sw.getDescription())); - res.setRepository(fieldValue(sw.getCodeRepositoryUrl())); + res.setRepository(sw.getCodeRepositoryUrl()); res.setLandingPage(fieldValue(sw.getDocumentationUrl())); return res; @@ -250,7 +244,7 @@ public class ConversionUtils { } final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource(); - res.setName(StringUtils.defaultIfBlank(fieldValue(ds.getOfficialname()), fieldValue(ds.getEnglishname()))); + res.setName(StringUtils.defaultIfBlank(ds.getOfficialname(), ds.getEnglishname())); res.setOpenaireId(cleanOpenaireId(ds.getId())); res.setType(classId(ds.getDatasourcetype())); return res; @@ -264,13 +258,14 @@ public class ConversionUtils { return kv != null ? kv.getValue() : null; } - private static String fieldValue(final Field f) { - return f != null ? f.getValue() : null; - } - - private static String fieldValue(final List> fl) { - return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null) - : null; + private static String fieldValue(final List fl) { + return Optional + .ofNullable(fl) + .map(f -> fl.stream() + .filter(StringUtils::isNotBlank) + .findFirst() + .orElse(null)) + .orElse(null); } private static String classId(final Qualifier q) { @@ -283,18 +278,6 @@ public class ConversionUtils { : null; } - private static List fieldList(final List> fl) { - return fl != null - ? fl - .stream() - .map(Field::getValue) - .map(s -> StringUtils.abbreviate(s, BrokerConstants.MAX_STRING_SIZE)) - .filter(StringUtils::isNotBlank) - .limit(BrokerConstants.MAX_LIST_SIZE) - .collect(Collectors.toList()) - : new ArrayList<>(); - } - private static List structPropList(final List props) { return props != null ? props diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtilsTest.java index fc630df05..a31f4141b 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtilsTest.java @@ -27,7 +27,6 @@ class ConversionUtilsTest { qf.setClassid("test"); qf.setClassname("test"); qf.setSchemeid("test"); - qf.setSchemename("test"); final StructuredProperty sp1 = new StructuredProperty(); sp1.setValue("1"); diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml deleted file mode 100644 index c3d3a7375..000000000 --- a/dhp-workflows/dhp-distcp/pom.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - dhp-workflows - eu.dnetlib.dhp - 1.2.5-SNAPSHOT - - 4.0.0 - - dhp-distcp - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/config-default.xml b/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/config-default.xml deleted file mode 100644 index 905fb9984..000000000 --- a/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/config-default.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - sourceNN - webhdfs://namenode2.hadoop.dm.openaire.eu:50071 - - - oozie.use.system.libpath - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/workflow.xml b/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/workflow.xml deleted file mode 100644 index 91b97332b..000000000 --- a/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/workflow.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - sourceNN - the source name node - - - sourcePath - the source path - - - targetPath - the target path - - - hbase_dump_distcp_memory_mb - 6144 - memory for distcp action copying InfoSpace dump from remote cluster - - - hbase_dump_distcp_num_maps - 1 - maximum number of simultaneous copies of InfoSpace dump from remote location - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - -Dmapreduce.map.memory.mb=${hbase_dump_distcp_memory_mb} - -pb - -m ${hbase_dump_distcp_num_maps} - ${sourceNN}/${sourcePath} - ${nameNode}/${targetPath} - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java index 2e2ea567a..29c8bb680 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java @@ -22,7 +22,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Entity; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -61,7 +61,7 @@ public class CleanGraphSparkJob { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); - Class entityClazz = (Class) Class.forName(graphTableClassName); + Class entityClazz = (Class) Class.forName(graphTableClassName); final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl); final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index 3d501bb27..0b833fc7f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -74,7 +74,6 @@ public class CleaningRuleMap extends HashMap, SerializableConsumer) value -> OBJECT_MAPPER.readValue(value, Relation.class), Encoders.bean(Relation.class)) .filter( - (FilterFunction) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) && - !rel.getDataInfo().getDeletedbyinference()); + (FilterFunction) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY)); organization .joinWith(relation, organization.col("id").equalTo(relation.col("target"))) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveImporterJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveImporterJob.java index 0f74c6343..5fe207a97 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveImporterJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveImporterJob.java @@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Optional; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; @@ -17,7 +18,6 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.ModelSupport; public class GraphHiveImporterJob { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java index 76e1d57a1..c5057afd4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.oa.graph.hive; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import static eu.dnetlib.dhp.schema.common.ModelSupport.tableIdentifier; +import static eu.dnetlib.dhp.schema.oaf.common.ModelSupport.tableIdentifier; import java.util.Optional; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 7aa40cb8a..a1a480725 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -10,11 +10,11 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; -import java.net.MalformedURLException; -import java.net.URL; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.Entity; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.lang3.StringUtils; import org.apache.commons.validator.routines.UrlValidator; import org.dom4j.*; @@ -26,11 +26,9 @@ import com.google.common.collect.Sets; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; public abstract class AbstractMdRecordToOafMapper { @@ -49,9 +47,9 @@ public abstract class AbstractMdRecordToOafMapper { protected static final Qualifier ORCID_PID_TYPE = qualifier( ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, - DNET_PID_TYPES, DNET_PID_TYPES); + DNET_PID_TYPES); protected static final Qualifier MAG_PID_TYPE = qualifier( - "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); + "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES); protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999"; @@ -122,7 +120,7 @@ public abstract class AbstractMdRecordToOafMapper { return Lists.newArrayList(); } - final DataInfo info = prepareDataInfo(doc, invisible); + final EntityDataInfo info = prepareDataInfo(doc, invisible); final long lastUpdateTimestamp = new Date().getTime(); final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); @@ -171,10 +169,10 @@ public abstract class AbstractMdRecordToOafMapper { final String type, final List instances, final KeyValue collectedFrom, - final DataInfo info, + final EntityDataInfo info, final long lastUpdateTimestamp) { - final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); + final Entity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); final Set originalId = Sets.newHashSet(entity.getOriginalId()); originalId.add(entity.getId()); @@ -202,11 +200,11 @@ public abstract class AbstractMdRecordToOafMapper { return oafs; } - private OafEntity createEntity(final Document doc, + private Entity createEntity(final Document doc, final String type, final List instances, final KeyValue collectedFrom, - final DataInfo info, + final EntityDataInfo info, final long lastUpdateTimestamp) { switch (type.toLowerCase()) { case "publication": @@ -217,37 +215,36 @@ public abstract class AbstractMdRecordToOafMapper { case "dataset": final Dataset d = new Dataset(); populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp); - d.setStoragedate(prepareDatasetStorageDate(doc, info)); - d.setDevice(prepareDatasetDevice(doc, info)); - d.setSize(prepareDatasetSize(doc, info)); - d.setVersion(prepareDatasetVersion(doc, info)); - d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); - d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); - d.setGeolocation(prepareDatasetGeoLocations(doc, info)); + d.setStoragedate(prepareDatasetStorageDate(doc)); + d.setDevice(prepareDatasetDevice(doc)); + d.setSize(prepareDatasetSize(doc)); + d.setVersion(prepareDatasetVersion(doc)); + d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc)); + d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc)); + d.setGeolocation(prepareDatasetGeoLocations(doc)); return d; case "software": final Software s = new Software(); populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp); - s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); - s.setLicense(prepareSoftwareLicenses(doc, info)); - s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); - s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); + s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc)); + s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc)); + s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc)); return s; case "": case "otherresearchproducts": default: final OtherResearchProduct o = new OtherResearchProduct(); populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp); - o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); - o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); - o.setTool(prepareOtherResearchProductTools(doc, info)); + o.setContactperson(prepareOtherResearchProductContactPersons(doc)); + o.setContactgroup(prepareOtherResearchProductContactGroups(doc)); + o.setTool(prepareOtherResearchProductTools(doc)); return o; } } private List addProjectRels( final Document doc, - final OafEntity entity) { + final Entity entity) { final List res = new ArrayList<>(); @@ -277,7 +274,7 @@ public abstract class AbstractMdRecordToOafMapper { return res; } - private List addRelations(Document doc, OafEntity entity) { + private List addRelations(Document doc, Entity entity) { final List rels = Lists.newArrayList(); @@ -322,14 +319,14 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List addOtherResultRels( final Document doc, - final OafEntity entity); + final Entity entity); private void populateResultFields( final Result r, final Document doc, final List instances, final KeyValue collectedFrom, - final DataInfo info, + final EntityDataInfo info, final long lastUpdateTimestamp) { r.setDataInfo(info); r.setLastupdatetimestamp(lastUpdateTimestamp); @@ -345,24 +342,24 @@ public abstract class AbstractMdRecordToOafMapper { r.setLanguage(prepareLanguages(doc)); r.setCountry(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setSubject(prepareSubjects(doc, info)); - r.setTitle(prepareTitles(doc, info)); - r.setRelevantdate(prepareRelevantDates(doc, info)); - r.setDescription(prepareDescriptions(doc, info)); - r.setDateofacceptance(prepareField(doc, "//oaf:dateAccepted", info)); - r.setPublisher(preparePublisher(doc, info)); - r.setEmbargoenddate(prepareField(doc, "//oaf:embargoenddate", info)); - r.setSource(prepareSources(doc, info)); - r.setFulltext(prepareListFields(doc, "//oaf:fulltext", info)); - r.setFormat(prepareFormats(doc, info)); - r.setContributor(prepareContributors(doc, info)); - r.setResourcetype(prepareResourceType(doc, info)); - r.setCoverage(prepareCoverages(doc, info)); + r.setTitle(prepareTitles(doc)); + r.setRelevantdate(prepareRelevantDates(doc)); + r.setDescription(prepareDescriptions(doc)); + r.setDateofacceptance(doc.valueOf( "//oaf:dateAccepted")); + r.setPublisher(preparePublisher(doc)); + r.setEmbargoenddate(doc.valueOf("//oaf:embargoenddate")); + r.setSource(prepareSources(doc)); + r.setFulltext(prepareListString(doc, "//oaf:fulltext")); + r.setFormat(prepareFormats(doc)); + r.setContributor(prepareContributors(doc)); + r.setResourcetype(prepareResourceType(doc)); + r.setCoverage(prepareCoverages(doc)); r.setContext(prepareContexts(doc, info)); r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES r - .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); + .setProcessingchargeamount(doc.valueOf("//oaf:processingchargeamount")); r - .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(doc.valueOf("//oaf:processingchargeamount/@currency")); r.setInstance(instances); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); @@ -404,7 +401,7 @@ public abstract class AbstractMdRecordToOafMapper { return Lists.newArrayList(set); } - protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); + protected abstract Qualifier prepareResourceType(Document doc); protected abstract List prepareInstances( Document doc, @@ -412,21 +409,21 @@ public abstract class AbstractMdRecordToOafMapper { KeyValue collectedfrom, KeyValue hostedby); - protected abstract List> prepareSources(Document doc, DataInfo info); + protected abstract List prepareSources(Document doc); - protected abstract List prepareRelevantDates(Document doc, DataInfo info); + protected abstract List prepareRelevantDates(Document doc); - protected abstract List> prepareCoverages(Document doc, DataInfo info); + protected abstract List prepareCoverages(Document doc); - protected abstract List> prepareContributors(Document doc, DataInfo info); + protected abstract List prepareContributors(Document doc); - protected abstract List> prepareFormats(Document doc, DataInfo info); + protected abstract List prepareFormats(Document doc); - protected abstract Field preparePublisher(Document doc, DataInfo info); + protected abstract Publisher preparePublisher(Document doc); - protected abstract List> prepareDescriptions(Document doc, DataInfo info); + protected abstract List prepareDescriptions(Document doc); - protected abstract List prepareTitles(Document doc, DataInfo info); + protected abstract List prepareTitles(Document doc); protected abstract List prepareSubjects(Document doc, DataInfo info); @@ -434,41 +431,31 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareAuthors(Document doc, DataInfo info); - protected abstract List> prepareOtherResearchProductTools( - Document doc, - DataInfo info); + protected abstract List prepareOtherResearchProductTools(Document doc); - protected abstract List> prepareOtherResearchProductContactGroups( - Document doc, - DataInfo info); + protected abstract List prepareOtherResearchProductContactGroups(Document doc); - protected abstract List> prepareOtherResearchProductContactPersons( - Document doc, - DataInfo info); + protected abstract List prepareOtherResearchProductContactPersons(Document doc); - protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info); + protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc); - protected abstract Field prepareSoftwareCodeRepositoryUrl(Document doc, DataInfo info); + protected abstract String prepareSoftwareCodeRepositoryUrl(Document doc); - protected abstract List prepareSoftwareLicenses(Document doc, DataInfo info); + protected abstract List prepareSoftwareDocumentationUrls(Document doc); - protected abstract List> prepareSoftwareDocumentationUrls( - Document doc, - DataInfo info); + protected abstract List prepareDatasetGeoLocations(Document doc); - protected abstract List prepareDatasetGeoLocations(Document doc, DataInfo info); + protected abstract String prepareDatasetMetadataVersionNumber(Document doc); - protected abstract Field prepareDatasetMetadataVersionNumber(Document doc, DataInfo info); + protected abstract String prepareDatasetLastMetadataUpdate(Document doc); - protected abstract Field prepareDatasetLastMetadataUpdate(Document doc, DataInfo info); + protected abstract String prepareDatasetVersion(Document doc); - protected abstract Field prepareDatasetVersion(Document doc, DataInfo info); + protected abstract String prepareDatasetSize(Document doc); - protected abstract Field prepareDatasetSize(Document doc, DataInfo info); + protected abstract String prepareDatasetDevice(Document doc); - protected abstract Field prepareDatasetDevice(Document doc, DataInfo info); - - protected abstract Field prepareDatasetStorageDate(Document doc, DataInfo info); + protected abstract String prepareDatasetStorageDate(Document doc); private Journal prepareJournal(final Document doc, final DataInfo info) { final Node n = doc.selectSingleNode("//oaf:journal"); @@ -514,7 +501,6 @@ public abstract class AbstractMdRecordToOafMapper { accessRight.setClassid(qualifier.getClassid()); accessRight.setClassname(qualifier.getClassname()); accessRight.setSchemeid(qualifier.getSchemeid()); - accessRight.setSchemename(qualifier.getSchemename()); // TODO set the OAStatus @@ -541,7 +527,7 @@ public abstract class AbstractMdRecordToOafMapper { final Node n = (Node) o; final String classId = n.valueOf(xpathClassId).trim(); if (vocs.termExists(schemeId, classId)) { - res.add(structuredProperty(n.getText(), vocs.getTermAsQualifier(schemeId, classId), info)); + res.add(structuredProperty(n.getText(), vocs.getTermAsQualifier(schemeId, classId))); } } return res; @@ -550,28 +536,11 @@ public abstract class AbstractMdRecordToOafMapper { protected List prepareListStructProps( final Node node, final String xpath, - final Qualifier qualifier, - final DataInfo info) { + final Qualifier qualifier) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; - res.add(structuredProperty(n.getText(), qualifier, info)); - } - return res; - } - - protected List prepareListStructProps( - final Node node, - final String xpath, - final DataInfo info) { - final List res = new ArrayList<>(); - for (final Object o : node.selectNodes(xpath)) { - final Node n = (Node) o; - res - .add( - structuredProperty( - n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), - n.valueOf("@schemename"), info)); + res.add(structuredProperty(n.getText(), qualifier)); } return res; } @@ -583,11 +552,10 @@ public abstract class AbstractMdRecordToOafMapper { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; + Qualifier qualifier = qualifier(n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid")); res .add( - subject( - n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), - n.valueOf("@schemename"), info)); + subject(n.getText(), qualifier, info)); } return res; } @@ -609,37 +577,31 @@ public abstract class AbstractMdRecordToOafMapper { return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate); } - protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) { + protected EntityDataInfo prepareDataInfo(final Document doc, final boolean invisible) { final Node n = doc.selectSingleNode("//oaf:datainfo"); if (n == null) { - return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); + return dataInfo(false, false, 0.9f, null, false, REPOSITORY_PROVENANCE_ACTIONS); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); final String paSchemeId = n.valueOf("./oaf:provenanceaction/@schemeid"); - final String paSchemeName = n.valueOf("./oaf:provenanceaction/@schemename"); final boolean deletedbyinference = Boolean.parseBoolean(n.valueOf("./oaf:deletedbyinference")); final String inferenceprovenance = n.valueOf("./oaf:inferenceprovenance"); final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); - final String trust = n.valueOf("./oaf:trust"); + final Float trust = Float.parseFloat(n.valueOf("./oaf:trust")); - return dataInfo( - deletedbyinference, inferenceprovenance, inferred, invisible, - qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); + final Qualifier pAction = qualifier(paClassId, paClassName, paSchemeId); + + return dataInfo(invisible, deletedbyinference, trust, inferenceprovenance, inferred, pAction); } - protected Field prepareField(final Node node, final String xpath, final DataInfo info) { - return field(node.valueOf(xpath), info); - } - - protected List> prepareListFields( + protected List prepareListFields( final Node node, - final String xpath, - final DataInfo info) { - return listFields(info, prepareListString(node, xpath)); + final String xpath) { + return prepareListString(node, xpath); } protected List prepareListString(final Node node, final String xpath) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/DispatchEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/DispatchEntitiesApplication.java index 1aab78afe..7774416d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/DispatchEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/DispatchEntitiesApplication.java @@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.Optional; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -18,7 +19,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.common.ModelSupport; + import eu.dnetlib.dhp.schema.oaf.*; public class DispatchEntitiesApplication { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 5f9d98073..de1364025 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -9,6 +9,8 @@ import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; @@ -16,11 +18,7 @@ import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.rdd.RDD; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,7 +27,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -139,7 +136,7 @@ public class GenerateEntitiesApplication { save( inputRdd .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) - .reduceByKey(OafMapperUtils::merge) + .reduceByKey(MergeUtils::merge) .map(Tuple2::_2), targetPath); break; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java index ee1b6a5da..a727e67f0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java @@ -10,6 +10,7 @@ import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.Stream; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; @@ -26,7 +27,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import scala.Tuple2; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index c69a7a6ff..40783989a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -15,6 +15,7 @@ import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -28,23 +29,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; import eu.dnetlib.dhp.oa.graph.raw.common.MigrateAction; import eu.dnetlib.dhp.oa.graph.raw.common.VerifyNsPrefixPredicate; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Context; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Project; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -52,12 +37,16 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class); - private static final DataInfo DATA_INFO_CLAIM = dataInfo( - false, null, false, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), - "0.9"); + private static final EntityDataInfo ENTITY_DATA_INFO_CLAIM = dataInfo( + false, false, 0.9f, null, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS)); + + private static final DataInfo REL_DATA_INFO_CLAIM = dataInfo( + 0.9f, null, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS)); private static final List COLLECTED_FROM_CLAIM = listKeyValues( - createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); + createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); + + private final static List PROVENANCE_CLAIM = getProvenance(COLLECTED_FROM_CLAIM, ENTITY_DATA_INFO_CLAIM); public static final String SOURCE_TYPE = "source_type"; public static final String TARGET_TYPE = "target_type"; @@ -207,7 +196,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i public List processService(final ResultSet rs) { try { - final DataInfo info = prepareDataInfo(rs); + final EntityDataInfo info = prepareDataInfo(rs); final Datasource ds = new Datasource(); @@ -220,46 +209,45 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i .filter(StringUtils::isNotBlank) .collect(Collectors.toList())); ds.setCollectedfrom(prepareCollectedfrom(rs.getArray("collectedfrom"))); - ds.setPid(prepareListOfStructProps(rs.getArray("pid"), info)); + ds.setPid(prepareListOfStructProps(rs.getArray("pid"))); ds.setDateofcollection(asString(rs.getDate("dateofcollection"))); ds.setDateoftransformation(null); // Value not returned by the SQL query ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB - ds.setOaiprovenance(null); // Values not present in the DB ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui"))); ds.setEosctype(prepareQualifierSplitting(rs.getString("eosctype"))); ds.setEoscdatasourcetype(prepareQualifierSplitting(rs.getString("eoscdatasourcetype"))); ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); - ds.setOfficialname(field(rs.getString("officialname"), info)); - ds.setEnglishname(field(rs.getString("englishname"), info)); - ds.setWebsiteurl(field(rs.getString("websiteurl"), info)); - ds.setLogourl(field(rs.getString("logourl"), info)); - ds.setContactemail(field(rs.getString("contactemail"), info)); - ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info)); - ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info)); - ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info)); - ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info)); - ds.setDescription(field(rs.getString("description"), info)); - ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); - ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info)); - ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info)); - ds.setOdpolicies(field(rs.getString("odpolicies"), info)); - ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info)); + ds.setOfficialname(rs.getString("officialname")); + ds.setEnglishname(rs.getString("englishname")); + ds.setWebsiteurl(rs.getString("websiteurl")); + ds.setLogourl(rs.getString("logourl")); + ds.setContactemail(rs.getString("contactemail")); + ds.setNamespaceprefix(rs.getString("namespaceprefix")); + ds.setLatitude(Double.toString(rs.getDouble("latitude"))); + ds.setLongitude(Double.toString(rs.getDouble("longitude"))); + ds.setDateofvalidation(asString(rs.getDate("dateofvalidation"))); + ds.setDescription(rs.getString("description")); + ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"))); + ds.setOdnumberofitems(Double.toString(rs.getInt("odnumberofitems"))); + ds.setOdnumberofitemsdate(asString(rs.getDate("odnumberofitemsdate"))); + ds.setOdpolicies(rs.getString("odpolicies")); + ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"))); ds.setLanguages(listValues(rs.getArray("languages"))); - ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info)); - ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info)); - ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info)); - ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info)); - ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info)); - ds.setDatauploadtype(field(rs.getString("datauploadtype"), info)); - ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info)); - ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info)); - ds.setVersioning(field(rs.getBoolean("versioning"), info)); + ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"))); + ds.setReleasestartdate(asString(rs.getDate("releasestartdate"))); + ds.setReleaseenddate(asString(rs.getDate("releaseenddate"))); + ds.setMissionstatementurl(rs.getString("missionstatementurl")); + ds.setDatabaseaccesstype(rs.getString("databaseaccesstype")); + ds.setDatauploadtype(rs.getString("datauploadtype")); + ds.setDatabaseaccessrestriction(rs.getString("databaseaccessrestriction")); + ds.setDatauploadrestriction(rs.getString("datauploadrestriction")); + ds.setVersioning(rs.getBoolean("versioning")); ds.setVersioncontrol(rs.getBoolean("versioncontrol")); - ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info)); + ds.setCitationguidelineurl(rs.getString("citationguidelineurl")); - ds.setPidsystems(field(rs.getString("pidsystems"), info)); - ds.setCertificates(field(rs.getString("certificates"), info)); + ds.setPidsystems(rs.getString("pidsystems")); + ds.setCertificates(rs.getString("certificates")); ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array ds .setJournal( @@ -306,7 +294,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i public List processProject(final ResultSet rs) { try { - final DataInfo info = prepareDataInfo(rs); + final EntityDataInfo info = prepareDataInfo(rs); final Project p = new Project(); @@ -321,32 +309,31 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i p.setDateofcollection(asString(rs.getDate("dateofcollection"))); p.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); p.setExtraInfo(new ArrayList<>()); // Values not present in the DB - p.setOaiprovenance(null); // Values not present in the DB - p.setWebsiteurl(field(rs.getString("websiteurl"), info)); - p.setCode(field(rs.getString("code"), info)); - p.setAcronym(field(rs.getString("acronym"), info)); - p.setTitle(field(rs.getString("title"), info)); - p.setStartdate(field(asString(rs.getDate("startdate")), info)); - p.setEnddate(field(asString(rs.getDate("enddate")), info)); - p.setCallidentifier(field(rs.getString("callidentifier"), info)); - p.setKeywords(field(rs.getString("keywords"), info)); - p.setDuration(field(Integer.toString(rs.getInt("duration")), info)); - p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info)); + p.setWebsiteurl(rs.getString("websiteurl")); + p.setCode(rs.getString("code")); + p.setAcronym(rs.getString("acronym")); + p.setTitle(rs.getString("title")); + p.setStartdate(asString(rs.getDate("startdate"))); + p.setEnddate(asString(rs.getDate("enddate"))); + p.setCallidentifier(rs.getString("callidentifier")); + p.setKeywords(rs.getString("keywords")); + p.setDuration(Integer.toString(rs.getInt("duration"))); + p.setEcsc39(Boolean.toString(rs.getBoolean("ecsc39"))); p - .setOamandatepublications(field(Boolean.toString(rs.getBoolean("oamandatepublications")), info)); - p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info)); - p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); - p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info)); + .setOamandatepublications(Boolean.toString(rs.getBoolean("oamandatepublications"))); + p.setEcarticle29_3(Boolean.toString(rs.getBoolean("ecarticle29_3"))); + p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"))); + p.setFundingtree(prepareListFields(rs.getArray("fundingtree"))); p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype"))); - p.setOptional1(field(rs.getString("optional1"), info)); - p.setOptional2(field(rs.getString("optional2"), info)); - p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info)); - p.setContactfullname(field(rs.getString("contactfullname"), info)); - p.setContactfax(field(rs.getString("contactfax"), info)); - p.setContactphone(field(rs.getString("contactphone"), info)); - p.setContactemail(field(rs.getString("contactemail"), info)); - p.setSummary(field(rs.getString("summary"), info)); - p.setCurrency(field(rs.getString("currency"), info)); + p.setOptional1(rs.getString("optional1")); + p.setOptional2(rs.getString("optional2")); + p.setJsonextrainfo(rs.getString("jsonextrainfo")); + p.setContactfullname(rs.getString("contactfullname")); + p.setContactfax(rs.getString("contactfax")); + p.setContactphone(rs.getString("contactphone")); + p.setContactemail(rs.getString("contactemail")); + p.setSummary(rs.getString("summary")); + p.setCurrency(rs.getString("currency")); p.setTotalcost(new Float(rs.getDouble("totalcost"))); p.setFundedamount(new Float(rs.getDouble("fundedamount"))); p.setDataInfo(info); @@ -361,7 +348,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i public List processOrganization(final ResultSet rs) { try { - final DataInfo info = prepareDataInfo(rs); + final EntityDataInfo info = prepareDataInfo(rs); final Organization o = new Organization(); @@ -372,31 +359,30 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i listKeyValues( createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"))); - o.setPid(prepareListOfStructProps(rs.getArray("pid"), info)); + o.setPid(prepareListOfStructProps(rs.getArray("pid"))); o.setDateofcollection(asString(rs.getDate("dateofcollection"))); o.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); o.setExtraInfo(new ArrayList<>()); // Values not present in the DB - o.setOaiprovenance(null); // Values not present in the DB - o.setLegalshortname(field(rs.getString("legalshortname"), info)); - o.setLegalname(field(rs.getString("legalname"), info)); - o.setAlternativeNames(prepareListFields(rs.getArray("alternativenames"), info)); - o.setWebsiteurl(field(rs.getString("websiteurl"), info)); - o.setLogourl(field(rs.getString("logourl"), info)); - o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info)); - o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info)); - o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info)); + o.setLegalshortname(rs.getString("legalshortname")); + o.setLegalname(rs.getString("legalname")); + o.setAlternativeNames(prepareListFields(rs.getArray("alternativenames"))); + o.setWebsiteurl(rs.getString("websiteurl")); + o.setLogourl(rs.getString("logourl")); + o.setEclegalbody(Boolean.toString(rs.getBoolean("eclegalbody"))); + o.setEclegalperson(Boolean.toString(rs.getBoolean("eclegalperson"))); + o.setEcnonprofit(Boolean.toString(rs.getBoolean("ecnonprofit"))); o - .setEcresearchorganization(field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info)); - o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info)); + .setEcresearchorganization(Boolean.toString(rs.getBoolean("ecresearchorganization"))); + o.setEchighereducation(Boolean.toString(rs.getBoolean("echighereducation"))); o .setEcinternationalorganizationeurinterests( - field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info)); + Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests"))); o .setEcinternationalorganization( - field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info)); - o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info)); - o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info)); - o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info)); + Boolean.toString(rs.getBoolean("ecinternationalorganization"))); + o.setEcenterprise(Boolean.toString(rs.getBoolean("ecenterprise"))); + o.setEcsmevalidated(Boolean.toString(rs.getBoolean("ecsmevalidated"))); + o.setEcnutscode(Boolean.toString(rs.getBoolean("ecnutscode"))); o.setCountry(prepareQualifierSplitting(rs.getString("country"))); o.setDataInfo(info); o.setLastupdatetimestamp(lastUpdateTimestamp); @@ -409,21 +395,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i public List processServiceOrganization(final ResultSet rs) { try { - final DataInfo info = prepareDataInfo(rs); + final DataInfo info = prepareRelDataInfo(rs); final String orgId = createOpenaireId(20, rs.getString("organization"), true); final String dsId = createOpenaireId(10, rs.getString("service"), true); final List collectedFrom = listKeyValues( createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); + final List provenance = getProvenance(collectedFrom, info); + final Relation r1 = OafMapperUtils .getRelation( - dsId, orgId, DATASOURCE_ORGANIZATION, PROVISION, IS_PROVIDED_BY, collectedFrom, info, - lastUpdateTimestamp); + dsId, orgId, DATASOURCE_ORGANIZATION, PROVISION, IS_PROVIDED_BY, provenance); final Relation r2 = OafMapperUtils .getRelation( - orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, collectedFrom, info, - lastUpdateTimestamp); + orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, provenance); return Arrays.asList(r1, r2); } catch (final Exception e) { @@ -433,12 +419,14 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i public List processProjectOrganization(final ResultSet rs) { try { - final DataInfo info = prepareDataInfo(rs); + final DataInfo info = prepareRelDataInfo(rs); final String orgId = createOpenaireId(20, rs.getString("resporganization"), true); final String projectId = createOpenaireId(40, rs.getString("project"), true); final List collectedFrom = listKeyValues( createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); + final List provenance = getProvenance(collectedFrom, info); + final List properties = Lists .newArrayList( keyValue("contribution", String.valueOf(rs.getDouble("contribution"))), @@ -446,13 +434,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final Relation r1 = OafMapperUtils .getRelation( - projectId, orgId, PROJECT_ORGANIZATION, PARTICIPATION, HAS_PARTICIPANT, collectedFrom, info, - lastUpdateTimestamp, null, properties); + projectId, orgId, PROJECT_ORGANIZATION, PARTICIPATION, HAS_PARTICIPANT, provenance, properties); final Relation r2 = OafMapperUtils .getRelation( - orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, collectedFrom, info, - lastUpdateTimestamp, null, properties); + orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, provenance, properties); return Arrays.asList(r1, r2); } catch (final Exception e) { @@ -469,21 +455,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i if (targetType.equals("dataset")) { r = new Dataset(); - r.setResulttype(DATASET_DEFAULT_RESULTTYPE); + r.setResulttype(DATASET_DEFAULT_RESULTTYPE.getClassid()); } else if (targetType.equals("software")) { r = new Software(); - r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); + r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE.getClassid()); } else if (targetType.equals("other")) { r = new OtherResearchProduct(); - r.setResulttype(ORP_DEFAULT_RESULTTYPE); + r.setResulttype(ORP_DEFAULT_RESULTTYPE.getClassid()); } else { r = new Publication(); - r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE.getClassid()); } r.setId(createOpenaireId(50, rs.getString("target_id"), false)); r.setLastupdatetimestamp(lastUpdateTimestamp); - r.setContext(prepareContext(rs.getString("source_id"), DATA_INFO_CLAIM)); - r.setDataInfo(DATA_INFO_CLAIM); + r.setContext(prepareContext(rs.getString("source_id"), ENTITY_DATA_INFO_CLAIM)); + r.setDataInfo(ENTITY_DATA_INFO_CLAIM); r.setCollectedfrom(COLLECTED_FROM_CLAIM); return Arrays.asList(r); @@ -493,8 +479,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false); final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false); - Relation r1 = prepareRelation(sourceId, targetId, validationDate); - Relation r2 = prepareRelation(targetId, sourceId, validationDate); + Relation r1 = prepareRelation(sourceId, targetId, PROVENANCE_CLAIM, validationDate); + Relation r2 = prepareRelation(targetId, sourceId, PROVENANCE_CLAIM, validationDate); final String semantics = rs.getString("semantics"); @@ -529,17 +515,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } - private Relation prepareRelation(final String sourceId, final String targetId, final String validationDate) { + private Relation prepareRelation(final String sourceId, final String targetId, final List provenance, final String validationDate) { final Relation r = new Relation(); if (StringUtils.isNotBlank(validationDate)) { r.setValidated(true); r.setValidationDate(validationDate); } - r.setCollectedfrom(COLLECTED_FROM_CLAIM); + r.setProvenance(provenance); r.setSource(sourceId); r.setTarget(targetId); - r.setDataInfo(DATA_INFO_CLAIM); - r.setLastupdatetimestamp(lastUpdateTimestamp); return r; } @@ -558,16 +542,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return Arrays.asList(context); } - private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException { + private EntityDataInfo prepareDataInfo(final ResultSet rs) throws SQLException { final Boolean deletedbyinference = rs.getBoolean("deletedbyinference"); final String inferenceprovenance = rs.getString("inferenceprovenance"); final Boolean inferred = rs.getBoolean("inferred"); - final double trust = rs.getDouble("trust"); + final float trust = (float) rs.getDouble("trust"); - return dataInfo( - deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, - String.format("%.3f", trust)); + return dataInfo(false, deletedbyinference, trust, inferenceprovenance, inferred, ENTITYREGISTRY_PROVENANCE_ACTION); + } + + private DataInfo prepareRelDataInfo(final ResultSet rs) throws SQLException { + final String inferenceprovenance = rs.getString("inferenceprovenance"); + final Boolean inferred = rs.getBoolean("inferred"); + final float trust = (float) rs.getDouble("trust"); + + return dataInfo(trust, inferenceprovenance, inferred, ENTITYREGISTRY_PROVENANCE_ACTION); } private List prepareCollectedfrom(Array values) throws SQLException { @@ -604,15 +594,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return arr.length == 2 ? vocs.getTermAsQualifier(arr[1], arr[0]) : null; } - private List> prepareListFields(final Array array, final DataInfo info) { + private List prepareListFields(final Array array) { try { - return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>(); + return array != null ? listValues(array) : new ArrayList<>(); } catch (final SQLException e) { throw new RuntimeException("Invalid SQL array", e); } } - private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) { + private StructuredProperty prepareStructProp(final String s) { if (StringUtils.isBlank(s)) { return null; } @@ -621,19 +611,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final String value = parts[0]; final String[] arr = parts[1].split("@@@"); if (arr.length == 2) { - return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]), dataInfo); + return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0])); } } return null; } private List prepareListOfStructProps( - final Array array, - final DataInfo dataInfo) throws SQLException { + final Array array) throws SQLException { final List res = new ArrayList<>(); if (array != null) { for (final String s : (String[]) array.getArray()) { - final StructuredProperty sp = prepareStructProp(s, dataInfo); + final StructuredProperty sp = prepareStructProp(s); if (sp != null) { res.add(sp); } @@ -666,12 +655,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final List collectedFrom = listKeyValues( createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); - final Relation r1 = OafMapperUtils - .getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, collectedFrom, info, lastUpdateTimestamp); + final List provenance = getProvenance(collectedFrom, info); - final Relation r2 = OafMapperUtils - .getRelation( - orgId2, orgId1, ORG_ORG_RELTYPE, DEDUP, IS_MERGED_IN, collectedFrom, info, lastUpdateTimestamp); + final Relation r1 = getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, provenance); + + final Relation r2 = getRelation(orgId2, orgId1, ORG_ORG_RELTYPE, DEDUP, IS_MERGED_IN, provenance); return Arrays.asList(r1, r2); } catch (final Exception e) { throw new RuntimeException(e); @@ -688,12 +676,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final List collectedFrom = listKeyValues( createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); - return Arrays - .asList( - OafMapperUtils - .getRelation( - orgId1, orgId2, ORG_ORG_RELTYPE, RELATIONSHIP, rs.getString("type"), collectedFrom, info, - lastUpdateTimestamp)); + final List provenance = getProvenance(collectedFrom, info); + + final String relClass = rs.getString("type"); + return Arrays.asList(getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, RELATIONSHIP, relClass, provenance)); } catch (final Exception e) { throw new RuntimeException(e); } @@ -710,12 +696,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final List collectedFrom = listKeyValues( createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); - return Arrays - .asList( - OafMapperUtils - .getRelation( - orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, relClass, collectedFrom, info, - lastUpdateTimestamp)); + final List provenance = getProvenance(collectedFrom, info); + + return Arrays.asList(getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, relClass, provenance)); } catch (final Exception e) { throw new RuntimeException(e); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 30f3935f5..3170c2568 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -67,9 +67,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final String cleanedId = pid .replaceAll("http://orcid.org/", "") .replaceAll("https://orcid.org/", ""); - author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); + author.getPid().add(authorPid(cleanedId, ORCID_PID_TYPE, info)); } else if (type.startsWith("MAGID")) { - author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info)); + author.getPid().add(authorPid(pid, MAG_PID_TYPE, info)); } } @@ -89,39 +89,36 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected List prepareTitles(final Document doc, final DataInfo info) { - return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER, info); + protected List prepareTitles(final Document doc) { + return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER); } @Override - protected List> prepareDescriptions(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//dc:description", info) + protected List prepareDescriptions(final Document doc) { + return prepareListFields(doc, "//dc:description") .stream() - .map(d -> { - d.setValue(StringUtils.left(d.getValue(), ModelHardLimits.MAX_ABSTRACT_LENGTH)); - return d; - }) + .map(d -> StringUtils.left(d, ModelHardLimits.MAX_ABSTRACT_LENGTH)) .collect(Collectors.toList()); } @Override - protected Field preparePublisher(final Document doc, final DataInfo info) { - return prepareField(doc, "//dc:publisher", info); + protected Publisher preparePublisher(final Document doc) { + return publisher(doc.valueOf("//dc:publisher")); } @Override - protected List> prepareFormats(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//dc:format", info); + protected List prepareFormats(final Document doc) { + return prepareListFields(doc, "//dc:format"); } @Override - protected List> prepareContributors(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//dc:contributor", info); + protected List prepareContributors(final Document doc) { + return prepareListFields(doc, "//dc:contributor"); } @Override - protected List> prepareCoverages(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//dc:coverage", info); + protected List prepareCoverages(final Document doc) { + return prepareListFields(doc, "//dc:coverage"); } @Override @@ -147,16 +144,16 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { alternateIdentifier.stream().filter(i -> !pids.contains(i)).collect(Collectors.toList())); instance.setPid(pid); - instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); + instance.setDateofacceptance(doc.valueOf("//oaf:dateAccepted")); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance .setAccessright(prepareAccessRight(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); - instance.setLicense(field(doc.valueOf("//oaf:license"), info)); + instance.setLicense(license(doc.valueOf("//oaf:license"))); instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS)); instance - .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); + .setProcessingchargeamount(doc.valueOf("//oaf:processingchargeamount")); instance - .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(doc.valueOf("//oaf:processingchargeamount/@currency")); final List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); final List url = nodes @@ -183,110 +180,90 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected List> prepareSources(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//dc:source", info); + protected List prepareSources(final Document doc) { + return prepareListFields(doc, "//dc:source"); } @Override - protected List prepareRelevantDates(final Document doc, final DataInfo info) { + protected List prepareRelevantDates(final Document doc) { return new ArrayList<>(); // NOT PRESENT IN OAF } // SOFTWARES @Override - protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { + protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc) { return null; // NOT PRESENT IN OAF } @Override - protected Field prepareSoftwareCodeRepositoryUrl( - final Document doc, - final DataInfo info) { + protected String prepareSoftwareCodeRepositoryUrl( + final Document doc) { return null; // NOT PRESENT IN OAF } @Override - protected List prepareSoftwareLicenses( - final Document doc, - final DataInfo info) { - return new ArrayList<>(); // NOT PRESENT IN OAF - } - - @Override - protected List> prepareSoftwareDocumentationUrls( - final Document doc, - final DataInfo info) { + protected List prepareSoftwareDocumentationUrls(final Document doc) { return new ArrayList<>(); // NOT PRESENT IN OAF } // DATASETS @Override - protected List prepareDatasetGeoLocations(final Document doc, final DataInfo info) { + protected List prepareDatasetGeoLocations(final Document doc) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override - protected Field prepareDatasetMetadataVersionNumber( - final Document doc, - final DataInfo info) { + protected String prepareDatasetMetadataVersionNumber(final Document doc) { return null; // NOT PRESENT IN OAF } @Override - protected Field prepareDatasetLastMetadataUpdate( - final Document doc, - final DataInfo info) { + protected String prepareDatasetLastMetadataUpdate(final Document doc) { return null; // NOT PRESENT IN OAF } @Override - protected Field prepareDatasetVersion(final Document doc, final DataInfo info) { + protected String prepareDatasetVersion(final Document doc) { return null; // NOT PRESENT IN OAF } @Override - protected Field prepareDatasetSize(final Document doc, final DataInfo info) { + protected String prepareDatasetSize(final Document doc) { return null; // NOT PRESENT IN OAF } @Override - protected Field prepareDatasetDevice(final Document doc, final DataInfo info) { + protected String prepareDatasetDevice(final Document doc) { return null; // NOT PRESENT IN OAF } @Override - protected Field prepareDatasetStorageDate(final Document doc, final DataInfo info) { + protected String prepareDatasetStorageDate(final Document doc) { return null; // NOT PRESENT IN OAF } // OTHER PRODUCTS @Override - protected List> prepareOtherResearchProductTools( - final Document doc, - final DataInfo info) { + protected List prepareOtherResearchProductTools(final Document doc) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override - protected List> prepareOtherResearchProductContactGroups( - final Document doc, - final DataInfo info) { + protected List prepareOtherResearchProductContactGroups(final Document doc) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override - protected List> prepareOtherResearchProductContactPersons( - final Document doc, - final DataInfo info) { + protected List prepareOtherResearchProductContactPersons(final Document doc) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List addOtherResultRels( final Document doc, - final OafEntity entity) { + final Entity entity) { final String docId = entity.getId(); final List res = new ArrayList<>(); @@ -313,7 +290,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { + protected Qualifier prepareResourceType(final Document doc) { return null; // NOT PRESENT IN OAF } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 39c77bd37..b3e9fd442 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -5,15 +5,13 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; -import java.io.UnsupportedEncodingException; -import java.net.MalformedURLException; -import java.net.URL; import java.net.URLDecoder; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.common.RelationInverse; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.validator.routines.UrlValidator; import org.dom4j.Document; import org.dom4j.Element; import org.dom4j.Node; @@ -22,12 +20,10 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.common.RelationInverse; + import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @@ -44,7 +40,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected List prepareTitles(final Document doc, final DataInfo info) { + protected List prepareTitles(final Document doc) { final List title = Lists.newArrayList(); final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']"; @@ -57,9 +53,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { title .add( structuredProperty( - titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info)); + titleValue, titleType, titleType, DNET_DATACITE_TITLE)); } else { - title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info)); + title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER)); } } @@ -97,7 +93,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { author.setFullname(String.format("%s, %s", author.getSurname(), author.getName())); } - author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info)); author.setPid(preparePids(n, info)); author.setRank(pos++); res.add(author); @@ -106,8 +101,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { return res; } - private List preparePids(final Node n, final DataInfo info) { - final List res = new ArrayList<>(); + private List preparePids(final Node n, final DataInfo info) { + final List res = new ArrayList<>(); for (final Object o : n.selectNodes("./*[local-name()='nameIdentifier']")) { final String id = ((Node) o).getText(); @@ -120,9 +115,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { if (type.toLowerCase().startsWith(ORCID)) { final String cleanedId = id.replace("http://orcid.org/", "").replace("https://orcid.org/", ""); - res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); + res.add(authorPid(cleanedId, ORCID_PID_TYPE, info)); } else if (type.startsWith("MAGID")) { - res.add(structuredProperty(id, MAG_PID_TYPE, info)); + res.add(authorPid(id, MAG_PID_TYPE, info)); } } return res; @@ -151,16 +146,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { alternateIdentifier.stream().filter(i -> !pids.contains(i)).collect(Collectors.toList())); instance.setPid(pid); - instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); + instance.setDateofacceptance(doc.valueOf("//oaf:dateAccepted")); final String distributionlocation = doc.valueOf("//oaf:distributionlocation"); instance.setDistributionlocation(StringUtils.isNotBlank(distributionlocation) ? distributionlocation : null); instance .setAccessright(prepareAccessRight(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); - instance.setLicense(field(doc.valueOf("//oaf:license"), info)); + instance.setLicense(license(doc.valueOf("//oaf:license"))); instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS)); - instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); + instance.setProcessingchargeamount(doc.valueOf("//oaf:processingchargeamount")); instance - .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(doc.valueOf("//oaf:processingchargeamount/@currency")); final Set url = new HashSet<>(); for (final Object o : doc @@ -218,12 +213,12 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected List> prepareSources(final Document doc, final DataInfo info) { + protected List prepareSources(final Document doc) { return new ArrayList<>(); // Not present in ODF ??? } @Override - protected List prepareRelevantDates(final Document doc, final DataInfo info) { + protected List prepareRelevantDates(final Document doc) { final List res = new ArrayList<>(); for (final Object o : doc.selectNodes("//*[local-name()='date']")) { final String dateType = ((Node) o).valueOf("@dateType"); @@ -235,42 +230,40 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { res .add( structuredProperty( - ((Node) o).getText(), UNKNOWN, UNKNOWN, DNET_DATACITE_DATE, DNET_DATACITE_DATE, - info)); + ((Node) o).getText(), UNKNOWN, UNKNOWN, DNET_DATACITE_DATE)); } else { res .add( structuredProperty( - ((Node) o).getText(), dateType, dateType, DNET_DATACITE_DATE, DNET_DATACITE_DATE, - info)); + ((Node) o).getText(), dateType, dateType, DNET_DATACITE_DATE)); } } return res; } @Override - protected List> prepareCoverages(final Document doc, final DataInfo info) { + protected List prepareCoverages(final Document doc) { return new ArrayList<>(); // Not present in ODF ??? } @Override - protected List> prepareContributors(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//*[local-name()='contributorName']", info); + protected List prepareContributors(final Document doc) { + return prepareListFields(doc, "//*[local-name()='contributorName']"); } @Override - protected List> prepareFormats(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//*[local-name()='format']", info); + protected List prepareFormats(final Document doc) { + return prepareListFields(doc, "//*[local-name()='format']"); } @Override - protected Field preparePublisher(final Document doc, final DataInfo info) { - return prepareField(doc, "//*[local-name()='publisher']", info); + protected Publisher preparePublisher(final Document doc) { + return publisher(doc.valueOf("//*[local-name()='publisher']")); } @Override - protected List> prepareDescriptions(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']", info); + protected List prepareDescriptions(final Document doc) { + return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']"); } @Override @@ -284,65 +277,46 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected List> prepareOtherResearchProductTools( - final Document doc, - final DataInfo info) { + protected List prepareOtherResearchProductTools(final Document doc) { return new ArrayList<>(); // Not present in ODF ??? } @Override - protected List> prepareOtherResearchProductContactGroups( - final Document doc, - final DataInfo info) { + protected List prepareOtherResearchProductContactGroups(final Document doc) { return prepareListFields( doc, - "//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']", - info); + "//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']"); } @Override - protected List> prepareOtherResearchProductContactPersons( - final Document doc, - final DataInfo info) { + protected List prepareOtherResearchProductContactPersons( + final Document doc) { return prepareListFields( doc, - "//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']", - info); + "//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']"); } @Override - protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { + protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc) { return prepareQualifier(doc, "//*[local-name()='format']", DNET_PROGRAMMING_LANGUAGES); } @Override - protected Field prepareSoftwareCodeRepositoryUrl( - final Document doc, - final DataInfo info) { + protected String prepareSoftwareCodeRepositoryUrl(final Document doc) { return null; // Not present in ODF ??? } @Override - protected List prepareSoftwareLicenses( - final Document doc, - final DataInfo info) { - return new ArrayList<>(); // Not present in ODF ??? - } - - @Override - protected List> prepareSoftwareDocumentationUrls( - final Document doc, - final DataInfo info) { + protected List prepareSoftwareDocumentationUrls(final Document doc) { return prepareListFields( doc, - "//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", - info); + "//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']"); } // DATASETS @Override - protected List prepareDatasetGeoLocations(final Document doc, final DataInfo info) { + protected List prepareDatasetGeoLocations(final Document doc) { final List res = new ArrayList<>(); for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) { @@ -356,43 +330,39 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected Field prepareDatasetMetadataVersionNumber( - final Document doc, - final DataInfo info) { + protected String prepareDatasetMetadataVersionNumber(final Document doc) { return null; // Not present in ODF ??? } @Override - protected Field prepareDatasetLastMetadataUpdate( - final Document doc, - final DataInfo info) { - return prepareField(doc, "//*[local-name()='date' and ./@dateType='Updated']", info); + protected String prepareDatasetLastMetadataUpdate(final Document doc) { + return doc.valueOf("//*[local-name()='date' and ./@dateType='Updated']"); } @Override - protected Field prepareDatasetVersion(final Document doc, final DataInfo info) { - return prepareField(doc, "//*[local-name()='version']", info); + protected String prepareDatasetVersion(final Document doc) { + return doc.valueOf("//*[local-name()='version']"); } @Override - protected Field prepareDatasetSize(final Document doc, final DataInfo info) { - return prepareField(doc, "//*[local-name()='size']", info); + protected String prepareDatasetSize(final Document doc) { + return doc.valueOf("//*[local-name()='size']"); } @Override - protected Field prepareDatasetDevice(final Document doc, final DataInfo info) { + protected String prepareDatasetDevice(final Document doc) { return null; // Not present in ODF ??? } @Override - protected Field prepareDatasetStorageDate(final Document doc, final DataInfo info) { - return prepareField(doc, "//*[local-name()='date' and ./@dateType='Issued']", info); + protected String prepareDatasetStorageDate(final Document doc) { + return doc.valueOf("//*[local-name()='date' and ./@dateType='Issued']"); } @Override protected List addOtherResultRels( final Document doc, - final OafEntity entity) { + final Entity entity) { final String docId = entity.getId(); @@ -429,7 +399,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } protected List getRelations(final String reltype, final String entityId, final String otherId, - final OafEntity entity) { + final Entity entity) { final List res = new ArrayList<>(); RelationInverse rel = ModelSupport.findRelation(reltype); if (rel != null) { @@ -447,7 +417,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { + protected Qualifier prepareResourceType(final Document doc) { return prepareQualifier( doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicate.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicate.java index 1e99d298d..438dbfb34 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicate.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicate.java @@ -12,7 +12,7 @@ import com.google.common.base.Splitter; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Entity; import eu.dnetlib.dhp.schema.oaf.Relation; /** @@ -38,9 +38,9 @@ public class VerifyNsPrefixPredicate implements Predicate { @Override public boolean test(final Oaf oaf) { if (oaf instanceof Datasource) { - return testValue(((Datasource) oaf).getNamespaceprefix().getValue()); - } else if (oaf instanceof OafEntity) { - return testValue(((OafEntity) oaf).getId()); + return testValue(((Datasource) oaf).getNamespaceprefix()); + } else if (oaf instanceof Entity) { + return testValue(((Entity) oaf).getId()); } else if (oaf instanceof Relation) { return testValue(((Relation) oaf).getSource()) && testValue(((Relation) oaf).getTarget()); } else { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index 9e3a451e8..efd114830 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -117,7 +117,7 @@ object SparkProduceHostedByMap { return getHostedByItemType( dats.getId, - dats.getOfficialname.getValue, + dats.getOfficialname, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index 533948289..4d992e16b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -3,8 +3,8 @@ package eu.dnetlib.dhp.oa.graph.raw import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport -import eu.dnetlib.dhp.schema.common.ModelSupport import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport import eu.dnetlib.dhp.utils.DHPUtils import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index f5a13e72b..1bf3df5b1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -2,7 +2,8 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.common.EntityType +import eu.dnetlib.dhp.schema.oaf.common.EntityType +import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileSystem, Path} @@ -124,7 +125,7 @@ object SparkResolveEntities { if (b == null) a._2 else { - a._2.mergeFrom(b._2) + MergeUtils.mergeResult(a._2, b._2) a._2 } }) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 362cb2028..fb5b33152 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -114,7 +114,6 @@ object SparkConvertRDDtoDataset { val rddRelation = spark.sparkContext .textFile(s"$sourcePath/relation") .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference) .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => filterRelations(r)) //filter OpenCitations relations @@ -142,13 +141,13 @@ object SparkConvertRDDtoDataset { if (relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) false else { - if (r.getCollectedfrom == null || r.getCollectedfrom.size() == 0) + if (r.getProvenance == null || r.getProvenance.isEmpty) false - else if (r.getCollectedfrom.size() > 1) + else if (r.getProvenance.size() > 1) true else if ( - r.getCollectedfrom.size() == 1 && r.getCollectedfrom.get(0) != null && "OpenCitations".equalsIgnoreCase( - r.getCollectedfrom.get(0).getValue + r.getProvenance.size() == 1 && r.getProvenance.get(0) != null && "OpenCitations".equalsIgnoreCase( + r.getProvenance.get(0).getCollectedfrom.getValue ) ) false diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala index 9d57e5869..c3f9db848 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.sx.graph import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf @@ -131,7 +132,7 @@ object SparkCreateInputGraph { ds.groupByKey(_.getId) .reduceGroups { (x, y) => - x.mergeFrom(y) + MergeUtils.mergeResult(x, y) x } .map(_._2) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index fd06e7dea..ca401ec6c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -51,10 +51,7 @@ object SparkCreateScholix { val relationDS: Dataset[(String, Relation)] = spark.read .load(relationPath) .as[Relation] - .filter(r => - (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase - .contains("merge") - ) + .filter(r => !r.getRelClass.toLowerCase.contains("merge")) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) val summaryDS: Dataset[(String, ScholixSummary)] = spark.read diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java index afaac04ea..7e6533b7f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.RandomStringUtils; import org.apache.spark.SparkConf; @@ -17,7 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.oa.graph.hive.GraphHiveImporterJob; -import eu.dnetlib.dhp.schema.common.ModelSupport; public class GraphHiveImporterJobTest { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index fc7c6e5f1..3e9bce675 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -70,8 +70,8 @@ public class GraphCleaningFunctionsTest { assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass())); assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType())); - assertEquals("iis", r_out.getDataInfo().getProvenanceaction().getClassid()); - assertEquals("Inferred by OpenAIRE", r_out.getDataInfo().getProvenanceaction().getClassname()); + assertTrue(r_out.getProvenance().stream().anyMatch(p -> "iis".equals(p.getDataInfo().getProvenanceaction().getClassid()))); + assertTrue(r_out.getProvenance().stream().anyMatch(p -> "Inferred by OpenAIRE".equals(p.getDataInfo().getProvenanceaction().getClassname()))); } } @@ -141,7 +141,7 @@ public class GraphCleaningFunctionsTest { assertNotNull(p_out); assertNotNull(p_out.getPublisher()); - assertNull(p_out.getPublisher().getValue()); + assertNull(p_out.getPublisher().getName()); assertEquals("und", p_out.getLanguage().getClassid()); assertEquals("Undetermined", p_out.getLanguage().getClassname()); @@ -216,7 +216,7 @@ public class GraphCleaningFunctionsTest { assertEquals("CLOSED", p_cleaned.getBestaccessright().getClassid()); assertNull(p_out.getPublisher()); - assertEquals("1970-10-07", p_cleaned.getDateofacceptance().getValue()); + assertEquals("1970-10-07", p_cleaned.getDateofacceptance()); assertEquals("0038", p_cleaned.getInstance().get(2).getInstancetype().getClassid()); assertEquals("Other literature type", p_cleaned.getInstance().get(2).getInstancetype().getClassname()); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java index 3bd1c13de..c0b06eccd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -9,6 +9,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -26,7 +27,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob; import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.utils.DHPUtils; @@ -130,13 +130,13 @@ public class GroupEntitiesSparkJobTest { assertEquals( 2, output - .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .map((MapFunction) r -> r.getResulttype(), Encoders.STRING()) .filter((FilterFunction) s -> s.equals("publication")) .count()); assertEquals( 1, output - .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .map((MapFunction) r -> r.getResulttype(), Encoders.STRING()) .filter((FilterFunction) s -> s.equals("dataset")) .count()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 53b3f8432..e8a6c049a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -8,6 +8,7 @@ import static org.mockito.Mockito.lenient; import java.io.IOException; import java.util.List; +import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; import org.apache.commons.io.IOUtils; import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; @@ -20,7 +21,6 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -72,9 +72,9 @@ class GenerateEntitiesApplicationTest { protected void verifyMerge(Result publication, Result dataset, Class clazz, String resultType) { - final Result merge = OafMapperUtils.mergeResults(publication, dataset); + final Result merge = MergeUtils.mergeResults(publication, dataset); assertTrue(clazz.isAssignableFrom(merge.getClass())); - assertEquals(resultType, merge.getResulttype().getClassid()); + assertEquals(resultType, merge.getResulttype()); } protected Result getResult(String xmlFileName, Class clazz) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index d08545388..b461814ee 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -26,7 +26,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -92,7 +91,6 @@ class MappersTest { assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); - assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals("Votsi,Nefta", author.get().getFullname()); assertEquals("Votsi", author.get().getSurname()); assertEquals("Nefta", author.get().getName()); @@ -124,7 +122,7 @@ class MappersTest { assertNotNull(p.getFulltext()); assertEquals(1, p.getFulltext().size()); - assertEquals("https://oneecosystem.pensoft.net/article/13718/", p.getFulltext().get(0).getValue()); + assertEquals("https://oneecosystem.pensoft.net/article/13718/", p.getFulltext().get(0)); // RESULT PROJECT List resultProject = list @@ -171,9 +169,11 @@ class MappersTest { private void verifyRelation(Relation r) { assertValidId(r.getSource()); assertValidId(r.getTarget()); - assertValidId(r.getCollectedfrom().get(0).getKey()); - assertNotNull(r.getDataInfo()); - assertNotNull(r.getDataInfo().getTrust()); + for(Provenance p : r.getProvenance()) { + assertValidId(p.getCollectedfrom().getKey()); + assertNotNull(p.getDataInfo()); + assertNotNull(p.getDataInfo().getTrust()); + } assertTrue(StringUtils.isNotBlank(r.getRelClass())); assertTrue(StringUtils.isNotBlank(r.getRelType())); @@ -221,7 +221,6 @@ class MappersTest { assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); - assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals("Votsi,Nefta", author.get().getFullname()); assertEquals("Votsi", author.get().getSurname()); assertEquals("Nefta", author.get().getName()); @@ -326,7 +325,7 @@ class MappersTest { .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - final Optional oPid = author + final Optional oPid = author .get() .getPid() .stream() @@ -337,21 +336,10 @@ class MappersTest { assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); - assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals("Baracchini, Theo", author.get().getFullname()); assertEquals("Baracchini", author.get().getSurname()); assertEquals("Theo", author.get().getName()); - assertEquals(1, author.get().getAffiliation().size()); - final Optional> opAff = author - .get() - .getAffiliation() - .stream() - .findFirst(); - assertTrue(opAff.isPresent()); - final Field affiliation = opAff.get(); - assertEquals("ISTI-CNR", affiliation.getValue()); - assertTrue(d.getSubject().size() > 0); assertTrue(d.getInstance().size() > 0); assertTrue(d.getContext().size() > 0); @@ -378,10 +366,13 @@ class MappersTest { assertValidId(r1.getTarget()); assertValidId(r2.getSource()); assertValidId(r2.getTarget()); - assertNotNull(r1.getDataInfo()); - assertNotNull(r2.getDataInfo()); - assertNotNull(r1.getDataInfo().getTrust()); - assertNotNull(r2.getDataInfo().getTrust()); + + assertNotNull(r1.getProvenance()); + assertFalse(r1.getProvenance().isEmpty()); + assertNotNull(r1.getProvenance().get(0).getDataInfo()); + assertNotNull(r2.getProvenance().get(0).getDataInfo()); + assertNotNull(r1.getProvenance().get(0).getDataInfo().getTrust()); + assertNotNull(r2.getProvenance().get(0).getDataInfo().getTrust()); assertEquals(r1.getSource(), r2.getTarget()); assertEquals(r2.getSource(), r1.getTarget()); assertTrue(StringUtils.isNotBlank(r1.getRelClass())); @@ -491,7 +482,6 @@ class MappersTest { assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassid()); assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassname()); assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemeid()); - assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename()); assertValidId(d.getId()); assertEquals(2, d.getOriginalId().size()); @@ -510,7 +500,7 @@ class MappersTest { assertNotNull(d.getDescription()); assertEquals(1, d.getDescription().size()); - assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue())); + assertTrue(StringUtils.isNotBlank(d.getDescription().get(0))); assertEquals(1, d.getAuthor().size()); assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname()); @@ -524,7 +514,7 @@ class MappersTest { assertEquals(0, d.getPid().size()); assertNotNull(d.getPublisher()); - assertEquals("nct", d.getPublisher().getValue()); + assertEquals("nct", d.getPublisher().getName()); assertTrue(d.getSubject().isEmpty()); assertTrue(d.getContext().isEmpty()); @@ -536,7 +526,7 @@ class MappersTest { assertNotNull(i.getAccessright()); assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemeid()); - assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemename()); + assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); assertEquals("Open Access", i.getAccessright().getClassname()); @@ -552,11 +542,10 @@ class MappersTest { assertEquals("0037", i.getInstancetype().getClassid()); assertEquals("Clinical Trial", i.getInstancetype().getClassname()); assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemeid()); - assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemename()); assertNull(i.getLicense()); assertNotNull(i.getDateofacceptance()); - assertEquals("2014-11-11", i.getDateofacceptance().getValue()); + assertEquals("2014-11-11", i.getDateofacceptance()); assertNull(i.getDistributionlocation()); assertNull(i.getProcessingchargeamount()); @@ -571,7 +560,7 @@ class MappersTest { assertEquals("nct", i.getAlternateIdentifier().get(0).getQualifier().getClassid()); assertEquals("ClinicalTrials.gov Identifier", i.getAlternateIdentifier().get(0).getQualifier().getClassname()); assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier().getSchemeid()); - assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier().getSchemename()); + assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier()); assertNotNull(i.getUrl()); assertEquals(2, i.getUrl().size()); @@ -738,13 +727,13 @@ class MappersTest { assertTrue(PidType.isValid(p.getPid().get(0).getQualifier().getClassid())); assertEquals(PidType.handle, PidType.valueOf(p.getPid().get(0).getQualifier().getClassid())); assertEquals("hdl:11858/00-1734-0000-0003-EE73-2", p.getPid().get(0).getValue()); - assertEquals("dataset", p.getResulttype().getClassname()); + assertEquals("dataset", p.getResulttype()); assertEquals(1, p.getInstance().size()); assertEquals("OPEN", p.getInstance().get(0).getAccessright().getClassid()); assertValidId(p.getInstance().get(0).getCollectedfrom().getKey()); assertValidId(p.getInstance().get(0).getHostedby().getKey()); assertEquals( - "http://creativecommons.org/licenses/by/3.0/de/legalcode", p.getInstance().get(0).getLicense().getValue()); + "http://creativecommons.org/licenses/by/3.0/de/legalcode", p.getInstance().get(0).getLicense().getUrl()); assertEquals(1, p.getInstance().size()); assertNotNull(p.getInstance().get(0).getAlternateIdentifier()); @@ -938,8 +927,8 @@ class MappersTest { assertTrue(p.getProcessingchargeamount() != null); assertTrue(p.getProcessingchargecurrency() != null); - assertEquals("1721.47", p.getProcessingchargeamount().getValue()); - assertEquals("EUR", p.getProcessingchargecurrency().getValue()); + assertEquals("1721.47", p.getProcessingchargeamount()); + assertEquals("EUR", p.getProcessingchargecurrency()); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 27304ec06..574fdae2e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -51,8 +51,7 @@ class MigrateDbEntitiesApplicationTest { .thenAnswer( invocation -> OafMapperUtils .qualifier( - invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), - invocation.getArgument(0))); + invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0))); lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true); @@ -80,12 +79,12 @@ class MigrateDbEntitiesApplicationTest { assertEquals("re3data", ds.getPid().get(0).getQualifier().getClassid()); assertEquals("dnet:pid_types", ds.getPid().get(0).getQualifier().getSchemeid()); - assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue()); - assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue()); - assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue()); + assertEquals(getValueAsString("officialname", fields), ds.getOfficialname()); + assertEquals(getValueAsString("englishname", fields), ds.getEnglishname()); + assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl()); assertEquals(getValueAsString("logourl", fields), ds.getLogourl()); - assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue()); - assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix().getValue()); + assertEquals(getValueAsString("contactemail", fields), ds.getContactemail()); + assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix()); assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName()); assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted()); assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline()); @@ -100,37 +99,34 @@ class MigrateDbEntitiesApplicationTest { assertEquals("Data Source", ds.getEosctype().getClassid()); assertEquals("Data Source", ds.getEosctype().getClassname()); assertEquals("dnet:eosc_types", ds.getEosctype().getSchemeid()); - assertEquals("dnet:eosc_types", ds.getEosctype().getSchemename()); assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassid()); assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassname()); assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemeid()); - assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemename()); assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassid()); assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassname()); assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemeid()); - assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemename()); - assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude().getValue()); - assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude().getValue()); + assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude()); + assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude()); assertEquals(getValueAsString("dateofvalidation", fields), ds.getDateofvalidation()); - assertEquals(getValueAsString("description", fields), ds.getDescription().getValue()); + assertEquals(getValueAsString("description", fields), ds.getDescription()); // TODO assertEquals(getValueAsString("subjects", fields), ds.getSubjects()); - assertEquals("0.0", ds.getOdnumberofitems().getValue()); + assertEquals("0.0", ds.getOdnumberofitems()); assertEquals(getValueAsString("odnumberofitemsdate", fields), ds.getOdnumberofitemsdate()); assertEquals(getValueAsString("odpolicies", fields), ds.getOdpolicies()); assertEquals( getValueAsList("odlanguages", fields), - ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList())); + ds.getOdlanguages().stream().collect(Collectors.toList())); assertEquals(getValueAsList("languages", fields), ds.getLanguages()); assertEquals( getValueAsList("accessinfopackage", fields), - ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList())); + ds.getAccessinfopackage().stream().collect(Collectors.toList())); assertEquals(getValueAsString("releasestartdate", fields), ds.getReleasestartdate()); assertEquals(getValueAsString("releaseenddate", fields), ds.getReleasestartdate()); assertEquals(getValueAsString("missionstatementurl", fields), ds.getMissionstatementurl()); @@ -143,7 +139,7 @@ class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("databaseaccessrestriction", fields), ds.getDatabaseaccessrestriction()); assertEquals(getValueAsString("datauploadrestriction", fields), ds.getDatauploadrestriction()); - assertEquals(false, ds.getVersioning().getValue()); + assertEquals(false, ds.getVersioning()); assertEquals(false, ds.getVersioncontrol()); assertEquals(getValueAsString("citationguidelineurl", fields), ds.getCitationguidelineurl()); @@ -164,13 +160,6 @@ class MigrateDbEntitiesApplicationTest { .collect(Collectors.toCollection(HashSet::new)); assertEquals(1, cpSchemeId.size()); assertTrue(cpSchemeId.contains("eosc:contentpolicies")); - HashSet cpSchemeName = ds - .getContentpolicies() - .stream() - .map(Qualifier::getSchemename) - .collect(Collectors.toCollection(HashSet::new)); - assertEquals(1, cpSchemeName.size()); - assertTrue(cpSchemeName.contains("eosc:contentpolicies")); assertEquals(2, ds.getContentpolicies().size()); assertEquals("Taxonomic classification", ds.getContentpolicies().get(0).getClassid()); assertEquals("Resource collection", ds.getContentpolicies().get(1).getClassid()); @@ -202,8 +191,8 @@ class MigrateDbEntitiesApplicationTest { final Project p = (Project) list.get(0); assertValidId(p.getId()); assertValidId(p.getCollectedfrom().get(0).getKey()); - assertEquals(getValueAsString("acronym", fields), p.getAcronym().getValue()); - assertEquals(getValueAsString("title", fields), p.getTitle().getValue()); + assertEquals(getValueAsString("acronym", fields), p.getAcronym()); + assertEquals(getValueAsString("title", fields), p.getTitle()); assertEquals(getValueAsString("collectedfromname", fields), p.getCollectedfrom().get(0).getValue()); assertEquals(getValueAsFloat("fundedamount", fields), p.getFundedamount()); assertEquals(getValueAsFloat("totalcost", fields), p.getTotalcost()); @@ -222,13 +211,12 @@ class MigrateDbEntitiesApplicationTest { final Organization o = (Organization) list.get(0); assertValidId(o.getId()); assertValidId(o.getCollectedfrom().get(0).getKey()); - assertEquals(getValueAsString("legalshortname", fields), o.getLegalshortname().getValue()); - assertEquals(getValueAsString("legalname", fields), o.getLegalname().getValue()); - assertEquals(getValueAsString("websiteurl", fields), o.getWebsiteurl().getValue()); + assertEquals(getValueAsString("legalshortname", fields), o.getLegalshortname()); + assertEquals(getValueAsString("legalname", fields), o.getLegalname()); + assertEquals(getValueAsString("websiteurl", fields), o.getWebsiteurl()); assertEquals(getValueAsString("country", fields).split("@@@")[0], o.getCountry().getClassid()); assertEquals(getValueAsString("country", fields).split("@@@")[0], o.getCountry().getClassname()); assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid()); - assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename()); assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue()); final List alternativenames = getValueAsList("alternativenames", fields); assertEquals(2, alternativenames.size()); @@ -280,8 +268,12 @@ class MigrateDbEntitiesApplicationTest { assertValidId(r2.getSource()); assertEquals(r1.getSource(), r2.getTarget()); assertEquals(r2.getSource(), r1.getTarget()); - assertValidId(r1.getCollectedfrom().get(0).getKey()); - assertValidId(r2.getCollectedfrom().get(0).getKey()); + assertNotNull(r1.getProvenance()); + assertFalse(r1.getProvenance().isEmpty()); + assertValidId(r1.getProvenance().get(0).getCollectedfrom().getKey()); + assertNotNull(r2.getProvenance()); + assertFalse(r2.getProvenance().isEmpty()); + assertValidId(r2.getProvenance().get(0).getCollectedfrom().getKey()); assertEquals(ModelConstants.PROJECT_ORGANIZATION, r1.getRelType()); assertEquals(ModelConstants.PROJECT_ORGANIZATION, r2.getRelType()); @@ -350,10 +342,17 @@ class MigrateDbEntitiesApplicationTest { assertValidId(r1.getTarget()); assertValidId(r2.getSource()); assertValidId(r2.getTarget()); - assertNotNull(r1.getDataInfo()); - assertNotNull(r2.getDataInfo()); - assertNotNull(r1.getDataInfo().getTrust()); - assertNotNull(r2.getDataInfo().getTrust()); + + assertNotNull(r1.getProvenance()); + assertFalse(r1.getProvenance().isEmpty()); + assertNotNull(r1.getProvenance().get(0).getDataInfo()); + assertNotNull(r1.getProvenance().get(0).getDataInfo().getTrust()); + + assertNotNull(r2.getProvenance()); + assertFalse(r2.getProvenance().isEmpty()); + assertNotNull(r2.getProvenance().get(0).getDataInfo()); + assertNotNull(r2.getProvenance().get(0).getDataInfo().getTrust()); + assertEquals(r1.getSource(), r2.getTarget()); assertEquals(r2.getSource(), r1.getTarget()); assertTrue(StringUtils.isNotBlank(r1.getRelClass())); @@ -361,8 +360,8 @@ class MigrateDbEntitiesApplicationTest { assertTrue(StringUtils.isNotBlank(r1.getRelType())); assertTrue(StringUtils.isNotBlank(r2.getRelType())); - assertValidId(r1.getCollectedfrom().get(0).getKey()); - assertValidId(r2.getCollectedfrom().get(0).getKey()); + assertValidId(r1.getProvenance().get(0).getCollectedfrom().getKey()); + assertValidId(r2.getProvenance().get(0).getCollectedfrom().getKey()); } private List prepareMocks(final String jsonFile) throws IOException, SQLException { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicateTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicateTest.java index a14fb4ae3..1750b8239 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicateTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicateTest.java @@ -8,7 +8,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -39,22 +38,17 @@ class VerifyNsPrefixPredicateTest { @Test void testTest_ds_true() { - final Field prefix = new Field<>(); - prefix.setValue("xxxxxx______"); final Datasource ds = new Datasource(); - ds.setNamespaceprefix(prefix); + ds.setNamespaceprefix("xxxxxx______"); assertTrue(predicate.test(ds)); } @Test void testTest_ds_false() { - final Field prefix = new Field<>(); - prefix.setValue("corda__2020"); - final Datasource ds = new Datasource(); - ds.setNamespaceprefix(prefix); + ds.setNamespaceprefix("corda__2020"); assertFalse(predicate.test(ds)); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index d415b7fc9..cdb1bbb15 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.schema.common.EntityType -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.common.EntityType +import eu.dnetlib.dhp.schema.oaf.utils.{MergeUtils, OafMapperUtils} import eu.dnetlib.dhp.schema.oaf.{Publication, Result, StructuredProperty} import org.apache.commons.io.FileUtils import org.apache.spark.SparkConf @@ -61,7 +61,7 @@ class ResolveEntitiesTest extends Serializable { List( OafMapperUtils.subject( FAKE_SUBJECT, - OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), + OafMapperUtils.qualifier("fos", "fosCS", "fossSchema"), null ) ).asJava @@ -70,8 +70,7 @@ class ResolveEntitiesTest extends Serializable { List( OafMapperUtils.structuredProperty( FAKE_TITLE, - OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), - null + OafMapperUtils.qualifier("fos", "fosCS", "fossSchema") ) ).asJava ) @@ -247,12 +246,12 @@ class ResolveEntitiesTest extends Serializable { @Test def testMerge(): Unit = { - val r = new Result + var r = new Result r.setSubject( List( OafMapperUtils.subject( FAKE_SUBJECT, - OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), + OafMapperUtils.qualifier("fos", "fosCS", "fossSchema"), null ) ).asJava @@ -269,7 +268,7 @@ class ResolveEntitiesTest extends Serializable { classOf[Publication] ) - r.mergeFrom(p) + r = MergeUtils.mergeResult(r, p); println(mapper.writeValueAsString(r)) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 48e5945c0..fb1df69ef 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -3,11 +3,14 @@ package eu.dnetlib.dhp.oa.provision; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.ArrayList; import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.common.EntityType; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -26,15 +29,6 @@ import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.Project; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import scala.Tuple2; @@ -79,7 +73,7 @@ public class CreateRelatedEntitiesJob_phase1 { log.info("graphTableClassName: {}", graphTableClassName); @SuppressWarnings("unchecked") - final Class entityClazz = (Class) Class.forName(graphTableClassName); + final Class entityClazz = (Class) Class.forName(graphTableClassName); final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); @@ -91,7 +85,7 @@ public class CreateRelatedEntitiesJob_phase1 { }); } - private static void joinRelationEntity( + private static void joinRelationEntity( final SparkSession spark, final String inputRelationsPath, final String inputEntityPath, @@ -123,7 +117,7 @@ public class CreateRelatedEntitiesJob_phase1 { .parquet(outputPath); } - private static Dataset readPathEntity( + private static Dataset readPathEntity( final SparkSession spark, final String inputEntityPath, final Class entityClazz) { @@ -137,7 +131,7 @@ public class CreateRelatedEntitiesJob_phase1 { Encoders.bean(entityClazz)); } - public static RelatedEntity asRelatedEntity(final E entity, final Class clazz) { + public static RelatedEntity asRelatedEntity(final E entity, final Class clazz) { final RelatedEntity re = new RelatedEntity(); re.setId(entity.getId()); @@ -162,8 +156,8 @@ public class CreateRelatedEntitiesJob_phase1 { re.setTitle(title); } - re.setDateofacceptance(getValue(result.getDateofacceptance())); - re.setPublisher(getValue(result.getPublisher())); + re.setDateofacceptance(result.getDateofacceptance()); + re.setPublisher(Optional.ofNullable(result.getPublisher()).map(p -> p.getName()).orElse(null)); re.setResulttype(result.getResulttype()); if (Objects.nonNull(result.getInstance())) { re @@ -206,24 +200,23 @@ public class CreateRelatedEntitiesJob_phase1 { re.setAcronym(getValue(p.getAcronym())); re.setContracttype(p.getContracttype()); - final List> f = p.getFundingtree(); + final List f = p.getFundingtree(); if (!f.isEmpty()) { - re.setFundingtree(f.stream().map(Field::getValue).collect(Collectors.toList())); + re.setFundingtree(f); } break; } return re; } - private static String getValue(final Field field) { - return getFieldValueWithDefault(field, ""); + private static String getValue(final String s) { + return getFieldValueWithDefault(s, ""); } - private static T getFieldValueWithDefault(final Field f, final T defaultValue) { + private static T getFieldValueWithDefault(final T f, final T defaultValue) { return Optional .ofNullable(f) .filter(Objects::nonNull) - .map(Field::getValue) .orElse(defaultValue); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index 85fb4a6b2..aea960171 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -8,6 +8,7 @@ import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -28,7 +29,6 @@ import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import scala.Tuple2; @@ -78,7 +78,7 @@ public class CreateRelatedEntitiesJob_phase2 { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); - Class entityClazz = (Class) Class.forName(graphTableClassName); + Class entityClazz = (Class) Class.forName(graphTableClassName); SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); @@ -94,7 +94,7 @@ public class CreateRelatedEntitiesJob_phase2 { }); } - private static void joinEntityWithRelatedEntities( + private static void joinEntityWithRelatedEntities( SparkSession spark, String relatedEntitiesPath, String entityPath, @@ -177,7 +177,7 @@ public class CreateRelatedEntitiesJob_phase2 { } - private static Dataset> readRelatedEntities( + private static Dataset> readRelatedEntities( SparkSession spark, String inputRelatedEntitiesPath, Class entityClazz) { log.info("Reading related entities from: {}", inputRelatedEntitiesPath); @@ -200,7 +200,7 @@ public class CreateRelatedEntitiesJob_phase2 { Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntityWrapper.class))); } - private static Dataset> readPathEntity( + private static Dataset> readPathEntity( SparkSession spark, String inputEntityPath, Class entityClazz) { log.info("Reading Graph table from: {}", inputEntityPath); @@ -217,7 +217,7 @@ public class CreateRelatedEntitiesJob_phase2 { Encoders.tuple(Encoders.STRING(), Encoders.kryo(entityClazz))); } - private static E pruneOutliers(Class entityClazz, E e) { + private static E pruneOutliers(Class entityClazz, E e) { if (ModelSupport.isSubClass(entityClazz, Result.class)) { Result r = (Result) e; if (r.getExternalReference() != null) { @@ -239,14 +239,11 @@ public class CreateRelatedEntitiesJob_phase2 { r.setAuthor(authors); } if (r.getDescription() != null) { - List> desc = r + List desc = r .getDescription() .stream() .filter(Objects::nonNull) - .map(d -> { - d.setValue(StringUtils.left(d.getValue(), ModelHardLimits.MAX_ABSTRACT_LENGTH)); - return d; - }) + .map(d -> StringUtils.left(d, ModelHardLimits.MAX_ABSTRACT_LENGTH)) .collect(Collectors.toList()); r.setDescription(desc); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index fdf397ad7..512cae826 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -132,7 +132,6 @@ public class PrepareRelationsJob { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) .filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved"))) - .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) .filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass()))); JavaRDD pruned = pruneRels( @@ -171,7 +170,6 @@ public class PrepareRelationsJob { .map( (MapFunction) s -> OBJECT_MAPPER.readValue(s, Relation.class), Encoders.kryo(Relation.class)) - .filter((FilterFunction) rel -> !rel.getDataInfo().getDeletedbyinference()) .filter((FilterFunction) rel -> !relationFilter.contains(rel.getRelClass())) .groupByKey( (MapFunction) Relation::getSource, diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SortableRelation.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SortableRelation.java index 8740b47fc..a9c0d74d2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SortableRelation.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SortableRelation.java @@ -43,9 +43,7 @@ public class SortableRelation extends Relation implements Comparable implements Serializable { +public class JoinedEntity implements Serializable { private E entity; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index d4ee24c14..fbdca8761 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -7,7 +7,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.RelationList; import eu.dnetlib.dhp.oa.provision.SortableRelation; -import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; public class ProvisionModelSupport { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java index 5c78d1826..1940da08c 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java @@ -25,7 +25,7 @@ public class RelatedEntity implements Serializable { private String publisher; private List pid; private String codeRepositoryUrl; - private Qualifier resulttype; + private String resulttype; private List collectedfrom; private List instances; @@ -111,11 +111,11 @@ public class RelatedEntity implements Serializable { this.codeRepositoryUrl = codeRepositoryUrl; } - public Qualifier getResulttype() { + public String getResulttype() { return resulttype; } - public void setResulttype(Qualifier resulttype) { + public void setResulttype(String resulttype) { this.resulttype = resulttype; } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java index 930eab4c3..7afa60630 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java @@ -19,13 +19,11 @@ public class XmlInstance implements Serializable { UNKNOWN_ACCESS_RIGHT.setClassid(ModelConstants.UNKNOWN); UNKNOWN_ACCESS_RIGHT.setClassname(ModelConstants.UNKNOWN); UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_ACCESS_MODES); - UNKNOWN_ACCESS_RIGHT.setSchemename(ModelConstants.DNET_ACCESS_MODES); UNKNOWN_REVIEW_LEVEL = new Qualifier(); UNKNOWN_REVIEW_LEVEL.setClassid("0000"); UNKNOWN_REVIEW_LEVEL.setClassname(ModelConstants.UNKNOWN); UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_REVIEW_LEVELS); - UNKNOWN_REVIEW_LEVEL.setSchemename(ModelConstants.DNET_REVIEW_LEVELS); } private String url; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 87c0261ac..2f0e711c7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -1,25 +1,21 @@ package eu.dnetlib.dhp.oa.provision.utils; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix; -import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml; +import com.google.common.collect.Lists; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Entity; +import org.apache.commons.lang3.StringUtils; +import org.stringtemplate.v4.ST; import java.io.IOException; -import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import javax.swing.text.html.Option; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix; +import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml; -import org.apache.commons.lang3.StringUtils; -import org.stringtemplate.v4.ST; - -import com.google.common.collect.Lists; - -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.OafEntity; public class TemplateFactory { @@ -62,7 +58,7 @@ public class TemplateFactory { } public String buildRecord( - final OafEntity entity, final String schemaLocation, final String body) { + final Entity entity, final String schemaLocation, final String body) { return getTemplate(resources.getRecord()) .add("id", escapeXml(removePrefix(entity.getId()))) .add("dateofcollection", entity.getDateofcollection()) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 45ba840c9..97fea8467 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -20,6 +20,7 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import eu.dnetlib.dhp.schema.oaf.common.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -97,7 +98,7 @@ public class XmlRecordFactory implements Serializable { final Set contexts = Sets.newHashSet(); // final OafEntity entity = toOafEntity(je.getEntity()); - final OafEntity entity = je.getEntity(); + final Entity entity = je.getEntity(); final TemplateFactory templateFactory = new TemplateFactory(); try { @@ -128,7 +129,7 @@ public class XmlRecordFactory implements Serializable { } } - private static OafEntity parseOaf(final String json, final String type) { + private static Entity parseOaf(final String json, final String type) { try { switch (EntityType.valueOf(type)) { case publication: @@ -170,7 +171,7 @@ public class XmlRecordFactory implements Serializable { private List metadata( final EntityType type, - final OafEntity entity, + final Entity entity, final Set contexts) { final List metadata = Lists.newArrayList(); @@ -319,7 +320,7 @@ public class XmlRecordFactory implements Serializable { .getContributor() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("contributor", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("contributor", c)) .collect(Collectors.toList())); } if (r.getCountry() != null) { @@ -339,14 +340,14 @@ public class XmlRecordFactory implements Serializable { .getCoverage() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("coverage", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("coverage", c)) .collect(Collectors.toList())); } if (r.getDateofacceptance() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("dateofacceptance", r.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", r.getDateofacceptance())); } if (r.getDescription() != null) { metadata @@ -355,12 +356,12 @@ public class XmlRecordFactory implements Serializable { .getDescription() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("description", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("description", c)) .collect(Collectors.toCollection(HashSet::new))); } if (r.getEmbargoenddate() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); + .add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate())); } if (r.getSubject() != null) { metadata @@ -386,7 +387,7 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.toList())); } if (r.getPublisher() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("publisher", r.getPublisher().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("publisher", r.getPublisher().getName())); } if (r.getSource() != null) { metadata @@ -395,7 +396,7 @@ public class XmlRecordFactory implements Serializable { .getSource() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("source", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("source", c)) .collect(Collectors.toCollection(HashSet::new))); } if (r.getFormat() != null) { @@ -405,11 +406,11 @@ public class XmlRecordFactory implements Serializable { .getFormat() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("format", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("format", c)) .collect(Collectors.toList())); } if (r.getResulttype() != null) { - metadata.add(XmlSerializationUtils.mapQualifier("resulttype", r.getResulttype())); + metadata.add(XmlSerializationUtils.asXmlElement("resulttype", r.getResulttype())); } if (r.getResourcetype() != null) { metadata.add(XmlSerializationUtils.mapQualifier("resourcetype", r.getResourcetype())); @@ -418,11 +419,11 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement("processingchargeamount", r.getProcessingchargeamount().getValue())); + .asXmlElement("processingchargeamount", r.getProcessingchargeamount())); metadata .add( XmlSerializationUtils - .asXmlElement("processingchargecurrency", r.getProcessingchargecurrency().getValue())); + .asXmlElement("processingchargecurrency", r.getProcessingchargecurrency())); } } @@ -439,29 +440,29 @@ public class XmlRecordFactory implements Serializable { case dataset: final Dataset d = (Dataset) entity; if (d.getDevice() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("device", d.getDevice().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("device", d.getDevice())); } if (d.getLastmetadataupdate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue())); + .asXmlElement("lastmetadataupdate", d.getLastmetadataupdate())); } if (d.getMetadataversionnumber() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue())); + .asXmlElement("metadataversionnumber", d.getMetadataversionnumber())); } if (d.getSize() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize())); } if (d.getStoragedate() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); + .add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate())); } if (d.getVersion() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion())); } // TODO d.getGeolocation() @@ -476,7 +477,7 @@ public class XmlRecordFactory implements Serializable { .getContactperson() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("contactperson", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("contactperson", c)) .collect(Collectors.toList())); } @@ -487,7 +488,7 @@ public class XmlRecordFactory implements Serializable { .getContactgroup() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c)) .collect(Collectors.toList())); } if (orp.getTool() != null) { @@ -497,7 +498,7 @@ public class XmlRecordFactory implements Serializable { .getTool() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("tool", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("tool", c)) .collect(Collectors.toList())); } break; @@ -511,24 +512,14 @@ public class XmlRecordFactory implements Serializable { .getDocumentationUrl() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c.getValue())) - .collect(Collectors.toList())); - } - if (s.getLicense() != null) { - metadata - .addAll( - s - .getLicense() - .stream() - .filter(Objects::nonNull) - .map(l -> XmlSerializationUtils.mapStructuredProperty("license", l)) + .map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c)) .collect(Collectors.toList())); } if (s.getCodeRepositoryUrl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); + .asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl())); } if (s.getProgrammingLanguage() != null) { metadata @@ -560,45 +551,45 @@ public class XmlRecordFactory implements Serializable { } if (ds.getOfficialname() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); + .add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname())); } if (ds.getEnglishname() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); + .add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname())); } if (ds.getWebsiteurl() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl())); } if (ds.getLogourl() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl())); } if (ds.getContactemail() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); + .add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail())); } if (ds.getNamespaceprefix() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue())); + .asXmlElement("namespaceprefix", ds.getNamespaceprefix())); } if (ds.getLatitude() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude())); } if (ds.getLongitude() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); + .add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude())); } if (ds.getDateofvalidation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue())); + .asXmlElement("dateofvalidation", ds.getDateofvalidation())); } if (ds.getDescription() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); + .add(XmlSerializationUtils.asXmlElement("description", ds.getDescription())); } if (ds.getSubjects() != null) { metadata @@ -614,17 +605,17 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue())); + .asXmlElement("odnumberofitems", ds.getOdnumberofitems())); } if (ds.getOdnumberofitemsdate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); + .asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate())); } if (ds.getOdpolicies() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); + .add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies())); } if (ds.getOdlanguages() != null) { metadata @@ -633,7 +624,7 @@ public class XmlRecordFactory implements Serializable { .getOdlanguages() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c)) .collect(Collectors.toList())); } if (ds.getLanguages() != null) { @@ -653,7 +644,7 @@ public class XmlRecordFactory implements Serializable { .getOdcontenttypes() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c)) .collect(Collectors.toList())); } if (ds.getAccessinfopackage() != null) { @@ -662,69 +653,69 @@ public class XmlRecordFactory implements Serializable { ds .getAccessinfopackage() .stream() - .map(c -> XmlSerializationUtils.asXmlElement("accessinfopackage", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("accessinfopackage", c)) .collect(Collectors.toList())); } if (ds.getReleaseenddate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("releasestartdate", ds.getReleaseenddate().getValue())); + .asXmlElement("releasestartdate", ds.getReleaseenddate())); } if (ds.getReleaseenddate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("releaseenddate", ds.getReleaseenddate().getValue())); + .asXmlElement("releaseenddate", ds.getReleaseenddate())); } if (ds.getMissionstatementurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue())); + .asXmlElement("missionstatementurl", ds.getMissionstatementurl())); } if (ds.getDataprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("dataprovider", ds.getDataprovider().getValue().toString())); + .asXmlElement("dataprovider", ds.getDataprovider().toString())); } if (ds.getServiceprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString())); + .asXmlElement("serviceprovider", ds.getServiceprovider().toString())); } if (ds.getDatabaseaccesstype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue())); + .asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype())); } if (ds.getDatauploadtype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("datauploadtype", ds.getDatauploadtype().getValue())); + .asXmlElement("datauploadtype", ds.getDatauploadtype())); } if (ds.getDatabaseaccessrestriction() != null) { metadata .add( XmlSerializationUtils .asXmlElement( - "databaseaccessrestriction", ds.getDatabaseaccessrestriction().getValue())); + "databaseaccessrestriction", ds.getDatabaseaccessrestriction())); } if (ds.getDatauploadrestriction() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue())); + .asXmlElement("datauploadrestriction", ds.getDatauploadrestriction())); } if (ds.getVersioning() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("versioning", ds.getVersioning().getValue().toString())); + .asXmlElement("versioning", ds.getVersioning().toString())); } if (ds.getVersioncontrol() != null) { metadata @@ -736,15 +727,15 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue())); + .asXmlElement("citationguidelineurl", ds.getCitationguidelineurl())); } if (ds.getPidsystems() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); + .add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems())); } if (ds.getCertificates() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); + .add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates())); } if (ds.getPolicies() != null) { metadata @@ -831,11 +822,11 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement("legalshortname", o.getLegalshortname().getValue())); + .asXmlElement("legalshortname", o.getLegalshortname())); } if (o.getLegalname() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); + .add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname())); } if (o.getAlternativeNames() != null) { metadata @@ -844,40 +835,40 @@ public class XmlRecordFactory implements Serializable { .getAlternativeNames() .stream() .filter(Objects::nonNull) - .map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c.getValue())) + .map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c)) .collect(Collectors.toList())); } if (o.getWebsiteurl() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl())); } if (o.getLogourl() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl())); } if (o.getEclegalbody() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody())); } if (o.getEclegalperson() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson())); } if (o.getEcnonprofit() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit())); } if (o.getEcresearchorganization() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue())); + .asXmlElement("ecresearchorganization", o.getEcresearchorganization())); } if (o.getEchighereducation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("echighereducation", o.getEchighereducation().getValue())); + .asXmlElement("echighereducation", o.getEchighereducation())); } if (o.getEcinternationalorganizationeurinterests() != null) { metadata @@ -885,28 +876,28 @@ public class XmlRecordFactory implements Serializable { XmlSerializationUtils .asXmlElement( "ecinternationalorganizationeurinterests", - o.getEcinternationalorganizationeurinterests().getValue())); + o.getEcinternationalorganizationeurinterests())); } if (o.getEcinternationalorganization() != null) { metadata .add( XmlSerializationUtils .asXmlElement( - "ecinternationalorganization", o.getEcinternationalorganization().getValue())); + "ecinternationalorganization", o.getEcinternationalorganization())); } if (o.getEcenterprise() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise())); } if (o.getEcsmevalidated() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue())); + .asXmlElement("ecsmevalidated", o.getEcsmevalidated())); } if (o.getEcnutscode() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode())); } if (o.getCountry() != null) { metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry())); @@ -918,39 +909,39 @@ public class XmlRecordFactory implements Serializable { if (p.getWebsiteurl() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl())); } if (p.getCode() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode())); } if (p.getAcronym() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("acronym", p.getAcronym().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("acronym", p.getAcronym())); } if (p.getTitle() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("title", p.getTitle().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("title", p.getTitle())); } if (p.getStartdate() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); + .add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate())); } if (p.getEnddate() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate())); } if (p.getCallidentifier() != null) { metadata .add( XmlSerializationUtils - .asXmlElement("callidentifier", p.getCallidentifier().getValue())); + .asXmlElement("callidentifier", p.getCallidentifier())); } if (p.getKeywords() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords())); } if (p.getDuration() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("duration", p.getDuration().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("duration", p.getDuration())); } if (p.getEcarticle29_3() != null) { metadata - .add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3())); } if (p.getSubjects() != null) { metadata @@ -969,16 +960,16 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement("oamandatepublications", p.getOamandatepublications().getValue())); + .asXmlElement("oamandatepublications", p.getOamandatepublications())); } if (p.getEcsc39() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("ecsc39", p.getEcsc39().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("ecsc39", p.getEcsc39())); } if (p.getSummary() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("summary", p.getSummary().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("summary", p.getSummary())); } if (p.getCurrency() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("currency", p.getCurrency().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("currency", p.getCurrency())); } if (p.getTotalcost() != null) { metadata @@ -995,7 +986,6 @@ public class XmlRecordFactory implements Serializable { .getFundingtree() .stream() .filter(Objects::nonNull) - .map(ft -> ft.getValue()) .collect(Collectors.toList())); } @@ -1054,9 +1044,6 @@ public class XmlRecordFactory implements Serializable { metadata .add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); } - if (re.getResulttype() != null && re.getResulttype().isBlank()) { - metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype())); - } if (re.getCollectedfrom() != null) { metadata .addAll( @@ -1081,13 +1068,13 @@ public class XmlRecordFactory implements Serializable { if (isNotBlank(re.getOfficialname())) { metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname())); } - if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) { + if (re.getDatasourcetype() != null && StringUtils.isNotBlank(re.getDatasourcetype().getClassid())) { metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype())); } - if (re.getDatasourcetypeui() != null && !re.getDatasourcetypeui().isBlank()) { + if (re.getDatasourcetypeui() != null && StringUtils.isNotBlank(re.getDatasourcetypeui().getClassid())) { metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui())); } - if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) { + if (re.getOpenairecompatibility() != null && StringUtils.isNotBlank(re.getOpenairecompatibility().getClassid())) { metadata .add( XmlSerializationUtils @@ -1102,7 +1089,7 @@ public class XmlRecordFactory implements Serializable { metadata .add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); } - if (re.getCountry() != null && !re.getCountry().isBlank()) { + if (re.getCountry() != null && StringUtils.isNotBlank(re.getCountry().getClassid())) { metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); } break; @@ -1116,7 +1103,7 @@ public class XmlRecordFactory implements Serializable { if (isNotBlank(re.getAcronym())) { metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym())); } - if (re.getContracttype() != null && !re.getContracttype().isBlank()) { + if (re.getContracttype() != null && StringUtils.isNotBlank(re.getContracttype().getClassid())) { metadata.add(XmlSerializationUtils.mapQualifier("contracttype", re.getContracttype())); } if (re.getFundingtree() != null && contexts != null) { @@ -1126,7 +1113,7 @@ public class XmlRecordFactory implements Serializable { .getFundingtree() .stream() .peek(ft -> fillContextMap(ft, contexts)) - .map(ft -> getRelFundingTree(ft)) + .map(XmlRecordFactory::getRelFundingTree) .collect(Collectors.toList())); } break; @@ -1158,14 +1145,15 @@ public class XmlRecordFactory implements Serializable { if (rel.getValidated() == null) { rel.setValidated(false); } + final DataInfo dataInfo = Optional.ofNullable(rel.getProvenance()).map(p -> p.get(0).getDataInfo()).orElse(null); return templateFactory .getRel( - targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(), + targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, dataInfo, rel.getValidated(), rel.getValidationDate()); } private List listChildren( - final OafEntity entity, + final Entity entity, final JoinedEntity je, final TemplateFactory templateFactory) { @@ -1191,7 +1179,7 @@ public class XmlRecordFactory implements Serializable { groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> { final List fields = Lists.newArrayList(); - if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) { + if (instance.getAccessright() != null && StringUtils.isNotBlank(instance.getAccessright().getClassid())) { fields .add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } @@ -1232,7 +1220,7 @@ public class XmlRecordFactory implements Serializable { instance .getInstancetype() .stream() - .filter(t -> !t.isBlank()) + .filter(t -> StringUtils.isNotBlank(t.getClassid())) .map(t -> XmlSerializationUtils.mapQualifier("instancetype", t)) .collect(Collectors.toList())); } @@ -1242,7 +1230,7 @@ public class XmlRecordFactory implements Serializable { instance .getDistributionlocation() .stream() - .filter(d -> isNotBlank(d)) + .filter(StringUtils::isNotBlank) .map(d -> XmlSerializationUtils.asXmlElement("distributionlocation", d)) .collect(Collectors.toList())); } @@ -1430,10 +1418,10 @@ public class XmlRecordFactory implements Serializable { instance.getInstancetype().add(i.getInstancetype()); instance .setProcessingchargeamount( - Optional.ofNullable(i.getProcessingchargeamount()).map(apc -> apc.getValue()).orElse(null)); + Optional.ofNullable(i.getProcessingchargeamount()).orElse(null)); instance .setProcessingchargecurrency( - Optional.ofNullable(i.getProcessingchargecurrency()).map(c -> c.getValue()).orElse(null)); + Optional.ofNullable(i.getProcessingchargecurrency()).orElse(null)); Optional .ofNullable(i.getPid()) .ifPresent(pid -> instance.getPid().addAll(pid)); @@ -1442,17 +1430,17 @@ public class XmlRecordFactory implements Serializable { .ifPresent(altId -> instance.getAlternateIdentifier().addAll(altId)); Optional .ofNullable(i.getDateofacceptance()) - .ifPresent(d -> instance.getDateofacceptance().add(d.getValue())); + .ifPresent(d -> instance.getDateofacceptance().add(d)); Optional .ofNullable(i.getLicense()) - .ifPresent(license -> instance.getLicense().add(license.getValue())); + .ifPresent(license -> instance.getLicense().add(license.getUrl())); Optional .ofNullable(i.getDistributionlocation()) .ifPresent(dl -> instance.getDistributionlocation().add(dl)); }); if (instance.getHostedby().size() > 1 - && instance.getHostedby().stream().anyMatch(hb -> ModelConstants.UNKNOWN_REPOSITORY.equals(hb))) { + && instance.getHostedby().stream().anyMatch(ModelConstants.UNKNOWN_REPOSITORY::equals)) { instance.getHostedby().remove(ModelConstants.UNKNOWN_REPOSITORY); } @@ -1463,7 +1451,7 @@ public class XmlRecordFactory implements Serializable { return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType()); } - private List listExtraInfo(final OafEntity entity) { + private List listExtraInfo(final Entity entity) { final List extraInfo = entity.getExtraInfo(); return extraInfo != null ? extraInfo diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index adf7090d2..aa30484ea 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -10,6 +10,7 @@ import java.util.List; import com.google.common.collect.Lists; import eu.dnetlib.dhp.schema.oaf.*; +import org.apache.commons.lang3.StringUtils; import scala.Tuple2; public class XmlSerializationUtils { @@ -49,7 +50,7 @@ public class XmlSerializationUtils { public static String mapStructuredProperty(String name, StructuredProperty t) { return asXmlElement( - name, t.getValue(), t.getQualifier(), t.getDataInfo()); + name, t.getValue(), t.getQualifier()); } public static String mapQualifier(String name, Qualifier q) { @@ -66,7 +67,7 @@ public class XmlSerializationUtils { .replaceAll(XML_10_PATTERN, ""); } - public static String parseDataInfo(final DataInfo dataInfo) { + public static String parseDataInfo(final EntityDataInfo dataInfo) { return new StringBuilder() .append("") .append(asXmlElement("inferred", dataInfo.getInferred() + "")) @@ -106,6 +107,12 @@ public class XmlSerializationUtils { return asXmlElement(name, value, null, null); } + public static String asXmlElement( + final String name, final String value, final Qualifier q) { + + return asXmlElement(name, value, q, null); + } + public static String asXmlElement( final String name, final String value, final Qualifier q, final DataInfo info) { StringBuilder sb = new StringBuilder(); @@ -125,7 +132,7 @@ public class XmlSerializationUtils { info.getProvenanceaction() != null ? info.getProvenanceaction().getClassid() : "")) - .append(attr("trust", info.getTrust())); + .append(attr("trust", Float.toString(info.getTrust()))); } if (isBlank(value)) { sb.append("/>"); @@ -142,14 +149,13 @@ public class XmlSerializationUtils { } public static String getAttributes(final Qualifier q) { - if (q == null || q.isBlank()) + if (q == null || StringUtils.isBlank(q.getClassid())) return ""; return new StringBuilder(" ") .append(attr("classid", q.getClassid())) .append(attr("classname", q.getClassname())) .append(attr("schemeid", q.getSchemeid())) - .append(attr("schemename", q.getSchemename())) .toString(); } diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 541d59007..23be7c7c8 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -25,7 +25,6 @@ dhp-workflow-profiles dhp-aggregation - dhp-distcp dhp-actionmanager dhp-graph-mapper dhp-dedup-openaire