WIP: refactoring the internal graph data model and its utilities

This commit is contained in:
Claudio Atzori 2023-02-06 13:45:21 +01:00
parent 67735f7e9d
commit d9c9482a5b
53 changed files with 587 additions and 836 deletions

View File

@ -320,7 +320,7 @@ public class ModelSupport {
return String.format("%s.%s", dbName, tableName); return String.format("%s.%s", dbName, tableName);
} }
public static <T extends Entity> String tableIdentifier(String dbName, Class<T> clazz) { public static <T extends Oaf> String tableIdentifier(String dbName, Class<T> clazz) {
checkArgument(Objects.nonNull(clazz), "clazz is needed to derive the table name, thus cannot be null"); checkArgument(Objects.nonNull(clazz), "clazz is needed to derive the table name, thus cannot be null");

View File

@ -415,6 +415,17 @@ public class OafMapperUtils {
source, target, relType, subRelType, relClass, provenance, null, null); source, target, relType, subRelType, relClass, provenance, null, null);
} }
public static Relation getRelation(final String source,
final String target,
final String relType,
final String subRelType,
final String relClass,
final List<Provenance> provenance,
final List<KeyValue> properties) {
return getRelation(
source, target, relType, subRelType, relClass, provenance, null, properties);
}
public static Relation getRelation(final String source, public static Relation getRelation(final String source,
final String target, final String target,
final String relType, final String relType,

View File

@ -13,6 +13,8 @@ import java.util.List;
import java.util.function.Consumer; import java.util.function.Consumer;
import java.util.function.Function; import java.util.function.Function;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.common.RelationInverse;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -25,8 +27,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
public class ReadBlacklistFromDB implements Closeable { public class ReadBlacklistFromDB implements Closeable {

View File

@ -4,12 +4,11 @@ package eu.dnetlib.dhp.blacklist;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.common.RelationInverse;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
public class BlacklistRelationTest { public class BlacklistRelationTest {
@Test @Test

View File

@ -68,7 +68,6 @@ public class PrepareRelatedDatasetsJob {
final Dataset<Relation> rels = ClusterUtils final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
.filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass())) .filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))

View File

@ -70,7 +70,6 @@ public class PrepareRelatedProjectsJob {
final Dataset<Relation> rels = ClusterUtils final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT))
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)) .filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))

View File

@ -69,7 +69,6 @@ public class PrepareRelatedPublicationsJob {
final Dataset<Relation> rels = ClusterUtils final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
.filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass())) .filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))

View File

@ -73,7 +73,6 @@ public class PrepareRelatedSoftwaresJob {
final Dataset<Relation> rels; final Dataset<Relation> rels;
rels = ClusterUtils rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)) .filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))

View File

@ -8,7 +8,7 @@ import java.util.Set;
import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup; import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
public class BrokerConstants { public class BrokerConstants {

View File

@ -1,12 +1,7 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.ArrayList; import java.util.*;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Function; import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -105,16 +100,16 @@ public class ConversionUtils {
res.setOpenaireId(cleanOpenaireId(result.getId())); res.setOpenaireId(cleanOpenaireId(result.getId()));
res.setOriginalId(first(result.getOriginalId())); res.setOriginalId(first(result.getOriginalId()));
res.setTypology(classId(result.getResulttype())); res.setTypology(result.getResulttype());
res.setTitles(structPropList(result.getTitle())); res.setTitles(structPropList(result.getTitle()));
res.setAbstracts(fieldList(result.getDescription())); res.setAbstracts(result.getDescription());
res.setLanguage(classId(result.getLanguage())); res.setLanguage(classId(result.getLanguage()));
res.setSubjects(subjectList(result.getSubject())); res.setSubjects(subjectList(result.getSubject()));
res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor)); res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor));
res.setPublicationdate(fieldValue(result.getDateofacceptance())); res.setPublicationdate(result.getDateofacceptance());
res.setPublisher(fieldValue(result.getPublisher())); res.setPublisher(result.getPublisher().getName());
res.setEmbargoenddate(fieldValue(result.getEmbargoenddate())); res.setEmbargoenddate(result.getEmbargoenddate());
res.setContributor(fieldList(result.getContributor())); res.setContributor(result.getContributor());
res res
.setJournal( .setJournal(
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null); result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null);
@ -210,10 +205,9 @@ public class ConversionUtils {
final OaBrokerProject res = new OaBrokerProject(); final OaBrokerProject res = new OaBrokerProject();
res.setOpenaireId(cleanOpenaireId(p.getId())); res.setOpenaireId(cleanOpenaireId(p.getId()));
res.setTitle(fieldValue(p.getTitle())); res.setTitle(p.getTitle());
res.setAcronym(fieldValue(p.getAcronym())); res.setAcronym(p.getAcronym());
res.setCode(fieldValue(p.getCode())); res.setCode(p.getCode());
final String ftree = fieldValue(p.getFundingtree()); final String ftree = fieldValue(p.getFundingtree());
if (StringUtils.isNotBlank(ftree)) { if (StringUtils.isNotBlank(ftree)) {
try { try {
@ -238,7 +232,7 @@ public class ConversionUtils {
res.setOpenaireId(cleanOpenaireId(sw.getId())); res.setOpenaireId(cleanOpenaireId(sw.getId()));
res.setName(structPropValue(sw.getTitle())); res.setName(structPropValue(sw.getTitle()));
res.setDescription(fieldValue(sw.getDescription())); res.setDescription(fieldValue(sw.getDescription()));
res.setRepository(fieldValue(sw.getCodeRepositoryUrl())); res.setRepository(sw.getCodeRepositoryUrl());
res.setLandingPage(fieldValue(sw.getDocumentationUrl())); res.setLandingPage(fieldValue(sw.getDocumentationUrl()));
return res; return res;
@ -250,7 +244,7 @@ public class ConversionUtils {
} }
final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource(); final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource();
res.setName(StringUtils.defaultIfBlank(fieldValue(ds.getOfficialname()), fieldValue(ds.getEnglishname()))); res.setName(StringUtils.defaultIfBlank(ds.getOfficialname(), ds.getEnglishname()));
res.setOpenaireId(cleanOpenaireId(ds.getId())); res.setOpenaireId(cleanOpenaireId(ds.getId()));
res.setType(classId(ds.getDatasourcetype())); res.setType(classId(ds.getDatasourcetype()));
return res; return res;
@ -264,13 +258,14 @@ public class ConversionUtils {
return kv != null ? kv.getValue() : null; return kv != null ? kv.getValue() : null;
} }
private static String fieldValue(final Field<String> f) { private static String fieldValue(final List<String> fl) {
return f != null ? f.getValue() : null; return Optional
} .ofNullable(fl)
.map(f -> fl.stream()
private static String fieldValue(final List<Field<String>> fl) { .filter(StringUtils::isNotBlank)
return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null) .findFirst()
: null; .orElse(null))
.orElse(null);
} }
private static String classId(final Qualifier q) { private static String classId(final Qualifier q) {
@ -283,18 +278,6 @@ public class ConversionUtils {
: null; : null;
} }
private static List<String> fieldList(final List<Field<String>> fl) {
return fl != null
? fl
.stream()
.map(Field::getValue)
.map(s -> StringUtils.abbreviate(s, BrokerConstants.MAX_STRING_SIZE))
.filter(StringUtils::isNotBlank)
.limit(BrokerConstants.MAX_LIST_SIZE)
.collect(Collectors.toList())
: new ArrayList<>();
}
private static List<String> structPropList(final List<StructuredProperty> props) { private static List<String> structPropList(final List<StructuredProperty> props) {
return props != null return props != null
? props ? props

View File

@ -27,7 +27,6 @@ class ConversionUtilsTest {
qf.setClassid("test"); qf.setClassid("test");
qf.setClassname("test"); qf.setClassname("test");
qf.setSchemeid("test"); qf.setSchemeid("test");
qf.setSchemename("test");
final StructuredProperty sp1 = new StructuredProperty(); final StructuredProperty sp1 = new StructuredProperty();
sp1.setValue("1"); sp1.setValue("1");

View File

@ -1,13 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-distcp</artifactId>
</project>

View File

@ -1,18 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>sourceNN</name>
<value>webhdfs://namenode2.hadoop.dm.openaire.eu:50071</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,46 +0,0 @@
<workflow-app name="distcp" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourceNN</name>
<description>the source name node</description>
</property>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>targetPath</name>
<description>the target path</description>
</property>
<property>
<name>hbase_dump_distcp_memory_mb</name>
<value>6144</value>
<description>memory for distcp action copying InfoSpace dump from remote cluster</description>
</property>
<property>
<name>hbase_dump_distcp_num_maps</name>
<value>1</value>
<description>maximum number of simultaneous copies of InfoSpace dump from remote location</description>
</property>
</parameters>
<start to="distcp"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="distcp">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>-Dmapreduce.map.memory.mb=${hbase_dump_distcp_memory_mb}</arg>
<arg>-pb</arg>
<arg>-m ${hbase_dump_distcp_num_maps}</arg>
<arg>${sourceNN}/${sourcePath}</arg>
<arg>${nameNode}/${targetPath}</arg>
</distcp>
<ok to="End" />
<error to="Kill" />
</action>
<end name="End"/>
</workflow-app>

View File

@ -22,7 +22,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -61,7 +61,7 @@ public class CleanGraphSparkJob {
String graphTableClassName = parser.get("graphTableClassName"); String graphTableClassName = parser.get("graphTableClassName");
log.info("graphTableClassName: {}", graphTableClassName); log.info("graphTableClassName: {}", graphTableClassName);
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName); Class<? extends Entity> entityClazz = (Class<? extends Entity>) Class.forName(graphTableClassName);
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl); final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService); final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService);

View File

@ -74,7 +74,6 @@ public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Obje
final Country c = o; final Country c = o;
if (StringUtils.isBlank(c.getSchemeid())) { if (StringUtils.isBlank(c.getSchemeid())) {
c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
} }
cleanQualifier(vocabularies, c); cleanQualifier(vocabularies, c);
} }

View File

@ -91,8 +91,7 @@ public class GetDatasourceFromCountry implements Serializable {
(MapFunction<String, Relation>) value -> OBJECT_MAPPER.readValue(value, Relation.class), (MapFunction<String, Relation>) value -> OBJECT_MAPPER.readValue(value, Relation.class),
Encoders.bean(Relation.class)) Encoders.bean(Relation.class))
.filter( .filter(
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) && (FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY));
!rel.getDataInfo().getDeletedbyinference());
organization organization
.joinWith(relation, organization.col("id").equalTo(relation.col("target"))) .joinWith(relation, organization.col("id").equalTo(relation.col("target")))

View File

@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
@ -17,7 +18,6 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelSupport;
public class GraphHiveImporterJob { public class GraphHiveImporterJob {

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.oa.graph.hive; package eu.dnetlib.dhp.oa.graph.hive;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.schema.common.ModelSupport.tableIdentifier; import static eu.dnetlib.dhp.schema.oaf.common.ModelSupport.tableIdentifier;
import java.util.Optional; import java.util.Optional;

View File

@ -10,11 +10,11 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN; import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.validator.routines.UrlValidator; import org.apache.commons.validator.routines.UrlValidator;
import org.dom4j.*; import org.dom4j.*;
@ -26,11 +26,9 @@ import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
public abstract class AbstractMdRecordToOafMapper { public abstract class AbstractMdRecordToOafMapper {
@ -49,9 +47,9 @@ public abstract class AbstractMdRecordToOafMapper {
protected static final Qualifier ORCID_PID_TYPE = qualifier( protected static final Qualifier ORCID_PID_TYPE = qualifier(
ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING,
ModelConstants.ORCID_CLASSNAME, ModelConstants.ORCID_CLASSNAME,
DNET_PID_TYPES, DNET_PID_TYPES); DNET_PID_TYPES);
protected static final Qualifier MAG_PID_TYPE = qualifier( protected static final Qualifier MAG_PID_TYPE = qualifier(
"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES);
protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999"; protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999";
@ -122,7 +120,7 @@ public abstract class AbstractMdRecordToOafMapper {
return Lists.newArrayList(); return Lists.newArrayList();
} }
final DataInfo info = prepareDataInfo(doc, invisible); final EntityDataInfo info = prepareDataInfo(doc, invisible);
final long lastUpdateTimestamp = new Date().getTime(); final long lastUpdateTimestamp = new Date().getTime();
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy); final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
@ -171,10 +169,10 @@ public abstract class AbstractMdRecordToOafMapper {
final String type, final String type,
final List<Instance> instances, final List<Instance> instances,
final KeyValue collectedFrom, final KeyValue collectedFrom,
final DataInfo info, final EntityDataInfo info,
final long lastUpdateTimestamp) { final long lastUpdateTimestamp) {
final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); final Entity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
final Set<String> originalId = Sets.newHashSet(entity.getOriginalId()); final Set<String> originalId = Sets.newHashSet(entity.getOriginalId());
originalId.add(entity.getId()); originalId.add(entity.getId());
@ -202,11 +200,11 @@ public abstract class AbstractMdRecordToOafMapper {
return oafs; return oafs;
} }
private OafEntity createEntity(final Document doc, private Entity createEntity(final Document doc,
final String type, final String type,
final List<Instance> instances, final List<Instance> instances,
final KeyValue collectedFrom, final KeyValue collectedFrom,
final DataInfo info, final EntityDataInfo info,
final long lastUpdateTimestamp) { final long lastUpdateTimestamp) {
switch (type.toLowerCase()) { switch (type.toLowerCase()) {
case "publication": case "publication":
@ -217,37 +215,36 @@ public abstract class AbstractMdRecordToOafMapper {
case "dataset": case "dataset":
final Dataset d = new Dataset(); final Dataset d = new Dataset();
populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp); populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp);
d.setStoragedate(prepareDatasetStorageDate(doc, info)); d.setStoragedate(prepareDatasetStorageDate(doc));
d.setDevice(prepareDatasetDevice(doc, info)); d.setDevice(prepareDatasetDevice(doc));
d.setSize(prepareDatasetSize(doc, info)); d.setSize(prepareDatasetSize(doc));
d.setVersion(prepareDatasetVersion(doc, info)); d.setVersion(prepareDatasetVersion(doc));
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc));
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc));
d.setGeolocation(prepareDatasetGeoLocations(doc, info)); d.setGeolocation(prepareDatasetGeoLocations(doc));
return d; return d;
case "software": case "software":
final Software s = new Software(); final Software s = new Software();
populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp); populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp);
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc));
s.setLicense(prepareSoftwareLicenses(doc, info)); s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc));
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc));
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
return s; return s;
case "": case "":
case "otherresearchproducts": case "otherresearchproducts":
default: default:
final OtherResearchProduct o = new OtherResearchProduct(); final OtherResearchProduct o = new OtherResearchProduct();
populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp); populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp);
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); o.setContactperson(prepareOtherResearchProductContactPersons(doc));
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); o.setContactgroup(prepareOtherResearchProductContactGroups(doc));
o.setTool(prepareOtherResearchProductTools(doc, info)); o.setTool(prepareOtherResearchProductTools(doc));
return o; return o;
} }
} }
private List<Oaf> addProjectRels( private List<Oaf> addProjectRels(
final Document doc, final Document doc,
final OafEntity entity) { final Entity entity) {
final List<Oaf> res = new ArrayList<>(); final List<Oaf> res = new ArrayList<>();
@ -277,7 +274,7 @@ public abstract class AbstractMdRecordToOafMapper {
return res; return res;
} }
private List<Oaf> addRelations(Document doc, OafEntity entity) { private List<Oaf> addRelations(Document doc, Entity entity) {
final List<Oaf> rels = Lists.newArrayList(); final List<Oaf> rels = Lists.newArrayList();
@ -322,14 +319,14 @@ public abstract class AbstractMdRecordToOafMapper {
protected abstract List<Oaf> addOtherResultRels( protected abstract List<Oaf> addOtherResultRels(
final Document doc, final Document doc,
final OafEntity entity); final Entity entity);
private void populateResultFields( private void populateResultFields(
final Result r, final Result r,
final Document doc, final Document doc,
final List<Instance> instances, final List<Instance> instances,
final KeyValue collectedFrom, final KeyValue collectedFrom,
final DataInfo info, final EntityDataInfo info,
final long lastUpdateTimestamp) { final long lastUpdateTimestamp) {
r.setDataInfo(info); r.setDataInfo(info);
r.setLastupdatetimestamp(lastUpdateTimestamp); r.setLastupdatetimestamp(lastUpdateTimestamp);
@ -345,24 +342,24 @@ public abstract class AbstractMdRecordToOafMapper {
r.setLanguage(prepareLanguages(doc)); r.setLanguage(prepareLanguages(doc));
r.setCountry(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setCountry(new ArrayList<>()); // NOT PRESENT IN MDSTORES
r.setSubject(prepareSubjects(doc, info)); r.setSubject(prepareSubjects(doc, info));
r.setTitle(prepareTitles(doc, info)); r.setTitle(prepareTitles(doc));
r.setRelevantdate(prepareRelevantDates(doc, info)); r.setRelevantdate(prepareRelevantDates(doc));
r.setDescription(prepareDescriptions(doc, info)); r.setDescription(prepareDescriptions(doc));
r.setDateofacceptance(prepareField(doc, "//oaf:dateAccepted", info)); r.setDateofacceptance(doc.valueOf( "//oaf:dateAccepted"));
r.setPublisher(preparePublisher(doc, info)); r.setPublisher(preparePublisher(doc));
r.setEmbargoenddate(prepareField(doc, "//oaf:embargoenddate", info)); r.setEmbargoenddate(doc.valueOf("//oaf:embargoenddate"));
r.setSource(prepareSources(doc, info)); r.setSource(prepareSources(doc));
r.setFulltext(prepareListFields(doc, "//oaf:fulltext", info)); r.setFulltext(prepareListString(doc, "//oaf:fulltext"));
r.setFormat(prepareFormats(doc, info)); r.setFormat(prepareFormats(doc));
r.setContributor(prepareContributors(doc, info)); r.setContributor(prepareContributors(doc));
r.setResourcetype(prepareResourceType(doc, info)); r.setResourcetype(prepareResourceType(doc));
r.setCoverage(prepareCoverages(doc, info)); r.setCoverage(prepareCoverages(doc));
r.setContext(prepareContexts(doc, info)); r.setContext(prepareContexts(doc, info));
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
r r
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); .setProcessingchargeamount(doc.valueOf("//oaf:processingchargeamount"));
r r
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); .setProcessingchargecurrency(doc.valueOf("//oaf:processingchargeamount/@currency"));
r.setInstance(instances); r.setInstance(instances);
r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances));
@ -404,7 +401,7 @@ public abstract class AbstractMdRecordToOafMapper {
return Lists.newArrayList(set); return Lists.newArrayList(set);
} }
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); protected abstract Qualifier prepareResourceType(Document doc);
protected abstract List<Instance> prepareInstances( protected abstract List<Instance> prepareInstances(
Document doc, Document doc,
@ -412,21 +409,21 @@ public abstract class AbstractMdRecordToOafMapper {
KeyValue collectedfrom, KeyValue collectedfrom,
KeyValue hostedby); KeyValue hostedby);
protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info); protected abstract List<String> prepareSources(Document doc);
protected abstract List<StructuredProperty> prepareRelevantDates(Document doc, DataInfo info); protected abstract List<StructuredProperty> prepareRelevantDates(Document doc);
protected abstract List<Field<String>> prepareCoverages(Document doc, DataInfo info); protected abstract List<String> prepareCoverages(Document doc);
protected abstract List<Field<String>> prepareContributors(Document doc, DataInfo info); protected abstract List<String> prepareContributors(Document doc);
protected abstract List<Field<String>> prepareFormats(Document doc, DataInfo info); protected abstract List<String> prepareFormats(Document doc);
protected abstract Field<String> preparePublisher(Document doc, DataInfo info); protected abstract Publisher preparePublisher(Document doc);
protected abstract List<Field<String>> prepareDescriptions(Document doc, DataInfo info); protected abstract List<String> prepareDescriptions(Document doc);
protected abstract List<StructuredProperty> prepareTitles(Document doc, DataInfo info); protected abstract List<StructuredProperty> prepareTitles(Document doc);
protected abstract List<Subject> prepareSubjects(Document doc, DataInfo info); protected abstract List<Subject> prepareSubjects(Document doc, DataInfo info);
@ -434,41 +431,31 @@ public abstract class AbstractMdRecordToOafMapper {
protected abstract List<Author> prepareAuthors(Document doc, DataInfo info); protected abstract List<Author> prepareAuthors(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductTools( protected abstract List<String> prepareOtherResearchProductTools(Document doc);
Document doc,
DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductContactGroups( protected abstract List<String> prepareOtherResearchProductContactGroups(Document doc);
Document doc,
DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductContactPersons( protected abstract List<String> prepareOtherResearchProductContactPersons(Document doc);
Document doc,
DataInfo info);
protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info); protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc);
protected abstract Field<String> prepareSoftwareCodeRepositoryUrl(Document doc, DataInfo info); protected abstract String prepareSoftwareCodeRepositoryUrl(Document doc);
protected abstract List<StructuredProperty> prepareSoftwareLicenses(Document doc, DataInfo info); protected abstract List<String> prepareSoftwareDocumentationUrls(Document doc);
protected abstract List<Field<String>> prepareSoftwareDocumentationUrls( protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc);
Document doc,
DataInfo info);
protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc, DataInfo info); protected abstract String prepareDatasetMetadataVersionNumber(Document doc);
protected abstract Field<String> prepareDatasetMetadataVersionNumber(Document doc, DataInfo info); protected abstract String prepareDatasetLastMetadataUpdate(Document doc);
protected abstract Field<String> prepareDatasetLastMetadataUpdate(Document doc, DataInfo info); protected abstract String prepareDatasetVersion(Document doc);
protected abstract Field<String> prepareDatasetVersion(Document doc, DataInfo info); protected abstract String prepareDatasetSize(Document doc);
protected abstract Field<String> prepareDatasetSize(Document doc, DataInfo info); protected abstract String prepareDatasetDevice(Document doc);
protected abstract Field<String> prepareDatasetDevice(Document doc, DataInfo info); protected abstract String prepareDatasetStorageDate(Document doc);
protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);
private Journal prepareJournal(final Document doc, final DataInfo info) { private Journal prepareJournal(final Document doc, final DataInfo info) {
final Node n = doc.selectSingleNode("//oaf:journal"); final Node n = doc.selectSingleNode("//oaf:journal");
@ -514,7 +501,6 @@ public abstract class AbstractMdRecordToOafMapper {
accessRight.setClassid(qualifier.getClassid()); accessRight.setClassid(qualifier.getClassid());
accessRight.setClassname(qualifier.getClassname()); accessRight.setClassname(qualifier.getClassname());
accessRight.setSchemeid(qualifier.getSchemeid()); accessRight.setSchemeid(qualifier.getSchemeid());
accessRight.setSchemename(qualifier.getSchemename());
// TODO set the OAStatus // TODO set the OAStatus
@ -541,7 +527,7 @@ public abstract class AbstractMdRecordToOafMapper {
final Node n = (Node) o; final Node n = (Node) o;
final String classId = n.valueOf(xpathClassId).trim(); final String classId = n.valueOf(xpathClassId).trim();
if (vocs.termExists(schemeId, classId)) { if (vocs.termExists(schemeId, classId)) {
res.add(structuredProperty(n.getText(), vocs.getTermAsQualifier(schemeId, classId), info)); res.add(structuredProperty(n.getText(), vocs.getTermAsQualifier(schemeId, classId)));
} }
} }
return res; return res;
@ -550,28 +536,11 @@ public abstract class AbstractMdRecordToOafMapper {
protected List<StructuredProperty> prepareListStructProps( protected List<StructuredProperty> prepareListStructProps(
final Node node, final Node node,
final String xpath, final String xpath,
final Qualifier qualifier, final Qualifier qualifier) {
final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : node.selectNodes(xpath)) { for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o; final Node n = (Node) o;
res.add(structuredProperty(n.getText(), qualifier, info)); res.add(structuredProperty(n.getText(), qualifier));
}
return res;
}
protected List<StructuredProperty> prepareListStructProps(
final Node node,
final String xpath,
final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o;
res
.add(
structuredProperty(
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
n.valueOf("@schemename"), info));
} }
return res; return res;
} }
@ -583,11 +552,10 @@ public abstract class AbstractMdRecordToOafMapper {
final List<Subject> res = new ArrayList<>(); final List<Subject> res = new ArrayList<>();
for (final Object o : node.selectNodes(xpath)) { for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o; final Node n = (Node) o;
Qualifier qualifier = qualifier(n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"));
res res
.add( .add(
subject( subject(n.getText(), qualifier, info));
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
n.valueOf("@schemename"), info));
} }
return res; return res;
} }
@ -609,37 +577,31 @@ public abstract class AbstractMdRecordToOafMapper {
return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate); return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate);
} }
protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) { protected EntityDataInfo prepareDataInfo(final Document doc, final boolean invisible) {
final Node n = doc.selectSingleNode("//oaf:datainfo"); final Node n = doc.selectSingleNode("//oaf:datainfo");
if (n == null) { if (n == null) {
return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); return dataInfo(false, false, 0.9f, null, false, REPOSITORY_PROVENANCE_ACTIONS);
} }
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
final String paSchemeId = n.valueOf("./oaf:provenanceaction/@schemeid"); final String paSchemeId = n.valueOf("./oaf:provenanceaction/@schemeid");
final String paSchemeName = n.valueOf("./oaf:provenanceaction/@schemename");
final boolean deletedbyinference = Boolean.parseBoolean(n.valueOf("./oaf:deletedbyinference")); final boolean deletedbyinference = Boolean.parseBoolean(n.valueOf("./oaf:deletedbyinference"));
final String inferenceprovenance = n.valueOf("./oaf:inferenceprovenance"); final String inferenceprovenance = n.valueOf("./oaf:inferenceprovenance");
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
final String trust = n.valueOf("./oaf:trust"); final Float trust = Float.parseFloat(n.valueOf("./oaf:trust"));
return dataInfo( final Qualifier pAction = qualifier(paClassId, paClassName, paSchemeId);
deletedbyinference, inferenceprovenance, inferred, invisible,
qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); return dataInfo(invisible, deletedbyinference, trust, inferenceprovenance, inferred, pAction);
} }
protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) { protected List<String> prepareListFields(
return field(node.valueOf(xpath), info);
}
protected List<Field<String>> prepareListFields(
final Node node, final Node node,
final String xpath, final String xpath) {
final DataInfo info) { return prepareListString(node, xpath);
return listFields(info, prepareListString(node, xpath));
} }
protected List<String> prepareListString(final Node node, final String xpath) { protected List<String> prepareListString(final Node node, final String xpath) {

View File

@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -18,7 +19,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
public class DispatchEntitiesApplication { public class DispatchEntitiesApplication {

View File

@ -9,6 +9,8 @@ import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
@ -16,11 +18,7 @@ import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.dom4j.DocumentException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -29,7 +27,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
@ -139,7 +136,7 @@ public class GenerateEntitiesApplication {
save( save(
inputRdd inputRdd
.mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf))
.reduceByKey(OafMapperUtils::merge) .reduceByKey(MergeUtils::merge)
.map(Tuple2::_2), .map(Tuple2::_2),
targetPath); targetPath);
break; break;

View File

@ -10,6 +10,7 @@ import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
@ -26,7 +27,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2; import scala.Tuple2;

View File

@ -15,6 +15,7 @@ import java.util.function.Function;
import java.util.function.Predicate; import java.util.function.Predicate;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -28,23 +29,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
import eu.dnetlib.dhp.oa.graph.raw.common.MigrateAction; import eu.dnetlib.dhp.oa.graph.raw.common.MigrateAction;
import eu.dnetlib.dhp.oa.graph.raw.common.VerifyNsPrefixPredicate; import eu.dnetlib.dhp.oa.graph.raw.common.VerifyNsPrefixPredicate;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
@ -52,13 +37,17 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class); private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class);
private static final DataInfo DATA_INFO_CLAIM = dataInfo( private static final EntityDataInfo ENTITY_DATA_INFO_CLAIM = dataInfo(
false, null, false, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), false, false, 0.9f, null, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS));
"0.9");
private static final DataInfo REL_DATA_INFO_CLAIM = dataInfo(
0.9f, null, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS));
private static final List<KeyValue> COLLECTED_FROM_CLAIM = listKeyValues( private static final List<KeyValue> COLLECTED_FROM_CLAIM = listKeyValues(
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
private final static List<Provenance> PROVENANCE_CLAIM = getProvenance(COLLECTED_FROM_CLAIM, ENTITY_DATA_INFO_CLAIM);
public static final String SOURCE_TYPE = "source_type"; public static final String SOURCE_TYPE = "source_type";
public static final String TARGET_TYPE = "target_type"; public static final String TARGET_TYPE = "target_type";
@ -207,7 +196,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public List<Oaf> processService(final ResultSet rs) { public List<Oaf> processService(final ResultSet rs) {
try { try {
final DataInfo info = prepareDataInfo(rs); final EntityDataInfo info = prepareDataInfo(rs);
final Datasource ds = new Datasource(); final Datasource ds = new Datasource();
@ -220,46 +209,45 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
.filter(StringUtils::isNotBlank) .filter(StringUtils::isNotBlank)
.collect(Collectors.toList())); .collect(Collectors.toList()));
ds.setCollectedfrom(prepareCollectedfrom(rs.getArray("collectedfrom"))); ds.setCollectedfrom(prepareCollectedfrom(rs.getArray("collectedfrom")));
ds.setPid(prepareListOfStructProps(rs.getArray("pid"), info)); ds.setPid(prepareListOfStructProps(rs.getArray("pid")));
ds.setDateofcollection(asString(rs.getDate("dateofcollection"))); ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
ds.setDateoftransformation(null); // Value not returned by the SQL query ds.setDateoftransformation(null); // Value not returned by the SQL query
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
ds.setOaiprovenance(null); // Values not present in the DB
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui"))); ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui")));
ds.setEosctype(prepareQualifierSplitting(rs.getString("eosctype"))); ds.setEosctype(prepareQualifierSplitting(rs.getString("eosctype")));
ds.setEoscdatasourcetype(prepareQualifierSplitting(rs.getString("eoscdatasourcetype"))); ds.setEoscdatasourcetype(prepareQualifierSplitting(rs.getString("eoscdatasourcetype")));
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
ds.setOfficialname(field(rs.getString("officialname"), info)); ds.setOfficialname(rs.getString("officialname"));
ds.setEnglishname(field(rs.getString("englishname"), info)); ds.setEnglishname(rs.getString("englishname"));
ds.setWebsiteurl(field(rs.getString("websiteurl"), info)); ds.setWebsiteurl(rs.getString("websiteurl"));
ds.setLogourl(field(rs.getString("logourl"), info)); ds.setLogourl(rs.getString("logourl"));
ds.setContactemail(field(rs.getString("contactemail"), info)); ds.setContactemail(rs.getString("contactemail"));
ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info)); ds.setNamespaceprefix(rs.getString("namespaceprefix"));
ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info)); ds.setLatitude(Double.toString(rs.getDouble("latitude")));
ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info)); ds.setLongitude(Double.toString(rs.getDouble("longitude")));
ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info)); ds.setDateofvalidation(asString(rs.getDate("dateofvalidation")));
ds.setDescription(field(rs.getString("description"), info)); ds.setDescription(rs.getString("description"));
ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects")));
ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info)); ds.setOdnumberofitems(Double.toString(rs.getInt("odnumberofitems")));
ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info)); ds.setOdnumberofitemsdate(asString(rs.getDate("odnumberofitemsdate")));
ds.setOdpolicies(field(rs.getString("odpolicies"), info)); ds.setOdpolicies(rs.getString("odpolicies"));
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info)); ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages")));
ds.setLanguages(listValues(rs.getArray("languages"))); ds.setLanguages(listValues(rs.getArray("languages")));
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info)); ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage")));
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info)); ds.setReleasestartdate(asString(rs.getDate("releasestartdate")));
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info)); ds.setReleaseenddate(asString(rs.getDate("releaseenddate")));
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info)); ds.setMissionstatementurl(rs.getString("missionstatementurl"));
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info)); ds.setDatabaseaccesstype(rs.getString("databaseaccesstype"));
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info)); ds.setDatauploadtype(rs.getString("datauploadtype"));
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info)); ds.setDatabaseaccessrestriction(rs.getString("databaseaccessrestriction"));
ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info)); ds.setDatauploadrestriction(rs.getString("datauploadrestriction"));
ds.setVersioning(field(rs.getBoolean("versioning"), info)); ds.setVersioning(rs.getBoolean("versioning"));
ds.setVersioncontrol(rs.getBoolean("versioncontrol")); ds.setVersioncontrol(rs.getBoolean("versioncontrol"));
ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info)); ds.setCitationguidelineurl(rs.getString("citationguidelineurl"));
ds.setPidsystems(field(rs.getString("pidsystems"), info)); ds.setPidsystems(rs.getString("pidsystems"));
ds.setCertificates(field(rs.getString("certificates"), info)); ds.setCertificates(rs.getString("certificates"));
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
ds ds
.setJournal( .setJournal(
@ -306,7 +294,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public List<Oaf> processProject(final ResultSet rs) { public List<Oaf> processProject(final ResultSet rs) {
try { try {
final DataInfo info = prepareDataInfo(rs); final EntityDataInfo info = prepareDataInfo(rs);
final Project p = new Project(); final Project p = new Project();
@ -321,32 +309,31 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
p.setDateofcollection(asString(rs.getDate("dateofcollection"))); p.setDateofcollection(asString(rs.getDate("dateofcollection")));
p.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
p.setExtraInfo(new ArrayList<>()); // Values not present in the DB p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
p.setOaiprovenance(null); // Values not present in the DB p.setWebsiteurl(rs.getString("websiteurl"));
p.setWebsiteurl(field(rs.getString("websiteurl"), info)); p.setCode(rs.getString("code"));
p.setCode(field(rs.getString("code"), info)); p.setAcronym(rs.getString("acronym"));
p.setAcronym(field(rs.getString("acronym"), info)); p.setTitle(rs.getString("title"));
p.setTitle(field(rs.getString("title"), info)); p.setStartdate(asString(rs.getDate("startdate")));
p.setStartdate(field(asString(rs.getDate("startdate")), info)); p.setEnddate(asString(rs.getDate("enddate")));
p.setEnddate(field(asString(rs.getDate("enddate")), info)); p.setCallidentifier(rs.getString("callidentifier"));
p.setCallidentifier(field(rs.getString("callidentifier"), info)); p.setKeywords(rs.getString("keywords"));
p.setKeywords(field(rs.getString("keywords"), info)); p.setDuration(Integer.toString(rs.getInt("duration")));
p.setDuration(field(Integer.toString(rs.getInt("duration")), info)); p.setEcsc39(Boolean.toString(rs.getBoolean("ecsc39")));
p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info));
p p
.setOamandatepublications(field(Boolean.toString(rs.getBoolean("oamandatepublications")), info)); .setOamandatepublications(Boolean.toString(rs.getBoolean("oamandatepublications")));
p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info)); p.setEcarticle29_3(Boolean.toString(rs.getBoolean("ecarticle29_3")));
p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); p.setSubjects(prepareListOfStructProps(rs.getArray("subjects")));
p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info)); p.setFundingtree(prepareListFields(rs.getArray("fundingtree")));
p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype"))); p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
p.setOptional1(field(rs.getString("optional1"), info)); p.setOptional1(rs.getString("optional1"));
p.setOptional2(field(rs.getString("optional2"), info)); p.setOptional2(rs.getString("optional2"));
p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info)); p.setJsonextrainfo(rs.getString("jsonextrainfo"));
p.setContactfullname(field(rs.getString("contactfullname"), info)); p.setContactfullname(rs.getString("contactfullname"));
p.setContactfax(field(rs.getString("contactfax"), info)); p.setContactfax(rs.getString("contactfax"));
p.setContactphone(field(rs.getString("contactphone"), info)); p.setContactphone(rs.getString("contactphone"));
p.setContactemail(field(rs.getString("contactemail"), info)); p.setContactemail(rs.getString("contactemail"));
p.setSummary(field(rs.getString("summary"), info)); p.setSummary(rs.getString("summary"));
p.setCurrency(field(rs.getString("currency"), info)); p.setCurrency(rs.getString("currency"));
p.setTotalcost(new Float(rs.getDouble("totalcost"))); p.setTotalcost(new Float(rs.getDouble("totalcost")));
p.setFundedamount(new Float(rs.getDouble("fundedamount"))); p.setFundedamount(new Float(rs.getDouble("fundedamount")));
p.setDataInfo(info); p.setDataInfo(info);
@ -361,7 +348,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public List<Oaf> processOrganization(final ResultSet rs) { public List<Oaf> processOrganization(final ResultSet rs) {
try { try {
final DataInfo info = prepareDataInfo(rs); final EntityDataInfo info = prepareDataInfo(rs);
final Organization o = new Organization(); final Organization o = new Organization();
@ -372,31 +359,30 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
listKeyValues( listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname"))); rs.getString("collectedfromname")));
o.setPid(prepareListOfStructProps(rs.getArray("pid"), info)); o.setPid(prepareListOfStructProps(rs.getArray("pid")));
o.setDateofcollection(asString(rs.getDate("dateofcollection"))); o.setDateofcollection(asString(rs.getDate("dateofcollection")));
o.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
o.setExtraInfo(new ArrayList<>()); // Values not present in the DB o.setExtraInfo(new ArrayList<>()); // Values not present in the DB
o.setOaiprovenance(null); // Values not present in the DB o.setLegalshortname(rs.getString("legalshortname"));
o.setLegalshortname(field(rs.getString("legalshortname"), info)); o.setLegalname(rs.getString("legalname"));
o.setLegalname(field(rs.getString("legalname"), info)); o.setAlternativeNames(prepareListFields(rs.getArray("alternativenames")));
o.setAlternativeNames(prepareListFields(rs.getArray("alternativenames"), info)); o.setWebsiteurl(rs.getString("websiteurl"));
o.setWebsiteurl(field(rs.getString("websiteurl"), info)); o.setLogourl(rs.getString("logourl"));
o.setLogourl(field(rs.getString("logourl"), info)); o.setEclegalbody(Boolean.toString(rs.getBoolean("eclegalbody")));
o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info)); o.setEclegalperson(Boolean.toString(rs.getBoolean("eclegalperson")));
o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info)); o.setEcnonprofit(Boolean.toString(rs.getBoolean("ecnonprofit")));
o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
o o
.setEcresearchorganization(field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info)); .setEcresearchorganization(Boolean.toString(rs.getBoolean("ecresearchorganization")));
o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info)); o.setEchighereducation(Boolean.toString(rs.getBoolean("echighereducation")));
o o
.setEcinternationalorganizationeurinterests( .setEcinternationalorganizationeurinterests(
field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info)); Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")));
o o
.setEcinternationalorganization( .setEcinternationalorganization(
field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info)); Boolean.toString(rs.getBoolean("ecinternationalorganization")));
o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info)); o.setEcenterprise(Boolean.toString(rs.getBoolean("ecenterprise")));
o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info)); o.setEcsmevalidated(Boolean.toString(rs.getBoolean("ecsmevalidated")));
o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info)); o.setEcnutscode(Boolean.toString(rs.getBoolean("ecnutscode")));
o.setCountry(prepareQualifierSplitting(rs.getString("country"))); o.setCountry(prepareQualifierSplitting(rs.getString("country")));
o.setDataInfo(info); o.setDataInfo(info);
o.setLastupdatetimestamp(lastUpdateTimestamp); o.setLastupdatetimestamp(lastUpdateTimestamp);
@ -409,21 +395,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public List<Oaf> processServiceOrganization(final ResultSet rs) { public List<Oaf> processServiceOrganization(final ResultSet rs) {
try { try {
final DataInfo info = prepareDataInfo(rs); final DataInfo info = prepareRelDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("organization"), true); final String orgId = createOpenaireId(20, rs.getString("organization"), true);
final String dsId = createOpenaireId(10, rs.getString("service"), true); final String dsId = createOpenaireId(10, rs.getString("service"), true);
final List<KeyValue> collectedFrom = listKeyValues( final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final List<Provenance> provenance = getProvenance(collectedFrom, info);
final Relation r1 = OafMapperUtils final Relation r1 = OafMapperUtils
.getRelation( .getRelation(
dsId, orgId, DATASOURCE_ORGANIZATION, PROVISION, IS_PROVIDED_BY, collectedFrom, info, dsId, orgId, DATASOURCE_ORGANIZATION, PROVISION, IS_PROVIDED_BY, provenance);
lastUpdateTimestamp);
final Relation r2 = OafMapperUtils final Relation r2 = OafMapperUtils
.getRelation( .getRelation(
orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, collectedFrom, info, orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, provenance);
lastUpdateTimestamp);
return Arrays.asList(r1, r2); return Arrays.asList(r1, r2);
} catch (final Exception e) { } catch (final Exception e) {
@ -433,12 +419,14 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public List<Oaf> processProjectOrganization(final ResultSet rs) { public List<Oaf> processProjectOrganization(final ResultSet rs) {
try { try {
final DataInfo info = prepareDataInfo(rs); final DataInfo info = prepareRelDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("resporganization"), true); final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
final String projectId = createOpenaireId(40, rs.getString("project"), true); final String projectId = createOpenaireId(40, rs.getString("project"), true);
final List<KeyValue> collectedFrom = listKeyValues( final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final List<Provenance> provenance = getProvenance(collectedFrom, info);
final List<KeyValue> properties = Lists final List<KeyValue> properties = Lists
.newArrayList( .newArrayList(
keyValue("contribution", String.valueOf(rs.getDouble("contribution"))), keyValue("contribution", String.valueOf(rs.getDouble("contribution"))),
@ -446,13 +434,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final Relation r1 = OafMapperUtils final Relation r1 = OafMapperUtils
.getRelation( .getRelation(
projectId, orgId, PROJECT_ORGANIZATION, PARTICIPATION, HAS_PARTICIPANT, collectedFrom, info, projectId, orgId, PROJECT_ORGANIZATION, PARTICIPATION, HAS_PARTICIPANT, provenance, properties);
lastUpdateTimestamp, null, properties);
final Relation r2 = OafMapperUtils final Relation r2 = OafMapperUtils
.getRelation( .getRelation(
orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, collectedFrom, info, orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, provenance, properties);
lastUpdateTimestamp, null, properties);
return Arrays.asList(r1, r2); return Arrays.asList(r1, r2);
} catch (final Exception e) { } catch (final Exception e) {
@ -469,21 +455,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
if (targetType.equals("dataset")) { if (targetType.equals("dataset")) {
r = new Dataset(); r = new Dataset();
r.setResulttype(DATASET_DEFAULT_RESULTTYPE); r.setResulttype(DATASET_DEFAULT_RESULTTYPE.getClassid());
} else if (targetType.equals("software")) { } else if (targetType.equals("software")) {
r = new Software(); r = new Software();
r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE.getClassid());
} else if (targetType.equals("other")) { } else if (targetType.equals("other")) {
r = new OtherResearchProduct(); r = new OtherResearchProduct();
r.setResulttype(ORP_DEFAULT_RESULTTYPE); r.setResulttype(ORP_DEFAULT_RESULTTYPE.getClassid());
} else { } else {
r = new Publication(); r = new Publication();
r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE.getClassid());
} }
r.setId(createOpenaireId(50, rs.getString("target_id"), false)); r.setId(createOpenaireId(50, rs.getString("target_id"), false));
r.setLastupdatetimestamp(lastUpdateTimestamp); r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setContext(prepareContext(rs.getString("source_id"), DATA_INFO_CLAIM)); r.setContext(prepareContext(rs.getString("source_id"), ENTITY_DATA_INFO_CLAIM));
r.setDataInfo(DATA_INFO_CLAIM); r.setDataInfo(ENTITY_DATA_INFO_CLAIM);
r.setCollectedfrom(COLLECTED_FROM_CLAIM); r.setCollectedfrom(COLLECTED_FROM_CLAIM);
return Arrays.asList(r); return Arrays.asList(r);
@ -493,8 +479,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false); final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false);
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false); final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
Relation r1 = prepareRelation(sourceId, targetId, validationDate); Relation r1 = prepareRelation(sourceId, targetId, PROVENANCE_CLAIM, validationDate);
Relation r2 = prepareRelation(targetId, sourceId, validationDate); Relation r2 = prepareRelation(targetId, sourceId, PROVENANCE_CLAIM, validationDate);
final String semantics = rs.getString("semantics"); final String semantics = rs.getString("semantics");
@ -529,17 +515,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
} }
} }
private Relation prepareRelation(final String sourceId, final String targetId, final String validationDate) { private Relation prepareRelation(final String sourceId, final String targetId, final List<Provenance> provenance, final String validationDate) {
final Relation r = new Relation(); final Relation r = new Relation();
if (StringUtils.isNotBlank(validationDate)) { if (StringUtils.isNotBlank(validationDate)) {
r.setValidated(true); r.setValidated(true);
r.setValidationDate(validationDate); r.setValidationDate(validationDate);
} }
r.setCollectedfrom(COLLECTED_FROM_CLAIM); r.setProvenance(provenance);
r.setSource(sourceId); r.setSource(sourceId);
r.setTarget(targetId); r.setTarget(targetId);
r.setDataInfo(DATA_INFO_CLAIM);
r.setLastupdatetimestamp(lastUpdateTimestamp);
return r; return r;
} }
@ -558,16 +542,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
return Arrays.asList(context); return Arrays.asList(context);
} }
private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException { private EntityDataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference"); final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
final String inferenceprovenance = rs.getString("inferenceprovenance"); final String inferenceprovenance = rs.getString("inferenceprovenance");
final Boolean inferred = rs.getBoolean("inferred"); final Boolean inferred = rs.getBoolean("inferred");
final double trust = rs.getDouble("trust"); final float trust = (float) rs.getDouble("trust");
return dataInfo( return dataInfo(false, deletedbyinference, trust, inferenceprovenance, inferred, ENTITYREGISTRY_PROVENANCE_ACTION);
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, }
String.format("%.3f", trust));
private DataInfo prepareRelDataInfo(final ResultSet rs) throws SQLException {
final String inferenceprovenance = rs.getString("inferenceprovenance");
final Boolean inferred = rs.getBoolean("inferred");
final float trust = (float) rs.getDouble("trust");
return dataInfo(trust, inferenceprovenance, inferred, ENTITYREGISTRY_PROVENANCE_ACTION);
} }
private List<KeyValue> prepareCollectedfrom(Array values) throws SQLException { private List<KeyValue> prepareCollectedfrom(Array values) throws SQLException {
@ -604,15 +594,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
return arr.length == 2 ? vocs.getTermAsQualifier(arr[1], arr[0]) : null; return arr.length == 2 ? vocs.getTermAsQualifier(arr[1], arr[0]) : null;
} }
private List<Field<String>> prepareListFields(final Array array, final DataInfo info) { private List<String> prepareListFields(final Array array) {
try { try {
return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>(); return array != null ? listValues(array) : new ArrayList<>();
} catch (final SQLException e) { } catch (final SQLException e) {
throw new RuntimeException("Invalid SQL array", e); throw new RuntimeException("Invalid SQL array", e);
} }
} }
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) { private StructuredProperty prepareStructProp(final String s) {
if (StringUtils.isBlank(s)) { if (StringUtils.isBlank(s)) {
return null; return null;
} }
@ -621,19 +611,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final String value = parts[0]; final String value = parts[0];
final String[] arr = parts[1].split("@@@"); final String[] arr = parts[1].split("@@@");
if (arr.length == 2) { if (arr.length == 2) {
return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]), dataInfo); return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]));
} }
} }
return null; return null;
} }
private List<StructuredProperty> prepareListOfStructProps( private List<StructuredProperty> prepareListOfStructProps(
final Array array, final Array array) throws SQLException {
final DataInfo dataInfo) throws SQLException {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
if (array != null) { if (array != null) {
for (final String s : (String[]) array.getArray()) { for (final String s : (String[]) array.getArray()) {
final StructuredProperty sp = prepareStructProp(s, dataInfo); final StructuredProperty sp = prepareStructProp(s);
if (sp != null) { if (sp != null) {
res.add(sp); res.add(sp);
} }
@ -666,12 +655,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final List<KeyValue> collectedFrom = listKeyValues( final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final Relation r1 = OafMapperUtils final List<Provenance> provenance = getProvenance(collectedFrom, info);
.getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, collectedFrom, info, lastUpdateTimestamp);
final Relation r2 = OafMapperUtils final Relation r1 = getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, provenance);
.getRelation(
orgId2, orgId1, ORG_ORG_RELTYPE, DEDUP, IS_MERGED_IN, collectedFrom, info, lastUpdateTimestamp); final Relation r2 = getRelation(orgId2, orgId1, ORG_ORG_RELTYPE, DEDUP, IS_MERGED_IN, provenance);
return Arrays.asList(r1, r2); return Arrays.asList(r1, r2);
} catch (final Exception e) { } catch (final Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);
@ -688,12 +676,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final List<KeyValue> collectedFrom = listKeyValues( final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
return Arrays final List<Provenance> provenance = getProvenance(collectedFrom, info);
.asList(
OafMapperUtils final String relClass = rs.getString("type");
.getRelation( return Arrays.asList(getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, RELATIONSHIP, relClass, provenance));
orgId1, orgId2, ORG_ORG_RELTYPE, RELATIONSHIP, rs.getString("type"), collectedFrom, info,
lastUpdateTimestamp));
} catch (final Exception e) { } catch (final Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
@ -710,12 +696,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final List<KeyValue> collectedFrom = listKeyValues( final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
return Arrays final List<Provenance> provenance = getProvenance(collectedFrom, info);
.asList(
OafMapperUtils return Arrays.asList(getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, relClass, provenance));
.getRelation(
orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, relClass, collectedFrom, info,
lastUpdateTimestamp));
} catch (final Exception e) { } catch (final Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }

View File

@ -67,9 +67,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
final String cleanedId = pid final String cleanedId = pid
.replaceAll("http://orcid.org/", "") .replaceAll("http://orcid.org/", "")
.replaceAll("https://orcid.org/", ""); .replaceAll("https://orcid.org/", "");
author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); author.getPid().add(authorPid(cleanedId, ORCID_PID_TYPE, info));
} else if (type.startsWith("MAGID")) { } else if (type.startsWith("MAGID")) {
author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info)); author.getPid().add(authorPid(pid, MAG_PID_TYPE, info));
} }
} }
@ -89,39 +89,36 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
} }
@Override @Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareTitles(final Document doc) {
return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER, info); return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER);
} }
@Override @Override
protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) { protected List<String> prepareDescriptions(final Document doc) {
return prepareListFields(doc, "//dc:description", info) return prepareListFields(doc, "//dc:description")
.stream() .stream()
.map(d -> { .map(d -> StringUtils.left(d, ModelHardLimits.MAX_ABSTRACT_LENGTH))
d.setValue(StringUtils.left(d.getValue(), ModelHardLimits.MAX_ABSTRACT_LENGTH));
return d;
})
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
@Override @Override
protected Field<String> preparePublisher(final Document doc, final DataInfo info) { protected Publisher preparePublisher(final Document doc) {
return prepareField(doc, "//dc:publisher", info); return publisher(doc.valueOf("//dc:publisher"));
} }
@Override @Override
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) { protected List<String> prepareFormats(final Document doc) {
return prepareListFields(doc, "//dc:format", info); return prepareListFields(doc, "//dc:format");
} }
@Override @Override
protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) { protected List<String> prepareContributors(final Document doc) {
return prepareListFields(doc, "//dc:contributor", info); return prepareListFields(doc, "//dc:contributor");
} }
@Override @Override
protected List<Field<String>> prepareCoverages(final Document doc, final DataInfo info) { protected List<String> prepareCoverages(final Document doc) {
return prepareListFields(doc, "//dc:coverage", info); return prepareListFields(doc, "//dc:coverage");
} }
@Override @Override
@ -147,16 +144,16 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
alternateIdentifier.stream().filter(i -> !pids.contains(i)).collect(Collectors.toList())); alternateIdentifier.stream().filter(i -> !pids.contains(i)).collect(Collectors.toList()));
instance.setPid(pid); instance.setPid(pid);
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDateofacceptance(doc.valueOf("//oaf:dateAccepted"));
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
instance instance
.setAccessright(prepareAccessRight(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); .setAccessright(prepareAccessRight(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setLicense(license(doc.valueOf("//oaf:license")));
instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS)); instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS));
instance instance
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); .setProcessingchargeamount(doc.valueOf("//oaf:processingchargeamount"));
instance instance
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); .setProcessingchargecurrency(doc.valueOf("//oaf:processingchargeamount/@currency"));
final List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); final List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
final List<String> url = nodes final List<String> url = nodes
@ -183,110 +180,90 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
} }
@Override @Override
protected List<Field<String>> prepareSources(final Document doc, final DataInfo info) { protected List<String> prepareSources(final Document doc) {
return prepareListFields(doc, "//dc:source", info); return prepareListFields(doc, "//dc:source");
} }
@Override @Override
protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareRelevantDates(final Document doc) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
// SOFTWARES // SOFTWARES
@Override @Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareSoftwareCodeRepositoryUrl( protected String prepareSoftwareCodeRepositoryUrl(
final Document doc, final Document doc) {
final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected List<StructuredProperty> prepareSoftwareLicenses( protected List<String> prepareSoftwareDocumentationUrls(final Document doc) {
final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF
}
@Override
protected List<Field<String>> prepareSoftwareDocumentationUrls(
final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
// DATASETS // DATASETS
@Override @Override
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) { protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetMetadataVersionNumber( protected String prepareDatasetMetadataVersionNumber(final Document doc) {
final Document doc,
final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetLastMetadataUpdate( protected String prepareDatasetLastMetadataUpdate(final Document doc) {
final Document doc,
final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) { protected String prepareDatasetVersion(final Document doc) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) { protected String prepareDatasetSize(final Document doc) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetDevice(final Document doc, final DataInfo info) { protected String prepareDatasetDevice(final Document doc) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) { protected String prepareDatasetStorageDate(final Document doc) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
// OTHER PRODUCTS // OTHER PRODUCTS
@Override @Override
protected List<Field<String>> prepareOtherResearchProductTools( protected List<String> prepareOtherResearchProductTools(final Document doc) {
final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactGroups( protected List<String> prepareOtherResearchProductContactGroups(final Document doc) {
final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactPersons( protected List<String> prepareOtherResearchProductContactPersons(final Document doc) {
final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected List<Oaf> addOtherResultRels( protected List<Oaf> addOtherResultRels(
final Document doc, final Document doc,
final OafEntity entity) { final Entity entity) {
final String docId = entity.getId(); final String docId = entity.getId();
final List<Oaf> res = new ArrayList<>(); final List<Oaf> res = new ArrayList<>();
@ -313,7 +290,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
} }
@Override @Override
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { protected Qualifier prepareResourceType(final Document doc) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }

View File

@ -5,15 +5,13 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder; import java.net.URLDecoder;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.common.RelationInverse;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.validator.routines.UrlValidator;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Element; import org.dom4j.Element;
import org.dom4j.Node; import org.dom4j.Node;
@ -22,12 +20,10 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@ -44,7 +40,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
} }
@Override @Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareTitles(final Document doc) {
final List<StructuredProperty> title = Lists.newArrayList(); final List<StructuredProperty> title = Lists.newArrayList();
final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']"; final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']";
@ -57,9 +53,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
title title
.add( .add(
structuredProperty( structuredProperty(
titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info)); titleValue, titleType, titleType, DNET_DATACITE_TITLE));
} else { } else {
title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info)); title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER));
} }
} }
@ -97,7 +93,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
author.setFullname(String.format("%s, %s", author.getSurname(), author.getName())); author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
} }
author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info));
author.setPid(preparePids(n, info)); author.setPid(preparePids(n, info));
author.setRank(pos++); author.setRank(pos++);
res.add(author); res.add(author);
@ -106,8 +101,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
return res; return res;
} }
private List<StructuredProperty> preparePids(final Node n, final DataInfo info) { private List<AuthorPid> preparePids(final Node n, final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>(); final List<AuthorPid> res = new ArrayList<>();
for (final Object o : n.selectNodes("./*[local-name()='nameIdentifier']")) { for (final Object o : n.selectNodes("./*[local-name()='nameIdentifier']")) {
final String id = ((Node) o).getText(); final String id = ((Node) o).getText();
@ -120,9 +115,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
if (type.toLowerCase().startsWith(ORCID)) { if (type.toLowerCase().startsWith(ORCID)) {
final String cleanedId = id.replace("http://orcid.org/", "").replace("https://orcid.org/", ""); final String cleanedId = id.replace("http://orcid.org/", "").replace("https://orcid.org/", "");
res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); res.add(authorPid(cleanedId, ORCID_PID_TYPE, info));
} else if (type.startsWith("MAGID")) { } else if (type.startsWith("MAGID")) {
res.add(structuredProperty(id, MAG_PID_TYPE, info)); res.add(authorPid(id, MAG_PID_TYPE, info));
} }
} }
return res; return res;
@ -151,16 +146,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
alternateIdentifier.stream().filter(i -> !pids.contains(i)).collect(Collectors.toList())); alternateIdentifier.stream().filter(i -> !pids.contains(i)).collect(Collectors.toList()));
instance.setPid(pid); instance.setPid(pid);
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDateofacceptance(doc.valueOf("//oaf:dateAccepted"));
final String distributionlocation = doc.valueOf("//oaf:distributionlocation"); final String distributionlocation = doc.valueOf("//oaf:distributionlocation");
instance.setDistributionlocation(StringUtils.isNotBlank(distributionlocation) ? distributionlocation : null); instance.setDistributionlocation(StringUtils.isNotBlank(distributionlocation) ? distributionlocation : null);
instance instance
.setAccessright(prepareAccessRight(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); .setAccessright(prepareAccessRight(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setLicense(license(doc.valueOf("//oaf:license")));
instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS)); instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS));
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance.setProcessingchargeamount(doc.valueOf("//oaf:processingchargeamount"));
instance instance
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); .setProcessingchargecurrency(doc.valueOf("//oaf:processingchargeamount/@currency"));
final Set<String> url = new HashSet<>(); final Set<String> url = new HashSet<>();
for (final Object o : doc for (final Object o : doc
@ -218,12 +213,12 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
} }
@Override @Override
protected List<Field<String>> prepareSources(final Document doc, final DataInfo info) { protected List<String> prepareSources(final Document doc) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareRelevantDates(final Document doc) {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//*[local-name()='date']")) { for (final Object o : doc.selectNodes("//*[local-name()='date']")) {
final String dateType = ((Node) o).valueOf("@dateType"); final String dateType = ((Node) o).valueOf("@dateType");
@ -235,42 +230,40 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
res res
.add( .add(
structuredProperty( structuredProperty(
((Node) o).getText(), UNKNOWN, UNKNOWN, DNET_DATACITE_DATE, DNET_DATACITE_DATE, ((Node) o).getText(), UNKNOWN, UNKNOWN, DNET_DATACITE_DATE));
info));
} else { } else {
res res
.add( .add(
structuredProperty( structuredProperty(
((Node) o).getText(), dateType, dateType, DNET_DATACITE_DATE, DNET_DATACITE_DATE, ((Node) o).getText(), dateType, dateType, DNET_DATACITE_DATE));
info));
} }
} }
return res; return res;
} }
@Override @Override
protected List<Field<String>> prepareCoverages(final Document doc, final DataInfo info) { protected List<String> prepareCoverages(final Document doc) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) { protected List<String> prepareContributors(final Document doc) {
return prepareListFields(doc, "//*[local-name()='contributorName']", info); return prepareListFields(doc, "//*[local-name()='contributorName']");
} }
@Override @Override
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) { protected List<String> prepareFormats(final Document doc) {
return prepareListFields(doc, "//*[local-name()='format']", info); return prepareListFields(doc, "//*[local-name()='format']");
} }
@Override @Override
protected Field<String> preparePublisher(final Document doc, final DataInfo info) { protected Publisher preparePublisher(final Document doc) {
return prepareField(doc, "//*[local-name()='publisher']", info); return publisher(doc.valueOf("//*[local-name()='publisher']"));
} }
@Override @Override
protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) { protected List<String> prepareDescriptions(final Document doc) {
return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']", info); return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']");
} }
@Override @Override
@ -284,65 +277,46 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductTools( protected List<String> prepareOtherResearchProductTools(final Document doc) {
final Document doc,
final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactGroups( protected List<String> prepareOtherResearchProductContactGroups(final Document doc) {
final Document doc,
final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, doc,
"//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']", "//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']");
info);
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactPersons( protected List<String> prepareOtherResearchProductContactPersons(
final Document doc, final Document doc) {
final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, doc,
"//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']", "//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']");
info);
} }
@Override @Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc) {
return prepareQualifier(doc, "//*[local-name()='format']", DNET_PROGRAMMING_LANGUAGES); return prepareQualifier(doc, "//*[local-name()='format']", DNET_PROGRAMMING_LANGUAGES);
} }
@Override @Override
protected Field<String> prepareSoftwareCodeRepositoryUrl( protected String prepareSoftwareCodeRepositoryUrl(final Document doc) {
final Document doc,
final DataInfo info) {
return null; // Not present in ODF ??? return null; // Not present in ODF ???
} }
@Override @Override
protected List<StructuredProperty> prepareSoftwareLicenses( protected List<String> prepareSoftwareDocumentationUrls(final Document doc) {
final Document doc,
final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ???
}
@Override
protected List<Field<String>> prepareSoftwareDocumentationUrls(
final Document doc,
final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, doc,
"//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", "//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']");
info);
} }
// DATASETS // DATASETS
@Override @Override
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) { protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc) {
final List<GeoLocation> res = new ArrayList<>(); final List<GeoLocation> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) { for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) {
@ -356,43 +330,39 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
} }
@Override @Override
protected Field<String> prepareDatasetMetadataVersionNumber( protected String prepareDatasetMetadataVersionNumber(final Document doc) {
final Document doc,
final DataInfo info) {
return null; // Not present in ODF ??? return null; // Not present in ODF ???
} }
@Override @Override
protected Field<String> prepareDatasetLastMetadataUpdate( protected String prepareDatasetLastMetadataUpdate(final Document doc) {
final Document doc, return doc.valueOf("//*[local-name()='date' and ./@dateType='Updated']");
final DataInfo info) {
return prepareField(doc, "//*[local-name()='date' and ./@dateType='Updated']", info);
} }
@Override @Override
protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) { protected String prepareDatasetVersion(final Document doc) {
return prepareField(doc, "//*[local-name()='version']", info); return doc.valueOf("//*[local-name()='version']");
} }
@Override @Override
protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) { protected String prepareDatasetSize(final Document doc) {
return prepareField(doc, "//*[local-name()='size']", info); return doc.valueOf("//*[local-name()='size']");
} }
@Override @Override
protected Field<String> prepareDatasetDevice(final Document doc, final DataInfo info) { protected String prepareDatasetDevice(final Document doc) {
return null; // Not present in ODF ??? return null; // Not present in ODF ???
} }
@Override @Override
protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) { protected String prepareDatasetStorageDate(final Document doc) {
return prepareField(doc, "//*[local-name()='date' and ./@dateType='Issued']", info); return doc.valueOf("//*[local-name()='date' and ./@dateType='Issued']");
} }
@Override @Override
protected List<Oaf> addOtherResultRels( protected List<Oaf> addOtherResultRels(
final Document doc, final Document doc,
final OafEntity entity) { final Entity entity) {
final String docId = entity.getId(); final String docId = entity.getId();
@ -429,7 +399,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
} }
protected List<Oaf> getRelations(final String reltype, final String entityId, final String otherId, protected List<Oaf> getRelations(final String reltype, final String entityId, final String otherId,
final OafEntity entity) { final Entity entity) {
final List<Oaf> res = new ArrayList<>(); final List<Oaf> res = new ArrayList<>();
RelationInverse rel = ModelSupport.findRelation(reltype); RelationInverse rel = ModelSupport.findRelation(reltype);
if (rel != null) { if (rel != null) {
@ -447,7 +417,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
} }
@Override @Override
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { protected Qualifier prepareResourceType(final Document doc) {
return prepareQualifier( return prepareQualifier(
doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE); doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE);
} }

View File

@ -12,7 +12,7 @@ import com.google.common.base.Splitter;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
/** /**
@ -38,9 +38,9 @@ public class VerifyNsPrefixPredicate implements Predicate<Oaf> {
@Override @Override
public boolean test(final Oaf oaf) { public boolean test(final Oaf oaf) {
if (oaf instanceof Datasource) { if (oaf instanceof Datasource) {
return testValue(((Datasource) oaf).getNamespaceprefix().getValue()); return testValue(((Datasource) oaf).getNamespaceprefix());
} else if (oaf instanceof OafEntity) { } else if (oaf instanceof Entity) {
return testValue(((OafEntity) oaf).getId()); return testValue(((Entity) oaf).getId());
} else if (oaf instanceof Relation) { } else if (oaf instanceof Relation) {
return testValue(((Relation) oaf).getSource()) && testValue(((Relation) oaf).getTarget()); return testValue(((Relation) oaf).getSource()) && testValue(((Relation) oaf).getTarget());
} else { } else {

View File

@ -117,7 +117,7 @@ object SparkProduceHostedByMap {
return getHostedByItemType( return getHostedByItemType(
dats.getId, dats.getId,
dats.getOfficialname.getValue, dats.getOfficialname,
dats.getJournal.getIssnPrinted, dats.getJournal.getIssnPrinted,
dats.getJournal.getIssnOnline, dats.getJournal.getIssnOnline,
dats.getJournal.getIssnLinking, dats.getJournal.getIssnLinking,

View File

@ -3,8 +3,8 @@ package eu.dnetlib.dhp.oa.graph.raw
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.common.HdfsSupport
import eu.dnetlib.dhp.schema.common.ModelSupport
import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.schema.oaf.Oaf
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport
import eu.dnetlib.dhp.utils.DHPUtils import eu.dnetlib.dhp.utils.DHPUtils
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.{SparkConf, SparkContext}

View File

@ -2,7 +2,8 @@ package eu.dnetlib.dhp.oa.graph.resolution
import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.common.EntityType
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils
import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _}
import org.apache.commons.io.IOUtils import org.apache.commons.io.IOUtils
import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.{FileSystem, Path}
@ -124,7 +125,7 @@ object SparkResolveEntities {
if (b == null) if (b == null)
a._2 a._2
else { else {
a._2.mergeFrom(b._2) MergeUtils.mergeResult(a._2, b._2)
a._2 a._2
} }
}) })

View File

@ -114,7 +114,6 @@ object SparkConvertRDDtoDataset {
val rddRelation = spark.sparkContext val rddRelation = spark.sparkContext
.textFile(s"$sourcePath/relation") .textFile(s"$sourcePath/relation")
.map(s => mapper.readValue(s, classOf[Relation])) .map(s => mapper.readValue(s, classOf[Relation]))
.filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference)
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
.filter(r => filterRelations(r)) .filter(r => filterRelations(r))
//filter OpenCitations relations //filter OpenCitations relations
@ -142,13 +141,13 @@ object SparkConvertRDDtoDataset {
if (relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) if (relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
false false
else { else {
if (r.getCollectedfrom == null || r.getCollectedfrom.size() == 0) if (r.getProvenance == null || r.getProvenance.isEmpty)
false false
else if (r.getCollectedfrom.size() > 1) else if (r.getProvenance.size() > 1)
true true
else if ( else if (
r.getCollectedfrom.size() == 1 && r.getCollectedfrom.get(0) != null && "OpenCitations".equalsIgnoreCase( r.getProvenance.size() == 1 && r.getProvenance.get(0) != null && "OpenCitations".equalsIgnoreCase(
r.getCollectedfrom.get(0).getValue r.getProvenance.get(0).getCollectedfrom.getValue
) )
) )
false false

View File

@ -1,6 +1,7 @@
package eu.dnetlib.dhp.sx.graph package eu.dnetlib.dhp.sx.graph
import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils
import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _}
import org.apache.commons.io.IOUtils import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf import org.apache.spark.SparkConf
@ -131,7 +132,7 @@ object SparkCreateInputGraph {
ds.groupByKey(_.getId) ds.groupByKey(_.getId)
.reduceGroups { (x, y) => .reduceGroups { (x, y) =>
x.mergeFrom(y) MergeUtils.mergeResult(x, y)
x x
} }
.map(_._2) .map(_._2)

View File

@ -51,10 +51,7 @@ object SparkCreateScholix {
val relationDS: Dataset[(String, Relation)] = spark.read val relationDS: Dataset[(String, Relation)] = spark.read
.load(relationPath) .load(relationPath)
.as[Relation] .as[Relation]
.filter(r => .filter(r => !r.getRelClass.toLowerCase.contains("merge"))
(r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase
.contains("merge")
)
.map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder))
val summaryDS: Dataset[(String, ScholixSummary)] = spark.read val summaryDS: Dataset[(String, ScholixSummary)] = spark.read

View File

@ -5,6 +5,7 @@ import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.RandomStringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -17,7 +18,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.oa.graph.hive.GraphHiveImporterJob; import eu.dnetlib.dhp.oa.graph.hive.GraphHiveImporterJob;
import eu.dnetlib.dhp.schema.common.ModelSupport;
public class GraphHiveImporterJobTest { public class GraphHiveImporterJobTest {

View File

@ -70,8 +70,8 @@ public class GraphCleaningFunctionsTest {
assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass())); assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass()));
assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType())); assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType()));
assertEquals("iis", r_out.getDataInfo().getProvenanceaction().getClassid()); assertTrue(r_out.getProvenance().stream().anyMatch(p -> "iis".equals(p.getDataInfo().getProvenanceaction().getClassid())));
assertEquals("Inferred by OpenAIRE", r_out.getDataInfo().getProvenanceaction().getClassname()); assertTrue(r_out.getProvenance().stream().anyMatch(p -> "Inferred by OpenAIRE".equals(p.getDataInfo().getProvenanceaction().getClassname())));
} }
} }
@ -141,7 +141,7 @@ public class GraphCleaningFunctionsTest {
assertNotNull(p_out); assertNotNull(p_out);
assertNotNull(p_out.getPublisher()); assertNotNull(p_out.getPublisher());
assertNull(p_out.getPublisher().getValue()); assertNull(p_out.getPublisher().getName());
assertEquals("und", p_out.getLanguage().getClassid()); assertEquals("und", p_out.getLanguage().getClassid());
assertEquals("Undetermined", p_out.getLanguage().getClassname()); assertEquals("Undetermined", p_out.getLanguage().getClassname());
@ -216,7 +216,7 @@ public class GraphCleaningFunctionsTest {
assertEquals("CLOSED", p_cleaned.getBestaccessright().getClassid()); assertEquals("CLOSED", p_cleaned.getBestaccessright().getClassid());
assertNull(p_out.getPublisher()); assertNull(p_out.getPublisher());
assertEquals("1970-10-07", p_cleaned.getDateofacceptance().getValue()); assertEquals("1970-10-07", p_cleaned.getDateofacceptance());
assertEquals("0038", p_cleaned.getInstance().get(2).getInstancetype().getClassid()); assertEquals("0038", p_cleaned.getInstance().get(2).getInstancetype().getClassid());
assertEquals("Other literature type", p_cleaned.getInstance().get(2).getInstancetype().getClassname()); assertEquals("Other literature type", p_cleaned.getInstance().get(2).getInstancetype().getClassname());

View File

@ -9,6 +9,7 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -26,7 +27,6 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob; import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob;
import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
@ -130,13 +130,13 @@ public class GroupEntitiesSparkJobTest {
assertEquals( assertEquals(
2, 2,
output output
.map((MapFunction<Result, String>) r -> r.getResulttype().getClassid(), Encoders.STRING()) .map((MapFunction<Result, String>) r -> r.getResulttype(), Encoders.STRING())
.filter((FilterFunction<String>) s -> s.equals("publication")) .filter((FilterFunction<String>) s -> s.equals("publication"))
.count()); .count());
assertEquals( assertEquals(
1, 1,
output output
.map((MapFunction<Result, String>) r -> r.getResulttype().getClassid(), Encoders.STRING()) .map((MapFunction<Result, String>) r -> r.getResulttype(), Encoders.STRING())
.filter((FilterFunction<String>) s -> s.equals("dataset")) .filter((FilterFunction<String>) s -> s.equals("dataset"))
.count()); .count());
} }

View File

@ -8,6 +8,7 @@ import static org.mockito.Mockito.lenient;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
@ -20,7 +21,6 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -72,9 +72,9 @@ class GenerateEntitiesApplicationTest {
protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz, protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz,
String resultType) { String resultType) {
final Result merge = OafMapperUtils.mergeResults(publication, dataset); final Result merge = MergeUtils.mergeResults(publication, dataset);
assertTrue(clazz.isAssignableFrom(merge.getClass())); assertTrue(clazz.isAssignableFrom(merge.getClass()));
assertEquals(resultType, merge.getResulttype().getClassid()); assertEquals(resultType, merge.getResulttype());
} }
protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz) protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz)

View File

@ -26,7 +26,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -92,7 +91,6 @@ class MappersTest {
assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid());
assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
assertEquals("Votsi,Nefta", author.get().getFullname()); assertEquals("Votsi,Nefta", author.get().getFullname());
assertEquals("Votsi", author.get().getSurname()); assertEquals("Votsi", author.get().getSurname());
assertEquals("Nefta", author.get().getName()); assertEquals("Nefta", author.get().getName());
@ -124,7 +122,7 @@ class MappersTest {
assertNotNull(p.getFulltext()); assertNotNull(p.getFulltext());
assertEquals(1, p.getFulltext().size()); assertEquals(1, p.getFulltext().size());
assertEquals("https://oneecosystem.pensoft.net/article/13718/", p.getFulltext().get(0).getValue()); assertEquals("https://oneecosystem.pensoft.net/article/13718/", p.getFulltext().get(0));
// RESULT PROJECT // RESULT PROJECT
List<Relation> resultProject = list List<Relation> resultProject = list
@ -171,9 +169,11 @@ class MappersTest {
private void verifyRelation(Relation r) { private void verifyRelation(Relation r) {
assertValidId(r.getSource()); assertValidId(r.getSource());
assertValidId(r.getTarget()); assertValidId(r.getTarget());
assertValidId(r.getCollectedfrom().get(0).getKey()); for(Provenance p : r.getProvenance()) {
assertNotNull(r.getDataInfo()); assertValidId(p.getCollectedfrom().getKey());
assertNotNull(r.getDataInfo().getTrust()); assertNotNull(p.getDataInfo());
assertNotNull(p.getDataInfo().getTrust());
}
assertTrue(StringUtils.isNotBlank(r.getRelClass())); assertTrue(StringUtils.isNotBlank(r.getRelClass()));
assertTrue(StringUtils.isNotBlank(r.getRelType())); assertTrue(StringUtils.isNotBlank(r.getRelType()));
@ -221,7 +221,6 @@ class MappersTest {
assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid());
assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
assertEquals("Votsi,Nefta", author.get().getFullname()); assertEquals("Votsi,Nefta", author.get().getFullname());
assertEquals("Votsi", author.get().getSurname()); assertEquals("Votsi", author.get().getSurname());
assertEquals("Nefta", author.get().getName()); assertEquals("Nefta", author.get().getName());
@ -326,7 +325,7 @@ class MappersTest {
.filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst(); .findFirst();
assertTrue(author.isPresent()); assertTrue(author.isPresent());
final Optional<StructuredProperty> oPid = author final Optional<AuthorPid> oPid = author
.get() .get()
.getPid() .getPid()
.stream() .stream()
@ -337,21 +336,10 @@ class MappersTest {
assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid());
assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
assertEquals("Baracchini, Theo", author.get().getFullname()); assertEquals("Baracchini, Theo", author.get().getFullname());
assertEquals("Baracchini", author.get().getSurname()); assertEquals("Baracchini", author.get().getSurname());
assertEquals("Theo", author.get().getName()); assertEquals("Theo", author.get().getName());
assertEquals(1, author.get().getAffiliation().size());
final Optional<Field<String>> opAff = author
.get()
.getAffiliation()
.stream()
.findFirst();
assertTrue(opAff.isPresent());
final Field<String> affiliation = opAff.get();
assertEquals("ISTI-CNR", affiliation.getValue());
assertTrue(d.getSubject().size() > 0); assertTrue(d.getSubject().size() > 0);
assertTrue(d.getInstance().size() > 0); assertTrue(d.getInstance().size() > 0);
assertTrue(d.getContext().size() > 0); assertTrue(d.getContext().size() > 0);
@ -378,10 +366,13 @@ class MappersTest {
assertValidId(r1.getTarget()); assertValidId(r1.getTarget());
assertValidId(r2.getSource()); assertValidId(r2.getSource());
assertValidId(r2.getTarget()); assertValidId(r2.getTarget());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo()); assertNotNull(r1.getProvenance());
assertNotNull(r1.getDataInfo().getTrust()); assertFalse(r1.getProvenance().isEmpty());
assertNotNull(r2.getDataInfo().getTrust()); assertNotNull(r1.getProvenance().get(0).getDataInfo());
assertNotNull(r2.getProvenance().get(0).getDataInfo());
assertNotNull(r1.getProvenance().get(0).getDataInfo().getTrust());
assertNotNull(r2.getProvenance().get(0).getDataInfo().getTrust());
assertEquals(r1.getSource(), r2.getTarget()); assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget()); assertEquals(r2.getSource(), r1.getTarget());
assertTrue(StringUtils.isNotBlank(r1.getRelClass())); assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
@ -491,7 +482,6 @@ class MappersTest {
assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassid()); assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassid());
assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassname()); assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassname());
assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemeid()); assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemeid());
assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename());
assertValidId(d.getId()); assertValidId(d.getId());
assertEquals(2, d.getOriginalId().size()); assertEquals(2, d.getOriginalId().size());
@ -510,7 +500,7 @@ class MappersTest {
assertNotNull(d.getDescription()); assertNotNull(d.getDescription());
assertEquals(1, d.getDescription().size()); assertEquals(1, d.getDescription().size());
assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue())); assertTrue(StringUtils.isNotBlank(d.getDescription().get(0)));
assertEquals(1, d.getAuthor().size()); assertEquals(1, d.getAuthor().size());
assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname()); assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname());
@ -524,7 +514,7 @@ class MappersTest {
assertEquals(0, d.getPid().size()); assertEquals(0, d.getPid().size());
assertNotNull(d.getPublisher()); assertNotNull(d.getPublisher());
assertEquals("nct", d.getPublisher().getValue()); assertEquals("nct", d.getPublisher().getName());
assertTrue(d.getSubject().isEmpty()); assertTrue(d.getSubject().isEmpty());
assertTrue(d.getContext().isEmpty()); assertTrue(d.getContext().isEmpty());
@ -536,7 +526,7 @@ class MappersTest {
assertNotNull(i.getAccessright()); assertNotNull(i.getAccessright());
assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemeid()); assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemeid());
assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemename()); assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright());
assertEquals("OPEN", i.getAccessright().getClassid()); assertEquals("OPEN", i.getAccessright().getClassid());
assertEquals("Open Access", i.getAccessright().getClassname()); assertEquals("Open Access", i.getAccessright().getClassname());
@ -552,11 +542,10 @@ class MappersTest {
assertEquals("0037", i.getInstancetype().getClassid()); assertEquals("0037", i.getInstancetype().getClassid());
assertEquals("Clinical Trial", i.getInstancetype().getClassname()); assertEquals("Clinical Trial", i.getInstancetype().getClassname());
assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemeid()); assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemeid());
assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemename());
assertNull(i.getLicense()); assertNull(i.getLicense());
assertNotNull(i.getDateofacceptance()); assertNotNull(i.getDateofacceptance());
assertEquals("2014-11-11", i.getDateofacceptance().getValue()); assertEquals("2014-11-11", i.getDateofacceptance());
assertNull(i.getDistributionlocation()); assertNull(i.getDistributionlocation());
assertNull(i.getProcessingchargeamount()); assertNull(i.getProcessingchargeamount());
@ -571,7 +560,7 @@ class MappersTest {
assertEquals("nct", i.getAlternateIdentifier().get(0).getQualifier().getClassid()); assertEquals("nct", i.getAlternateIdentifier().get(0).getQualifier().getClassid());
assertEquals("ClinicalTrials.gov Identifier", i.getAlternateIdentifier().get(0).getQualifier().getClassname()); assertEquals("ClinicalTrials.gov Identifier", i.getAlternateIdentifier().get(0).getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier().getSchemename()); assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier());
assertNotNull(i.getUrl()); assertNotNull(i.getUrl());
assertEquals(2, i.getUrl().size()); assertEquals(2, i.getUrl().size());
@ -738,13 +727,13 @@ class MappersTest {
assertTrue(PidType.isValid(p.getPid().get(0).getQualifier().getClassid())); assertTrue(PidType.isValid(p.getPid().get(0).getQualifier().getClassid()));
assertEquals(PidType.handle, PidType.valueOf(p.getPid().get(0).getQualifier().getClassid())); assertEquals(PidType.handle, PidType.valueOf(p.getPid().get(0).getQualifier().getClassid()));
assertEquals("hdl:11858/00-1734-0000-0003-EE73-2", p.getPid().get(0).getValue()); assertEquals("hdl:11858/00-1734-0000-0003-EE73-2", p.getPid().get(0).getValue());
assertEquals("dataset", p.getResulttype().getClassname()); assertEquals("dataset", p.getResulttype());
assertEquals(1, p.getInstance().size()); assertEquals(1, p.getInstance().size());
assertEquals("OPEN", p.getInstance().get(0).getAccessright().getClassid()); assertEquals("OPEN", p.getInstance().get(0).getAccessright().getClassid());
assertValidId(p.getInstance().get(0).getCollectedfrom().getKey()); assertValidId(p.getInstance().get(0).getCollectedfrom().getKey());
assertValidId(p.getInstance().get(0).getHostedby().getKey()); assertValidId(p.getInstance().get(0).getHostedby().getKey());
assertEquals( assertEquals(
"http://creativecommons.org/licenses/by/3.0/de/legalcode", p.getInstance().get(0).getLicense().getValue()); "http://creativecommons.org/licenses/by/3.0/de/legalcode", p.getInstance().get(0).getLicense().getUrl());
assertEquals(1, p.getInstance().size()); assertEquals(1, p.getInstance().size());
assertNotNull(p.getInstance().get(0).getAlternateIdentifier()); assertNotNull(p.getInstance().get(0).getAlternateIdentifier());
@ -938,8 +927,8 @@ class MappersTest {
assertTrue(p.getProcessingchargeamount() != null); assertTrue(p.getProcessingchargeamount() != null);
assertTrue(p.getProcessingchargecurrency() != null); assertTrue(p.getProcessingchargecurrency() != null);
assertEquals("1721.47", p.getProcessingchargeamount().getValue()); assertEquals("1721.47", p.getProcessingchargeamount());
assertEquals("EUR", p.getProcessingchargecurrency().getValue()); assertEquals("EUR", p.getProcessingchargecurrency());
} }
@Test @Test

View File

@ -51,8 +51,7 @@ class MigrateDbEntitiesApplicationTest {
.thenAnswer( .thenAnswer(
invocation -> OafMapperUtils invocation -> OafMapperUtils
.qualifier( .qualifier(
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0)));
invocation.getArgument(0)));
lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true); lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true);
@ -80,12 +79,12 @@ class MigrateDbEntitiesApplicationTest {
assertEquals("re3data", ds.getPid().get(0).getQualifier().getClassid()); assertEquals("re3data", ds.getPid().get(0).getQualifier().getClassid());
assertEquals("dnet:pid_types", ds.getPid().get(0).getQualifier().getSchemeid()); assertEquals("dnet:pid_types", ds.getPid().get(0).getQualifier().getSchemeid());
assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue()); assertEquals(getValueAsString("officialname", fields), ds.getOfficialname());
assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue()); assertEquals(getValueAsString("englishname", fields), ds.getEnglishname());
assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue()); assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl());
assertEquals(getValueAsString("logourl", fields), ds.getLogourl()); assertEquals(getValueAsString("logourl", fields), ds.getLogourl());
assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue()); assertEquals(getValueAsString("contactemail", fields), ds.getContactemail());
assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix().getValue()); assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix());
assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName()); assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName());
assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted()); assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted());
assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline()); assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline());
@ -100,37 +99,34 @@ class MigrateDbEntitiesApplicationTest {
assertEquals("Data Source", ds.getEosctype().getClassid()); assertEquals("Data Source", ds.getEosctype().getClassid());
assertEquals("Data Source", ds.getEosctype().getClassname()); assertEquals("Data Source", ds.getEosctype().getClassname());
assertEquals("dnet:eosc_types", ds.getEosctype().getSchemeid()); assertEquals("dnet:eosc_types", ds.getEosctype().getSchemeid());
assertEquals("dnet:eosc_types", ds.getEosctype().getSchemename());
assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassid()); assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassid());
assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassname()); assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassname());
assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemeid()); assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemeid());
assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemename());
assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassid()); assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassid());
assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassname()); assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassname());
assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemeid()); assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemeid());
assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemename());
assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude().getValue()); assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude());
assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude().getValue()); assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude());
assertEquals(getValueAsString("dateofvalidation", fields), ds.getDateofvalidation()); assertEquals(getValueAsString("dateofvalidation", fields), ds.getDateofvalidation());
assertEquals(getValueAsString("description", fields), ds.getDescription().getValue()); assertEquals(getValueAsString("description", fields), ds.getDescription());
// TODO assertEquals(getValueAsString("subjects", fields), ds.getSubjects()); // TODO assertEquals(getValueAsString("subjects", fields), ds.getSubjects());
assertEquals("0.0", ds.getOdnumberofitems().getValue()); assertEquals("0.0", ds.getOdnumberofitems());
assertEquals(getValueAsString("odnumberofitemsdate", fields), ds.getOdnumberofitemsdate()); assertEquals(getValueAsString("odnumberofitemsdate", fields), ds.getOdnumberofitemsdate());
assertEquals(getValueAsString("odpolicies", fields), ds.getOdpolicies()); assertEquals(getValueAsString("odpolicies", fields), ds.getOdpolicies());
assertEquals( assertEquals(
getValueAsList("odlanguages", fields), getValueAsList("odlanguages", fields),
ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList())); ds.getOdlanguages().stream().collect(Collectors.toList()));
assertEquals(getValueAsList("languages", fields), ds.getLanguages()); assertEquals(getValueAsList("languages", fields), ds.getLanguages());
assertEquals( assertEquals(
getValueAsList("accessinfopackage", fields), getValueAsList("accessinfopackage", fields),
ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList())); ds.getAccessinfopackage().stream().collect(Collectors.toList()));
assertEquals(getValueAsString("releasestartdate", fields), ds.getReleasestartdate()); assertEquals(getValueAsString("releasestartdate", fields), ds.getReleasestartdate());
assertEquals(getValueAsString("releaseenddate", fields), ds.getReleasestartdate()); assertEquals(getValueAsString("releaseenddate", fields), ds.getReleasestartdate());
assertEquals(getValueAsString("missionstatementurl", fields), ds.getMissionstatementurl()); assertEquals(getValueAsString("missionstatementurl", fields), ds.getMissionstatementurl());
@ -143,7 +139,7 @@ class MigrateDbEntitiesApplicationTest {
assertEquals(getValueAsString("databaseaccessrestriction", fields), ds.getDatabaseaccessrestriction()); assertEquals(getValueAsString("databaseaccessrestriction", fields), ds.getDatabaseaccessrestriction());
assertEquals(getValueAsString("datauploadrestriction", fields), ds.getDatauploadrestriction()); assertEquals(getValueAsString("datauploadrestriction", fields), ds.getDatauploadrestriction());
assertEquals(false, ds.getVersioning().getValue()); assertEquals(false, ds.getVersioning());
assertEquals(false, ds.getVersioncontrol()); assertEquals(false, ds.getVersioncontrol());
assertEquals(getValueAsString("citationguidelineurl", fields), ds.getCitationguidelineurl()); assertEquals(getValueAsString("citationguidelineurl", fields), ds.getCitationguidelineurl());
@ -164,13 +160,6 @@ class MigrateDbEntitiesApplicationTest {
.collect(Collectors.toCollection(HashSet::new)); .collect(Collectors.toCollection(HashSet::new));
assertEquals(1, cpSchemeId.size()); assertEquals(1, cpSchemeId.size());
assertTrue(cpSchemeId.contains("eosc:contentpolicies")); assertTrue(cpSchemeId.contains("eosc:contentpolicies"));
HashSet<String> cpSchemeName = ds
.getContentpolicies()
.stream()
.map(Qualifier::getSchemename)
.collect(Collectors.toCollection(HashSet::new));
assertEquals(1, cpSchemeName.size());
assertTrue(cpSchemeName.contains("eosc:contentpolicies"));
assertEquals(2, ds.getContentpolicies().size()); assertEquals(2, ds.getContentpolicies().size());
assertEquals("Taxonomic classification", ds.getContentpolicies().get(0).getClassid()); assertEquals("Taxonomic classification", ds.getContentpolicies().get(0).getClassid());
assertEquals("Resource collection", ds.getContentpolicies().get(1).getClassid()); assertEquals("Resource collection", ds.getContentpolicies().get(1).getClassid());
@ -202,8 +191,8 @@ class MigrateDbEntitiesApplicationTest {
final Project p = (Project) list.get(0); final Project p = (Project) list.get(0);
assertValidId(p.getId()); assertValidId(p.getId());
assertValidId(p.getCollectedfrom().get(0).getKey()); assertValidId(p.getCollectedfrom().get(0).getKey());
assertEquals(getValueAsString("acronym", fields), p.getAcronym().getValue()); assertEquals(getValueAsString("acronym", fields), p.getAcronym());
assertEquals(getValueAsString("title", fields), p.getTitle().getValue()); assertEquals(getValueAsString("title", fields), p.getTitle());
assertEquals(getValueAsString("collectedfromname", fields), p.getCollectedfrom().get(0).getValue()); assertEquals(getValueAsString("collectedfromname", fields), p.getCollectedfrom().get(0).getValue());
assertEquals(getValueAsFloat("fundedamount", fields), p.getFundedamount()); assertEquals(getValueAsFloat("fundedamount", fields), p.getFundedamount());
assertEquals(getValueAsFloat("totalcost", fields), p.getTotalcost()); assertEquals(getValueAsFloat("totalcost", fields), p.getTotalcost());
@ -222,13 +211,12 @@ class MigrateDbEntitiesApplicationTest {
final Organization o = (Organization) list.get(0); final Organization o = (Organization) list.get(0);
assertValidId(o.getId()); assertValidId(o.getId());
assertValidId(o.getCollectedfrom().get(0).getKey()); assertValidId(o.getCollectedfrom().get(0).getKey());
assertEquals(getValueAsString("legalshortname", fields), o.getLegalshortname().getValue()); assertEquals(getValueAsString("legalshortname", fields), o.getLegalshortname());
assertEquals(getValueAsString("legalname", fields), o.getLegalname().getValue()); assertEquals(getValueAsString("legalname", fields), o.getLegalname());
assertEquals(getValueAsString("websiteurl", fields), o.getWebsiteurl().getValue()); assertEquals(getValueAsString("websiteurl", fields), o.getWebsiteurl());
assertEquals(getValueAsString("country", fields).split("@@@")[0], o.getCountry().getClassid()); assertEquals(getValueAsString("country", fields).split("@@@")[0], o.getCountry().getClassid());
assertEquals(getValueAsString("country", fields).split("@@@")[0], o.getCountry().getClassname()); assertEquals(getValueAsString("country", fields).split("@@@")[0], o.getCountry().getClassname());
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid()); assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid());
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename());
assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue()); assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue());
final List<String> alternativenames = getValueAsList("alternativenames", fields); final List<String> alternativenames = getValueAsList("alternativenames", fields);
assertEquals(2, alternativenames.size()); assertEquals(2, alternativenames.size());
@ -280,8 +268,12 @@ class MigrateDbEntitiesApplicationTest {
assertValidId(r2.getSource()); assertValidId(r2.getSource());
assertEquals(r1.getSource(), r2.getTarget()); assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget()); assertEquals(r2.getSource(), r1.getTarget());
assertValidId(r1.getCollectedfrom().get(0).getKey()); assertNotNull(r1.getProvenance());
assertValidId(r2.getCollectedfrom().get(0).getKey()); assertFalse(r1.getProvenance().isEmpty());
assertValidId(r1.getProvenance().get(0).getCollectedfrom().getKey());
assertNotNull(r2.getProvenance());
assertFalse(r2.getProvenance().isEmpty());
assertValidId(r2.getProvenance().get(0).getCollectedfrom().getKey());
assertEquals(ModelConstants.PROJECT_ORGANIZATION, r1.getRelType()); assertEquals(ModelConstants.PROJECT_ORGANIZATION, r1.getRelType());
assertEquals(ModelConstants.PROJECT_ORGANIZATION, r2.getRelType()); assertEquals(ModelConstants.PROJECT_ORGANIZATION, r2.getRelType());
@ -350,10 +342,17 @@ class MigrateDbEntitiesApplicationTest {
assertValidId(r1.getTarget()); assertValidId(r1.getTarget());
assertValidId(r2.getSource()); assertValidId(r2.getSource());
assertValidId(r2.getTarget()); assertValidId(r2.getTarget());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo()); assertNotNull(r1.getProvenance());
assertNotNull(r1.getDataInfo().getTrust()); assertFalse(r1.getProvenance().isEmpty());
assertNotNull(r2.getDataInfo().getTrust()); assertNotNull(r1.getProvenance().get(0).getDataInfo());
assertNotNull(r1.getProvenance().get(0).getDataInfo().getTrust());
assertNotNull(r2.getProvenance());
assertFalse(r2.getProvenance().isEmpty());
assertNotNull(r2.getProvenance().get(0).getDataInfo());
assertNotNull(r2.getProvenance().get(0).getDataInfo().getTrust());
assertEquals(r1.getSource(), r2.getTarget()); assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget()); assertEquals(r2.getSource(), r1.getTarget());
assertTrue(StringUtils.isNotBlank(r1.getRelClass())); assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
@ -361,8 +360,8 @@ class MigrateDbEntitiesApplicationTest {
assertTrue(StringUtils.isNotBlank(r1.getRelType())); assertTrue(StringUtils.isNotBlank(r1.getRelType()));
assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(StringUtils.isNotBlank(r2.getRelType()));
assertValidId(r1.getCollectedfrom().get(0).getKey()); assertValidId(r1.getProvenance().get(0).getCollectedfrom().getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey()); assertValidId(r2.getProvenance().get(0).getCollectedfrom().getKey());
} }
private List<TypedField> prepareMocks(final String jsonFile) throws IOException, SQLException { private List<TypedField> prepareMocks(final String jsonFile) throws IOException, SQLException {

View File

@ -8,7 +8,6 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
@ -39,22 +38,17 @@ class VerifyNsPrefixPredicateTest {
@Test @Test
void testTest_ds_true() { void testTest_ds_true() {
final Field<String> prefix = new Field<>();
prefix.setValue("xxxxxx______");
final Datasource ds = new Datasource(); final Datasource ds = new Datasource();
ds.setNamespaceprefix(prefix); ds.setNamespaceprefix("xxxxxx______");
assertTrue(predicate.test(ds)); assertTrue(predicate.test(ds));
} }
@Test @Test
void testTest_ds_false() { void testTest_ds_false() {
final Field<String> prefix = new Field<>();
prefix.setValue("corda__2020");
final Datasource ds = new Datasource(); final Datasource ds = new Datasource();
ds.setNamespaceprefix(prefix); ds.setNamespaceprefix("corda__2020");
assertFalse(predicate.test(ds)); assertFalse(predicate.test(ds));
} }

View File

@ -1,8 +1,8 @@
package eu.dnetlib.dhp.oa.graph.resolution package eu.dnetlib.dhp.oa.graph.resolution
import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.common.EntityType
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils import eu.dnetlib.dhp.schema.oaf.utils.{MergeUtils, OafMapperUtils}
import eu.dnetlib.dhp.schema.oaf.{Publication, Result, StructuredProperty} import eu.dnetlib.dhp.schema.oaf.{Publication, Result, StructuredProperty}
import org.apache.commons.io.FileUtils import org.apache.commons.io.FileUtils
import org.apache.spark.SparkConf import org.apache.spark.SparkConf
@ -61,7 +61,7 @@ class ResolveEntitiesTest extends Serializable {
List( List(
OafMapperUtils.subject( OafMapperUtils.subject(
FAKE_SUBJECT, FAKE_SUBJECT,
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), OafMapperUtils.qualifier("fos", "fosCS", "fossSchema"),
null null
) )
).asJava ).asJava
@ -70,8 +70,7 @@ class ResolveEntitiesTest extends Serializable {
List( List(
OafMapperUtils.structuredProperty( OafMapperUtils.structuredProperty(
FAKE_TITLE, FAKE_TITLE,
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), OafMapperUtils.qualifier("fos", "fosCS", "fossSchema")
null
) )
).asJava ).asJava
) )
@ -247,12 +246,12 @@ class ResolveEntitiesTest extends Serializable {
@Test @Test
def testMerge(): Unit = { def testMerge(): Unit = {
val r = new Result var r = new Result
r.setSubject( r.setSubject(
List( List(
OafMapperUtils.subject( OafMapperUtils.subject(
FAKE_SUBJECT, FAKE_SUBJECT,
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), OafMapperUtils.qualifier("fos", "fosCS", "fossSchema"),
null null
) )
).asJava ).asJava
@ -269,7 +268,7 @@ class ResolveEntitiesTest extends Serializable {
classOf[Publication] classOf[Publication]
) )
r.mergeFrom(p) r = MergeUtils.mergeResult(r, p);
println(mapper.writeValueAsString(r)) println(mapper.writeValueAsString(r))

View File

@ -3,11 +3,14 @@ package eu.dnetlib.dhp.oa.provision;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.EntityType;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -26,15 +29,6 @@ import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
import scala.Tuple2; import scala.Tuple2;
@ -79,7 +73,7 @@ public class CreateRelatedEntitiesJob_phase1 {
log.info("graphTableClassName: {}", graphTableClassName); log.info("graphTableClassName: {}", graphTableClassName);
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
final Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName); final Class<? extends Entity> entityClazz = (Class<? extends Entity>) Class.forName(graphTableClassName);
final SparkConf conf = new SparkConf(); final SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
@ -91,7 +85,7 @@ public class CreateRelatedEntitiesJob_phase1 {
}); });
} }
private static <E extends OafEntity> void joinRelationEntity( private static <E extends Entity> void joinRelationEntity(
final SparkSession spark, final SparkSession spark,
final String inputRelationsPath, final String inputRelationsPath,
final String inputEntityPath, final String inputEntityPath,
@ -123,7 +117,7 @@ public class CreateRelatedEntitiesJob_phase1 {
.parquet(outputPath); .parquet(outputPath);
} }
private static <E extends OafEntity> Dataset<E> readPathEntity( private static <E extends Entity> Dataset<E> readPathEntity(
final SparkSession spark, final SparkSession spark,
final String inputEntityPath, final String inputEntityPath,
final Class<E> entityClazz) { final Class<E> entityClazz) {
@ -137,7 +131,7 @@ public class CreateRelatedEntitiesJob_phase1 {
Encoders.bean(entityClazz)); Encoders.bean(entityClazz));
} }
public static <E extends OafEntity> RelatedEntity asRelatedEntity(final E entity, final Class<E> clazz) { public static <E extends Entity> RelatedEntity asRelatedEntity(final E entity, final Class<E> clazz) {
final RelatedEntity re = new RelatedEntity(); final RelatedEntity re = new RelatedEntity();
re.setId(entity.getId()); re.setId(entity.getId());
@ -162,8 +156,8 @@ public class CreateRelatedEntitiesJob_phase1 {
re.setTitle(title); re.setTitle(title);
} }
re.setDateofacceptance(getValue(result.getDateofacceptance())); re.setDateofacceptance(result.getDateofacceptance());
re.setPublisher(getValue(result.getPublisher())); re.setPublisher(Optional.ofNullable(result.getPublisher()).map(p -> p.getName()).orElse(null));
re.setResulttype(result.getResulttype()); re.setResulttype(result.getResulttype());
if (Objects.nonNull(result.getInstance())) { if (Objects.nonNull(result.getInstance())) {
re re
@ -206,24 +200,23 @@ public class CreateRelatedEntitiesJob_phase1 {
re.setAcronym(getValue(p.getAcronym())); re.setAcronym(getValue(p.getAcronym()));
re.setContracttype(p.getContracttype()); re.setContracttype(p.getContracttype());
final List<Field<String>> f = p.getFundingtree(); final List<String> f = p.getFundingtree();
if (!f.isEmpty()) { if (!f.isEmpty()) {
re.setFundingtree(f.stream().map(Field::getValue).collect(Collectors.toList())); re.setFundingtree(f);
} }
break; break;
} }
return re; return re;
} }
private static String getValue(final Field<String> field) { private static String getValue(final String s) {
return getFieldValueWithDefault(field, ""); return getFieldValueWithDefault(s, "");
} }
private static <T> T getFieldValueWithDefault(final Field<T> f, final T defaultValue) { private static <T> T getFieldValueWithDefault(final T f, final T defaultValue) {
return Optional return Optional
.ofNullable(f) .ofNullable(f)
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(Field::getValue)
.orElse(defaultValue); .orElse(defaultValue);
} }

View File

@ -8,6 +8,7 @@ import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -28,7 +29,6 @@ import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
import scala.Tuple2; import scala.Tuple2;
@ -78,7 +78,7 @@ public class CreateRelatedEntitiesJob_phase2 {
String graphTableClassName = parser.get("graphTableClassName"); String graphTableClassName = parser.get("graphTableClassName");
log.info("graphTableClassName: {}", graphTableClassName); log.info("graphTableClassName: {}", graphTableClassName);
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName); Class<? extends Entity> entityClazz = (Class<? extends Entity>) Class.forName(graphTableClassName);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
@ -94,7 +94,7 @@ public class CreateRelatedEntitiesJob_phase2 {
}); });
} }
private static <E extends OafEntity> void joinEntityWithRelatedEntities( private static <E extends Entity> void joinEntityWithRelatedEntities(
SparkSession spark, SparkSession spark,
String relatedEntitiesPath, String relatedEntitiesPath,
String entityPath, String entityPath,
@ -177,7 +177,7 @@ public class CreateRelatedEntitiesJob_phase2 {
} }
private static <E extends OafEntity> Dataset<Tuple2<String, RelatedEntityWrapper>> readRelatedEntities( private static <E extends Entity> Dataset<Tuple2<String, RelatedEntityWrapper>> readRelatedEntities(
SparkSession spark, String inputRelatedEntitiesPath, Class<E> entityClazz) { SparkSession spark, String inputRelatedEntitiesPath, Class<E> entityClazz) {
log.info("Reading related entities from: {}", inputRelatedEntitiesPath); log.info("Reading related entities from: {}", inputRelatedEntitiesPath);
@ -200,7 +200,7 @@ public class CreateRelatedEntitiesJob_phase2 {
Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntityWrapper.class))); Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntityWrapper.class)));
} }
private static <E extends OafEntity> Dataset<Tuple2<String, E>> readPathEntity( private static <E extends Entity> Dataset<Tuple2<String, E>> readPathEntity(
SparkSession spark, String inputEntityPath, Class<E> entityClazz) { SparkSession spark, String inputEntityPath, Class<E> entityClazz) {
log.info("Reading Graph table from: {}", inputEntityPath); log.info("Reading Graph table from: {}", inputEntityPath);
@ -217,7 +217,7 @@ public class CreateRelatedEntitiesJob_phase2 {
Encoders.tuple(Encoders.STRING(), Encoders.kryo(entityClazz))); Encoders.tuple(Encoders.STRING(), Encoders.kryo(entityClazz)));
} }
private static <E extends OafEntity> E pruneOutliers(Class<E> entityClazz, E e) { private static <E extends Entity> E pruneOutliers(Class<E> entityClazz, E e) {
if (ModelSupport.isSubClass(entityClazz, Result.class)) { if (ModelSupport.isSubClass(entityClazz, Result.class)) {
Result r = (Result) e; Result r = (Result) e;
if (r.getExternalReference() != null) { if (r.getExternalReference() != null) {
@ -239,14 +239,11 @@ public class CreateRelatedEntitiesJob_phase2 {
r.setAuthor(authors); r.setAuthor(authors);
} }
if (r.getDescription() != null) { if (r.getDescription() != null) {
List<Field<String>> desc = r List<String> desc = r
.getDescription() .getDescription()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(d -> { .map(d -> StringUtils.left(d, ModelHardLimits.MAX_ABSTRACT_LENGTH))
d.setValue(StringUtils.left(d.getValue(), ModelHardLimits.MAX_ABSTRACT_LENGTH));
return d;
})
.collect(Collectors.toList()); .collect(Collectors.toList());
r.setDescription(desc); r.setDescription(desc);
} }

View File

@ -132,7 +132,6 @@ public class PrepareRelationsJob {
JavaRDD<Relation> rels = readPathRelationRDD(spark, inputRelationsPath) JavaRDD<Relation> rels = readPathRelationRDD(spark, inputRelationsPath)
.filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved"))) .filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved")))
.filter(rel -> !rel.getDataInfo().getDeletedbyinference())
.filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass()))); .filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())));
JavaRDD<Relation> pruned = pruneRels( JavaRDD<Relation> pruned = pruneRels(
@ -171,7 +170,6 @@ public class PrepareRelationsJob {
.map( .map(
(MapFunction<String, Relation>) s -> OBJECT_MAPPER.readValue(s, Relation.class), (MapFunction<String, Relation>) s -> OBJECT_MAPPER.readValue(s, Relation.class),
Encoders.kryo(Relation.class)) Encoders.kryo(Relation.class))
.filter((FilterFunction<Relation>) rel -> !rel.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) rel -> !relationFilter.contains(rel.getRelClass())) .filter((FilterFunction<Relation>) rel -> !relationFilter.contains(rel.getRelClass()))
.groupByKey( .groupByKey(
(MapFunction<Relation, String>) Relation::getSource, (MapFunction<Relation, String>) Relation::getSource,

View File

@ -43,9 +43,7 @@ public class SortableRelation extends Relation implements Comparable<SortableRel
sr.setRelType(r.getRelType()); sr.setRelType(r.getRelType());
sr.setSubRelType(r.getSubRelType()); sr.setSubRelType(r.getSubRelType());
sr.setRelClass(r.getRelClass()); sr.setRelClass(r.getRelClass());
sr.setDataInfo(r.getDataInfo()); sr.setProvenance(r.getProvenance());
sr.setCollectedfrom(r.getCollectedfrom());
sr.setLastupdatetimestamp(r.getLastupdatetimestamp());
sr.setProperties(r.getProperties()); sr.setProperties(r.getProperties());
sr.setValidated(r.getValidated()); sr.setValidated(r.getValidated());
sr.setValidationDate(r.getValidationDate()); sr.setValidationDate(r.getValidationDate());

View File

@ -5,9 +5,9 @@ import java.io.Serializable;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Entity;
public class JoinedEntity<E extends OafEntity> implements Serializable { public class JoinedEntity<E extends Entity> implements Serializable {
private E entity; private E entity;

View File

@ -7,7 +7,7 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.oa.provision.RelationList; import eu.dnetlib.dhp.oa.provision.RelationList;
import eu.dnetlib.dhp.oa.provision.SortableRelation; import eu.dnetlib.dhp.oa.provision.SortableRelation;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
public class ProvisionModelSupport { public class ProvisionModelSupport {

View File

@ -25,7 +25,7 @@ public class RelatedEntity implements Serializable {
private String publisher; private String publisher;
private List<StructuredProperty> pid; private List<StructuredProperty> pid;
private String codeRepositoryUrl; private String codeRepositoryUrl;
private Qualifier resulttype; private String resulttype;
private List<KeyValue> collectedfrom; private List<KeyValue> collectedfrom;
private List<Instance> instances; private List<Instance> instances;
@ -111,11 +111,11 @@ public class RelatedEntity implements Serializable {
this.codeRepositoryUrl = codeRepositoryUrl; this.codeRepositoryUrl = codeRepositoryUrl;
} }
public Qualifier getResulttype() { public String getResulttype() {
return resulttype; return resulttype;
} }
public void setResulttype(Qualifier resulttype) { public void setResulttype(String resulttype) {
this.resulttype = resulttype; this.resulttype = resulttype;
} }

View File

@ -19,13 +19,11 @@ public class XmlInstance implements Serializable {
UNKNOWN_ACCESS_RIGHT.setClassid(ModelConstants.UNKNOWN); UNKNOWN_ACCESS_RIGHT.setClassid(ModelConstants.UNKNOWN);
UNKNOWN_ACCESS_RIGHT.setClassname(ModelConstants.UNKNOWN); UNKNOWN_ACCESS_RIGHT.setClassname(ModelConstants.UNKNOWN);
UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_ACCESS_MODES); UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
UNKNOWN_ACCESS_RIGHT.setSchemename(ModelConstants.DNET_ACCESS_MODES);
UNKNOWN_REVIEW_LEVEL = new Qualifier(); UNKNOWN_REVIEW_LEVEL = new Qualifier();
UNKNOWN_REVIEW_LEVEL.setClassid("0000"); UNKNOWN_REVIEW_LEVEL.setClassid("0000");
UNKNOWN_REVIEW_LEVEL.setClassname(ModelConstants.UNKNOWN); UNKNOWN_REVIEW_LEVEL.setClassname(ModelConstants.UNKNOWN);
UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_REVIEW_LEVELS); UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_REVIEW_LEVELS);
UNKNOWN_REVIEW_LEVEL.setSchemename(ModelConstants.DNET_REVIEW_LEVELS);
} }
private String url; private String url;

View File

@ -1,25 +1,21 @@
package eu.dnetlib.dhp.oa.provision.utils; package eu.dnetlib.dhp.oa.provision.utils;
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix; import com.google.common.collect.Lists;
import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml; import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Entity;
import org.apache.commons.lang3.StringUtils;
import org.stringtemplate.v4.ST;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import javax.swing.text.html.Option; import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix;
import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml;
import org.apache.commons.lang3.StringUtils;
import org.stringtemplate.v4.ST;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
public class TemplateFactory { public class TemplateFactory {
@ -62,7 +58,7 @@ public class TemplateFactory {
} }
public String buildRecord( public String buildRecord(
final OafEntity entity, final String schemaLocation, final String body) { final Entity entity, final String schemaLocation, final String body) {
return getTemplate(resources.getRecord()) return getTemplate(resources.getRecord())
.add("id", escapeXml(removePrefix(entity.getId()))) .add("id", escapeXml(removePrefix(entity.getId())))
.add("dateofcollection", entity.getDateofcollection()) .add("dateofcollection", entity.getDateofcollection())

View File

@ -20,6 +20,7 @@ import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource; import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamResult;
import eu.dnetlib.dhp.schema.oaf.common.*;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.tuple.Pair;
@ -97,7 +98,7 @@ public class XmlRecordFactory implements Serializable {
final Set<String> contexts = Sets.newHashSet(); final Set<String> contexts = Sets.newHashSet();
// final OafEntity entity = toOafEntity(je.getEntity()); // final OafEntity entity = toOafEntity(je.getEntity());
final OafEntity entity = je.getEntity(); final Entity entity = je.getEntity();
final TemplateFactory templateFactory = new TemplateFactory(); final TemplateFactory templateFactory = new TemplateFactory();
try { try {
@ -128,7 +129,7 @@ public class XmlRecordFactory implements Serializable {
} }
} }
private static OafEntity parseOaf(final String json, final String type) { private static Entity parseOaf(final String json, final String type) {
try { try {
switch (EntityType.valueOf(type)) { switch (EntityType.valueOf(type)) {
case publication: case publication:
@ -170,7 +171,7 @@ public class XmlRecordFactory implements Serializable {
private List<String> metadata( private List<String> metadata(
final EntityType type, final EntityType type,
final OafEntity entity, final Entity entity,
final Set<String> contexts) { final Set<String> contexts) {
final List<String> metadata = Lists.newArrayList(); final List<String> metadata = Lists.newArrayList();
@ -319,7 +320,7 @@ public class XmlRecordFactory implements Serializable {
.getContributor() .getContributor()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("contributor", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("contributor", c))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (r.getCountry() != null) { if (r.getCountry() != null) {
@ -339,14 +340,14 @@ public class XmlRecordFactory implements Serializable {
.getCoverage() .getCoverage()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("coverage", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("coverage", c))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (r.getDateofacceptance() != null) { if (r.getDateofacceptance() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("dateofacceptance", r.getDateofacceptance().getValue())); .asXmlElement("dateofacceptance", r.getDateofacceptance()));
} }
if (r.getDescription() != null) { if (r.getDescription() != null) {
metadata metadata
@ -355,12 +356,12 @@ public class XmlRecordFactory implements Serializable {
.getDescription() .getDescription()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("description", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("description", c))
.collect(Collectors.toCollection(HashSet::new))); .collect(Collectors.toCollection(HashSet::new)));
} }
if (r.getEmbargoenddate() != null) { if (r.getEmbargoenddate() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); .add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate()));
} }
if (r.getSubject() != null) { if (r.getSubject() != null) {
metadata metadata
@ -386,7 +387,7 @@ public class XmlRecordFactory implements Serializable {
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (r.getPublisher() != null) { if (r.getPublisher() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("publisher", r.getPublisher().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("publisher", r.getPublisher().getName()));
} }
if (r.getSource() != null) { if (r.getSource() != null) {
metadata metadata
@ -395,7 +396,7 @@ public class XmlRecordFactory implements Serializable {
.getSource() .getSource()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("source", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("source", c))
.collect(Collectors.toCollection(HashSet::new))); .collect(Collectors.toCollection(HashSet::new)));
} }
if (r.getFormat() != null) { if (r.getFormat() != null) {
@ -405,11 +406,11 @@ public class XmlRecordFactory implements Serializable {
.getFormat() .getFormat()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("format", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("format", c))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (r.getResulttype() != null) { if (r.getResulttype() != null) {
metadata.add(XmlSerializationUtils.mapQualifier("resulttype", r.getResulttype())); metadata.add(XmlSerializationUtils.asXmlElement("resulttype", r.getResulttype()));
} }
if (r.getResourcetype() != null) { if (r.getResourcetype() != null) {
metadata.add(XmlSerializationUtils.mapQualifier("resourcetype", r.getResourcetype())); metadata.add(XmlSerializationUtils.mapQualifier("resourcetype", r.getResourcetype()));
@ -418,11 +419,11 @@ public class XmlRecordFactory implements Serializable {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("processingchargeamount", r.getProcessingchargeamount().getValue())); .asXmlElement("processingchargeamount", r.getProcessingchargeamount()));
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("processingchargecurrency", r.getProcessingchargecurrency().getValue())); .asXmlElement("processingchargecurrency", r.getProcessingchargecurrency()));
} }
} }
@ -439,29 +440,29 @@ public class XmlRecordFactory implements Serializable {
case dataset: case dataset:
final Dataset d = (Dataset) entity; final Dataset d = (Dataset) entity;
if (d.getDevice() != null) { if (d.getDevice() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("device", d.getDevice().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("device", d.getDevice()));
} }
if (d.getLastmetadataupdate() != null) { if (d.getLastmetadataupdate() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue())); .asXmlElement("lastmetadataupdate", d.getLastmetadataupdate()));
} }
if (d.getMetadataversionnumber() != null) { if (d.getMetadataversionnumber() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue())); .asXmlElement("metadataversionnumber", d.getMetadataversionnumber()));
} }
if (d.getSize() != null) { if (d.getSize() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize()));
} }
if (d.getStoragedate() != null) { if (d.getStoragedate() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); .add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate()));
} }
if (d.getVersion() != null) { if (d.getVersion() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion()));
} }
// TODO d.getGeolocation() // TODO d.getGeolocation()
@ -476,7 +477,7 @@ public class XmlRecordFactory implements Serializable {
.getContactperson() .getContactperson()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("contactperson", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("contactperson", c))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
@ -487,7 +488,7 @@ public class XmlRecordFactory implements Serializable {
.getContactgroup() .getContactgroup()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (orp.getTool() != null) { if (orp.getTool() != null) {
@ -497,7 +498,7 @@ public class XmlRecordFactory implements Serializable {
.getTool() .getTool()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("tool", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("tool", c))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
break; break;
@ -511,24 +512,14 @@ public class XmlRecordFactory implements Serializable {
.getDocumentationUrl() .getDocumentationUrl()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c))
.collect(Collectors.toList()));
}
if (s.getLicense() != null) {
metadata
.addAll(
s
.getLicense()
.stream()
.filter(Objects::nonNull)
.map(l -> XmlSerializationUtils.mapStructuredProperty("license", l))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (s.getCodeRepositoryUrl() != null) { if (s.getCodeRepositoryUrl() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); .asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl()));
} }
if (s.getProgrammingLanguage() != null) { if (s.getProgrammingLanguage() != null) {
metadata metadata
@ -560,45 +551,45 @@ public class XmlRecordFactory implements Serializable {
} }
if (ds.getOfficialname() != null) { if (ds.getOfficialname() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); .add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname()));
} }
if (ds.getEnglishname() != null) { if (ds.getEnglishname() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); .add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname()));
} }
if (ds.getWebsiteurl() != null) { if (ds.getWebsiteurl() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); .add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl()));
} }
if (ds.getLogourl() != null) { if (ds.getLogourl() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl()));
} }
if (ds.getContactemail() != null) { if (ds.getContactemail() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); .add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail()));
} }
if (ds.getNamespaceprefix() != null) { if (ds.getNamespaceprefix() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue())); .asXmlElement("namespaceprefix", ds.getNamespaceprefix()));
} }
if (ds.getLatitude() != null) { if (ds.getLatitude() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude()));
} }
if (ds.getLongitude() != null) { if (ds.getLongitude() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); .add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude()));
} }
if (ds.getDateofvalidation() != null) { if (ds.getDateofvalidation() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue())); .asXmlElement("dateofvalidation", ds.getDateofvalidation()));
} }
if (ds.getDescription() != null) { if (ds.getDescription() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); .add(XmlSerializationUtils.asXmlElement("description", ds.getDescription()));
} }
if (ds.getSubjects() != null) { if (ds.getSubjects() != null) {
metadata metadata
@ -614,17 +605,17 @@ public class XmlRecordFactory implements Serializable {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue())); .asXmlElement("odnumberofitems", ds.getOdnumberofitems()));
} }
if (ds.getOdnumberofitemsdate() != null) { if (ds.getOdnumberofitemsdate() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); .asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate()));
} }
if (ds.getOdpolicies() != null) { if (ds.getOdpolicies() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); .add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies()));
} }
if (ds.getOdlanguages() != null) { if (ds.getOdlanguages() != null) {
metadata metadata
@ -633,7 +624,7 @@ public class XmlRecordFactory implements Serializable {
.getOdlanguages() .getOdlanguages()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (ds.getLanguages() != null) { if (ds.getLanguages() != null) {
@ -653,7 +644,7 @@ public class XmlRecordFactory implements Serializable {
.getOdcontenttypes() .getOdcontenttypes()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (ds.getAccessinfopackage() != null) { if (ds.getAccessinfopackage() != null) {
@ -662,69 +653,69 @@ public class XmlRecordFactory implements Serializable {
ds ds
.getAccessinfopackage() .getAccessinfopackage()
.stream() .stream()
.map(c -> XmlSerializationUtils.asXmlElement("accessinfopackage", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("accessinfopackage", c))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (ds.getReleaseenddate() != null) { if (ds.getReleaseenddate() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("releasestartdate", ds.getReleaseenddate().getValue())); .asXmlElement("releasestartdate", ds.getReleaseenddate()));
} }
if (ds.getReleaseenddate() != null) { if (ds.getReleaseenddate() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("releaseenddate", ds.getReleaseenddate().getValue())); .asXmlElement("releaseenddate", ds.getReleaseenddate()));
} }
if (ds.getMissionstatementurl() != null) { if (ds.getMissionstatementurl() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue())); .asXmlElement("missionstatementurl", ds.getMissionstatementurl()));
} }
if (ds.getDataprovider() != null) { if (ds.getDataprovider() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("dataprovider", ds.getDataprovider().getValue().toString())); .asXmlElement("dataprovider", ds.getDataprovider().toString()));
} }
if (ds.getServiceprovider() != null) { if (ds.getServiceprovider() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString())); .asXmlElement("serviceprovider", ds.getServiceprovider().toString()));
} }
if (ds.getDatabaseaccesstype() != null) { if (ds.getDatabaseaccesstype() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue())); .asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype()));
} }
if (ds.getDatauploadtype() != null) { if (ds.getDatauploadtype() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("datauploadtype", ds.getDatauploadtype().getValue())); .asXmlElement("datauploadtype", ds.getDatauploadtype()));
} }
if (ds.getDatabaseaccessrestriction() != null) { if (ds.getDatabaseaccessrestriction() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement( .asXmlElement(
"databaseaccessrestriction", ds.getDatabaseaccessrestriction().getValue())); "databaseaccessrestriction", ds.getDatabaseaccessrestriction()));
} }
if (ds.getDatauploadrestriction() != null) { if (ds.getDatauploadrestriction() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue())); .asXmlElement("datauploadrestriction", ds.getDatauploadrestriction()));
} }
if (ds.getVersioning() != null) { if (ds.getVersioning() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("versioning", ds.getVersioning().getValue().toString())); .asXmlElement("versioning", ds.getVersioning().toString()));
} }
if (ds.getVersioncontrol() != null) { if (ds.getVersioncontrol() != null) {
metadata metadata
@ -736,15 +727,15 @@ public class XmlRecordFactory implements Serializable {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue())); .asXmlElement("citationguidelineurl", ds.getCitationguidelineurl()));
} }
if (ds.getPidsystems() != null) { if (ds.getPidsystems() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); .add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems()));
} }
if (ds.getCertificates() != null) { if (ds.getCertificates() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); .add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates()));
} }
if (ds.getPolicies() != null) { if (ds.getPolicies() != null) {
metadata metadata
@ -831,11 +822,11 @@ public class XmlRecordFactory implements Serializable {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("legalshortname", o.getLegalshortname().getValue())); .asXmlElement("legalshortname", o.getLegalshortname()));
} }
if (o.getLegalname() != null) { if (o.getLegalname() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); .add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname()));
} }
if (o.getAlternativeNames() != null) { if (o.getAlternativeNames() != null) {
metadata metadata
@ -844,40 +835,40 @@ public class XmlRecordFactory implements Serializable {
.getAlternativeNames() .getAlternativeNames()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c.getValue())) .map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (o.getWebsiteurl() != null) { if (o.getWebsiteurl() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); .add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl()));
} }
if (o.getLogourl() != null) { if (o.getLogourl() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl()));
} }
if (o.getEclegalbody() != null) { if (o.getEclegalbody() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); .add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody()));
} }
if (o.getEclegalperson() != null) { if (o.getEclegalperson() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); .add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson()));
} }
if (o.getEcnonprofit() != null) { if (o.getEcnonprofit() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); .add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit()));
} }
if (o.getEcresearchorganization() != null) { if (o.getEcresearchorganization() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue())); .asXmlElement("ecresearchorganization", o.getEcresearchorganization()));
} }
if (o.getEchighereducation() != null) { if (o.getEchighereducation() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("echighereducation", o.getEchighereducation().getValue())); .asXmlElement("echighereducation", o.getEchighereducation()));
} }
if (o.getEcinternationalorganizationeurinterests() != null) { if (o.getEcinternationalorganizationeurinterests() != null) {
metadata metadata
@ -885,28 +876,28 @@ public class XmlRecordFactory implements Serializable {
XmlSerializationUtils XmlSerializationUtils
.asXmlElement( .asXmlElement(
"ecinternationalorganizationeurinterests", "ecinternationalorganizationeurinterests",
o.getEcinternationalorganizationeurinterests().getValue())); o.getEcinternationalorganizationeurinterests()));
} }
if (o.getEcinternationalorganization() != null) { if (o.getEcinternationalorganization() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement( .asXmlElement(
"ecinternationalorganization", o.getEcinternationalorganization().getValue())); "ecinternationalorganization", o.getEcinternationalorganization()));
} }
if (o.getEcenterprise() != null) { if (o.getEcenterprise() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); .add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise()));
} }
if (o.getEcsmevalidated() != null) { if (o.getEcsmevalidated() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue())); .asXmlElement("ecsmevalidated", o.getEcsmevalidated()));
} }
if (o.getEcnutscode() != null) { if (o.getEcnutscode() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); .add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode()));
} }
if (o.getCountry() != null) { if (o.getCountry() != null) {
metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry())); metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry()));
@ -918,39 +909,39 @@ public class XmlRecordFactory implements Serializable {
if (p.getWebsiteurl() != null) { if (p.getWebsiteurl() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); .add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl()));
} }
if (p.getCode() != null) { if (p.getCode() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode()));
} }
if (p.getAcronym() != null) { if (p.getAcronym() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("acronym", p.getAcronym().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("acronym", p.getAcronym()));
} }
if (p.getTitle() != null) { if (p.getTitle() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("title", p.getTitle().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("title", p.getTitle()));
} }
if (p.getStartdate() != null) { if (p.getStartdate() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); .add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate()));
} }
if (p.getEnddate() != null) { if (p.getEnddate() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate()));
} }
if (p.getCallidentifier() != null) { if (p.getCallidentifier() != null) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("callidentifier", p.getCallidentifier().getValue())); .asXmlElement("callidentifier", p.getCallidentifier()));
} }
if (p.getKeywords() != null) { if (p.getKeywords() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords()));
} }
if (p.getDuration() != null) { if (p.getDuration() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("duration", p.getDuration().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("duration", p.getDuration()));
} }
if (p.getEcarticle29_3() != null) { if (p.getEcarticle29_3() != null) {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); .add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3()));
} }
if (p.getSubjects() != null) { if (p.getSubjects() != null) {
metadata metadata
@ -969,16 +960,16 @@ public class XmlRecordFactory implements Serializable {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("oamandatepublications", p.getOamandatepublications().getValue())); .asXmlElement("oamandatepublications", p.getOamandatepublications()));
} }
if (p.getEcsc39() != null) { if (p.getEcsc39() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("ecsc39", p.getEcsc39().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("ecsc39", p.getEcsc39()));
} }
if (p.getSummary() != null) { if (p.getSummary() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("summary", p.getSummary().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("summary", p.getSummary()));
} }
if (p.getCurrency() != null) { if (p.getCurrency() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("currency", p.getCurrency().getValue())); metadata.add(XmlSerializationUtils.asXmlElement("currency", p.getCurrency()));
} }
if (p.getTotalcost() != null) { if (p.getTotalcost() != null) {
metadata metadata
@ -995,7 +986,6 @@ public class XmlRecordFactory implements Serializable {
.getFundingtree() .getFundingtree()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(ft -> ft.getValue())
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
@ -1054,9 +1044,6 @@ public class XmlRecordFactory implements Serializable {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); .add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl()));
} }
if (re.getResulttype() != null && re.getResulttype().isBlank()) {
metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype()));
}
if (re.getCollectedfrom() != null) { if (re.getCollectedfrom() != null) {
metadata metadata
.addAll( .addAll(
@ -1081,13 +1068,13 @@ public class XmlRecordFactory implements Serializable {
if (isNotBlank(re.getOfficialname())) { if (isNotBlank(re.getOfficialname())) {
metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname())); metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname()));
} }
if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) { if (re.getDatasourcetype() != null && StringUtils.isNotBlank(re.getDatasourcetype().getClassid())) {
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype())); metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype()));
} }
if (re.getDatasourcetypeui() != null && !re.getDatasourcetypeui().isBlank()) { if (re.getDatasourcetypeui() != null && StringUtils.isNotBlank(re.getDatasourcetypeui().getClassid())) {
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui())); metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui()));
} }
if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) { if (re.getOpenairecompatibility() != null && StringUtils.isNotBlank(re.getOpenairecompatibility().getClassid())) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
@ -1102,7 +1089,7 @@ public class XmlRecordFactory implements Serializable {
metadata metadata
.add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); .add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname()));
} }
if (re.getCountry() != null && !re.getCountry().isBlank()) { if (re.getCountry() != null && StringUtils.isNotBlank(re.getCountry().getClassid())) {
metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry()));
} }
break; break;
@ -1116,7 +1103,7 @@ public class XmlRecordFactory implements Serializable {
if (isNotBlank(re.getAcronym())) { if (isNotBlank(re.getAcronym())) {
metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym())); metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym()));
} }
if (re.getContracttype() != null && !re.getContracttype().isBlank()) { if (re.getContracttype() != null && StringUtils.isNotBlank(re.getContracttype().getClassid())) {
metadata.add(XmlSerializationUtils.mapQualifier("contracttype", re.getContracttype())); metadata.add(XmlSerializationUtils.mapQualifier("contracttype", re.getContracttype()));
} }
if (re.getFundingtree() != null && contexts != null) { if (re.getFundingtree() != null && contexts != null) {
@ -1126,7 +1113,7 @@ public class XmlRecordFactory implements Serializable {
.getFundingtree() .getFundingtree()
.stream() .stream()
.peek(ft -> fillContextMap(ft, contexts)) .peek(ft -> fillContextMap(ft, contexts))
.map(ft -> getRelFundingTree(ft)) .map(XmlRecordFactory::getRelFundingTree)
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
break; break;
@ -1158,14 +1145,15 @@ public class XmlRecordFactory implements Serializable {
if (rel.getValidated() == null) { if (rel.getValidated() == null) {
rel.setValidated(false); rel.setValidated(false);
} }
final DataInfo dataInfo = Optional.ofNullable(rel.getProvenance()).map(p -> p.get(0).getDataInfo()).orElse(null);
return templateFactory return templateFactory
.getRel( .getRel(
targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(), targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, dataInfo, rel.getValidated(),
rel.getValidationDate()); rel.getValidationDate());
} }
private List<String> listChildren( private List<String> listChildren(
final OafEntity entity, final Entity entity,
final JoinedEntity je, final JoinedEntity je,
final TemplateFactory templateFactory) { final TemplateFactory templateFactory) {
@ -1191,7 +1179,7 @@ public class XmlRecordFactory implements Serializable {
groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> { groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> {
final List<String> fields = Lists.newArrayList(); final List<String> fields = Lists.newArrayList();
if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) { if (instance.getAccessright() != null && StringUtils.isNotBlank(instance.getAccessright().getClassid())) {
fields fields
.add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); .add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright()));
} }
@ -1232,7 +1220,7 @@ public class XmlRecordFactory implements Serializable {
instance instance
.getInstancetype() .getInstancetype()
.stream() .stream()
.filter(t -> !t.isBlank()) .filter(t -> StringUtils.isNotBlank(t.getClassid()))
.map(t -> XmlSerializationUtils.mapQualifier("instancetype", t)) .map(t -> XmlSerializationUtils.mapQualifier("instancetype", t))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
@ -1242,7 +1230,7 @@ public class XmlRecordFactory implements Serializable {
instance instance
.getDistributionlocation() .getDistributionlocation()
.stream() .stream()
.filter(d -> isNotBlank(d)) .filter(StringUtils::isNotBlank)
.map(d -> XmlSerializationUtils.asXmlElement("distributionlocation", d)) .map(d -> XmlSerializationUtils.asXmlElement("distributionlocation", d))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
@ -1430,10 +1418,10 @@ public class XmlRecordFactory implements Serializable {
instance.getInstancetype().add(i.getInstancetype()); instance.getInstancetype().add(i.getInstancetype());
instance instance
.setProcessingchargeamount( .setProcessingchargeamount(
Optional.ofNullable(i.getProcessingchargeamount()).map(apc -> apc.getValue()).orElse(null)); Optional.ofNullable(i.getProcessingchargeamount()).orElse(null));
instance instance
.setProcessingchargecurrency( .setProcessingchargecurrency(
Optional.ofNullable(i.getProcessingchargecurrency()).map(c -> c.getValue()).orElse(null)); Optional.ofNullable(i.getProcessingchargecurrency()).orElse(null));
Optional Optional
.ofNullable(i.getPid()) .ofNullable(i.getPid())
.ifPresent(pid -> instance.getPid().addAll(pid)); .ifPresent(pid -> instance.getPid().addAll(pid));
@ -1442,17 +1430,17 @@ public class XmlRecordFactory implements Serializable {
.ifPresent(altId -> instance.getAlternateIdentifier().addAll(altId)); .ifPresent(altId -> instance.getAlternateIdentifier().addAll(altId));
Optional Optional
.ofNullable(i.getDateofacceptance()) .ofNullable(i.getDateofacceptance())
.ifPresent(d -> instance.getDateofacceptance().add(d.getValue())); .ifPresent(d -> instance.getDateofacceptance().add(d));
Optional Optional
.ofNullable(i.getLicense()) .ofNullable(i.getLicense())
.ifPresent(license -> instance.getLicense().add(license.getValue())); .ifPresent(license -> instance.getLicense().add(license.getUrl()));
Optional Optional
.ofNullable(i.getDistributionlocation()) .ofNullable(i.getDistributionlocation())
.ifPresent(dl -> instance.getDistributionlocation().add(dl)); .ifPresent(dl -> instance.getDistributionlocation().add(dl));
}); });
if (instance.getHostedby().size() > 1 if (instance.getHostedby().size() > 1
&& instance.getHostedby().stream().anyMatch(hb -> ModelConstants.UNKNOWN_REPOSITORY.equals(hb))) { && instance.getHostedby().stream().anyMatch(ModelConstants.UNKNOWN_REPOSITORY::equals)) {
instance.getHostedby().remove(ModelConstants.UNKNOWN_REPOSITORY); instance.getHostedby().remove(ModelConstants.UNKNOWN_REPOSITORY);
} }
@ -1463,7 +1451,7 @@ public class XmlRecordFactory implements Serializable {
return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType()); return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType());
} }
private List<String> listExtraInfo(final OafEntity entity) { private List<String> listExtraInfo(final Entity entity) {
final List<ExtraInfo> extraInfo = entity.getExtraInfo(); final List<ExtraInfo> extraInfo = entity.getExtraInfo();
return extraInfo != null return extraInfo != null
? extraInfo ? extraInfo

View File

@ -10,6 +10,7 @@ import java.util.List;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
import scala.Tuple2; import scala.Tuple2;
public class XmlSerializationUtils { public class XmlSerializationUtils {
@ -49,7 +50,7 @@ public class XmlSerializationUtils {
public static String mapStructuredProperty(String name, StructuredProperty t) { public static String mapStructuredProperty(String name, StructuredProperty t) {
return asXmlElement( return asXmlElement(
name, t.getValue(), t.getQualifier(), t.getDataInfo()); name, t.getValue(), t.getQualifier());
} }
public static String mapQualifier(String name, Qualifier q) { public static String mapQualifier(String name, Qualifier q) {
@ -66,7 +67,7 @@ public class XmlSerializationUtils {
.replaceAll(XML_10_PATTERN, ""); .replaceAll(XML_10_PATTERN, "");
} }
public static String parseDataInfo(final DataInfo dataInfo) { public static String parseDataInfo(final EntityDataInfo dataInfo) {
return new StringBuilder() return new StringBuilder()
.append("<datainfo>") .append("<datainfo>")
.append(asXmlElement("inferred", dataInfo.getInferred() + "")) .append(asXmlElement("inferred", dataInfo.getInferred() + ""))
@ -106,6 +107,12 @@ public class XmlSerializationUtils {
return asXmlElement(name, value, null, null); return asXmlElement(name, value, null, null);
} }
public static String asXmlElement(
final String name, final String value, final Qualifier q) {
return asXmlElement(name, value, q, null);
}
public static String asXmlElement( public static String asXmlElement(
final String name, final String value, final Qualifier q, final DataInfo info) { final String name, final String value, final Qualifier q, final DataInfo info) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -125,7 +132,7 @@ public class XmlSerializationUtils {
info.getProvenanceaction() != null info.getProvenanceaction() != null
? info.getProvenanceaction().getClassid() ? info.getProvenanceaction().getClassid()
: "")) : ""))
.append(attr("trust", info.getTrust())); .append(attr("trust", Float.toString(info.getTrust())));
} }
if (isBlank(value)) { if (isBlank(value)) {
sb.append("/>"); sb.append("/>");
@ -142,14 +149,13 @@ public class XmlSerializationUtils {
} }
public static String getAttributes(final Qualifier q) { public static String getAttributes(final Qualifier q) {
if (q == null || q.isBlank()) if (q == null || StringUtils.isBlank(q.getClassid()))
return ""; return "";
return new StringBuilder(" ") return new StringBuilder(" ")
.append(attr("classid", q.getClassid())) .append(attr("classid", q.getClassid()))
.append(attr("classname", q.getClassname())) .append(attr("classname", q.getClassname()))
.append(attr("schemeid", q.getSchemeid())) .append(attr("schemeid", q.getSchemeid()))
.append(attr("schemename", q.getSchemename()))
.toString(); .toString();
} }

View File

@ -25,7 +25,6 @@
<modules> <modules>
<module>dhp-workflow-profiles</module> <module>dhp-workflow-profiles</module>
<module>dhp-aggregation</module> <module>dhp-aggregation</module>
<module>dhp-distcp</module>
<module>dhp-actionmanager</module> <module>dhp-actionmanager</module>
<module>dhp-graph-mapper</module> <module>dhp-graph-mapper</module>
<module>dhp-dedup-openaire</module> <module>dhp-dedup-openaire</module>