WIP: refactoring the internal graph data model and its utilities

This commit is contained in:
Claudio Atzori 2023-02-06 13:45:21 +01:00
parent 67735f7e9d
commit d9c9482a5b
53 changed files with 587 additions and 836 deletions

View File

@ -320,7 +320,7 @@ public class ModelSupport {
return String.format("%s.%s", dbName, tableName);
}
public static <T extends Entity> String tableIdentifier(String dbName, Class<T> clazz) {
public static <T extends Oaf> String tableIdentifier(String dbName, Class<T> clazz) {
checkArgument(Objects.nonNull(clazz), "clazz is needed to derive the table name, thus cannot be null");

View File

@ -415,6 +415,17 @@ public class OafMapperUtils {
source, target, relType, subRelType, relClass, provenance, null, null);
}
public static Relation getRelation(final String source,
final String target,
final String relType,
final String subRelType,
final String relClass,
final List<Provenance> provenance,
final List<KeyValue> properties) {
return getRelation(
source, target, relType, subRelType, relClass, provenance, null, properties);
}
public static Relation getRelation(final String source,
final String target,
final String relType,

View File

@ -13,6 +13,8 @@ import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.common.RelationInverse;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -25,8 +27,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class ReadBlacklistFromDB implements Closeable {

View File

@ -4,12 +4,11 @@ package eu.dnetlib.dhp.blacklist;
import java.util.Arrays;
import java.util.List;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.common.RelationInverse;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
public class BlacklistRelationTest {
@Test

View File

@ -68,7 +68,6 @@ public class PrepareRelatedDatasetsJob {
final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
.filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))

View File

@ -70,7 +70,6 @@ public class PrepareRelatedProjectsJob {
final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT))
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))

View File

@ -69,7 +69,6 @@ public class PrepareRelatedPublicationsJob {
final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
.filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))

View File

@ -73,7 +73,6 @@ public class PrepareRelatedSoftwaresJob {
final Dataset<Relation> rels;
rels = ClusterUtils
.loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))

View File

@ -8,7 +8,7 @@ import java.util.Set;
import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
public class BrokerConstants {

View File

@ -1,12 +1,7 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
@ -105,16 +100,16 @@ public class ConversionUtils {
res.setOpenaireId(cleanOpenaireId(result.getId()));
res.setOriginalId(first(result.getOriginalId()));
res.setTypology(classId(result.getResulttype()));
res.setTypology(result.getResulttype());
res.setTitles(structPropList(result.getTitle()));
res.setAbstracts(fieldList(result.getDescription()));
res.setAbstracts(result.getDescription());
res.setLanguage(classId(result.getLanguage()));
res.setSubjects(subjectList(result.getSubject()));
res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor));
res.setPublicationdate(fieldValue(result.getDateofacceptance()));
res.setPublisher(fieldValue(result.getPublisher()));
res.setEmbargoenddate(fieldValue(result.getEmbargoenddate()));
res.setContributor(fieldList(result.getContributor()));
res.setPublicationdate(result.getDateofacceptance());
res.setPublisher(result.getPublisher().getName());
res.setEmbargoenddate(result.getEmbargoenddate());
res.setContributor(result.getContributor());
res
.setJournal(
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null);
@ -210,10 +205,9 @@ public class ConversionUtils {
final OaBrokerProject res = new OaBrokerProject();
res.setOpenaireId(cleanOpenaireId(p.getId()));
res.setTitle(fieldValue(p.getTitle()));
res.setAcronym(fieldValue(p.getAcronym()));
res.setCode(fieldValue(p.getCode()));
res.setTitle(p.getTitle());
res.setAcronym(p.getAcronym());
res.setCode(p.getCode());
final String ftree = fieldValue(p.getFundingtree());
if (StringUtils.isNotBlank(ftree)) {
try {
@ -238,7 +232,7 @@ public class ConversionUtils {
res.setOpenaireId(cleanOpenaireId(sw.getId()));
res.setName(structPropValue(sw.getTitle()));
res.setDescription(fieldValue(sw.getDescription()));
res.setRepository(fieldValue(sw.getCodeRepositoryUrl()));
res.setRepository(sw.getCodeRepositoryUrl());
res.setLandingPage(fieldValue(sw.getDocumentationUrl()));
return res;
@ -250,7 +244,7 @@ public class ConversionUtils {
}
final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource();
res.setName(StringUtils.defaultIfBlank(fieldValue(ds.getOfficialname()), fieldValue(ds.getEnglishname())));
res.setName(StringUtils.defaultIfBlank(ds.getOfficialname(), ds.getEnglishname()));
res.setOpenaireId(cleanOpenaireId(ds.getId()));
res.setType(classId(ds.getDatasourcetype()));
return res;
@ -264,13 +258,14 @@ public class ConversionUtils {
return kv != null ? kv.getValue() : null;
}
private static String fieldValue(final Field<String> f) {
return f != null ? f.getValue() : null;
}
private static String fieldValue(final List<Field<String>> fl) {
return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null)
: null;
private static String fieldValue(final List<String> fl) {
return Optional
.ofNullable(fl)
.map(f -> fl.stream()
.filter(StringUtils::isNotBlank)
.findFirst()
.orElse(null))
.orElse(null);
}
private static String classId(final Qualifier q) {
@ -283,18 +278,6 @@ public class ConversionUtils {
: null;
}
private static List<String> fieldList(final List<Field<String>> fl) {
return fl != null
? fl
.stream()
.map(Field::getValue)
.map(s -> StringUtils.abbreviate(s, BrokerConstants.MAX_STRING_SIZE))
.filter(StringUtils::isNotBlank)
.limit(BrokerConstants.MAX_LIST_SIZE)
.collect(Collectors.toList())
: new ArrayList<>();
}
private static List<String> structPropList(final List<StructuredProperty> props) {
return props != null
? props

View File

@ -27,7 +27,6 @@ class ConversionUtilsTest {
qf.setClassid("test");
qf.setClassname("test");
qf.setSchemeid("test");
qf.setSchemename("test");
final StructuredProperty sp1 = new StructuredProperty();
sp1.setValue("1");

View File

@ -1,13 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-distcp</artifactId>
</project>

View File

@ -1,18 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>sourceNN</name>
<value>webhdfs://namenode2.hadoop.dm.openaire.eu:50071</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,46 +0,0 @@
<workflow-app name="distcp" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourceNN</name>
<description>the source name node</description>
</property>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>targetPath</name>
<description>the target path</description>
</property>
<property>
<name>hbase_dump_distcp_memory_mb</name>
<value>6144</value>
<description>memory for distcp action copying InfoSpace dump from remote cluster</description>
</property>
<property>
<name>hbase_dump_distcp_num_maps</name>
<value>1</value>
<description>maximum number of simultaneous copies of InfoSpace dump from remote location</description>
</property>
</parameters>
<start to="distcp"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="distcp">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>-Dmapreduce.map.memory.mb=${hbase_dump_distcp_memory_mb}</arg>
<arg>-pb</arg>
<arg>-m ${hbase_dump_distcp_num_maps}</arg>
<arg>${sourceNN}/${sourcePath}</arg>
<arg>${nameNode}/${targetPath}</arg>
</distcp>
<ok to="End" />
<error to="Kill" />
</action>
<end name="End"/>
</workflow-app>

View File

@ -22,7 +22,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -61,7 +61,7 @@ public class CleanGraphSparkJob {
String graphTableClassName = parser.get("graphTableClassName");
log.info("graphTableClassName: {}", graphTableClassName);
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
Class<? extends Entity> entityClazz = (Class<? extends Entity>) Class.forName(graphTableClassName);
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService);

View File

@ -74,7 +74,6 @@ public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Obje
final Country c = o;
if (StringUtils.isBlank(c.getSchemeid())) {
c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
}
cleanQualifier(vocabularies, c);
}

View File

@ -91,8 +91,7 @@ public class GetDatasourceFromCountry implements Serializable {
(MapFunction<String, Relation>) value -> OBJECT_MAPPER.readValue(value, Relation.class),
Encoders.bean(Relation.class))
.filter(
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) &&
!rel.getDataInfo().getDeletedbyinference());
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY));
organization
.joinWith(relation, organization.col("id").equalTo(relation.col("target")))

View File

@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
@ -17,7 +18,6 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelSupport;
public class GraphHiveImporterJob {

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.oa.graph.hive;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.schema.common.ModelSupport.tableIdentifier;
import static eu.dnetlib.dhp.schema.oaf.common.ModelSupport.tableIdentifier;
import java.util.Optional;

View File

@ -10,11 +10,11 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.validator.routines.UrlValidator;
import org.dom4j.*;
@ -26,11 +26,9 @@ import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
public abstract class AbstractMdRecordToOafMapper {
@ -49,9 +47,9 @@ public abstract class AbstractMdRecordToOafMapper {
protected static final Qualifier ORCID_PID_TYPE = qualifier(
ModelConstants.ORCID_PENDING,
ModelConstants.ORCID_CLASSNAME,
DNET_PID_TYPES, DNET_PID_TYPES);
DNET_PID_TYPES);
protected static final Qualifier MAG_PID_TYPE = qualifier(
"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES);
protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999";
@ -122,7 +120,7 @@ public abstract class AbstractMdRecordToOafMapper {
return Lists.newArrayList();
}
final DataInfo info = prepareDataInfo(doc, invisible);
final EntityDataInfo info = prepareDataInfo(doc, invisible);
final long lastUpdateTimestamp = new Date().getTime();
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
@ -171,10 +169,10 @@ public abstract class AbstractMdRecordToOafMapper {
final String type,
final List<Instance> instances,
final KeyValue collectedFrom,
final DataInfo info,
final EntityDataInfo info,
final long lastUpdateTimestamp) {
final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
final Entity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
final Set<String> originalId = Sets.newHashSet(entity.getOriginalId());
originalId.add(entity.getId());
@ -202,11 +200,11 @@ public abstract class AbstractMdRecordToOafMapper {
return oafs;
}
private OafEntity createEntity(final Document doc,
private Entity createEntity(final Document doc,
final String type,
final List<Instance> instances,
final KeyValue collectedFrom,
final DataInfo info,
final EntityDataInfo info,
final long lastUpdateTimestamp) {
switch (type.toLowerCase()) {
case "publication":
@ -217,37 +215,36 @@ public abstract class AbstractMdRecordToOafMapper {
case "dataset":
final Dataset d = new Dataset();
populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp);
d.setStoragedate(prepareDatasetStorageDate(doc, info));
d.setDevice(prepareDatasetDevice(doc, info));
d.setSize(prepareDatasetSize(doc, info));
d.setVersion(prepareDatasetVersion(doc, info));
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info));
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info));
d.setGeolocation(prepareDatasetGeoLocations(doc, info));
d.setStoragedate(prepareDatasetStorageDate(doc));
d.setDevice(prepareDatasetDevice(doc));
d.setSize(prepareDatasetSize(doc));
d.setVersion(prepareDatasetVersion(doc));
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc));
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc));
d.setGeolocation(prepareDatasetGeoLocations(doc));
return d;
case "software":
final Software s = new Software();
populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp);
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
s.setLicense(prepareSoftwareLicenses(doc, info));
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc));
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc));
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc));
return s;
case "":
case "otherresearchproducts":
default:
final OtherResearchProduct o = new OtherResearchProduct();
populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp);
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
o.setTool(prepareOtherResearchProductTools(doc, info));
o.setContactperson(prepareOtherResearchProductContactPersons(doc));
o.setContactgroup(prepareOtherResearchProductContactGroups(doc));
o.setTool(prepareOtherResearchProductTools(doc));
return o;
}
}
private List<Oaf> addProjectRels(
final Document doc,
final OafEntity entity) {
final Entity entity) {
final List<Oaf> res = new ArrayList<>();
@ -277,7 +274,7 @@ public abstract class AbstractMdRecordToOafMapper {
return res;
}
private List<Oaf> addRelations(Document doc, OafEntity entity) {
private List<Oaf> addRelations(Document doc, Entity entity) {
final List<Oaf> rels = Lists.newArrayList();
@ -322,14 +319,14 @@ public abstract class AbstractMdRecordToOafMapper {
protected abstract List<Oaf> addOtherResultRels(
final Document doc,
final OafEntity entity);
final Entity entity);
private void populateResultFields(
final Result r,
final Document doc,
final List<Instance> instances,
final KeyValue collectedFrom,
final DataInfo info,
final EntityDataInfo info,
final long lastUpdateTimestamp) {
r.setDataInfo(info);
r.setLastupdatetimestamp(lastUpdateTimestamp);
@ -345,24 +342,24 @@ public abstract class AbstractMdRecordToOafMapper {
r.setLanguage(prepareLanguages(doc));
r.setCountry(new ArrayList<>()); // NOT PRESENT IN MDSTORES
r.setSubject(prepareSubjects(doc, info));
r.setTitle(prepareTitles(doc, info));
r.setRelevantdate(prepareRelevantDates(doc, info));
r.setDescription(prepareDescriptions(doc, info));
r.setDateofacceptance(prepareField(doc, "//oaf:dateAccepted", info));
r.setPublisher(preparePublisher(doc, info));
r.setEmbargoenddate(prepareField(doc, "//oaf:embargoenddate", info));
r.setSource(prepareSources(doc, info));
r.setFulltext(prepareListFields(doc, "//oaf:fulltext", info));
r.setFormat(prepareFormats(doc, info));
r.setContributor(prepareContributors(doc, info));
r.setResourcetype(prepareResourceType(doc, info));
r.setCoverage(prepareCoverages(doc, info));
r.setTitle(prepareTitles(doc));
r.setRelevantdate(prepareRelevantDates(doc));
r.setDescription(prepareDescriptions(doc));
r.setDateofacceptance(doc.valueOf( "//oaf:dateAccepted"));
r.setPublisher(preparePublisher(doc));
r.setEmbargoenddate(doc.valueOf("//oaf:embargoenddate"));
r.setSource(prepareSources(doc));
r.setFulltext(prepareListString(doc, "//oaf:fulltext"));
r.setFormat(prepareFormats(doc));
r.setContributor(prepareContributors(doc));
r.setResourcetype(prepareResourceType(doc));
r.setCoverage(prepareCoverages(doc));
r.setContext(prepareContexts(doc, info));
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
r
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
.setProcessingchargeamount(doc.valueOf("//oaf:processingchargeamount"));
r
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
.setProcessingchargecurrency(doc.valueOf("//oaf:processingchargeamount/@currency"));
r.setInstance(instances);
r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances));
@ -404,7 +401,7 @@ public abstract class AbstractMdRecordToOafMapper {
return Lists.newArrayList(set);
}
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
protected abstract Qualifier prepareResourceType(Document doc);
protected abstract List<Instance> prepareInstances(
Document doc,
@ -412,21 +409,21 @@ public abstract class AbstractMdRecordToOafMapper {
KeyValue collectedfrom,
KeyValue hostedby);
protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info);
protected abstract List<String> prepareSources(Document doc);
protected abstract List<StructuredProperty> prepareRelevantDates(Document doc, DataInfo info);
protected abstract List<StructuredProperty> prepareRelevantDates(Document doc);
protected abstract List<Field<String>> prepareCoverages(Document doc, DataInfo info);
protected abstract List<String> prepareCoverages(Document doc);
protected abstract List<Field<String>> prepareContributors(Document doc, DataInfo info);
protected abstract List<String> prepareContributors(Document doc);
protected abstract List<Field<String>> prepareFormats(Document doc, DataInfo info);
protected abstract List<String> prepareFormats(Document doc);
protected abstract Field<String> preparePublisher(Document doc, DataInfo info);
protected abstract Publisher preparePublisher(Document doc);
protected abstract List<Field<String>> prepareDescriptions(Document doc, DataInfo info);
protected abstract List<String> prepareDescriptions(Document doc);
protected abstract List<StructuredProperty> prepareTitles(Document doc, DataInfo info);
protected abstract List<StructuredProperty> prepareTitles(Document doc);
protected abstract List<Subject> prepareSubjects(Document doc, DataInfo info);
@ -434,41 +431,31 @@ public abstract class AbstractMdRecordToOafMapper {
protected abstract List<Author> prepareAuthors(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductTools(
Document doc,
DataInfo info);
protected abstract List<String> prepareOtherResearchProductTools(Document doc);
protected abstract List<Field<String>> prepareOtherResearchProductContactGroups(
Document doc,
DataInfo info);
protected abstract List<String> prepareOtherResearchProductContactGroups(Document doc);
protected abstract List<Field<String>> prepareOtherResearchProductContactPersons(
Document doc,
DataInfo info);
protected abstract List<String> prepareOtherResearchProductContactPersons(Document doc);
protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info);
protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc);
protected abstract Field<String> prepareSoftwareCodeRepositoryUrl(Document doc, DataInfo info);
protected abstract String prepareSoftwareCodeRepositoryUrl(Document doc);
protected abstract List<StructuredProperty> prepareSoftwareLicenses(Document doc, DataInfo info);
protected abstract List<String> prepareSoftwareDocumentationUrls(Document doc);
protected abstract List<Field<String>> prepareSoftwareDocumentationUrls(
Document doc,
DataInfo info);
protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc);
protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc, DataInfo info);
protected abstract String prepareDatasetMetadataVersionNumber(Document doc);
protected abstract Field<String> prepareDatasetMetadataVersionNumber(Document doc, DataInfo info);
protected abstract String prepareDatasetLastMetadataUpdate(Document doc);
protected abstract Field<String> prepareDatasetLastMetadataUpdate(Document doc, DataInfo info);
protected abstract String prepareDatasetVersion(Document doc);
protected abstract Field<String> prepareDatasetVersion(Document doc, DataInfo info);
protected abstract String prepareDatasetSize(Document doc);
protected abstract Field<String> prepareDatasetSize(Document doc, DataInfo info);
protected abstract String prepareDatasetDevice(Document doc);
protected abstract Field<String> prepareDatasetDevice(Document doc, DataInfo info);
protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);
protected abstract String prepareDatasetStorageDate(Document doc);
private Journal prepareJournal(final Document doc, final DataInfo info) {
final Node n = doc.selectSingleNode("//oaf:journal");
@ -514,7 +501,6 @@ public abstract class AbstractMdRecordToOafMapper {
accessRight.setClassid(qualifier.getClassid());
accessRight.setClassname(qualifier.getClassname());
accessRight.setSchemeid(qualifier.getSchemeid());
accessRight.setSchemename(qualifier.getSchemename());
// TODO set the OAStatus
@ -541,7 +527,7 @@ public abstract class AbstractMdRecordToOafMapper {
final Node n = (Node) o;
final String classId = n.valueOf(xpathClassId).trim();
if (vocs.termExists(schemeId, classId)) {
res.add(structuredProperty(n.getText(), vocs.getTermAsQualifier(schemeId, classId), info));
res.add(structuredProperty(n.getText(), vocs.getTermAsQualifier(schemeId, classId)));
}
}
return res;
@ -550,28 +536,11 @@ public abstract class AbstractMdRecordToOafMapper {
protected List<StructuredProperty> prepareListStructProps(
final Node node,
final String xpath,
final Qualifier qualifier,
final DataInfo info) {
final Qualifier qualifier) {
final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o;
res.add(structuredProperty(n.getText(), qualifier, info));
}
return res;
}
protected List<StructuredProperty> prepareListStructProps(
final Node node,
final String xpath,
final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o;
res
.add(
structuredProperty(
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
n.valueOf("@schemename"), info));
res.add(structuredProperty(n.getText(), qualifier));
}
return res;
}
@ -583,11 +552,10 @@ public abstract class AbstractMdRecordToOafMapper {
final List<Subject> res = new ArrayList<>();
for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o;
Qualifier qualifier = qualifier(n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"));
res
.add(
subject(
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
n.valueOf("@schemename"), info));
subject(n.getText(), qualifier, info));
}
return res;
}
@ -609,37 +577,31 @@ public abstract class AbstractMdRecordToOafMapper {
return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate);
}
protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) {
protected EntityDataInfo prepareDataInfo(final Document doc, final boolean invisible) {
final Node n = doc.selectSingleNode("//oaf:datainfo");
if (n == null) {
return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
return dataInfo(false, false, 0.9f, null, false, REPOSITORY_PROVENANCE_ACTIONS);
}
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
final String paSchemeId = n.valueOf("./oaf:provenanceaction/@schemeid");
final String paSchemeName = n.valueOf("./oaf:provenanceaction/@schemename");
final boolean deletedbyinference = Boolean.parseBoolean(n.valueOf("./oaf:deletedbyinference"));
final String inferenceprovenance = n.valueOf("./oaf:inferenceprovenance");
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
final String trust = n.valueOf("./oaf:trust");
final Float trust = Float.parseFloat(n.valueOf("./oaf:trust"));
return dataInfo(
deletedbyinference, inferenceprovenance, inferred, invisible,
qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
final Qualifier pAction = qualifier(paClassId, paClassName, paSchemeId);
return dataInfo(invisible, deletedbyinference, trust, inferenceprovenance, inferred, pAction);
}
protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) {
return field(node.valueOf(xpath), info);
}
protected List<Field<String>> prepareListFields(
protected List<String> prepareListFields(
final Node node,
final String xpath,
final DataInfo info) {
return listFields(info, prepareListString(node, xpath));
final String xpath) {
return prepareListString(node, xpath);
}
protected List<String> prepareListString(final Node node, final String xpath) {

View File

@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
@ -18,7 +19,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
public class DispatchEntitiesApplication {

View File

@ -9,6 +9,8 @@ import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.Text;
@ -16,11 +18,7 @@ import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -29,7 +27,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
@ -139,7 +136,7 @@ public class GenerateEntitiesApplication {
save(
inputRdd
.mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf))
.reduceByKey(OafMapperUtils::merge)
.reduceByKey(MergeUtils::merge)
.map(Tuple2::_2),
targetPath);
break;

View File

@ -10,6 +10,7 @@ import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
@ -26,7 +27,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;

View File

@ -15,6 +15,7 @@ import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
@ -28,23 +29,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
import eu.dnetlib.dhp.oa.graph.raw.common.MigrateAction;
import eu.dnetlib.dhp.oa.graph.raw.common.VerifyNsPrefixPredicate;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
@ -52,12 +37,16 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class);
private static final DataInfo DATA_INFO_CLAIM = dataInfo(
false, null, false, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
"0.9");
private static final EntityDataInfo ENTITY_DATA_INFO_CLAIM = dataInfo(
false, false, 0.9f, null, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS));
private static final DataInfo REL_DATA_INFO_CLAIM = dataInfo(
0.9f, null, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS));
private static final List<KeyValue> COLLECTED_FROM_CLAIM = listKeyValues(
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
private final static List<Provenance> PROVENANCE_CLAIM = getProvenance(COLLECTED_FROM_CLAIM, ENTITY_DATA_INFO_CLAIM);
public static final String SOURCE_TYPE = "source_type";
public static final String TARGET_TYPE = "target_type";
@ -207,7 +196,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public List<Oaf> processService(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final EntityDataInfo info = prepareDataInfo(rs);
final Datasource ds = new Datasource();
@ -220,46 +209,45 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
.filter(StringUtils::isNotBlank)
.collect(Collectors.toList()));
ds.setCollectedfrom(prepareCollectedfrom(rs.getArray("collectedfrom")));
ds.setPid(prepareListOfStructProps(rs.getArray("pid"), info));
ds.setPid(prepareListOfStructProps(rs.getArray("pid")));
ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
ds.setDateoftransformation(null); // Value not returned by the SQL query
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
ds.setOaiprovenance(null); // Values not present in the DB
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui")));
ds.setEosctype(prepareQualifierSplitting(rs.getString("eosctype")));
ds.setEoscdatasourcetype(prepareQualifierSplitting(rs.getString("eoscdatasourcetype")));
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
ds.setOfficialname(field(rs.getString("officialname"), info));
ds.setEnglishname(field(rs.getString("englishname"), info));
ds.setWebsiteurl(field(rs.getString("websiteurl"), info));
ds.setLogourl(field(rs.getString("logourl"), info));
ds.setContactemail(field(rs.getString("contactemail"), info));
ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info));
ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info));
ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info));
ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info));
ds.setDescription(field(rs.getString("description"), info));
ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info));
ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
ds.setOfficialname(rs.getString("officialname"));
ds.setEnglishname(rs.getString("englishname"));
ds.setWebsiteurl(rs.getString("websiteurl"));
ds.setLogourl(rs.getString("logourl"));
ds.setContactemail(rs.getString("contactemail"));
ds.setNamespaceprefix(rs.getString("namespaceprefix"));
ds.setLatitude(Double.toString(rs.getDouble("latitude")));
ds.setLongitude(Double.toString(rs.getDouble("longitude")));
ds.setDateofvalidation(asString(rs.getDate("dateofvalidation")));
ds.setDescription(rs.getString("description"));
ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects")));
ds.setOdnumberofitems(Double.toString(rs.getInt("odnumberofitems")));
ds.setOdnumberofitemsdate(asString(rs.getDate("odnumberofitemsdate")));
ds.setOdpolicies(rs.getString("odpolicies"));
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages")));
ds.setLanguages(listValues(rs.getArray("languages")));
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
ds.setVersioning(field(rs.getBoolean("versioning"), info));
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage")));
ds.setReleasestartdate(asString(rs.getDate("releasestartdate")));
ds.setReleaseenddate(asString(rs.getDate("releaseenddate")));
ds.setMissionstatementurl(rs.getString("missionstatementurl"));
ds.setDatabaseaccesstype(rs.getString("databaseaccesstype"));
ds.setDatauploadtype(rs.getString("datauploadtype"));
ds.setDatabaseaccessrestriction(rs.getString("databaseaccessrestriction"));
ds.setDatauploadrestriction(rs.getString("datauploadrestriction"));
ds.setVersioning(rs.getBoolean("versioning"));
ds.setVersioncontrol(rs.getBoolean("versioncontrol"));
ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
ds.setCitationguidelineurl(rs.getString("citationguidelineurl"));
ds.setPidsystems(field(rs.getString("pidsystems"), info));
ds.setCertificates(field(rs.getString("certificates"), info));
ds.setPidsystems(rs.getString("pidsystems"));
ds.setCertificates(rs.getString("certificates"));
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
ds
.setJournal(
@ -306,7 +294,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public List<Oaf> processProject(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final EntityDataInfo info = prepareDataInfo(rs);
final Project p = new Project();
@ -321,32 +309,31 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
p.setDateofcollection(asString(rs.getDate("dateofcollection")));
p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
p.setOaiprovenance(null); // Values not present in the DB
p.setWebsiteurl(field(rs.getString("websiteurl"), info));
p.setCode(field(rs.getString("code"), info));
p.setAcronym(field(rs.getString("acronym"), info));
p.setTitle(field(rs.getString("title"), info));
p.setStartdate(field(asString(rs.getDate("startdate")), info));
p.setEnddate(field(asString(rs.getDate("enddate")), info));
p.setCallidentifier(field(rs.getString("callidentifier"), info));
p.setKeywords(field(rs.getString("keywords"), info));
p.setDuration(field(Integer.toString(rs.getInt("duration")), info));
p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info));
p.setWebsiteurl(rs.getString("websiteurl"));
p.setCode(rs.getString("code"));
p.setAcronym(rs.getString("acronym"));
p.setTitle(rs.getString("title"));
p.setStartdate(asString(rs.getDate("startdate")));
p.setEnddate(asString(rs.getDate("enddate")));
p.setCallidentifier(rs.getString("callidentifier"));
p.setKeywords(rs.getString("keywords"));
p.setDuration(Integer.toString(rs.getInt("duration")));
p.setEcsc39(Boolean.toString(rs.getBoolean("ecsc39")));
p
.setOamandatepublications(field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info));
.setOamandatepublications(Boolean.toString(rs.getBoolean("oamandatepublications")));
p.setEcarticle29_3(Boolean.toString(rs.getBoolean("ecarticle29_3")));
p.setSubjects(prepareListOfStructProps(rs.getArray("subjects")));
p.setFundingtree(prepareListFields(rs.getArray("fundingtree")));
p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
p.setOptional1(field(rs.getString("optional1"), info));
p.setOptional2(field(rs.getString("optional2"), info));
p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info));
p.setContactfullname(field(rs.getString("contactfullname"), info));
p.setContactfax(field(rs.getString("contactfax"), info));
p.setContactphone(field(rs.getString("contactphone"), info));
p.setContactemail(field(rs.getString("contactemail"), info));
p.setSummary(field(rs.getString("summary"), info));
p.setCurrency(field(rs.getString("currency"), info));
p.setOptional1(rs.getString("optional1"));
p.setOptional2(rs.getString("optional2"));
p.setJsonextrainfo(rs.getString("jsonextrainfo"));
p.setContactfullname(rs.getString("contactfullname"));
p.setContactfax(rs.getString("contactfax"));
p.setContactphone(rs.getString("contactphone"));
p.setContactemail(rs.getString("contactemail"));
p.setSummary(rs.getString("summary"));
p.setCurrency(rs.getString("currency"));
p.setTotalcost(new Float(rs.getDouble("totalcost")));
p.setFundedamount(new Float(rs.getDouble("fundedamount")));
p.setDataInfo(info);
@ -361,7 +348,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public List<Oaf> processOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final EntityDataInfo info = prepareDataInfo(rs);
final Organization o = new Organization();
@ -372,31 +359,30 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")));
o.setPid(prepareListOfStructProps(rs.getArray("pid"), info));
o.setPid(prepareListOfStructProps(rs.getArray("pid")));
o.setDateofcollection(asString(rs.getDate("dateofcollection")));
o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
o.setExtraInfo(new ArrayList<>()); // Values not present in the DB
o.setOaiprovenance(null); // Values not present in the DB
o.setLegalshortname(field(rs.getString("legalshortname"), info));
o.setLegalname(field(rs.getString("legalname"), info));
o.setAlternativeNames(prepareListFields(rs.getArray("alternativenames"), info));
o.setWebsiteurl(field(rs.getString("websiteurl"), info));
o.setLogourl(field(rs.getString("logourl"), info));
o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
o.setLegalshortname(rs.getString("legalshortname"));
o.setLegalname(rs.getString("legalname"));
o.setAlternativeNames(prepareListFields(rs.getArray("alternativenames")));
o.setWebsiteurl(rs.getString("websiteurl"));
o.setLogourl(rs.getString("logourl"));
o.setEclegalbody(Boolean.toString(rs.getBoolean("eclegalbody")));
o.setEclegalperson(Boolean.toString(rs.getBoolean("eclegalperson")));
o.setEcnonprofit(Boolean.toString(rs.getBoolean("ecnonprofit")));
o
.setEcresearchorganization(field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info));
.setEcresearchorganization(Boolean.toString(rs.getBoolean("ecresearchorganization")));
o.setEchighereducation(Boolean.toString(rs.getBoolean("echighereducation")));
o
.setEcinternationalorganizationeurinterests(
field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")));
o
.setEcinternationalorganization(
field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
Boolean.toString(rs.getBoolean("ecinternationalorganization")));
o.setEcenterprise(Boolean.toString(rs.getBoolean("ecenterprise")));
o.setEcsmevalidated(Boolean.toString(rs.getBoolean("ecsmevalidated")));
o.setEcnutscode(Boolean.toString(rs.getBoolean("ecnutscode")));
o.setCountry(prepareQualifierSplitting(rs.getString("country")));
o.setDataInfo(info);
o.setLastupdatetimestamp(lastUpdateTimestamp);
@ -409,21 +395,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public List<Oaf> processServiceOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final DataInfo info = prepareRelDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
final String dsId = createOpenaireId(10, rs.getString("service"), true);
final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final List<Provenance> provenance = getProvenance(collectedFrom, info);
final Relation r1 = OafMapperUtils
.getRelation(
dsId, orgId, DATASOURCE_ORGANIZATION, PROVISION, IS_PROVIDED_BY, collectedFrom, info,
lastUpdateTimestamp);
dsId, orgId, DATASOURCE_ORGANIZATION, PROVISION, IS_PROVIDED_BY, provenance);
final Relation r2 = OafMapperUtils
.getRelation(
orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, collectedFrom, info,
lastUpdateTimestamp);
orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, provenance);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
@ -433,12 +419,14 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public List<Oaf> processProjectOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final DataInfo info = prepareRelDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
final String projectId = createOpenaireId(40, rs.getString("project"), true);
final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final List<Provenance> provenance = getProvenance(collectedFrom, info);
final List<KeyValue> properties = Lists
.newArrayList(
keyValue("contribution", String.valueOf(rs.getDouble("contribution"))),
@ -446,13 +434,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final Relation r1 = OafMapperUtils
.getRelation(
projectId, orgId, PROJECT_ORGANIZATION, PARTICIPATION, HAS_PARTICIPANT, collectedFrom, info,
lastUpdateTimestamp, null, properties);
projectId, orgId, PROJECT_ORGANIZATION, PARTICIPATION, HAS_PARTICIPANT, provenance, properties);
final Relation r2 = OafMapperUtils
.getRelation(
orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, collectedFrom, info,
lastUpdateTimestamp, null, properties);
orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, provenance, properties);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
@ -469,21 +455,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
if (targetType.equals("dataset")) {
r = new Dataset();
r.setResulttype(DATASET_DEFAULT_RESULTTYPE);
r.setResulttype(DATASET_DEFAULT_RESULTTYPE.getClassid());
} else if (targetType.equals("software")) {
r = new Software();
r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE.getClassid());
} else if (targetType.equals("other")) {
r = new OtherResearchProduct();
r.setResulttype(ORP_DEFAULT_RESULTTYPE);
r.setResulttype(ORP_DEFAULT_RESULTTYPE.getClassid());
} else {
r = new Publication();
r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE.getClassid());
}
r.setId(createOpenaireId(50, rs.getString("target_id"), false));
r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setContext(prepareContext(rs.getString("source_id"), DATA_INFO_CLAIM));
r.setDataInfo(DATA_INFO_CLAIM);
r.setContext(prepareContext(rs.getString("source_id"), ENTITY_DATA_INFO_CLAIM));
r.setDataInfo(ENTITY_DATA_INFO_CLAIM);
r.setCollectedfrom(COLLECTED_FROM_CLAIM);
return Arrays.asList(r);
@ -493,8 +479,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false);
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
Relation r1 = prepareRelation(sourceId, targetId, validationDate);
Relation r2 = prepareRelation(targetId, sourceId, validationDate);
Relation r1 = prepareRelation(sourceId, targetId, PROVENANCE_CLAIM, validationDate);
Relation r2 = prepareRelation(targetId, sourceId, PROVENANCE_CLAIM, validationDate);
final String semantics = rs.getString("semantics");
@ -529,17 +515,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
}
}
private Relation prepareRelation(final String sourceId, final String targetId, final String validationDate) {
private Relation prepareRelation(final String sourceId, final String targetId, final List<Provenance> provenance, final String validationDate) {
final Relation r = new Relation();
if (StringUtils.isNotBlank(validationDate)) {
r.setValidated(true);
r.setValidationDate(validationDate);
}
r.setCollectedfrom(COLLECTED_FROM_CLAIM);
r.setProvenance(provenance);
r.setSource(sourceId);
r.setTarget(targetId);
r.setDataInfo(DATA_INFO_CLAIM);
r.setLastupdatetimestamp(lastUpdateTimestamp);
return r;
}
@ -558,16 +542,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
return Arrays.asList(context);
}
private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
private EntityDataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
final String inferenceprovenance = rs.getString("inferenceprovenance");
final Boolean inferred = rs.getBoolean("inferred");
final double trust = rs.getDouble("trust");
final float trust = (float) rs.getDouble("trust");
return dataInfo(
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION,
String.format("%.3f", trust));
return dataInfo(false, deletedbyinference, trust, inferenceprovenance, inferred, ENTITYREGISTRY_PROVENANCE_ACTION);
}
private DataInfo prepareRelDataInfo(final ResultSet rs) throws SQLException {
final String inferenceprovenance = rs.getString("inferenceprovenance");
final Boolean inferred = rs.getBoolean("inferred");
final float trust = (float) rs.getDouble("trust");
return dataInfo(trust, inferenceprovenance, inferred, ENTITYREGISTRY_PROVENANCE_ACTION);
}
private List<KeyValue> prepareCollectedfrom(Array values) throws SQLException {
@ -604,15 +594,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
return arr.length == 2 ? vocs.getTermAsQualifier(arr[1], arr[0]) : null;
}
private List<Field<String>> prepareListFields(final Array array, final DataInfo info) {
private List<String> prepareListFields(final Array array) {
try {
return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>();
return array != null ? listValues(array) : new ArrayList<>();
} catch (final SQLException e) {
throw new RuntimeException("Invalid SQL array", e);
}
}
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
private StructuredProperty prepareStructProp(final String s) {
if (StringUtils.isBlank(s)) {
return null;
}
@ -621,19 +611,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final String value = parts[0];
final String[] arr = parts[1].split("@@@");
if (arr.length == 2) {
return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]), dataInfo);
return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]));
}
}
return null;
}
private List<StructuredProperty> prepareListOfStructProps(
final Array array,
final DataInfo dataInfo) throws SQLException {
final Array array) throws SQLException {
final List<StructuredProperty> res = new ArrayList<>();
if (array != null) {
for (final String s : (String[]) array.getArray()) {
final StructuredProperty sp = prepareStructProp(s, dataInfo);
final StructuredProperty sp = prepareStructProp(s);
if (sp != null) {
res.add(sp);
}
@ -666,12 +655,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final Relation r1 = OafMapperUtils
.getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, collectedFrom, info, lastUpdateTimestamp);
final List<Provenance> provenance = getProvenance(collectedFrom, info);
final Relation r2 = OafMapperUtils
.getRelation(
orgId2, orgId1, ORG_ORG_RELTYPE, DEDUP, IS_MERGED_IN, collectedFrom, info, lastUpdateTimestamp);
final Relation r1 = getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, provenance);
final Relation r2 = getRelation(orgId2, orgId1, ORG_ORG_RELTYPE, DEDUP, IS_MERGED_IN, provenance);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
throw new RuntimeException(e);
@ -688,12 +676,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
return Arrays
.asList(
OafMapperUtils
.getRelation(
orgId1, orgId2, ORG_ORG_RELTYPE, RELATIONSHIP, rs.getString("type"), collectedFrom, info,
lastUpdateTimestamp));
final List<Provenance> provenance = getProvenance(collectedFrom, info);
final String relClass = rs.getString("type");
return Arrays.asList(getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, RELATIONSHIP, relClass, provenance));
} catch (final Exception e) {
throw new RuntimeException(e);
}
@ -710,12 +696,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
return Arrays
.asList(
OafMapperUtils
.getRelation(
orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, relClass, collectedFrom, info,
lastUpdateTimestamp));
final List<Provenance> provenance = getProvenance(collectedFrom, info);
return Arrays.asList(getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, relClass, provenance));
} catch (final Exception e) {
throw new RuntimeException(e);
}

View File

@ -67,9 +67,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
final String cleanedId = pid
.replaceAll("http://orcid.org/", "")
.replaceAll("https://orcid.org/", "");
author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info));
author.getPid().add(authorPid(cleanedId, ORCID_PID_TYPE, info));
} else if (type.startsWith("MAGID")) {
author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info));
author.getPid().add(authorPid(pid, MAG_PID_TYPE, info));
}
}
@ -89,39 +89,36 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
}
@Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER, info);
protected List<StructuredProperty> prepareTitles(final Document doc) {
return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER);
}
@Override
protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//dc:description", info)
protected List<String> prepareDescriptions(final Document doc) {
return prepareListFields(doc, "//dc:description")
.stream()
.map(d -> {
d.setValue(StringUtils.left(d.getValue(), ModelHardLimits.MAX_ABSTRACT_LENGTH));
return d;
})
.map(d -> StringUtils.left(d, ModelHardLimits.MAX_ABSTRACT_LENGTH))
.collect(Collectors.toList());
}
@Override
protected Field<String> preparePublisher(final Document doc, final DataInfo info) {
return prepareField(doc, "//dc:publisher", info);
protected Publisher preparePublisher(final Document doc) {
return publisher(doc.valueOf("//dc:publisher"));
}
@Override
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//dc:format", info);
protected List<String> prepareFormats(final Document doc) {
return prepareListFields(doc, "//dc:format");
}
@Override
protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//dc:contributor", info);
protected List<String> prepareContributors(final Document doc) {
return prepareListFields(doc, "//dc:contributor");
}
@Override
protected List<Field<String>> prepareCoverages(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//dc:coverage", info);
protected List<String> prepareCoverages(final Document doc) {
return prepareListFields(doc, "//dc:coverage");
}
@Override
@ -147,16 +144,16 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
alternateIdentifier.stream().filter(i -> !pids.contains(i)).collect(Collectors.toList()));
instance.setPid(pid);
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
instance.setDateofacceptance(doc.valueOf("//oaf:dateAccepted"));
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
instance
.setAccessright(prepareAccessRight(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
instance.setLicense(license(doc.valueOf("//oaf:license")));
instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS));
instance
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
.setProcessingchargeamount(doc.valueOf("//oaf:processingchargeamount"));
instance
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
.setProcessingchargecurrency(doc.valueOf("//oaf:processingchargeamount/@currency"));
final List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
final List<String> url = nodes
@ -183,110 +180,90 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
}
@Override
protected List<Field<String>> prepareSources(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//dc:source", info);
protected List<String> prepareSources(final Document doc) {
return prepareListFields(doc, "//dc:source");
}
@Override
protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) {
protected List<StructuredProperty> prepareRelevantDates(final Document doc) {
return new ArrayList<>(); // NOT PRESENT IN OAF
}
// SOFTWARES
@Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc) {
return null; // NOT PRESENT IN OAF
}
@Override
protected Field<String> prepareSoftwareCodeRepositoryUrl(
final Document doc,
final DataInfo info) {
protected String prepareSoftwareCodeRepositoryUrl(
final Document doc) {
return null; // NOT PRESENT IN OAF
}
@Override
protected List<StructuredProperty> prepareSoftwareLicenses(
final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF
}
@Override
protected List<Field<String>> prepareSoftwareDocumentationUrls(
final Document doc,
final DataInfo info) {
protected List<String> prepareSoftwareDocumentationUrls(final Document doc) {
return new ArrayList<>(); // NOT PRESENT IN OAF
}
// DATASETS
@Override
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) {
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc) {
return new ArrayList<>(); // NOT PRESENT IN OAF
}
@Override
protected Field<String> prepareDatasetMetadataVersionNumber(
final Document doc,
final DataInfo info) {
protected String prepareDatasetMetadataVersionNumber(final Document doc) {
return null; // NOT PRESENT IN OAF
}
@Override
protected Field<String> prepareDatasetLastMetadataUpdate(
final Document doc,
final DataInfo info) {
protected String prepareDatasetLastMetadataUpdate(final Document doc) {
return null; // NOT PRESENT IN OAF
}
@Override
protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) {
protected String prepareDatasetVersion(final Document doc) {
return null; // NOT PRESENT IN OAF
}
@Override
protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) {
protected String prepareDatasetSize(final Document doc) {
return null; // NOT PRESENT IN OAF
}
@Override
protected Field<String> prepareDatasetDevice(final Document doc, final DataInfo info) {
protected String prepareDatasetDevice(final Document doc) {
return null; // NOT PRESENT IN OAF
}
@Override
protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) {
protected String prepareDatasetStorageDate(final Document doc) {
return null; // NOT PRESENT IN OAF
}
// OTHER PRODUCTS
@Override
protected List<Field<String>> prepareOtherResearchProductTools(
final Document doc,
final DataInfo info) {
protected List<String> prepareOtherResearchProductTools(final Document doc) {
return new ArrayList<>(); // NOT PRESENT IN OAF
}
@Override
protected List<Field<String>> prepareOtherResearchProductContactGroups(
final Document doc,
final DataInfo info) {
protected List<String> prepareOtherResearchProductContactGroups(final Document doc) {
return new ArrayList<>(); // NOT PRESENT IN OAF
}
@Override
protected List<Field<String>> prepareOtherResearchProductContactPersons(
final Document doc,
final DataInfo info) {
protected List<String> prepareOtherResearchProductContactPersons(final Document doc) {
return new ArrayList<>(); // NOT PRESENT IN OAF
}
@Override
protected List<Oaf> addOtherResultRels(
final Document doc,
final OafEntity entity) {
final Entity entity) {
final String docId = entity.getId();
final List<Oaf> res = new ArrayList<>();
@ -313,7 +290,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
}
@Override
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
protected Qualifier prepareResourceType(final Document doc) {
return null; // NOT PRESENT IN OAF
}

View File

@ -5,15 +5,13 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.common.RelationInverse;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.validator.routines.UrlValidator;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node;
@ -22,12 +20,10 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@ -44,7 +40,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
}
@Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
protected List<StructuredProperty> prepareTitles(final Document doc) {
final List<StructuredProperty> title = Lists.newArrayList();
final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']";
@ -57,9 +53,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
title
.add(
structuredProperty(
titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info));
titleValue, titleType, titleType, DNET_DATACITE_TITLE));
} else {
title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info));
title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER));
}
}
@ -97,7 +93,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
}
author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info));
author.setPid(preparePids(n, info));
author.setRank(pos++);
res.add(author);
@ -106,8 +101,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
return res;
}
private List<StructuredProperty> preparePids(final Node n, final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>();
private List<AuthorPid> preparePids(final Node n, final DataInfo info) {
final List<AuthorPid> res = new ArrayList<>();
for (final Object o : n.selectNodes("./*[local-name()='nameIdentifier']")) {
final String id = ((Node) o).getText();
@ -120,9 +115,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
if (type.toLowerCase().startsWith(ORCID)) {
final String cleanedId = id.replace("http://orcid.org/", "").replace("https://orcid.org/", "");
res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info));
res.add(authorPid(cleanedId, ORCID_PID_TYPE, info));
} else if (type.startsWith("MAGID")) {
res.add(structuredProperty(id, MAG_PID_TYPE, info));
res.add(authorPid(id, MAG_PID_TYPE, info));
}
}
return res;
@ -151,16 +146,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
alternateIdentifier.stream().filter(i -> !pids.contains(i)).collect(Collectors.toList()));
instance.setPid(pid);
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
instance.setDateofacceptance(doc.valueOf("//oaf:dateAccepted"));
final String distributionlocation = doc.valueOf("//oaf:distributionlocation");
instance.setDistributionlocation(StringUtils.isNotBlank(distributionlocation) ? distributionlocation : null);
instance
.setAccessright(prepareAccessRight(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
instance.setLicense(license(doc.valueOf("//oaf:license")));
instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS));
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
instance.setProcessingchargeamount(doc.valueOf("//oaf:processingchargeamount"));
instance
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
.setProcessingchargecurrency(doc.valueOf("//oaf:processingchargeamount/@currency"));
final Set<String> url = new HashSet<>();
for (final Object o : doc
@ -218,12 +213,12 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
}
@Override
protected List<Field<String>> prepareSources(final Document doc, final DataInfo info) {
protected List<String> prepareSources(final Document doc) {
return new ArrayList<>(); // Not present in ODF ???
}
@Override
protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) {
protected List<StructuredProperty> prepareRelevantDates(final Document doc) {
final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//*[local-name()='date']")) {
final String dateType = ((Node) o).valueOf("@dateType");
@ -235,42 +230,40 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
res
.add(
structuredProperty(
((Node) o).getText(), UNKNOWN, UNKNOWN, DNET_DATACITE_DATE, DNET_DATACITE_DATE,
info));
((Node) o).getText(), UNKNOWN, UNKNOWN, DNET_DATACITE_DATE));
} else {
res
.add(
structuredProperty(
((Node) o).getText(), dateType, dateType, DNET_DATACITE_DATE, DNET_DATACITE_DATE,
info));
((Node) o).getText(), dateType, dateType, DNET_DATACITE_DATE));
}
}
return res;
}
@Override
protected List<Field<String>> prepareCoverages(final Document doc, final DataInfo info) {
protected List<String> prepareCoverages(final Document doc) {
return new ArrayList<>(); // Not present in ODF ???
}
@Override
protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//*[local-name()='contributorName']", info);
protected List<String> prepareContributors(final Document doc) {
return prepareListFields(doc, "//*[local-name()='contributorName']");
}
@Override
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//*[local-name()='format']", info);
protected List<String> prepareFormats(final Document doc) {
return prepareListFields(doc, "//*[local-name()='format']");
}
@Override
protected Field<String> preparePublisher(final Document doc, final DataInfo info) {
return prepareField(doc, "//*[local-name()='publisher']", info);
protected Publisher preparePublisher(final Document doc) {
return publisher(doc.valueOf("//*[local-name()='publisher']"));
}
@Override
protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']", info);
protected List<String> prepareDescriptions(final Document doc) {
return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']");
}
@Override
@ -284,65 +277,46 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
}
@Override
protected List<Field<String>> prepareOtherResearchProductTools(
final Document doc,
final DataInfo info) {
protected List<String> prepareOtherResearchProductTools(final Document doc) {
return new ArrayList<>(); // Not present in ODF ???
}
@Override
protected List<Field<String>> prepareOtherResearchProductContactGroups(
final Document doc,
final DataInfo info) {
protected List<String> prepareOtherResearchProductContactGroups(final Document doc) {
return prepareListFields(
doc,
"//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']",
info);
"//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']");
}
@Override
protected List<Field<String>> prepareOtherResearchProductContactPersons(
final Document doc,
final DataInfo info) {
protected List<String> prepareOtherResearchProductContactPersons(
final Document doc) {
return prepareListFields(
doc,
"//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']",
info);
"//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']");
}
@Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc) {
return prepareQualifier(doc, "//*[local-name()='format']", DNET_PROGRAMMING_LANGUAGES);
}
@Override
protected Field<String> prepareSoftwareCodeRepositoryUrl(
final Document doc,
final DataInfo info) {
protected String prepareSoftwareCodeRepositoryUrl(final Document doc) {
return null; // Not present in ODF ???
}
@Override
protected List<StructuredProperty> prepareSoftwareLicenses(
final Document doc,
final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ???
}
@Override
protected List<Field<String>> prepareSoftwareDocumentationUrls(
final Document doc,
final DataInfo info) {
protected List<String> prepareSoftwareDocumentationUrls(final Document doc) {
return prepareListFields(
doc,
"//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']",
info);
"//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']");
}
// DATASETS
@Override
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) {
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc) {
final List<GeoLocation> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) {
@ -356,43 +330,39 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
}
@Override
protected Field<String> prepareDatasetMetadataVersionNumber(
final Document doc,
final DataInfo info) {
protected String prepareDatasetMetadataVersionNumber(final Document doc) {
return null; // Not present in ODF ???
}
@Override
protected Field<String> prepareDatasetLastMetadataUpdate(
final Document doc,
final DataInfo info) {
return prepareField(doc, "//*[local-name()='date' and ./@dateType='Updated']", info);
protected String prepareDatasetLastMetadataUpdate(final Document doc) {
return doc.valueOf("//*[local-name()='date' and ./@dateType='Updated']");
}
@Override
protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) {
return prepareField(doc, "//*[local-name()='version']", info);
protected String prepareDatasetVersion(final Document doc) {
return doc.valueOf("//*[local-name()='version']");
}
@Override
protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) {
return prepareField(doc, "//*[local-name()='size']", info);
protected String prepareDatasetSize(final Document doc) {
return doc.valueOf("//*[local-name()='size']");
}
@Override
protected Field<String> prepareDatasetDevice(final Document doc, final DataInfo info) {
protected String prepareDatasetDevice(final Document doc) {
return null; // Not present in ODF ???
}
@Override
protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) {
return prepareField(doc, "//*[local-name()='date' and ./@dateType='Issued']", info);
protected String prepareDatasetStorageDate(final Document doc) {
return doc.valueOf("//*[local-name()='date' and ./@dateType='Issued']");
}
@Override
protected List<Oaf> addOtherResultRels(
final Document doc,
final OafEntity entity) {
final Entity entity) {
final String docId = entity.getId();
@ -429,7 +399,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
}
protected List<Oaf> getRelations(final String reltype, final String entityId, final String otherId,
final OafEntity entity) {
final Entity entity) {
final List<Oaf> res = new ArrayList<>();
RelationInverse rel = ModelSupport.findRelation(reltype);
if (rel != null) {
@ -447,7 +417,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
}
@Override
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
protected Qualifier prepareResourceType(final Document doc) {
return prepareQualifier(
doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE);
}

View File

@ -12,7 +12,7 @@ import com.google.common.base.Splitter;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.Relation;
/**
@ -38,9 +38,9 @@ public class VerifyNsPrefixPredicate implements Predicate<Oaf> {
@Override
public boolean test(final Oaf oaf) {
if (oaf instanceof Datasource) {
return testValue(((Datasource) oaf).getNamespaceprefix().getValue());
} else if (oaf instanceof OafEntity) {
return testValue(((OafEntity) oaf).getId());
return testValue(((Datasource) oaf).getNamespaceprefix());
} else if (oaf instanceof Entity) {
return testValue(((Entity) oaf).getId());
} else if (oaf instanceof Relation) {
return testValue(((Relation) oaf).getSource()) && testValue(((Relation) oaf).getTarget());
} else {

View File

@ -117,7 +117,7 @@ object SparkProduceHostedByMap {
return getHostedByItemType(
dats.getId,
dats.getOfficialname.getValue,
dats.getOfficialname,
dats.getJournal.getIssnPrinted,
dats.getJournal.getIssnOnline,
dats.getJournal.getIssnLinking,

View File

@ -3,8 +3,8 @@ package eu.dnetlib.dhp.oa.graph.raw
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.common.HdfsSupport
import eu.dnetlib.dhp.schema.common.ModelSupport
import eu.dnetlib.dhp.schema.oaf.Oaf
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport
import eu.dnetlib.dhp.utils.DHPUtils
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
import org.apache.spark.{SparkConf, SparkContext}

View File

@ -2,7 +2,8 @@ package eu.dnetlib.dhp.oa.graph.resolution
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.common.EntityType
import eu.dnetlib.dhp.schema.oaf.common.EntityType
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils
import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _}
import org.apache.commons.io.IOUtils
import org.apache.hadoop.fs.{FileSystem, Path}
@ -124,7 +125,7 @@ object SparkResolveEntities {
if (b == null)
a._2
else {
a._2.mergeFrom(b._2)
MergeUtils.mergeResult(a._2, b._2)
a._2
}
})

View File

@ -114,7 +114,6 @@ object SparkConvertRDDtoDataset {
val rddRelation = spark.sparkContext
.textFile(s"$sourcePath/relation")
.map(s => mapper.readValue(s, classOf[Relation]))
.filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference)
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
.filter(r => filterRelations(r))
//filter OpenCitations relations
@ -142,13 +141,13 @@ object SparkConvertRDDtoDataset {
if (relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
false
else {
if (r.getCollectedfrom == null || r.getCollectedfrom.size() == 0)
if (r.getProvenance == null || r.getProvenance.isEmpty)
false
else if (r.getCollectedfrom.size() > 1)
else if (r.getProvenance.size() > 1)
true
else if (
r.getCollectedfrom.size() == 1 && r.getCollectedfrom.get(0) != null && "OpenCitations".equalsIgnoreCase(
r.getCollectedfrom.get(0).getValue
r.getProvenance.size() == 1 && r.getProvenance.get(0) != null && "OpenCitations".equalsIgnoreCase(
r.getProvenance.get(0).getCollectedfrom.getValue
)
)
false

View File

@ -1,6 +1,7 @@
package eu.dnetlib.dhp.sx.graph
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils
import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _}
import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf
@ -131,7 +132,7 @@ object SparkCreateInputGraph {
ds.groupByKey(_.getId)
.reduceGroups { (x, y) =>
x.mergeFrom(y)
MergeUtils.mergeResult(x, y)
x
}
.map(_._2)

View File

@ -51,10 +51,7 @@ object SparkCreateScholix {
val relationDS: Dataset[(String, Relation)] = spark.read
.load(relationPath)
.as[Relation]
.filter(r =>
(r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase
.contains("merge")
)
.filter(r => !r.getRelClass.toLowerCase.contains("merge"))
.map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder))
val summaryDS: Dataset[(String, ScholixSummary)] = spark.read

View File

@ -5,6 +5,7 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.spark.SparkConf;
@ -17,7 +18,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.oa.graph.hive.GraphHiveImporterJob;
import eu.dnetlib.dhp.schema.common.ModelSupport;
public class GraphHiveImporterJobTest {

View File

@ -70,8 +70,8 @@ public class GraphCleaningFunctionsTest {
assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass()));
assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType()));
assertEquals("iis", r_out.getDataInfo().getProvenanceaction().getClassid());
assertEquals("Inferred by OpenAIRE", r_out.getDataInfo().getProvenanceaction().getClassname());
assertTrue(r_out.getProvenance().stream().anyMatch(p -> "iis".equals(p.getDataInfo().getProvenanceaction().getClassid())));
assertTrue(r_out.getProvenance().stream().anyMatch(p -> "Inferred by OpenAIRE".equals(p.getDataInfo().getProvenanceaction().getClassname())));
}
}
@ -141,7 +141,7 @@ public class GraphCleaningFunctionsTest {
assertNotNull(p_out);
assertNotNull(p_out.getPublisher());
assertNull(p_out.getPublisher().getValue());
assertNull(p_out.getPublisher().getName());
assertEquals("und", p_out.getLanguage().getClassid());
assertEquals("Undetermined", p_out.getLanguage().getClassname());
@ -216,7 +216,7 @@ public class GraphCleaningFunctionsTest {
assertEquals("CLOSED", p_cleaned.getBestaccessright().getClassid());
assertNull(p_out.getPublisher());
assertEquals("1970-10-07", p_cleaned.getDateofacceptance().getValue());
assertEquals("1970-10-07", p_cleaned.getDateofacceptance());
assertEquals("0038", p_cleaned.getInstance().get(2).getInstancetype().getClassid());
assertEquals("Other literature type", p_cleaned.getInstance().get(2).getInstancetype().getClassname());

View File

@ -9,6 +9,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
@ -26,7 +27,6 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob;
import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.utils.DHPUtils;
@ -130,13 +130,13 @@ public class GroupEntitiesSparkJobTest {
assertEquals(
2,
output
.map((MapFunction<Result, String>) r -> r.getResulttype().getClassid(), Encoders.STRING())
.map((MapFunction<Result, String>) r -> r.getResulttype(), Encoders.STRING())
.filter((FilterFunction<String>) s -> s.equals("publication"))
.count());
assertEquals(
1,
output
.map((MapFunction<Result, String>) r -> r.getResulttype().getClassid(), Encoders.STRING())
.map((MapFunction<Result, String>) r -> r.getResulttype(), Encoders.STRING())
.filter((FilterFunction<String>) s -> s.equals("dataset"))
.count());
}

View File

@ -8,6 +8,7 @@ import static org.mockito.Mockito.lenient;
import java.io.IOException;
import java.util.List;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.io.IOUtils;
import org.dom4j.DocumentException;
import org.junit.jupiter.api.BeforeEach;
@ -20,7 +21,6 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -72,9 +72,9 @@ class GenerateEntitiesApplicationTest {
protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz,
String resultType) {
final Result merge = OafMapperUtils.mergeResults(publication, dataset);
final Result merge = MergeUtils.mergeResults(publication, dataset);
assertTrue(clazz.isAssignableFrom(merge.getClass()));
assertEquals(resultType, merge.getResulttype().getClassid());
assertEquals(resultType, merge.getResulttype());
}
protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz)

View File

@ -26,7 +26,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -92,7 +91,6 @@ class MappersTest {
assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid());
assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
assertEquals("Votsi,Nefta", author.get().getFullname());
assertEquals("Votsi", author.get().getSurname());
assertEquals("Nefta", author.get().getName());
@ -124,7 +122,7 @@ class MappersTest {
assertNotNull(p.getFulltext());
assertEquals(1, p.getFulltext().size());
assertEquals("https://oneecosystem.pensoft.net/article/13718/", p.getFulltext().get(0).getValue());
assertEquals("https://oneecosystem.pensoft.net/article/13718/", p.getFulltext().get(0));
// RESULT PROJECT
List<Relation> resultProject = list
@ -171,9 +169,11 @@ class MappersTest {
private void verifyRelation(Relation r) {
assertValidId(r.getSource());
assertValidId(r.getTarget());
assertValidId(r.getCollectedfrom().get(0).getKey());
assertNotNull(r.getDataInfo());
assertNotNull(r.getDataInfo().getTrust());
for(Provenance p : r.getProvenance()) {
assertValidId(p.getCollectedfrom().getKey());
assertNotNull(p.getDataInfo());
assertNotNull(p.getDataInfo().getTrust());
}
assertTrue(StringUtils.isNotBlank(r.getRelClass()));
assertTrue(StringUtils.isNotBlank(r.getRelType()));
@ -221,7 +221,6 @@ class MappersTest {
assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid());
assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
assertEquals("Votsi,Nefta", author.get().getFullname());
assertEquals("Votsi", author.get().getSurname());
assertEquals("Nefta", author.get().getName());
@ -326,7 +325,7 @@ class MappersTest {
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst();
assertTrue(author.isPresent());
final Optional<StructuredProperty> oPid = author
final Optional<AuthorPid> oPid = author
.get()
.getPid()
.stream()
@ -337,21 +336,10 @@ class MappersTest {
assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid());
assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
assertEquals("Baracchini, Theo", author.get().getFullname());
assertEquals("Baracchini", author.get().getSurname());
assertEquals("Theo", author.get().getName());
assertEquals(1, author.get().getAffiliation().size());
final Optional<Field<String>> opAff = author
.get()
.getAffiliation()
.stream()
.findFirst();
assertTrue(opAff.isPresent());
final Field<String> affiliation = opAff.get();
assertEquals("ISTI-CNR", affiliation.getValue());
assertTrue(d.getSubject().size() > 0);
assertTrue(d.getInstance().size() > 0);
assertTrue(d.getContext().size() > 0);
@ -378,10 +366,13 @@ class MappersTest {
assertValidId(r1.getTarget());
assertValidId(r2.getSource());
assertValidId(r2.getTarget());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo());
assertNotNull(r1.getDataInfo().getTrust());
assertNotNull(r2.getDataInfo().getTrust());
assertNotNull(r1.getProvenance());
assertFalse(r1.getProvenance().isEmpty());
assertNotNull(r1.getProvenance().get(0).getDataInfo());
assertNotNull(r2.getProvenance().get(0).getDataInfo());
assertNotNull(r1.getProvenance().get(0).getDataInfo().getTrust());
assertNotNull(r2.getProvenance().get(0).getDataInfo().getTrust());
assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget());
assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
@ -491,7 +482,6 @@ class MappersTest {
assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassid());
assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassname());
assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemeid());
assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename());
assertValidId(d.getId());
assertEquals(2, d.getOriginalId().size());
@ -510,7 +500,7 @@ class MappersTest {
assertNotNull(d.getDescription());
assertEquals(1, d.getDescription().size());
assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue()));
assertTrue(StringUtils.isNotBlank(d.getDescription().get(0)));
assertEquals(1, d.getAuthor().size());
assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname());
@ -524,7 +514,7 @@ class MappersTest {
assertEquals(0, d.getPid().size());
assertNotNull(d.getPublisher());
assertEquals("nct", d.getPublisher().getValue());
assertEquals("nct", d.getPublisher().getName());
assertTrue(d.getSubject().isEmpty());
assertTrue(d.getContext().isEmpty());
@ -536,7 +526,7 @@ class MappersTest {
assertNotNull(i.getAccessright());
assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemeid());
assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemename());
assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright());
assertEquals("OPEN", i.getAccessright().getClassid());
assertEquals("Open Access", i.getAccessright().getClassname());
@ -552,11 +542,10 @@ class MappersTest {
assertEquals("0037", i.getInstancetype().getClassid());
assertEquals("Clinical Trial", i.getInstancetype().getClassname());
assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemeid());
assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemename());
assertNull(i.getLicense());
assertNotNull(i.getDateofacceptance());
assertEquals("2014-11-11", i.getDateofacceptance().getValue());
assertEquals("2014-11-11", i.getDateofacceptance());
assertNull(i.getDistributionlocation());
assertNull(i.getProcessingchargeamount());
@ -571,7 +560,7 @@ class MappersTest {
assertEquals("nct", i.getAlternateIdentifier().get(0).getQualifier().getClassid());
assertEquals("ClinicalTrials.gov Identifier", i.getAlternateIdentifier().get(0).getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier().getSchemename());
assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier());
assertNotNull(i.getUrl());
assertEquals(2, i.getUrl().size());
@ -738,13 +727,13 @@ class MappersTest {
assertTrue(PidType.isValid(p.getPid().get(0).getQualifier().getClassid()));
assertEquals(PidType.handle, PidType.valueOf(p.getPid().get(0).getQualifier().getClassid()));
assertEquals("hdl:11858/00-1734-0000-0003-EE73-2", p.getPid().get(0).getValue());
assertEquals("dataset", p.getResulttype().getClassname());
assertEquals("dataset", p.getResulttype());
assertEquals(1, p.getInstance().size());
assertEquals("OPEN", p.getInstance().get(0).getAccessright().getClassid());
assertValidId(p.getInstance().get(0).getCollectedfrom().getKey());
assertValidId(p.getInstance().get(0).getHostedby().getKey());
assertEquals(
"http://creativecommons.org/licenses/by/3.0/de/legalcode", p.getInstance().get(0).getLicense().getValue());
"http://creativecommons.org/licenses/by/3.0/de/legalcode", p.getInstance().get(0).getLicense().getUrl());
assertEquals(1, p.getInstance().size());
assertNotNull(p.getInstance().get(0).getAlternateIdentifier());
@ -938,8 +927,8 @@ class MappersTest {
assertTrue(p.getProcessingchargeamount() != null);
assertTrue(p.getProcessingchargecurrency() != null);
assertEquals("1721.47", p.getProcessingchargeamount().getValue());
assertEquals("EUR", p.getProcessingchargecurrency().getValue());
assertEquals("1721.47", p.getProcessingchargeamount());
assertEquals("EUR", p.getProcessingchargecurrency());
}
@Test

View File

@ -51,8 +51,7 @@ class MigrateDbEntitiesApplicationTest {
.thenAnswer(
invocation -> OafMapperUtils
.qualifier(
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
invocation.getArgument(0)));
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0)));
lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true);
@ -80,12 +79,12 @@ class MigrateDbEntitiesApplicationTest {
assertEquals("re3data", ds.getPid().get(0).getQualifier().getClassid());
assertEquals("dnet:pid_types", ds.getPid().get(0).getQualifier().getSchemeid());
assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue());
assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue());
assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue());
assertEquals(getValueAsString("officialname", fields), ds.getOfficialname());
assertEquals(getValueAsString("englishname", fields), ds.getEnglishname());
assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl());
assertEquals(getValueAsString("logourl", fields), ds.getLogourl());
assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue());
assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix().getValue());
assertEquals(getValueAsString("contactemail", fields), ds.getContactemail());
assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix());
assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName());
assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted());
assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline());
@ -100,37 +99,34 @@ class MigrateDbEntitiesApplicationTest {
assertEquals("Data Source", ds.getEosctype().getClassid());
assertEquals("Data Source", ds.getEosctype().getClassname());
assertEquals("dnet:eosc_types", ds.getEosctype().getSchemeid());
assertEquals("dnet:eosc_types", ds.getEosctype().getSchemename());
assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassid());
assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassname());
assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemeid());
assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemename());
assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassid());
assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassname());
assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemeid());
assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemename());
assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude().getValue());
assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude().getValue());
assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude());
assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude());
assertEquals(getValueAsString("dateofvalidation", fields), ds.getDateofvalidation());
assertEquals(getValueAsString("description", fields), ds.getDescription().getValue());
assertEquals(getValueAsString("description", fields), ds.getDescription());
// TODO assertEquals(getValueAsString("subjects", fields), ds.getSubjects());
assertEquals("0.0", ds.getOdnumberofitems().getValue());
assertEquals("0.0", ds.getOdnumberofitems());
assertEquals(getValueAsString("odnumberofitemsdate", fields), ds.getOdnumberofitemsdate());
assertEquals(getValueAsString("odpolicies", fields), ds.getOdpolicies());
assertEquals(
getValueAsList("odlanguages", fields),
ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList()));
ds.getOdlanguages().stream().collect(Collectors.toList()));
assertEquals(getValueAsList("languages", fields), ds.getLanguages());
assertEquals(
getValueAsList("accessinfopackage", fields),
ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList()));
ds.getAccessinfopackage().stream().collect(Collectors.toList()));
assertEquals(getValueAsString("releasestartdate", fields), ds.getReleasestartdate());
assertEquals(getValueAsString("releaseenddate", fields), ds.getReleasestartdate());
assertEquals(getValueAsString("missionstatementurl", fields), ds.getMissionstatementurl());
@ -143,7 +139,7 @@ class MigrateDbEntitiesApplicationTest {
assertEquals(getValueAsString("databaseaccessrestriction", fields), ds.getDatabaseaccessrestriction());
assertEquals(getValueAsString("datauploadrestriction", fields), ds.getDatauploadrestriction());
assertEquals(false, ds.getVersioning().getValue());
assertEquals(false, ds.getVersioning());
assertEquals(false, ds.getVersioncontrol());
assertEquals(getValueAsString("citationguidelineurl", fields), ds.getCitationguidelineurl());
@ -164,13 +160,6 @@ class MigrateDbEntitiesApplicationTest {
.collect(Collectors.toCollection(HashSet::new));
assertEquals(1, cpSchemeId.size());
assertTrue(cpSchemeId.contains("eosc:contentpolicies"));
HashSet<String> cpSchemeName = ds
.getContentpolicies()
.stream()
.map(Qualifier::getSchemename)
.collect(Collectors.toCollection(HashSet::new));
assertEquals(1, cpSchemeName.size());
assertTrue(cpSchemeName.contains("eosc:contentpolicies"));
assertEquals(2, ds.getContentpolicies().size());
assertEquals("Taxonomic classification", ds.getContentpolicies().get(0).getClassid());
assertEquals("Resource collection", ds.getContentpolicies().get(1).getClassid());
@ -202,8 +191,8 @@ class MigrateDbEntitiesApplicationTest {
final Project p = (Project) list.get(0);
assertValidId(p.getId());
assertValidId(p.getCollectedfrom().get(0).getKey());
assertEquals(getValueAsString("acronym", fields), p.getAcronym().getValue());
assertEquals(getValueAsString("title", fields), p.getTitle().getValue());
assertEquals(getValueAsString("acronym", fields), p.getAcronym());
assertEquals(getValueAsString("title", fields), p.getTitle());
assertEquals(getValueAsString("collectedfromname", fields), p.getCollectedfrom().get(0).getValue());
assertEquals(getValueAsFloat("fundedamount", fields), p.getFundedamount());
assertEquals(getValueAsFloat("totalcost", fields), p.getTotalcost());
@ -222,13 +211,12 @@ class MigrateDbEntitiesApplicationTest {
final Organization o = (Organization) list.get(0);
assertValidId(o.getId());
assertValidId(o.getCollectedfrom().get(0).getKey());
assertEquals(getValueAsString("legalshortname", fields), o.getLegalshortname().getValue());
assertEquals(getValueAsString("legalname", fields), o.getLegalname().getValue());
assertEquals(getValueAsString("websiteurl", fields), o.getWebsiteurl().getValue());
assertEquals(getValueAsString("legalshortname", fields), o.getLegalshortname());
assertEquals(getValueAsString("legalname", fields), o.getLegalname());
assertEquals(getValueAsString("websiteurl", fields), o.getWebsiteurl());
assertEquals(getValueAsString("country", fields).split("@@@")[0], o.getCountry().getClassid());
assertEquals(getValueAsString("country", fields).split("@@@")[0], o.getCountry().getClassname());
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid());
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename());
assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue());
final List<String> alternativenames = getValueAsList("alternativenames", fields);
assertEquals(2, alternativenames.size());
@ -280,8 +268,12 @@ class MigrateDbEntitiesApplicationTest {
assertValidId(r2.getSource());
assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget());
assertValidId(r1.getCollectedfrom().get(0).getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey());
assertNotNull(r1.getProvenance());
assertFalse(r1.getProvenance().isEmpty());
assertValidId(r1.getProvenance().get(0).getCollectedfrom().getKey());
assertNotNull(r2.getProvenance());
assertFalse(r2.getProvenance().isEmpty());
assertValidId(r2.getProvenance().get(0).getCollectedfrom().getKey());
assertEquals(ModelConstants.PROJECT_ORGANIZATION, r1.getRelType());
assertEquals(ModelConstants.PROJECT_ORGANIZATION, r2.getRelType());
@ -350,10 +342,17 @@ class MigrateDbEntitiesApplicationTest {
assertValidId(r1.getTarget());
assertValidId(r2.getSource());
assertValidId(r2.getTarget());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo());
assertNotNull(r1.getDataInfo().getTrust());
assertNotNull(r2.getDataInfo().getTrust());
assertNotNull(r1.getProvenance());
assertFalse(r1.getProvenance().isEmpty());
assertNotNull(r1.getProvenance().get(0).getDataInfo());
assertNotNull(r1.getProvenance().get(0).getDataInfo().getTrust());
assertNotNull(r2.getProvenance());
assertFalse(r2.getProvenance().isEmpty());
assertNotNull(r2.getProvenance().get(0).getDataInfo());
assertNotNull(r2.getProvenance().get(0).getDataInfo().getTrust());
assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget());
assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
@ -361,8 +360,8 @@ class MigrateDbEntitiesApplicationTest {
assertTrue(StringUtils.isNotBlank(r1.getRelType()));
assertTrue(StringUtils.isNotBlank(r2.getRelType()));
assertValidId(r1.getCollectedfrom().get(0).getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey());
assertValidId(r1.getProvenance().get(0).getCollectedfrom().getKey());
assertValidId(r2.getProvenance().get(0).getCollectedfrom().getKey());
}
private List<TypedField> prepareMocks(final String jsonFile) throws IOException, SQLException {

View File

@ -8,7 +8,6 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
@ -39,22 +38,17 @@ class VerifyNsPrefixPredicateTest {
@Test
void testTest_ds_true() {
final Field<String> prefix = new Field<>();
prefix.setValue("xxxxxx______");
final Datasource ds = new Datasource();
ds.setNamespaceprefix(prefix);
ds.setNamespaceprefix("xxxxxx______");
assertTrue(predicate.test(ds));
}
@Test
void testTest_ds_false() {
final Field<String> prefix = new Field<>();
prefix.setValue("corda__2020");
final Datasource ds = new Datasource();
ds.setNamespaceprefix(prefix);
ds.setNamespaceprefix("corda__2020");
assertFalse(predicate.test(ds));
}

View File

@ -1,8 +1,8 @@
package eu.dnetlib.dhp.oa.graph.resolution
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.schema.common.EntityType
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
import eu.dnetlib.dhp.schema.oaf.common.EntityType
import eu.dnetlib.dhp.schema.oaf.utils.{MergeUtils, OafMapperUtils}
import eu.dnetlib.dhp.schema.oaf.{Publication, Result, StructuredProperty}
import org.apache.commons.io.FileUtils
import org.apache.spark.SparkConf
@ -61,7 +61,7 @@ class ResolveEntitiesTest extends Serializable {
List(
OafMapperUtils.subject(
FAKE_SUBJECT,
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"),
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema"),
null
)
).asJava
@ -70,8 +70,7 @@ class ResolveEntitiesTest extends Serializable {
List(
OafMapperUtils.structuredProperty(
FAKE_TITLE,
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"),
null
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema")
)
).asJava
)
@ -247,12 +246,12 @@ class ResolveEntitiesTest extends Serializable {
@Test
def testMerge(): Unit = {
val r = new Result
var r = new Result
r.setSubject(
List(
OafMapperUtils.subject(
FAKE_SUBJECT,
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"),
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema"),
null
)
).asJava
@ -269,7 +268,7 @@ class ResolveEntitiesTest extends Serializable {
classOf[Publication]
)
r.mergeFrom(p)
r = MergeUtils.mergeResult(r, p);
println(mapper.writeValueAsString(r))

View File

@ -3,11 +3,14 @@ package eu.dnetlib.dhp.oa.provision;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.EntityType;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
@ -26,15 +29,6 @@ import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
import scala.Tuple2;
@ -79,7 +73,7 @@ public class CreateRelatedEntitiesJob_phase1 {
log.info("graphTableClassName: {}", graphTableClassName);
@SuppressWarnings("unchecked")
final Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
final Class<? extends Entity> entityClazz = (Class<? extends Entity>) Class.forName(graphTableClassName);
final SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
@ -91,7 +85,7 @@ public class CreateRelatedEntitiesJob_phase1 {
});
}
private static <E extends OafEntity> void joinRelationEntity(
private static <E extends Entity> void joinRelationEntity(
final SparkSession spark,
final String inputRelationsPath,
final String inputEntityPath,
@ -123,7 +117,7 @@ public class CreateRelatedEntitiesJob_phase1 {
.parquet(outputPath);
}
private static <E extends OafEntity> Dataset<E> readPathEntity(
private static <E extends Entity> Dataset<E> readPathEntity(
final SparkSession spark,
final String inputEntityPath,
final Class<E> entityClazz) {
@ -137,7 +131,7 @@ public class CreateRelatedEntitiesJob_phase1 {
Encoders.bean(entityClazz));
}
public static <E extends OafEntity> RelatedEntity asRelatedEntity(final E entity, final Class<E> clazz) {
public static <E extends Entity> RelatedEntity asRelatedEntity(final E entity, final Class<E> clazz) {
final RelatedEntity re = new RelatedEntity();
re.setId(entity.getId());
@ -162,8 +156,8 @@ public class CreateRelatedEntitiesJob_phase1 {
re.setTitle(title);
}
re.setDateofacceptance(getValue(result.getDateofacceptance()));
re.setPublisher(getValue(result.getPublisher()));
re.setDateofacceptance(result.getDateofacceptance());
re.setPublisher(Optional.ofNullable(result.getPublisher()).map(p -> p.getName()).orElse(null));
re.setResulttype(result.getResulttype());
if (Objects.nonNull(result.getInstance())) {
re
@ -206,24 +200,23 @@ public class CreateRelatedEntitiesJob_phase1 {
re.setAcronym(getValue(p.getAcronym()));
re.setContracttype(p.getContracttype());
final List<Field<String>> f = p.getFundingtree();
final List<String> f = p.getFundingtree();
if (!f.isEmpty()) {
re.setFundingtree(f.stream().map(Field::getValue).collect(Collectors.toList()));
re.setFundingtree(f);
}
break;
}
return re;
}
private static String getValue(final Field<String> field) {
return getFieldValueWithDefault(field, "");
private static String getValue(final String s) {
return getFieldValueWithDefault(s, "");
}
private static <T> T getFieldValueWithDefault(final Field<T> f, final T defaultValue) {
private static <T> T getFieldValueWithDefault(final T f, final T defaultValue) {
return Optional
.ofNullable(f)
.filter(Objects::nonNull)
.map(Field::getValue)
.orElse(defaultValue);
}

View File

@ -8,6 +8,7 @@ import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
@ -28,7 +29,6 @@ import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
import scala.Tuple2;
@ -78,7 +78,7 @@ public class CreateRelatedEntitiesJob_phase2 {
String graphTableClassName = parser.get("graphTableClassName");
log.info("graphTableClassName: {}", graphTableClassName);
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
Class<? extends Entity> entityClazz = (Class<? extends Entity>) Class.forName(graphTableClassName);
SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
@ -94,7 +94,7 @@ public class CreateRelatedEntitiesJob_phase2 {
});
}
private static <E extends OafEntity> void joinEntityWithRelatedEntities(
private static <E extends Entity> void joinEntityWithRelatedEntities(
SparkSession spark,
String relatedEntitiesPath,
String entityPath,
@ -177,7 +177,7 @@ public class CreateRelatedEntitiesJob_phase2 {
}
private static <E extends OafEntity> Dataset<Tuple2<String, RelatedEntityWrapper>> readRelatedEntities(
private static <E extends Entity> Dataset<Tuple2<String, RelatedEntityWrapper>> readRelatedEntities(
SparkSession spark, String inputRelatedEntitiesPath, Class<E> entityClazz) {
log.info("Reading related entities from: {}", inputRelatedEntitiesPath);
@ -200,7 +200,7 @@ public class CreateRelatedEntitiesJob_phase2 {
Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntityWrapper.class)));
}
private static <E extends OafEntity> Dataset<Tuple2<String, E>> readPathEntity(
private static <E extends Entity> Dataset<Tuple2<String, E>> readPathEntity(
SparkSession spark, String inputEntityPath, Class<E> entityClazz) {
log.info("Reading Graph table from: {}", inputEntityPath);
@ -217,7 +217,7 @@ public class CreateRelatedEntitiesJob_phase2 {
Encoders.tuple(Encoders.STRING(), Encoders.kryo(entityClazz)));
}
private static <E extends OafEntity> E pruneOutliers(Class<E> entityClazz, E e) {
private static <E extends Entity> E pruneOutliers(Class<E> entityClazz, E e) {
if (ModelSupport.isSubClass(entityClazz, Result.class)) {
Result r = (Result) e;
if (r.getExternalReference() != null) {
@ -239,14 +239,11 @@ public class CreateRelatedEntitiesJob_phase2 {
r.setAuthor(authors);
}
if (r.getDescription() != null) {
List<Field<String>> desc = r
List<String> desc = r
.getDescription()
.stream()
.filter(Objects::nonNull)
.map(d -> {
d.setValue(StringUtils.left(d.getValue(), ModelHardLimits.MAX_ABSTRACT_LENGTH));
return d;
})
.map(d -> StringUtils.left(d, ModelHardLimits.MAX_ABSTRACT_LENGTH))
.collect(Collectors.toList());
r.setDescription(desc);
}

View File

@ -132,7 +132,6 @@ public class PrepareRelationsJob {
JavaRDD<Relation> rels = readPathRelationRDD(spark, inputRelationsPath)
.filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved")))
.filter(rel -> !rel.getDataInfo().getDeletedbyinference())
.filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())));
JavaRDD<Relation> pruned = pruneRels(
@ -171,7 +170,6 @@ public class PrepareRelationsJob {
.map(
(MapFunction<String, Relation>) s -> OBJECT_MAPPER.readValue(s, Relation.class),
Encoders.kryo(Relation.class))
.filter((FilterFunction<Relation>) rel -> !rel.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) rel -> !relationFilter.contains(rel.getRelClass()))
.groupByKey(
(MapFunction<Relation, String>) Relation::getSource,

View File

@ -43,9 +43,7 @@ public class SortableRelation extends Relation implements Comparable<SortableRel
sr.setRelType(r.getRelType());
sr.setSubRelType(r.getSubRelType());
sr.setRelClass(r.getRelClass());
sr.setDataInfo(r.getDataInfo());
sr.setCollectedfrom(r.getCollectedfrom());
sr.setLastupdatetimestamp(r.getLastupdatetimestamp());
sr.setProvenance(r.getProvenance());
sr.setProperties(r.getProperties());
sr.setValidated(r.getValidated());
sr.setValidationDate(r.getValidationDate());

View File

@ -5,9 +5,9 @@ import java.io.Serializable;
import java.util.LinkedList;
import java.util.List;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Entity;
public class JoinedEntity<E extends OafEntity> implements Serializable {
public class JoinedEntity<E extends Entity> implements Serializable {
private E entity;

View File

@ -7,7 +7,7 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.oa.provision.RelationList;
import eu.dnetlib.dhp.oa.provision.SortableRelation;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
public class ProvisionModelSupport {

View File

@ -25,7 +25,7 @@ public class RelatedEntity implements Serializable {
private String publisher;
private List<StructuredProperty> pid;
private String codeRepositoryUrl;
private Qualifier resulttype;
private String resulttype;
private List<KeyValue> collectedfrom;
private List<Instance> instances;
@ -111,11 +111,11 @@ public class RelatedEntity implements Serializable {
this.codeRepositoryUrl = codeRepositoryUrl;
}
public Qualifier getResulttype() {
public String getResulttype() {
return resulttype;
}
public void setResulttype(Qualifier resulttype) {
public void setResulttype(String resulttype) {
this.resulttype = resulttype;
}

View File

@ -19,13 +19,11 @@ public class XmlInstance implements Serializable {
UNKNOWN_ACCESS_RIGHT.setClassid(ModelConstants.UNKNOWN);
UNKNOWN_ACCESS_RIGHT.setClassname(ModelConstants.UNKNOWN);
UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
UNKNOWN_ACCESS_RIGHT.setSchemename(ModelConstants.DNET_ACCESS_MODES);
UNKNOWN_REVIEW_LEVEL = new Qualifier();
UNKNOWN_REVIEW_LEVEL.setClassid("0000");
UNKNOWN_REVIEW_LEVEL.setClassname(ModelConstants.UNKNOWN);
UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_REVIEW_LEVELS);
UNKNOWN_REVIEW_LEVEL.setSchemename(ModelConstants.DNET_REVIEW_LEVELS);
}
private String url;

View File

@ -1,25 +1,21 @@
package eu.dnetlib.dhp.oa.provision.utils;
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix;
import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Entity;
import org.apache.commons.lang3.StringUtils;
import org.stringtemplate.v4.ST;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.swing.text.html.Option;
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix;
import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml;
import org.apache.commons.lang3.StringUtils;
import org.stringtemplate.v4.ST;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
public class TemplateFactory {
@ -62,7 +58,7 @@ public class TemplateFactory {
}
public String buildRecord(
final OafEntity entity, final String schemaLocation, final String body) {
final Entity entity, final String schemaLocation, final String body) {
return getTemplate(resources.getRecord())
.add("id", escapeXml(removePrefix(entity.getId())))
.add("dateofcollection", entity.getDateofcollection())

View File

@ -20,6 +20,7 @@ import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import eu.dnetlib.dhp.schema.oaf.common.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
@ -97,7 +98,7 @@ public class XmlRecordFactory implements Serializable {
final Set<String> contexts = Sets.newHashSet();
// final OafEntity entity = toOafEntity(je.getEntity());
final OafEntity entity = je.getEntity();
final Entity entity = je.getEntity();
final TemplateFactory templateFactory = new TemplateFactory();
try {
@ -128,7 +129,7 @@ public class XmlRecordFactory implements Serializable {
}
}
private static OafEntity parseOaf(final String json, final String type) {
private static Entity parseOaf(final String json, final String type) {
try {
switch (EntityType.valueOf(type)) {
case publication:
@ -170,7 +171,7 @@ public class XmlRecordFactory implements Serializable {
private List<String> metadata(
final EntityType type,
final OafEntity entity,
final Entity entity,
final Set<String> contexts) {
final List<String> metadata = Lists.newArrayList();
@ -319,7 +320,7 @@ public class XmlRecordFactory implements Serializable {
.getContributor()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("contributor", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("contributor", c))
.collect(Collectors.toList()));
}
if (r.getCountry() != null) {
@ -339,14 +340,14 @@ public class XmlRecordFactory implements Serializable {
.getCoverage()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("coverage", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("coverage", c))
.collect(Collectors.toList()));
}
if (r.getDateofacceptance() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("dateofacceptance", r.getDateofacceptance().getValue()));
.asXmlElement("dateofacceptance", r.getDateofacceptance()));
}
if (r.getDescription() != null) {
metadata
@ -355,12 +356,12 @@ public class XmlRecordFactory implements Serializable {
.getDescription()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("description", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("description", c))
.collect(Collectors.toCollection(HashSet::new)));
}
if (r.getEmbargoenddate() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue()));
.add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate()));
}
if (r.getSubject() != null) {
metadata
@ -386,7 +387,7 @@ public class XmlRecordFactory implements Serializable {
.collect(Collectors.toList()));
}
if (r.getPublisher() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("publisher", r.getPublisher().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("publisher", r.getPublisher().getName()));
}
if (r.getSource() != null) {
metadata
@ -395,7 +396,7 @@ public class XmlRecordFactory implements Serializable {
.getSource()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("source", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("source", c))
.collect(Collectors.toCollection(HashSet::new)));
}
if (r.getFormat() != null) {
@ -405,11 +406,11 @@ public class XmlRecordFactory implements Serializable {
.getFormat()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("format", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("format", c))
.collect(Collectors.toList()));
}
if (r.getResulttype() != null) {
metadata.add(XmlSerializationUtils.mapQualifier("resulttype", r.getResulttype()));
metadata.add(XmlSerializationUtils.asXmlElement("resulttype", r.getResulttype()));
}
if (r.getResourcetype() != null) {
metadata.add(XmlSerializationUtils.mapQualifier("resourcetype", r.getResourcetype()));
@ -418,11 +419,11 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement("processingchargeamount", r.getProcessingchargeamount().getValue()));
.asXmlElement("processingchargeamount", r.getProcessingchargeamount()));
metadata
.add(
XmlSerializationUtils
.asXmlElement("processingchargecurrency", r.getProcessingchargecurrency().getValue()));
.asXmlElement("processingchargecurrency", r.getProcessingchargecurrency()));
}
}
@ -439,29 +440,29 @@ public class XmlRecordFactory implements Serializable {
case dataset:
final Dataset d = (Dataset) entity;
if (d.getDevice() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("device", d.getDevice().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("device", d.getDevice()));
}
if (d.getLastmetadataupdate() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue()));
.asXmlElement("lastmetadataupdate", d.getLastmetadataupdate()));
}
if (d.getMetadataversionnumber() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue()));
.asXmlElement("metadataversionnumber", d.getMetadataversionnumber()));
}
if (d.getSize() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize()));
}
if (d.getStoragedate() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue()));
.add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate()));
}
if (d.getVersion() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion()));
}
// TODO d.getGeolocation()
@ -476,7 +477,7 @@ public class XmlRecordFactory implements Serializable {
.getContactperson()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("contactperson", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("contactperson", c))
.collect(Collectors.toList()));
}
@ -487,7 +488,7 @@ public class XmlRecordFactory implements Serializable {
.getContactgroup()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c))
.collect(Collectors.toList()));
}
if (orp.getTool() != null) {
@ -497,7 +498,7 @@ public class XmlRecordFactory implements Serializable {
.getTool()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("tool", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("tool", c))
.collect(Collectors.toList()));
}
break;
@ -511,24 +512,14 @@ public class XmlRecordFactory implements Serializable {
.getDocumentationUrl()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c.getValue()))
.collect(Collectors.toList()));
}
if (s.getLicense() != null) {
metadata
.addAll(
s
.getLicense()
.stream()
.filter(Objects::nonNull)
.map(l -> XmlSerializationUtils.mapStructuredProperty("license", l))
.map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c))
.collect(Collectors.toList()));
}
if (s.getCodeRepositoryUrl() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue()));
.asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl()));
}
if (s.getProgrammingLanguage() != null) {
metadata
@ -560,45 +551,45 @@ public class XmlRecordFactory implements Serializable {
}
if (ds.getOfficialname() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue()));
.add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname()));
}
if (ds.getEnglishname() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue()));
.add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname()));
}
if (ds.getWebsiteurl() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue()));
.add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl()));
}
if (ds.getLogourl() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl()));
}
if (ds.getContactemail() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue()));
.add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail()));
}
if (ds.getNamespaceprefix() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue()));
.asXmlElement("namespaceprefix", ds.getNamespaceprefix()));
}
if (ds.getLatitude() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude()));
}
if (ds.getLongitude() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue()));
.add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude()));
}
if (ds.getDateofvalidation() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue()));
.asXmlElement("dateofvalidation", ds.getDateofvalidation()));
}
if (ds.getDescription() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue()));
.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription()));
}
if (ds.getSubjects() != null) {
metadata
@ -614,17 +605,17 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue()));
.asXmlElement("odnumberofitems", ds.getOdnumberofitems()));
}
if (ds.getOdnumberofitemsdate() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue()));
.asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate()));
}
if (ds.getOdpolicies() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue()));
.add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies()));
}
if (ds.getOdlanguages() != null) {
metadata
@ -633,7 +624,7 @@ public class XmlRecordFactory implements Serializable {
.getOdlanguages()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c))
.collect(Collectors.toList()));
}
if (ds.getLanguages() != null) {
@ -653,7 +644,7 @@ public class XmlRecordFactory implements Serializable {
.getOdcontenttypes()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c))
.collect(Collectors.toList()));
}
if (ds.getAccessinfopackage() != null) {
@ -662,69 +653,69 @@ public class XmlRecordFactory implements Serializable {
ds
.getAccessinfopackage()
.stream()
.map(c -> XmlSerializationUtils.asXmlElement("accessinfopackage", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("accessinfopackage", c))
.collect(Collectors.toList()));
}
if (ds.getReleaseenddate() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("releasestartdate", ds.getReleaseenddate().getValue()));
.asXmlElement("releasestartdate", ds.getReleaseenddate()));
}
if (ds.getReleaseenddate() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("releaseenddate", ds.getReleaseenddate().getValue()));
.asXmlElement("releaseenddate", ds.getReleaseenddate()));
}
if (ds.getMissionstatementurl() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue()));
.asXmlElement("missionstatementurl", ds.getMissionstatementurl()));
}
if (ds.getDataprovider() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("dataprovider", ds.getDataprovider().getValue().toString()));
.asXmlElement("dataprovider", ds.getDataprovider().toString()));
}
if (ds.getServiceprovider() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString()));
.asXmlElement("serviceprovider", ds.getServiceprovider().toString()));
}
if (ds.getDatabaseaccesstype() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue()));
.asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype()));
}
if (ds.getDatauploadtype() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("datauploadtype", ds.getDatauploadtype().getValue()));
.asXmlElement("datauploadtype", ds.getDatauploadtype()));
}
if (ds.getDatabaseaccessrestriction() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"databaseaccessrestriction", ds.getDatabaseaccessrestriction().getValue()));
"databaseaccessrestriction", ds.getDatabaseaccessrestriction()));
}
if (ds.getDatauploadrestriction() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue()));
.asXmlElement("datauploadrestriction", ds.getDatauploadrestriction()));
}
if (ds.getVersioning() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("versioning", ds.getVersioning().getValue().toString()));
.asXmlElement("versioning", ds.getVersioning().toString()));
}
if (ds.getVersioncontrol() != null) {
metadata
@ -736,15 +727,15 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl()));
}
if (ds.getPidsystems() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue()));
.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems()));
}
if (ds.getCertificates() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue()));
.add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates()));
}
if (ds.getPolicies() != null) {
metadata
@ -831,11 +822,11 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement("legalshortname", o.getLegalshortname().getValue()));
.asXmlElement("legalshortname", o.getLegalshortname()));
}
if (o.getLegalname() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue()));
.add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname()));
}
if (o.getAlternativeNames() != null) {
metadata
@ -844,40 +835,40 @@ public class XmlRecordFactory implements Serializable {
.getAlternativeNames()
.stream()
.filter(Objects::nonNull)
.map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c))
.collect(Collectors.toList()));
}
if (o.getWebsiteurl() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue()));
.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl()));
}
if (o.getLogourl() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl()));
}
if (o.getEclegalbody() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue()));
.add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody()));
}
if (o.getEclegalperson() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue()));
.add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson()));
}
if (o.getEcnonprofit() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue()));
.add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit()));
}
if (o.getEcresearchorganization() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue()));
.asXmlElement("ecresearchorganization", o.getEcresearchorganization()));
}
if (o.getEchighereducation() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("echighereducation", o.getEchighereducation().getValue()));
.asXmlElement("echighereducation", o.getEchighereducation()));
}
if (o.getEcinternationalorganizationeurinterests() != null) {
metadata
@ -885,28 +876,28 @@ public class XmlRecordFactory implements Serializable {
XmlSerializationUtils
.asXmlElement(
"ecinternationalorganizationeurinterests",
o.getEcinternationalorganizationeurinterests().getValue()));
o.getEcinternationalorganizationeurinterests()));
}
if (o.getEcinternationalorganization() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"ecinternationalorganization", o.getEcinternationalorganization().getValue()));
"ecinternationalorganization", o.getEcinternationalorganization()));
}
if (o.getEcenterprise() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue()));
.add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise()));
}
if (o.getEcsmevalidated() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue()));
.asXmlElement("ecsmevalidated", o.getEcsmevalidated()));
}
if (o.getEcnutscode() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue()));
.add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode()));
}
if (o.getCountry() != null) {
metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry()));
@ -918,39 +909,39 @@ public class XmlRecordFactory implements Serializable {
if (p.getWebsiteurl() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue()));
.add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl()));
}
if (p.getCode() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode()));
}
if (p.getAcronym() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("acronym", p.getAcronym().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("acronym", p.getAcronym()));
}
if (p.getTitle() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("title", p.getTitle().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("title", p.getTitle()));
}
if (p.getStartdate() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue()));
.add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate()));
}
if (p.getEnddate() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate()));
}
if (p.getCallidentifier() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement("callidentifier", p.getCallidentifier().getValue()));
.asXmlElement("callidentifier", p.getCallidentifier()));
}
if (p.getKeywords() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords()));
}
if (p.getDuration() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("duration", p.getDuration().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("duration", p.getDuration()));
}
if (p.getEcarticle29_3() != null) {
metadata
.add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue()));
.add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3()));
}
if (p.getSubjects() != null) {
metadata
@ -969,16 +960,16 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement("oamandatepublications", p.getOamandatepublications().getValue()));
.asXmlElement("oamandatepublications", p.getOamandatepublications()));
}
if (p.getEcsc39() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("ecsc39", p.getEcsc39().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("ecsc39", p.getEcsc39()));
}
if (p.getSummary() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("summary", p.getSummary().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("summary", p.getSummary()));
}
if (p.getCurrency() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("currency", p.getCurrency().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("currency", p.getCurrency()));
}
if (p.getTotalcost() != null) {
metadata
@ -995,7 +986,6 @@ public class XmlRecordFactory implements Serializable {
.getFundingtree()
.stream()
.filter(Objects::nonNull)
.map(ft -> ft.getValue())
.collect(Collectors.toList()));
}
@ -1054,9 +1044,6 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl()));
}
if (re.getResulttype() != null && re.getResulttype().isBlank()) {
metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype()));
}
if (re.getCollectedfrom() != null) {
metadata
.addAll(
@ -1081,13 +1068,13 @@ public class XmlRecordFactory implements Serializable {
if (isNotBlank(re.getOfficialname())) {
metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname()));
}
if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) {
if (re.getDatasourcetype() != null && StringUtils.isNotBlank(re.getDatasourcetype().getClassid())) {
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype()));
}
if (re.getDatasourcetypeui() != null && !re.getDatasourcetypeui().isBlank()) {
if (re.getDatasourcetypeui() != null && StringUtils.isNotBlank(re.getDatasourcetypeui().getClassid())) {
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui()));
}
if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) {
if (re.getOpenairecompatibility() != null && StringUtils.isNotBlank(re.getOpenairecompatibility().getClassid())) {
metadata
.add(
XmlSerializationUtils
@ -1102,7 +1089,7 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname()));
}
if (re.getCountry() != null && !re.getCountry().isBlank()) {
if (re.getCountry() != null && StringUtils.isNotBlank(re.getCountry().getClassid())) {
metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry()));
}
break;
@ -1116,7 +1103,7 @@ public class XmlRecordFactory implements Serializable {
if (isNotBlank(re.getAcronym())) {
metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym()));
}
if (re.getContracttype() != null && !re.getContracttype().isBlank()) {
if (re.getContracttype() != null && StringUtils.isNotBlank(re.getContracttype().getClassid())) {
metadata.add(XmlSerializationUtils.mapQualifier("contracttype", re.getContracttype()));
}
if (re.getFundingtree() != null && contexts != null) {
@ -1126,7 +1113,7 @@ public class XmlRecordFactory implements Serializable {
.getFundingtree()
.stream()
.peek(ft -> fillContextMap(ft, contexts))
.map(ft -> getRelFundingTree(ft))
.map(XmlRecordFactory::getRelFundingTree)
.collect(Collectors.toList()));
}
break;
@ -1158,14 +1145,15 @@ public class XmlRecordFactory implements Serializable {
if (rel.getValidated() == null) {
rel.setValidated(false);
}
final DataInfo dataInfo = Optional.ofNullable(rel.getProvenance()).map(p -> p.get(0).getDataInfo()).orElse(null);
return templateFactory
.getRel(
targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(),
targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, dataInfo, rel.getValidated(),
rel.getValidationDate());
}
private List<String> listChildren(
final OafEntity entity,
final Entity entity,
final JoinedEntity je,
final TemplateFactory templateFactory) {
@ -1191,7 +1179,7 @@ public class XmlRecordFactory implements Serializable {
groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> {
final List<String> fields = Lists.newArrayList();
if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) {
if (instance.getAccessright() != null && StringUtils.isNotBlank(instance.getAccessright().getClassid())) {
fields
.add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright()));
}
@ -1232,7 +1220,7 @@ public class XmlRecordFactory implements Serializable {
instance
.getInstancetype()
.stream()
.filter(t -> !t.isBlank())
.filter(t -> StringUtils.isNotBlank(t.getClassid()))
.map(t -> XmlSerializationUtils.mapQualifier("instancetype", t))
.collect(Collectors.toList()));
}
@ -1242,7 +1230,7 @@ public class XmlRecordFactory implements Serializable {
instance
.getDistributionlocation()
.stream()
.filter(d -> isNotBlank(d))
.filter(StringUtils::isNotBlank)
.map(d -> XmlSerializationUtils.asXmlElement("distributionlocation", d))
.collect(Collectors.toList()));
}
@ -1430,10 +1418,10 @@ public class XmlRecordFactory implements Serializable {
instance.getInstancetype().add(i.getInstancetype());
instance
.setProcessingchargeamount(
Optional.ofNullable(i.getProcessingchargeamount()).map(apc -> apc.getValue()).orElse(null));
Optional.ofNullable(i.getProcessingchargeamount()).orElse(null));
instance
.setProcessingchargecurrency(
Optional.ofNullable(i.getProcessingchargecurrency()).map(c -> c.getValue()).orElse(null));
Optional.ofNullable(i.getProcessingchargecurrency()).orElse(null));
Optional
.ofNullable(i.getPid())
.ifPresent(pid -> instance.getPid().addAll(pid));
@ -1442,17 +1430,17 @@ public class XmlRecordFactory implements Serializable {
.ifPresent(altId -> instance.getAlternateIdentifier().addAll(altId));
Optional
.ofNullable(i.getDateofacceptance())
.ifPresent(d -> instance.getDateofacceptance().add(d.getValue()));
.ifPresent(d -> instance.getDateofacceptance().add(d));
Optional
.ofNullable(i.getLicense())
.ifPresent(license -> instance.getLicense().add(license.getValue()));
.ifPresent(license -> instance.getLicense().add(license.getUrl()));
Optional
.ofNullable(i.getDistributionlocation())
.ifPresent(dl -> instance.getDistributionlocation().add(dl));
});
if (instance.getHostedby().size() > 1
&& instance.getHostedby().stream().anyMatch(hb -> ModelConstants.UNKNOWN_REPOSITORY.equals(hb))) {
&& instance.getHostedby().stream().anyMatch(ModelConstants.UNKNOWN_REPOSITORY::equals)) {
instance.getHostedby().remove(ModelConstants.UNKNOWN_REPOSITORY);
}
@ -1463,7 +1451,7 @@ public class XmlRecordFactory implements Serializable {
return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType());
}
private List<String> listExtraInfo(final OafEntity entity) {
private List<String> listExtraInfo(final Entity entity) {
final List<ExtraInfo> extraInfo = entity.getExtraInfo();
return extraInfo != null
? extraInfo

View File

@ -10,6 +10,7 @@ import java.util.List;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
import scala.Tuple2;
public class XmlSerializationUtils {
@ -49,7 +50,7 @@ public class XmlSerializationUtils {
public static String mapStructuredProperty(String name, StructuredProperty t) {
return asXmlElement(
name, t.getValue(), t.getQualifier(), t.getDataInfo());
name, t.getValue(), t.getQualifier());
}
public static String mapQualifier(String name, Qualifier q) {
@ -66,7 +67,7 @@ public class XmlSerializationUtils {
.replaceAll(XML_10_PATTERN, "");
}
public static String parseDataInfo(final DataInfo dataInfo) {
public static String parseDataInfo(final EntityDataInfo dataInfo) {
return new StringBuilder()
.append("<datainfo>")
.append(asXmlElement("inferred", dataInfo.getInferred() + ""))
@ -106,6 +107,12 @@ public class XmlSerializationUtils {
return asXmlElement(name, value, null, null);
}
public static String asXmlElement(
final String name, final String value, final Qualifier q) {
return asXmlElement(name, value, q, null);
}
public static String asXmlElement(
final String name, final String value, final Qualifier q, final DataInfo info) {
StringBuilder sb = new StringBuilder();
@ -125,7 +132,7 @@ public class XmlSerializationUtils {
info.getProvenanceaction() != null
? info.getProvenanceaction().getClassid()
: ""))
.append(attr("trust", info.getTrust()));
.append(attr("trust", Float.toString(info.getTrust())));
}
if (isBlank(value)) {
sb.append("/>");
@ -142,14 +149,13 @@ public class XmlSerializationUtils {
}
public static String getAttributes(final Qualifier q) {
if (q == null || q.isBlank())
if (q == null || StringUtils.isBlank(q.getClassid()))
return "";
return new StringBuilder(" ")
.append(attr("classid", q.getClassid()))
.append(attr("classname", q.getClassname()))
.append(attr("schemeid", q.getSchemeid()))
.append(attr("schemename", q.getSchemename()))
.toString();
}

View File

@ -25,7 +25,6 @@
<modules>
<module>dhp-workflow-profiles</module>
<module>dhp-aggregation</module>
<module>dhp-distcp</module>
<module>dhp-actionmanager</module>
<module>dhp-graph-mapper</module>
<module>dhp-dedup-openaire</module>