included new stats* workflows in parent pom list of modules, code formatting

This commit is contained in:
Claudio Atzori 2024-03-26 10:42:10 +01:00
parent bfba71a95c
commit ef52128c55
24 changed files with 590 additions and 525 deletions

View File

@ -1,14 +1,14 @@
package eu.dnetlib.dhp.actionmanager.promote; package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import java.util.function.BiFunction;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import java.util.function.BiFunction;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
/** OAF model merging support. */ /** OAF model merging support. */
public class MergeAndGet { public class MergeAndGet {

View File

@ -38,7 +38,6 @@ public class BipProjectModel {
return projectId; return projectId;
} }
// each project bip measure has exactly one value, hence one key-value pair // each project bip measure has exactly one value, hence one key-value pair
private Measure createMeasure(String measureId, String measureValue) { private Measure createMeasure(String measureId, String measureValue) {

View File

@ -1,7 +1,26 @@
package eu.dnetlib.dhp.actionmanager.project; package eu.dnetlib.dhp.actionmanager.project;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.Arrays;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.actionmanager.project.utils.model.JsonTopic; import eu.dnetlib.dhp.actionmanager.project.utils.model.JsonTopic;
@ -15,25 +34,8 @@ import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2; import scala.Tuple2;
import java.util.Arrays;
import java.util.Objects;
import java.util.Optional;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
/** /**
* Class that makes the ActionSet. To prepare the AS two joins are needed * Class that makes the ActionSet. To prepare the AS two joins are needed
* *

View File

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.dedup;
import java.util.*; import java.util.*;
import java.util.stream.Stream; import java.util.stream.Stream;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.beanutils.BeanUtils; import org.apache.commons.beanutils.BeanUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.FlatMapFunction;
@ -15,11 +14,11 @@ import org.apache.spark.sql.*;
import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.dedup.model.Identifier;
import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.oa.merge.AuthorMerger;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import scala.Tuple2; import scala.Tuple2;
import scala.Tuple3; import scala.Tuple3;
import scala.collection.JavaConversions; import scala.collection.JavaConversions;

View File

@ -3,7 +3,6 @@ package eu.dnetlib.dhp.oa.dedup;
import static org.apache.spark.sql.functions.col; import static org.apache.spark.sql.functions.col;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -21,6 +20,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2; import scala.Tuple2;
@ -128,8 +128,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
(MapFunction<Relation, String>) r -> String (MapFunction<Relation, String>) r -> String
.join(" ", r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()), .join(" ", r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()),
Encoders.STRING()) Encoders.STRING())
.reduceGroups((ReduceFunction<Relation>) MergeUtils::mergeRelation .reduceGroups((ReduceFunction<Relation>) MergeUtils::mergeRelation)
)
.map((MapFunction<Tuple2<String, Relation>, Relation>) Tuple2::_2, REL_BEAN_ENC); .map((MapFunction<Tuple2<String, Relation>, Relation>) Tuple2::_2, REL_BEAN_ENC);
final String outputRelationPath = graphOutputPath + "/relation"; final String outputRelationPath = graphOutputPath + "/relation";

View File

@ -13,7 +13,6 @@ import java.util.List;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -29,6 +28,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import scala.Tuple2; import scala.Tuple2;
/** /**

View File

@ -7,7 +7,6 @@ import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FilterFunction;
@ -25,6 +24,7 @@ import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import scala.Tuple2; import scala.Tuple2;
/** /**

View File

@ -98,8 +98,10 @@ public abstract class AbstractMdRecordToOafMapper {
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/"; protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/";
protected static final Qualifier ORCID_PID_TYPE = qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, DNET_PID_TYPES, DNET_PID_TYPES); protected static final Qualifier ORCID_PID_TYPE = qualifier(
protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, DNET_PID_TYPES, DNET_PID_TYPES);
protected static final Qualifier MAG_PID_TYPE = qualifier(
"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999"; protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999";
@ -149,20 +151,26 @@ public abstract class AbstractMdRecordToOafMapper {
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
try { try {
final Document doc = DocumentHelper final Document doc = DocumentHelper
.parseText(xml .parseText(
xml
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); final KeyValue collectedFrom = getProvenanceDatasource(
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
if (collectedFrom == null) { return Lists.newArrayList(); } if (collectedFrom == null) {
return Lists.newArrayList();
}
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
? collectedFrom ? collectedFrom
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
if (hostedBy == null) { return Lists.newArrayList(); } if (hostedBy == null) {
return Lists.newArrayList();
}
final DataInfo entityInfo = prepareDataInfo(doc, this.invisible); final DataInfo entityInfo = prepareDataInfo(doc, this.invisible);
final long lastUpdateTimestamp = new Date().getTime(); final long lastUpdateTimestamp = new Date().getTime();
@ -201,7 +209,9 @@ public abstract class AbstractMdRecordToOafMapper {
final String dsId = doc.valueOf(xpathId); final String dsId = doc.valueOf(xpathId);
final String dsName = doc.valueOf(xpathName); final String dsName = doc.valueOf(xpathName);
if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) { return null; } if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) {
return null;
}
return keyValue(createOpenaireId(10, dsId, true), dsName); return keyValue(createOpenaireId(10, dsId, true), dsName);
} }
@ -307,13 +317,21 @@ public abstract class AbstractMdRecordToOafMapper {
final String projectId = createOpenaireId(40, originalId, true); final String projectId = createOpenaireId(40, originalId, true);
res res
.add(OafMapperUtils .add(
.getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity.getCollectedfrom(), info, entity OafMapperUtils
.getLastupdatetimestamp(), validationdDate, null)); .getRelation(
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity.getCollectedfrom(),
info, entity
.getLastupdatetimestamp(),
validationdDate, null));
res res
.add(OafMapperUtils .add(
.getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity.getCollectedfrom(), info, entity OafMapperUtils
.getLastupdatetimestamp(), validationdDate, null)); .getRelation(
projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity.getCollectedfrom(), info,
entity
.getLastupdatetimestamp(),
validationdDate, null));
} }
} }
@ -345,13 +363,21 @@ public abstract class AbstractMdRecordToOafMapper {
if (StringUtils.isNotBlank(targetType)) { if (StringUtils.isNotBlank(targetType)) {
final String targetId = createOpenaireId(targetType, target, true); final String targetId = createOpenaireId(targetType, target, true);
rels rels
.add(OafMapperUtils .add(
.getRelation(entity.getId(), targetId, relType, subRelType, relClass, entity.getCollectedfrom(), info, entity OafMapperUtils
.getLastupdatetimestamp(), validationDate, null)); .getRelation(
entity.getId(), targetId, relType, subRelType, relClass,
entity.getCollectedfrom(), info, entity
.getLastupdatetimestamp(),
validationDate, null));
rels rels
.add(OafMapperUtils .add(
.getRelation(targetId, entity.getId(), relType, subRelType, relClassInverse, entity.getCollectedfrom(), info, entity OafMapperUtils
.getLastupdatetimestamp(), validationDate, null)); .getRelation(
targetId, entity.getId(), relType, subRelType, relClassInverse,
entity.getCollectedfrom(), info, entity
.getLastupdatetimestamp(),
validationDate, null));
} }
} }
} }
@ -384,13 +410,20 @@ public abstract class AbstractMdRecordToOafMapper {
} }
rels rels
.add(OafMapperUtils .add(
.getRelation(resultId, orgId, RESULT_ORGANIZATION, AFFILIATION, HAS_AUTHOR_INSTITUTION, entity.getCollectedfrom(), info, entity OafMapperUtils
.getLastupdatetimestamp(), null, properties)); .getRelation(
resultId, orgId, RESULT_ORGANIZATION, AFFILIATION, HAS_AUTHOR_INSTITUTION,
entity.getCollectedfrom(), info, entity
.getLastupdatetimestamp(),
null, properties));
rels rels
.add(OafMapperUtils .add(
.getRelation(orgId, resultId, RESULT_ORGANIZATION, AFFILIATION, IS_AUTHOR_INSTITUTION_OF, entity OafMapperUtils
.getCollectedfrom(), info, entity.getLastupdatetimestamp(), null, properties)); .getRelation(
orgId, resultId, RESULT_ORGANIZATION, AFFILIATION, IS_AUTHOR_INSTITUTION_OF, entity
.getCollectedfrom(),
info, entity.getLastupdatetimestamp(), null, properties));
} }
} }
return rels; return rels;
@ -587,7 +620,9 @@ public abstract class AbstractMdRecordToOafMapper {
final String sp = n.valueOf("@sp"); final String sp = n.valueOf("@sp");
final String vol = n.valueOf("@vol"); final String vol = n.valueOf("@vol");
final String edition = n.valueOf("@edition"); final String edition = n.valueOf("@edition");
if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } if (StringUtils.isNotBlank(name)) {
return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info);
}
} }
return null; return null;
} }
@ -596,13 +631,18 @@ public abstract class AbstractMdRecordToOafMapper {
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
if (n != null) { if (n != null) {
final String id = n.valueOf("./*[local-name()='identifier']"); final String id = n.valueOf("./*[local-name()='identifier']");
if (StringUtils.isNotBlank(id)) { return Lists.newArrayList(id); } if (StringUtils.isNotBlank(id)) {
return Lists.newArrayList(id);
}
} }
final List<String> idList = doc final List<String> idList = doc
.selectNodes("normalize-space(//*[local-name()='header']/*[local-name()='identifier' or local-name()='recordIdentifier']/text())"); .selectNodes(
"normalize-space(//*[local-name()='header']/*[local-name()='identifier' or local-name()='recordIdentifier']/text())");
final Set<String> originalIds = Sets.newHashSet(idList); final Set<String> originalIds = Sets.newHashSet(idList);
if (originalIds.isEmpty()) { throw new IllegalStateException("missing originalID on " + doc.asXML()); } if (originalIds.isEmpty()) {
throw new IllegalStateException("missing originalID on " + doc.asXML());
}
return Lists.newArrayList(originalIds); return Lists.newArrayList(originalIds);
} }
@ -666,8 +706,11 @@ public abstract class AbstractMdRecordToOafMapper {
for (final Object o : node.selectNodes(xpath)) { for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o; final Node n = (Node) o;
res res
.add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n .add(
.valueOf("@schemename"), info)); structuredProperty(
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n
.valueOf("@schemename"),
info));
} }
return res; return res;
} }
@ -680,7 +723,10 @@ public abstract class AbstractMdRecordToOafMapper {
for (final Object o : node.selectNodes(xpath)) { for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o; final Node n = (Node) o;
res res
.add(subject(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info)); .add(
subject(
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
n.valueOf("@schemename"), info));
} }
return res; return res;
} }
@ -688,7 +734,9 @@ public abstract class AbstractMdRecordToOafMapper {
protected OAIProvenance prepareOAIprovenance(final Document doc) { protected OAIProvenance prepareOAIprovenance(final Document doc) {
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
if (n == null) { return null; } if (n == null) {
return null;
}
final String identifier = n.valueOf("./*[local-name()='identifier']"); final String identifier = n.valueOf("./*[local-name()='identifier']");
final String baseURL = n.valueOf("./*[local-name()='baseURL']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']");
@ -703,7 +751,9 @@ public abstract class AbstractMdRecordToOafMapper {
protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) { protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) {
final Node n = doc.selectSingleNode("//oaf:datainfo"); final Node n = doc.selectSingleNode("//oaf:datainfo");
if (n == null) { return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } if (n == null) {
return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
}
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
@ -715,11 +765,14 @@ public abstract class AbstractMdRecordToOafMapper {
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
final String trust = n.valueOf("./oaf:trust"); final String trust = n.valueOf("./oaf:trust");
return dataInfo(deletedbyinference, inferenceprovenance, inferred, invisible, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); return dataInfo(
deletedbyinference, inferenceprovenance, inferred, invisible,
qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
} }
protected List<Field<String>> prepareListURL(final Node node, final String xpath, final DataInfo info) { protected List<Field<String>> prepareListURL(final Node node, final String xpath, final DataInfo info) {
return listFields(info, prepareListString(node, xpath) return listFields(
info, prepareListString(node, xpath)
.stream() .stream()
.filter(URL_VALIDATOR::isValid) .filter(URL_VALIDATOR::isValid)
.collect(Collectors.toList())); .collect(Collectors.toList()));
@ -749,7 +802,9 @@ public abstract class AbstractMdRecordToOafMapper {
protected Set<String> validateUrl(final Collection<String> url) { protected Set<String> validateUrl(final Collection<String> url) {
if (Objects.isNull(url)) { return new HashSet<>(); } if (Objects.isNull(url)) {
return new HashSet<>();
}
return url return url
.stream() .stream()
.filter(URL_VALIDATOR::isValid) .filter(URL_VALIDATOR::isValid)

View File

@ -1,16 +1,13 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import com.fasterxml.jackson.databind.ObjectMapper; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import java.util.Arrays;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import java.util.List;
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; import java.util.Objects;
import eu.dnetlib.dhp.schema.common.ModelSupport; import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
@ -21,15 +18,20 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2; import scala.Tuple2;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
public class GenerateEntitiesApplication extends AbstractMigrationApplication { public class GenerateEntitiesApplication extends AbstractMigrationApplication {
private static final Logger log = LoggerFactory.getLogger(GenerateEntitiesApplication.class); private static final Logger log = LoggerFactory.getLogger(GenerateEntitiesApplication.class);

View File

@ -1,12 +1,13 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import static org.junit.jupiter.api.Assertions.assertEquals;
import eu.dnetlib.dhp.schema.common.ModelConstants; import static org.junit.jupiter.api.Assertions.assertTrue;
import eu.dnetlib.dhp.schema.oaf.*; import static org.mockito.Mockito.lenient;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import java.io.IOException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import java.util.List;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
@ -15,12 +16,12 @@ import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import java.io.IOException; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import java.util.List; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import static org.junit.jupiter.api.Assertions.assertEquals; import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import static org.junit.jupiter.api.Assertions.assertTrue; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import static org.mockito.Mockito.lenient; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
class GenerateEntitiesApplicationTest { class GenerateEntitiesApplicationTest {

View File

@ -1137,7 +1137,8 @@ public class XmlRecordFactory implements Serializable {
XmlSerializationUtils XmlSerializationUtils
.asXmlElement("dateofacceptance", i.getDateofacceptance().getValue())); .asXmlElement("dateofacceptance", i.getDateofacceptance().getValue()));
} }
if (i.getInstancetype() != null && StringUtils.isNotBlank(i.getInstancetype().getClassid())) { if (i.getInstancetype() != null
&& StringUtils.isNotBlank(i.getInstancetype().getClassid())) {
instanceFields instanceFields
.add(XmlSerializationUtils.mapQualifier("instancetype", i.getInstancetype())); .add(XmlSerializationUtils.mapQualifier("instancetype", i.getInstancetype()));
} }
@ -1178,7 +1179,8 @@ public class XmlRecordFactory implements Serializable {
if (re.getDatasourcetypeui() != null && StringUtils.isNotBlank(re.getDatasourcetypeui().getClassid())) { if (re.getDatasourcetypeui() != null && StringUtils.isNotBlank(re.getDatasourcetypeui().getClassid())) {
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui())); metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui()));
} }
if (re.getOpenairecompatibility() != null && StringUtils.isNotBlank(re.getOpenairecompatibility().getClassid())) { if (re.getOpenairecompatibility() != null
&& StringUtils.isNotBlank(re.getOpenairecompatibility().getClassid())) {
metadata metadata
.add( .add(
XmlSerializationUtils XmlSerializationUtils
@ -1285,7 +1287,8 @@ public class XmlRecordFactory implements Serializable {
groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> { groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> {
final List<String> fields = Lists.newArrayList(); final List<String> fields = Lists.newArrayList();
if (instance.getAccessright() != null && StringUtils.isNotBlank(instance.getAccessright().getClassid())) { if (instance.getAccessright() != null
&& StringUtils.isNotBlank(instance.getAccessright().getClassid())) {
fields fields
.add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); .add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright()));
} }

View File

@ -7,10 +7,11 @@ import static org.apache.commons.lang3.StringUtils.isNotBlank;
import java.util.List; import java.util.List;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
import scala.Tuple2; import scala.Tuple2;
public class XmlSerializationUtils { public class XmlSerializationUtils {

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-hist-snaps</artifactId> <artifactId>dhp-stats-hist-snaps</artifactId>

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-monitor-irish</artifactId> <artifactId>dhp-stats-monitor-irish</artifactId>

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-monitor-update</artifactId> <artifactId>dhp-stats-monitor-update</artifactId>

View File

@ -31,6 +31,10 @@
<module>dhp-enrichment</module> <module>dhp-enrichment</module>
<module>dhp-graph-provision</module> <module>dhp-graph-provision</module>
<module>dhp-blacklist</module> <module>dhp-blacklist</module>
<module>dhp-stats-actionsets</module>
<module>dhp-stats-hist-snaps</module>
<module>dhp-stats-monitor-irish</module>
<module>dhp-stats-monitor-update</module>
<module>dhp-stats-update</module> <module>dhp-stats-update</module>
<module>dhp-stats-promote</module> <module>dhp-stats-promote</module>
<module>dhp-usage-stats-build</module> <module>dhp-usage-stats-build</module>