forked from D-Net/dnet-hadoop
included new stats* workflows in parent pom list of modules, code formatting
This commit is contained in:
parent
bfba71a95c
commit
ef52128c55
|
@ -1,14 +1,14 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.promote;
|
package eu.dnetlib.dhp.actionmanager.promote;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
||||||
|
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
|
|
||||||
import java.util.function.BiFunction;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
|
||||||
|
|
||||||
/** OAF model merging support. */
|
/** OAF model merging support. */
|
||||||
public class MergeAndGet {
|
public class MergeAndGet {
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,6 @@ public class BipProjectModel {
|
||||||
return projectId;
|
return projectId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// each project bip measure has exactly one value, hence one key-value pair
|
// each project bip measure has exactly one value, hence one key-value pair
|
||||||
private Measure createMeasure(String measureId, String measureValue) {
|
private Measure createMeasure(String measureId, String measureValue) {
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,26 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project;
|
package eu.dnetlib.dhp.actionmanager.project;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.JsonTopic;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.JsonTopic;
|
||||||
|
@ -15,25 +34,8 @@ import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
|
||||||
import org.apache.spark.sql.Dataset;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class that makes the ActionSet. To prepare the AS two joins are needed
|
* Class that makes the ActionSet. To prepare the AS two joins are needed
|
||||||
*
|
*
|
||||||
|
|
|
@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.dedup;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
|
||||||
import org.apache.commons.beanutils.BeanUtils;
|
import org.apache.commons.beanutils.BeanUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
@ -15,11 +14,11 @@ import org.apache.spark.sql.*;
|
||||||
import eu.dnetlib.dhp.oa.dedup.model.Identifier;
|
import eu.dnetlib.dhp.oa.dedup.model.Identifier;
|
||||||
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
|
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
import scala.Tuple3;
|
import scala.Tuple3;
|
||||||
import scala.collection.JavaConversions;
|
import scala.collection.JavaConversions;
|
||||||
|
|
|
@ -3,7 +3,6 @@ package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
import static org.apache.spark.sql.functions.col;
|
import static org.apache.spark.sql.functions.col;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
@ -21,6 +20,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -128,8 +128,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
|
||||||
(MapFunction<Relation, String>) r -> String
|
(MapFunction<Relation, String>) r -> String
|
||||||
.join(" ", r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()),
|
.join(" ", r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()),
|
||||||
Encoders.STRING())
|
Encoders.STRING())
|
||||||
.reduceGroups((ReduceFunction<Relation>) MergeUtils::mergeRelation
|
.reduceGroups((ReduceFunction<Relation>) MergeUtils::mergeRelation)
|
||||||
)
|
|
||||||
.map((MapFunction<Tuple2<String, Relation>, Relation>) Tuple2::_2, REL_BEAN_ENC);
|
.map((MapFunction<Tuple2<String, Relation>, Relation>) Tuple2::_2, REL_BEAN_ENC);
|
||||||
|
|
||||||
final String outputRelationPath = graphOutputPath + "/relation";
|
final String outputRelationPath = graphOutputPath + "/relation";
|
||||||
|
|
|
@ -13,7 +13,6 @@ import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
@ -29,6 +28,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -7,7 +7,6 @@ import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -25,6 +24,7 @@ import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -98,8 +98,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/";
|
protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/";
|
||||||
|
|
||||||
protected static final Qualifier ORCID_PID_TYPE = qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, DNET_PID_TYPES, DNET_PID_TYPES);
|
protected static final Qualifier ORCID_PID_TYPE = qualifier(
|
||||||
protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
|
ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, DNET_PID_TYPES, DNET_PID_TYPES);
|
||||||
|
protected static final Qualifier MAG_PID_TYPE = qualifier(
|
||||||
|
"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
|
||||||
|
|
||||||
protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999";
|
protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999";
|
||||||
|
|
||||||
|
@ -149,20 +151,26 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
||||||
try {
|
try {
|
||||||
final Document doc = DocumentHelper
|
final Document doc = DocumentHelper
|
||||||
.parseText(xml
|
.parseText(
|
||||||
|
xml
|
||||||
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
|
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
|
||||||
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
|
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
|
||||||
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
|
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
|
||||||
|
|
||||||
final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
final KeyValue collectedFrom = getProvenanceDatasource(
|
||||||
|
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
||||||
|
|
||||||
if (collectedFrom == null) { return Lists.newArrayList(); }
|
if (collectedFrom == null) {
|
||||||
|
return Lists.newArrayList();
|
||||||
|
}
|
||||||
|
|
||||||
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
||||||
? collectedFrom
|
? collectedFrom
|
||||||
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
||||||
|
|
||||||
if (hostedBy == null) { return Lists.newArrayList(); }
|
if (hostedBy == null) {
|
||||||
|
return Lists.newArrayList();
|
||||||
|
}
|
||||||
|
|
||||||
final DataInfo entityInfo = prepareDataInfo(doc, this.invisible);
|
final DataInfo entityInfo = prepareDataInfo(doc, this.invisible);
|
||||||
final long lastUpdateTimestamp = new Date().getTime();
|
final long lastUpdateTimestamp = new Date().getTime();
|
||||||
|
@ -201,7 +209,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String dsId = doc.valueOf(xpathId);
|
final String dsId = doc.valueOf(xpathId);
|
||||||
final String dsName = doc.valueOf(xpathName);
|
final String dsName = doc.valueOf(xpathName);
|
||||||
|
|
||||||
if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) { return null; }
|
if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
return keyValue(createOpenaireId(10, dsId, true), dsName);
|
return keyValue(createOpenaireId(10, dsId, true), dsName);
|
||||||
}
|
}
|
||||||
|
@ -307,13 +317,21 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String projectId = createOpenaireId(40, originalId, true);
|
final String projectId = createOpenaireId(40, originalId, true);
|
||||||
|
|
||||||
res
|
res
|
||||||
.add(OafMapperUtils
|
.add(
|
||||||
.getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity.getCollectedfrom(), info, entity
|
OafMapperUtils
|
||||||
.getLastupdatetimestamp(), validationdDate, null));
|
.getRelation(
|
||||||
|
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity.getCollectedfrom(),
|
||||||
|
info, entity
|
||||||
|
.getLastupdatetimestamp(),
|
||||||
|
validationdDate, null));
|
||||||
res
|
res
|
||||||
.add(OafMapperUtils
|
.add(
|
||||||
.getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity.getCollectedfrom(), info, entity
|
OafMapperUtils
|
||||||
.getLastupdatetimestamp(), validationdDate, null));
|
.getRelation(
|
||||||
|
projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity.getCollectedfrom(), info,
|
||||||
|
entity
|
||||||
|
.getLastupdatetimestamp(),
|
||||||
|
validationdDate, null));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,13 +363,21 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
if (StringUtils.isNotBlank(targetType)) {
|
if (StringUtils.isNotBlank(targetType)) {
|
||||||
final String targetId = createOpenaireId(targetType, target, true);
|
final String targetId = createOpenaireId(targetType, target, true);
|
||||||
rels
|
rels
|
||||||
.add(OafMapperUtils
|
.add(
|
||||||
.getRelation(entity.getId(), targetId, relType, subRelType, relClass, entity.getCollectedfrom(), info, entity
|
OafMapperUtils
|
||||||
.getLastupdatetimestamp(), validationDate, null));
|
.getRelation(
|
||||||
|
entity.getId(), targetId, relType, subRelType, relClass,
|
||||||
|
entity.getCollectedfrom(), info, entity
|
||||||
|
.getLastupdatetimestamp(),
|
||||||
|
validationDate, null));
|
||||||
rels
|
rels
|
||||||
.add(OafMapperUtils
|
.add(
|
||||||
.getRelation(targetId, entity.getId(), relType, subRelType, relClassInverse, entity.getCollectedfrom(), info, entity
|
OafMapperUtils
|
||||||
.getLastupdatetimestamp(), validationDate, null));
|
.getRelation(
|
||||||
|
targetId, entity.getId(), relType, subRelType, relClassInverse,
|
||||||
|
entity.getCollectedfrom(), info, entity
|
||||||
|
.getLastupdatetimestamp(),
|
||||||
|
validationDate, null));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -384,13 +410,20 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
rels
|
rels
|
||||||
.add(OafMapperUtils
|
.add(
|
||||||
.getRelation(resultId, orgId, RESULT_ORGANIZATION, AFFILIATION, HAS_AUTHOR_INSTITUTION, entity.getCollectedfrom(), info, entity
|
OafMapperUtils
|
||||||
.getLastupdatetimestamp(), null, properties));
|
.getRelation(
|
||||||
|
resultId, orgId, RESULT_ORGANIZATION, AFFILIATION, HAS_AUTHOR_INSTITUTION,
|
||||||
|
entity.getCollectedfrom(), info, entity
|
||||||
|
.getLastupdatetimestamp(),
|
||||||
|
null, properties));
|
||||||
rels
|
rels
|
||||||
.add(OafMapperUtils
|
.add(
|
||||||
.getRelation(orgId, resultId, RESULT_ORGANIZATION, AFFILIATION, IS_AUTHOR_INSTITUTION_OF, entity
|
OafMapperUtils
|
||||||
.getCollectedfrom(), info, entity.getLastupdatetimestamp(), null, properties));
|
.getRelation(
|
||||||
|
orgId, resultId, RESULT_ORGANIZATION, AFFILIATION, IS_AUTHOR_INSTITUTION_OF, entity
|
||||||
|
.getCollectedfrom(),
|
||||||
|
info, entity.getLastupdatetimestamp(), null, properties));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return rels;
|
return rels;
|
||||||
|
@ -587,7 +620,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String sp = n.valueOf("@sp");
|
final String sp = n.valueOf("@sp");
|
||||||
final String vol = n.valueOf("@vol");
|
final String vol = n.valueOf("@vol");
|
||||||
final String edition = n.valueOf("@edition");
|
final String edition = n.valueOf("@edition");
|
||||||
if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); }
|
if (StringUtils.isNotBlank(name)) {
|
||||||
|
return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -596,13 +631,18 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
||||||
if (n != null) {
|
if (n != null) {
|
||||||
final String id = n.valueOf("./*[local-name()='identifier']");
|
final String id = n.valueOf("./*[local-name()='identifier']");
|
||||||
if (StringUtils.isNotBlank(id)) { return Lists.newArrayList(id); }
|
if (StringUtils.isNotBlank(id)) {
|
||||||
|
return Lists.newArrayList(id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
final List<String> idList = doc
|
final List<String> idList = doc
|
||||||
.selectNodes("normalize-space(//*[local-name()='header']/*[local-name()='identifier' or local-name()='recordIdentifier']/text())");
|
.selectNodes(
|
||||||
|
"normalize-space(//*[local-name()='header']/*[local-name()='identifier' or local-name()='recordIdentifier']/text())");
|
||||||
final Set<String> originalIds = Sets.newHashSet(idList);
|
final Set<String> originalIds = Sets.newHashSet(idList);
|
||||||
|
|
||||||
if (originalIds.isEmpty()) { throw new IllegalStateException("missing originalID on " + doc.asXML()); }
|
if (originalIds.isEmpty()) {
|
||||||
|
throw new IllegalStateException("missing originalID on " + doc.asXML());
|
||||||
|
}
|
||||||
return Lists.newArrayList(originalIds);
|
return Lists.newArrayList(originalIds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -666,8 +706,11 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
res
|
res
|
||||||
.add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n
|
.add(
|
||||||
.valueOf("@schemename"), info));
|
structuredProperty(
|
||||||
|
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n
|
||||||
|
.valueOf("@schemename"),
|
||||||
|
info));
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -680,7 +723,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
res
|
res
|
||||||
.add(subject(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info));
|
.add(
|
||||||
|
subject(
|
||||||
|
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
|
||||||
|
n.valueOf("@schemename"), info));
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -688,7 +734,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
||||||
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
||||||
|
|
||||||
if (n == null) { return null; }
|
if (n == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final String identifier = n.valueOf("./*[local-name()='identifier']");
|
final String identifier = n.valueOf("./*[local-name()='identifier']");
|
||||||
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
|
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
|
||||||
|
@ -703,7 +751,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) {
|
protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) {
|
||||||
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
||||||
|
|
||||||
if (n == null) { return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); }
|
if (n == null) {
|
||||||
|
return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
|
||||||
|
}
|
||||||
|
|
||||||
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
||||||
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
|
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
|
||||||
|
@ -715,11 +765,14 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
|
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
|
||||||
final String trust = n.valueOf("./oaf:trust");
|
final String trust = n.valueOf("./oaf:trust");
|
||||||
|
|
||||||
return dataInfo(deletedbyinference, inferenceprovenance, inferred, invisible, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
|
return dataInfo(
|
||||||
|
deletedbyinference, inferenceprovenance, inferred, invisible,
|
||||||
|
qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<Field<String>> prepareListURL(final Node node, final String xpath, final DataInfo info) {
|
protected List<Field<String>> prepareListURL(final Node node, final String xpath, final DataInfo info) {
|
||||||
return listFields(info, prepareListString(node, xpath)
|
return listFields(
|
||||||
|
info, prepareListString(node, xpath)
|
||||||
.stream()
|
.stream()
|
||||||
.filter(URL_VALIDATOR::isValid)
|
.filter(URL_VALIDATOR::isValid)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
@ -749,7 +802,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected Set<String> validateUrl(final Collection<String> url) {
|
protected Set<String> validateUrl(final Collection<String> url) {
|
||||||
|
|
||||||
if (Objects.isNull(url)) { return new HashSet<>(); }
|
if (Objects.isNull(url)) {
|
||||||
|
return new HashSet<>();
|
||||||
|
}
|
||||||
return url
|
return url
|
||||||
.stream()
|
.stream()
|
||||||
.filter(URL_VALIDATOR::isValid)
|
.filter(URL_VALIDATOR::isValid)
|
||||||
|
|
|
@ -1,16 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import java.util.Arrays;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import java.util.List;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
|
import java.util.Objects;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import java.util.Optional;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
@ -21,15 +18,20 @@ import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
public class GenerateEntitiesApplication extends AbstractMigrationApplication {
|
public class GenerateEntitiesApplication extends AbstractMigrationApplication {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(GenerateEntitiesApplication.class);
|
private static final Logger log = LoggerFactory.getLogger(GenerateEntitiesApplication.class);
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import static org.mockito.Mockito.lenient;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import java.io.IOException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
@ -15,12 +16,12 @@ import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
import java.io.IOException;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import java.util.List;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import static org.mockito.Mockito.lenient;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
class GenerateEntitiesApplicationTest {
|
class GenerateEntitiesApplicationTest {
|
||||||
|
|
|
@ -1137,7 +1137,8 @@ public class XmlRecordFactory implements Serializable {
|
||||||
XmlSerializationUtils
|
XmlSerializationUtils
|
||||||
.asXmlElement("dateofacceptance", i.getDateofacceptance().getValue()));
|
.asXmlElement("dateofacceptance", i.getDateofacceptance().getValue()));
|
||||||
}
|
}
|
||||||
if (i.getInstancetype() != null && StringUtils.isNotBlank(i.getInstancetype().getClassid())) {
|
if (i.getInstancetype() != null
|
||||||
|
&& StringUtils.isNotBlank(i.getInstancetype().getClassid())) {
|
||||||
instanceFields
|
instanceFields
|
||||||
.add(XmlSerializationUtils.mapQualifier("instancetype", i.getInstancetype()));
|
.add(XmlSerializationUtils.mapQualifier("instancetype", i.getInstancetype()));
|
||||||
}
|
}
|
||||||
|
@ -1178,7 +1179,8 @@ public class XmlRecordFactory implements Serializable {
|
||||||
if (re.getDatasourcetypeui() != null && StringUtils.isNotBlank(re.getDatasourcetypeui().getClassid())) {
|
if (re.getDatasourcetypeui() != null && StringUtils.isNotBlank(re.getDatasourcetypeui().getClassid())) {
|
||||||
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui()));
|
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui()));
|
||||||
}
|
}
|
||||||
if (re.getOpenairecompatibility() != null && StringUtils.isNotBlank(re.getOpenairecompatibility().getClassid())) {
|
if (re.getOpenairecompatibility() != null
|
||||||
|
&& StringUtils.isNotBlank(re.getOpenairecompatibility().getClassid())) {
|
||||||
metadata
|
metadata
|
||||||
.add(
|
.add(
|
||||||
XmlSerializationUtils
|
XmlSerializationUtils
|
||||||
|
@ -1285,7 +1287,8 @@ public class XmlRecordFactory implements Serializable {
|
||||||
groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> {
|
groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> {
|
||||||
final List<String> fields = Lists.newArrayList();
|
final List<String> fields = Lists.newArrayList();
|
||||||
|
|
||||||
if (instance.getAccessright() != null && StringUtils.isNotBlank(instance.getAccessright().getClassid())) {
|
if (instance.getAccessright() != null
|
||||||
|
&& StringUtils.isNotBlank(instance.getAccessright().getClassid())) {
|
||||||
fields
|
fields
|
||||||
.add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright()));
|
.add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright()));
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,10 +7,11 @@ import static org.apache.commons.lang3.StringUtils.isNotBlank;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class XmlSerializationUtils {
|
public class XmlSerializationUtils {
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.4-SNAPSHOT</version>
|
<version>1.2.5-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<artifactId>dhp-stats-hist-snaps</artifactId>
|
<artifactId>dhp-stats-hist-snaps</artifactId>
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.4-SNAPSHOT</version>
|
<version>1.2.5-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<artifactId>dhp-stats-monitor-irish</artifactId>
|
<artifactId>dhp-stats-monitor-irish</artifactId>
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.4-SNAPSHOT</version>
|
<version>1.2.5-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<artifactId>dhp-stats-monitor-update</artifactId>
|
<artifactId>dhp-stats-monitor-update</artifactId>
|
||||||
|
|
|
@ -31,6 +31,10 @@
|
||||||
<module>dhp-enrichment</module>
|
<module>dhp-enrichment</module>
|
||||||
<module>dhp-graph-provision</module>
|
<module>dhp-graph-provision</module>
|
||||||
<module>dhp-blacklist</module>
|
<module>dhp-blacklist</module>
|
||||||
|
<module>dhp-stats-actionsets</module>
|
||||||
|
<module>dhp-stats-hist-snaps</module>
|
||||||
|
<module>dhp-stats-monitor-irish</module>
|
||||||
|
<module>dhp-stats-monitor-update</module>
|
||||||
<module>dhp-stats-update</module>
|
<module>dhp-stats-update</module>
|
||||||
<module>dhp-stats-promote</module>
|
<module>dhp-stats-promote</module>
|
||||||
<module>dhp-usage-stats-build</module>
|
<module>dhp-usage-stats-build</module>
|
||||||
|
|
Loading…
Reference in New Issue