merged from master

This commit is contained in:
Claudio Atzori 2020-11-19 14:34:54 +01:00
commit 3f34757c63
52 changed files with 1644 additions and 1528 deletions

View File

@ -15,12 +15,12 @@
<snapshotRepository> <snapshotRepository>
<id>dnet45-snapshots</id> <id>dnet45-snapshots</id>
<name>DNet45 Snapshots</name> <name>DNet45 Snapshots</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url> <url>https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots</url>
<layout>default</layout> <layout>default</layout>
</snapshotRepository> </snapshotRepository>
<repository> <repository>
<id>dnet45-releases</id> <id>dnet45-releases</id>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url> <url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
</repository> </repository>
</distributionManagement> </distributionManagement>

View File

@ -104,11 +104,6 @@
<artifactId>dnet-pace-core</artifactId> <artifactId>dnet-pace-core</artifactId>
</dependency> </dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies> </dependencies>
</project> </project>

View File

@ -1,9 +1,7 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.oa.graph.clean;
import java.util.LinkedHashMap; import java.util.*;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Function; import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -12,12 +10,19 @@ import org.apache.commons.lang3.StringUtils;
import com.clearspring.analytics.util.Lists; import com.clearspring.analytics.util.Lists;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
public class CleaningFunctions { public class CleaningFunctions {
public static final String DOI_URL_PREFIX_REGEX = "(^http(s?):\\/\\/)(((dx\\.)?doi\\.org)|(handle\\.test\\.datacite\\.org))\\/"; public static final String DOI_URL_PREFIX_REGEX = "(^http(s?):\\/\\/)(((dx\\.)?doi\\.org)|(handle\\.test\\.datacite\\.org))\\/";
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/"; public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
public static final String NONE = "none";
public static final Set<String> PID_BLACKLIST = new HashSet<>();
static {
PID_BLACKLIST.add("none");
PID_BLACKLIST.add("na");
}
public static <T extends Oaf> T fixVocabularyNames(T value) { public static <T extends Oaf> T fixVocabularyNames(T value) {
if (value instanceof Datasource) { if (value instanceof Datasource) {
@ -71,7 +76,7 @@ public class CleaningFunctions {
return value; return value;
} }
public static <T extends Oaf> T fixDefaults(T value) { protected static <T extends Oaf> T fixDefaults(T value) {
if (value instanceof Datasource) { if (value instanceof Datasource) {
// nothing to clean here // nothing to clean here
} else if (value instanceof Project) { } else if (value instanceof Project) {
@ -114,7 +119,7 @@ public class CleaningFunctions {
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue()))) .filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
.filter(sp -> NONE.equalsIgnoreCase(sp.getValue())) .filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
.filter(sp -> Objects.nonNull(sp.getQualifier())) .filter(sp -> Objects.nonNull(sp.getQualifier()))
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
.map(CleaningFunctions::normalizePidValue) .map(CleaningFunctions::normalizePidValue)

View File

@ -3,6 +3,10 @@ package eu.dnetlib.dhp.schema.oaf;
public class ModelHardLimits { public class ModelHardLimits {
public static final String LAYOUT = "index";
public static final String INTERPRETATION = "openaire";
public static final String SEPARATOR = "-";
public static final int MAX_EXTERNAL_ENTITIES = 50; public static final int MAX_EXTERNAL_ENTITIES = 50;
public static final int MAX_AUTHORS = 200; public static final int MAX_AUTHORS = 200;
public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000; public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
@ -11,4 +15,8 @@ public class ModelHardLimits {
public static final int MAX_ABSTRACT_LENGTH = 150000; public static final int MAX_ABSTRACT_LENGTH = 150000;
public static final int MAX_INSTANCES = 10; public static final int MAX_INSTANCES = 10;
public static String getCollectionName(String format) {
return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
}
} }

View File

@ -2,7 +2,6 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
import java.util.*; import java.util.*;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
@ -13,10 +12,43 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.schema.common.LicenseComparator;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class OafMapperUtils { public class OafMapperUtils {
public static Oaf merge(final Oaf o1, final Oaf o2) {
if (ModelSupport.isSubClass(o1, OafEntity.class)) {
if (ModelSupport.isSubClass(o1, Result.class)) {
return mergeResults((Result) o1, (Result) o2);
} else if (ModelSupport.isSubClass(o1, Datasource.class)) {
((Datasource) o1).mergeFrom((Datasource) o2);
} else if (ModelSupport.isSubClass(o1, Organization.class)) {
((Organization) o1).mergeFrom((Organization) o2);
} else if (ModelSupport.isSubClass(o1, Project.class)) {
((Project) o1).mergeFrom((Project) o2);
} else {
throw new RuntimeException("invalid OafEntity subtype:" + o1.getClass().getCanonicalName());
}
} else if (ModelSupport.isSubClass(o1, Relation.class)) {
((Relation) o1).mergeFrom((Relation) o2);
} else {
throw new RuntimeException("invalid Oaf type:" + o1.getClass().getCanonicalName());
}
return o1;
}
public static Result mergeResults(Result r1, Result r2) {
if (new ResultTypeComparator().compare(r1, r2) < 0) {
r1.mergeFrom(r2);
return r1;
} else {
r2.mergeFrom(r1);
return r2;
}
}
public static KeyValue keyValue(final String k, final String v) { public static KeyValue keyValue(final String k, final String v) {
final KeyValue kv = new KeyValue(); final KeyValue kv = new KeyValue();
kv.setKey(k); kv.setKey(k);

View File

@ -3,12 +3,10 @@ package eu.dnetlib.dhp.schema.oaf.utils;
import java.io.Serializable; import java.io.Serializable;
import java.util.Objects; import java.util.Objects;
import java.util.Optional;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import eu.dnetlib.dhp.schema.oaf.CleaningFunctions; import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;

View File

@ -5,6 +5,7 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.security.MessageDigest; import java.security.MessageDigest;
import java.util.List;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream; import java.util.zip.GZIPOutputStream;
@ -15,9 +16,15 @@ import org.apache.commons.codec.binary.Hex;
import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.JsonPath;
import net.minidev.json.JSONArray; import net.minidev.json.JSONArray;
import scala.collection.JavaConverters;
import scala.collection.Seq;
public class DHPUtils { public class DHPUtils {
public static Seq<String> toSeq(List<String> list) {
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
}
public static String md5(final String s) { public static String md5(final String s) {
try { try {
final MessageDigest md = MessageDigest.getInstance("MD5"); final MessageDigest md = MessageDigest.getInstance("MD5");

View File

@ -1,4 +1,4 @@
<workflow-app name="create broker events - partial" xmlns="uri:oozie:workflow:0.5"> <workflow-app name="update broker notifications" xmlns="uri:oozie:workflow:0.5">
<parameters> <parameters>
<property> <property>

View File

@ -6,7 +6,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -20,7 +22,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -68,12 +71,12 @@ public class CleanGraphSparkJob {
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
removeOutputDir(spark, outputPath); HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration());
fixGraphTable(spark, vocs, inputPath, entityClazz, outputPath); cleanGraphTable(spark, vocs, inputPath, entityClazz, outputPath);
}); });
} }
private static <T extends Oaf> void fixGraphTable( private static <T extends Oaf> void cleanGraphTable(
SparkSession spark, SparkSession spark,
VocabularyGroup vocs, VocabularyGroup vocs,
String inputPath, String inputPath,
@ -99,13 +102,15 @@ public class CleanGraphSparkJob {
return spark return spark
.read() .read()
.textFile(inputEntityPath) .textFile(inputEntityPath)
.filter((FilterFunction<String>) s -> isEntityType(s, clazz))
.map((MapFunction<String, String>) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING())
.map( .map(
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz), (MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
Encoders.bean(clazz)); Encoders.bean(clazz));
} }
private static void removeOutputDir(SparkSession spark, String path) { private static <T extends Oaf> boolean isEntityType(final String s, final Class<T> clazz) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); return StringUtils.substringBefore(s, "|").equals(clazz.getName());
} }
} }

View File

@ -0,0 +1,206 @@
package eu.dnetlib.dhp.oa.graph.clean;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.Aggregator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Option;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;
/**
* Groups the graph content by entity identifier to ensure ID uniqueness
*/
public class GroupEntitiesAndRelationsSparkJob {
private static final Logger log = LoggerFactory.getLogger(GroupEntitiesAndRelationsSparkJob.class);
private final static String ID_JPATH = "$.id";
private final static String SOURCE_JPATH = "$.source";
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
GroupEntitiesAndRelationsSparkJob.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/group_graph_entities_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String graphInputPath = parser.get("graphInputPath");
log.info("graphInputPath: {}", graphInputPath);
String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration());
groupEntitiesAndRelations(spark, graphInputPath, outputPath);
});
}
private static void groupEntitiesAndRelations(
SparkSession spark,
String inputPath,
String outputPath) {
TypedColumn<Oaf, Oaf> aggregator = new GroupingAggregator().toColumn();
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
spark
.read()
.textFile(toSeq(listPaths(inputPath, sc)))
.map((MapFunction<String, Oaf>) s -> parseOaf(s), Encoders.kryo(Oaf.class))
.filter((FilterFunction<Oaf>) oaf -> StringUtils.isNotBlank(ModelSupport.idFn().apply(oaf)))
.groupByKey((MapFunction<Oaf, String>) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING())
.agg(aggregator)
.map(
(MapFunction<Tuple2<String, Oaf>, String>) t -> t._2().getClass().getName() +
"|" + OBJECT_MAPPER.writeValueAsString(t._2()),
Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.text(outputPath);
}
public static class GroupingAggregator extends Aggregator<Oaf, Oaf, Oaf> {
@Override
public Oaf zero() {
return null;
}
@Override
public Oaf reduce(Oaf b, Oaf a) {
return mergeAndGet(b, a);
}
private Oaf mergeAndGet(Oaf b, Oaf a) {
if (Objects.nonNull(a) && Objects.nonNull(b)) {
return OafMapperUtils.merge(b, a);
}
return Objects.isNull(a) ? b : a;
}
@Override
public Oaf merge(Oaf b, Oaf a) {
return mergeAndGet(b, a);
}
@Override
public Oaf finish(Oaf j) {
return j;
}
@Override
public Encoder<Oaf> bufferEncoder() {
return Encoders.kryo(Oaf.class);
}
@Override
public Encoder<Oaf> outputEncoder() {
return Encoders.kryo(Oaf.class);
}
}
private static Oaf parseOaf(String s) {
DocumentContext dc = JsonPath
.parse(s, Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS));
final String id = dc.read(ID_JPATH);
if (StringUtils.isNotBlank(id)) {
String prefix = StringUtils.substringBefore(id, "|");
switch (prefix) {
case "10":
return parse(s, Datasource.class);
case "20":
return parse(s, Organization.class);
case "40":
return parse(s, Project.class);
case "50":
String resultType = dc.read("$.resulttype.classid");
switch (resultType) {
case "publication":
return parse(s, Publication.class);
case "dataset":
return parse(s, eu.dnetlib.dhp.schema.oaf.Dataset.class);
case "software":
return parse(s, Software.class);
case "other":
return parse(s, OtherResearchProduct.class);
default:
throw new IllegalArgumentException(String.format("invalid resultType: '%s'", resultType));
}
default:
throw new IllegalArgumentException(String.format("invalid id prefix: '%s'", prefix));
}
} else {
String source = dc.read(SOURCE_JPATH);
if (StringUtils.isNotBlank(source)) {
return parse(s, Relation.class);
} else {
throw new IllegalArgumentException(String.format("invalid oaf: '%s'", s));
}
}
}
private static <T extends Oaf> Oaf parse(String s, Class<T> clazz) {
try {
return OBJECT_MAPPER.readValue(s, clazz);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private static List<String> listPaths(String inputPath, JavaSparkContext sc) {
return HdfsSupport
.listFiles(inputPath, sc.hadoopConfiguration())
.stream()
.collect(Collectors.toList());
}
}

View File

@ -33,9 +33,9 @@ import scala.Tuple2;
* are picked preferring those from the BETA aggregator rather then from PROD. The identity of a relationship is defined * are picked preferring those from the BETA aggregator rather then from PROD. The identity of a relationship is defined
* by eu.dnetlib.dhp.schema.common.ModelSupport#idFn() * by eu.dnetlib.dhp.schema.common.ModelSupport#idFn()
*/ */
public class MergeGraphSparkJob { public class MergeGraphTableSparkJob {
private static final Logger log = LoggerFactory.getLogger(CleanGraphSparkJob.class); private static final Logger log = LoggerFactory.getLogger(MergeGraphTableSparkJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

View File

@ -258,8 +258,8 @@ public abstract class AbstractMdRecordToOafMapper {
r.setCollectedfrom(Arrays.asList(collectedFrom)); r.setCollectedfrom(Arrays.asList(collectedFrom));
r.setPid(prepareResultPids(doc, info)); r.setPid(prepareResultPids(doc, info));
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection|//dri:dateOfCollection"));
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation|//dri:dateOfTransformation"));
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
r.setOaiprovenance(prepareOAIprovenance(doc)); r.setOaiprovenance(prepareOAIprovenance(doc));
r.setAuthor(prepareAuthors(doc, info)); r.setAuthor(prepareAuthors(doc, info));

View File

@ -4,9 +4,11 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -18,7 +20,6 @@ import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -68,7 +69,7 @@ public class GenerateEntitiesApplication {
final SparkConf conf = new SparkConf(); final SparkConf conf = new SparkConf();
runWithSparkSession(conf, isSparkSessionManaged, spark -> { runWithSparkSession(conf, isSparkSessionManaged, spark -> {
removeOutputDir(spark, targetPath); HdfsSupport.remove(targetPath, spark.sparkContext().hadoopConfiguration());
generateEntities(spark, vocs, sourcePaths, targetPath); generateEntities(spark, vocs, sourcePaths, targetPath);
}); });
} }
@ -82,7 +83,7 @@ public class GenerateEntitiesApplication {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
final List<String> existingSourcePaths = Arrays final List<String> existingSourcePaths = Arrays
.stream(sourcePaths.split(",")) .stream(sourcePaths.split(","))
.filter(p -> exists(sc, p)) .filter(p -> HdfsSupport.exists(p, sc.hadoopConfiguration()))
.collect(Collectors.toList()); .collect(Collectors.toList());
log.info("Generate entities from files:"); log.info("Generate entities from files:");
@ -103,7 +104,7 @@ public class GenerateEntitiesApplication {
inputRdd inputRdd
.mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf))
.reduceByKey((o1, o2) -> merge(o1, o2)) .reduceByKey((o1, o2) -> OafMapperUtils.merge(o1, o2))
.map(Tuple2::_2) .map(Tuple2::_2)
.map( .map(
oaf -> oaf.getClass().getSimpleName().toLowerCase() oaf -> oaf.getClass().getSimpleName().toLowerCase()
@ -112,38 +113,6 @@ public class GenerateEntitiesApplication {
.saveAsTextFile(targetPath, GzipCodec.class); .saveAsTextFile(targetPath, GzipCodec.class);
} }
private static Oaf merge(final Oaf o1, final Oaf o2) {
if (ModelSupport.isSubClass(o1, OafEntity.class)) {
if (ModelSupport.isSubClass(o1, Result.class)) {
return mergeResults((Result) o1, (Result) o2);
} else if (ModelSupport.isSubClass(o1, Datasource.class)) {
((Datasource) o1).mergeFrom((Datasource) o2);
} else if (ModelSupport.isSubClass(o1, Organization.class)) {
((Organization) o1).mergeFrom((Organization) o2);
} else if (ModelSupport.isSubClass(o1, Project.class)) {
((Project) o1).mergeFrom((Project) o2);
} else {
throw new RuntimeException("invalid OafEntity subtype:" + o1.getClass().getCanonicalName());
}
} else if (ModelSupport.isSubClass(o1, Relation.class)) {
((Relation) o1).mergeFrom((Relation) o2);
} else {
throw new RuntimeException("invalid Oaf type:" + o1.getClass().getCanonicalName());
}
return o1;
}
protected static Result mergeResults(Result r1, Result r2) {
if (new ResultTypeComparator().compare(r1, r2) < 0) {
r1.mergeFrom(r2);
return r1;
} else {
r2.mergeFrom(r1);
return r2;
}
}
private static List<Oaf> convertToListOaf( private static List<Oaf> convertToListOaf(
final String id, final String id,
final String s, final String s,
@ -192,17 +161,4 @@ public class GenerateEntitiesApplication {
} }
} }
private static boolean exists(final JavaSparkContext context, final String pathToFile) {
try {
final FileSystem hdfs = FileSystem.get(context.hadoopConfiguration());
final Path path = new Path(pathToFile);
return hdfs.exists(path);
} catch (final IOException e) {
throw new RuntimeException(e);
}
}
private static void removeOutputDir(final SparkSession spark, final String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
}
} }

View File

@ -1,6 +1,7 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION; import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
@ -9,25 +10,20 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY; import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION; import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION; import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES; import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION; import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP; import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM; import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.asString; import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.journal;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listFields;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listKeyValues;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
@ -442,26 +438,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
try { try {
final String targetType = rs.getString(TARGET_TYPE);
if (rs.getString(SOURCE_TYPE).equals("context")) { if (rs.getString(SOURCE_TYPE).equals("context")) {
final Result r; final Result r;
switch (targetType) { if (rs.getString(TARGET_TYPE).equals("dataset")) {
case "dataset":
r = new Dataset(); r = new Dataset();
break; r.setResulttype(DATASET_DEFAULT_RESULTTYPE);
case "software": } else if (rs.getString(TARGET_TYPE).equals("software")) {
r = new Software(); r = new Software();
break; r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
case "other": } else if (rs.getString(TARGET_TYPE).equals("other")) {
r = new OtherResearchProduct(); r = new OtherResearchProduct();
break; r.setResulttype(ORP_DEFAULT_RESULTTYPE);
case "publication": } else {
default:
r = new Publication(); r = new Publication();
break; r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
} }
r.setId(createOpenaireId(50, rs.getString("target_id"), false)); r.setId(createOpenaireId(50, rs.getString("target_id"), false));
r.setLastupdatetimestamp(lastUpdateTimestamp); r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setContext(prepareContext(rs.getString("source_id"), info)); r.setContext(prepareContext(rs.getString("source_id"), info));
@ -471,7 +463,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
return Arrays.asList(r); return Arrays.asList(r);
} else { } else {
final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false); final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false);
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false); final String targetId = createOpenaireId(rs.getString(TARGET_TYPE), rs.getString("target_id"), false);
final Relation r1 = new Relation(); final Relation r1 = new Relation();
final Relation r2 = new Relation(); final Relation r2 = new Relation();
@ -527,9 +519,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference"); final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
final String inferenceprovenance = rs.getString("inferenceprovenance"); final String inferenceprovenance = rs.getString("inferenceprovenance");
final Boolean inferred = rs.getBoolean("inferred"); final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust");
final double trust = rs.getDouble("trust");
return dataInfo( return dataInfo(
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust); deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION,
String.format("%.3f", trust));
} }
private Qualifier prepareQualifierSplitting(final String s) { private Qualifier prepareQualifierSplitting(final String s) {

View File

@ -2,9 +2,7 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -18,9 +16,9 @@ import org.dom4j.Node;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctions;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
public class OafToOafMapper extends AbstractMdRecordToOafMapper { public class OafToOafMapper extends AbstractMdRecordToOafMapper {

View File

@ -2,15 +2,9 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
import java.util.ArrayList; import java.util.*;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -18,9 +12,9 @@ import org.dom4j.Document;
import org.dom4j.Node; import org.dom4j.Node;
import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctions;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public class OdfToOafMapper extends AbstractMdRecordToOafMapper {

View File

@ -50,12 +50,36 @@
</property> </property>
</parameters> </parameters>
<start to="fork_clean_graph"/> <start to="group_entities"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill> </kill>
<action name="group_entities">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>group graph entities and relations</name>
<class>eu.dnetlib.dhp.oa.graph.clean.GroupEntitiesAndRelationsSparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
</spark-opts>
<arg>--graphInputPath</arg><arg>${graphInputPath}</arg>
<arg>--outputPath</arg><arg>${workingDir}/grouped_entities</arg>
</spark>
<ok to="fork_clean_graph"/>
<error to="Kill"/>
</action>
<fork name="fork_clean_graph"> <fork name="fork_clean_graph">
<path start="clean_publication"/> <path start="clean_publication"/>
<path start="clean_dataset"/> <path start="clean_dataset"/>
@ -84,7 +108,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/publication</arg> <arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/publication</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/publication</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
@ -110,7 +134,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/dataset</arg> <arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/dataset</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/dataset</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
@ -136,7 +160,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/otherresearchproduct</arg> <arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
@ -162,7 +186,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/software</arg> <arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/software</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/software</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
@ -188,7 +212,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/datasource</arg> <arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/datasource</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/datasource</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
@ -214,7 +238,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/organization</arg> <arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/organization</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/organization</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
@ -240,7 +264,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/project</arg> <arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/project</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/project</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
@ -266,7 +290,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/relation</arg> <arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>

View File

@ -0,0 +1,20 @@
[
{
"paramName": "issm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "when true will stop SparkSession after job execution",
"paramRequired": false
},
{
"paramName": "gin",
"paramLongName": "graphInputPath",
"paramDescription": "the graph root path",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the output merged graph root path",
"paramRequired": true
}
]

View File

@ -2,11 +2,11 @@
<parameters> <parameters>
<property> <property>
<name>betaInputGgraphPath</name> <name>betaInputGraphPath</name>
<description>the beta graph root path</description> <description>the beta graph root path</description>
</property> </property>
<property> <property>
<name>prodInputGgraphPath</name> <name>prodInputGraphPath</name>
<description>the production graph root path</description> <description>the production graph root path</description>
</property> </property>
<property> <property>
@ -76,7 +76,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Merge publications</name> <name>Merge publications</name>
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class> <class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
@ -88,8 +88,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/publication</arg> <arg>--betaInputPath</arg><arg>${betaInputGraphPath}/publication</arg>
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/publication</arg> <arg>--prodInputPath</arg><arg>${prodInputGraphPath}/publication</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/publication</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/publication</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--priority</arg><arg>${priority}</arg> <arg>--priority</arg><arg>${priority}</arg>
@ -103,7 +103,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Merge datasets</name> <name>Merge datasets</name>
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class> <class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
@ -115,8 +115,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/dataset</arg> <arg>--betaInputPath</arg><arg>${betaInputGraphPath}/dataset</arg>
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/dataset</arg> <arg>--prodInputPath</arg><arg>${prodInputGraphPath}/dataset</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/dataset</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/dataset</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--priority</arg><arg>${priority}</arg> <arg>--priority</arg><arg>${priority}</arg>
@ -130,7 +130,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Merge otherresearchproducts</name> <name>Merge otherresearchproducts</name>
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class> <class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
@ -142,8 +142,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/otherresearchproduct</arg> <arg>--betaInputPath</arg><arg>${betaInputGraphPath}/otherresearchproduct</arg>
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/otherresearchproduct</arg> <arg>--prodInputPath</arg><arg>${prodInputGraphPath}/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--priority</arg><arg>${priority}</arg> <arg>--priority</arg><arg>${priority}</arg>
@ -157,7 +157,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Merge softwares</name> <name>Merge softwares</name>
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class> <class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
@ -169,8 +169,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/software</arg> <arg>--betaInputPath</arg><arg>${betaInputGraphPath}/software</arg>
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/software</arg> <arg>--prodInputPath</arg><arg>${prodInputGraphPath}/software</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/software</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/software</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--priority</arg><arg>${priority}</arg> <arg>--priority</arg><arg>${priority}</arg>
@ -184,7 +184,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Merge datasources</name> <name>Merge datasources</name>
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class> <class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
@ -196,8 +196,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/datasource</arg> <arg>--betaInputPath</arg><arg>${betaInputGraphPath}/datasource</arg>
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/datasource</arg> <arg>--prodInputPath</arg><arg>${prodInputGraphPath}/datasource</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/datasource</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/datasource</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
<arg>--priority</arg><arg>${priority}</arg> <arg>--priority</arg><arg>${priority}</arg>
@ -211,7 +211,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Merge organizations</name> <name>Merge organizations</name>
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class> <class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
@ -223,8 +223,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/organization</arg> <arg>--betaInputPath</arg><arg>${betaInputGraphPath}/organization</arg>
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/organization</arg> <arg>--prodInputPath</arg><arg>${prodInputGraphPath}/organization</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/organization</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/organization</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
<arg>--priority</arg><arg>${priority}</arg> <arg>--priority</arg><arg>${priority}</arg>
@ -238,7 +238,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Merge projects</name> <name>Merge projects</name>
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class> <class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
@ -250,8 +250,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/project</arg> <arg>--betaInputPath</arg><arg>${betaInputGraphPath}/project</arg>
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/project</arg> <arg>--prodInputPath</arg><arg>${prodInputGraphPath}/project</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/project</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/project</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--priority</arg><arg>${priority}</arg> <arg>--priority</arg><arg>${priority}</arg>
@ -265,7 +265,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Merge relations</name> <name>Merge relations</name>
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class> <class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
@ -277,8 +277,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/relation</arg> <arg>--betaInputPath</arg><arg>${betaInputGraphPath}/relation</arg>
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/relation</arg> <arg>--prodInputPath</arg><arg>${prodInputGraphPath}/relation</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg> <arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
<arg>--priority</arg><arg>${priority}</arg> <arg>--priority</arg><arg>${priority}</arg>

View File

@ -19,7 +19,10 @@ import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;

View File

@ -15,7 +15,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
public class MergeGraphSparkJobTest { public class MergeGraphTableSparkJobTest {
private ObjectMapper mapper; private ObjectMapper mapper;
@ -28,7 +28,7 @@ public class MergeGraphSparkJobTest {
public void testMergeDatasources() throws IOException { public void testMergeDatasources() throws IOException {
assertEquals( assertEquals(
"openaire-cris_1.1", "openaire-cris_1.1",
MergeGraphSparkJob MergeGraphTableSparkJob
.mergeDatasource( .mergeDatasource(
d("datasource_cris.json"), d("datasource_cris.json"),
d("datasource_UNKNOWN.json")) d("datasource_UNKNOWN.json"))
@ -36,7 +36,7 @@ public class MergeGraphSparkJobTest {
.getClassid()); .getClassid());
assertEquals( assertEquals(
"openaire-cris_1.1", "openaire-cris_1.1",
MergeGraphSparkJob MergeGraphTableSparkJob
.mergeDatasource( .mergeDatasource(
d("datasource_UNKNOWN.json"), d("datasource_UNKNOWN.json"),
d("datasource_cris.json")) d("datasource_cris.json"))
@ -44,7 +44,7 @@ public class MergeGraphSparkJobTest {
.getClassid()); .getClassid());
assertEquals( assertEquals(
"driver-openaire2.0", "driver-openaire2.0",
MergeGraphSparkJob MergeGraphTableSparkJob
.mergeDatasource( .mergeDatasource(
d("datasource_native.json"), d("datasource_native.json"),
d("datasource_driver-openaire2.0.json")) d("datasource_driver-openaire2.0.json"))
@ -52,7 +52,7 @@ public class MergeGraphSparkJobTest {
.getClassid()); .getClassid());
assertEquals( assertEquals(
"driver-openaire2.0", "driver-openaire2.0",
MergeGraphSparkJob MergeGraphTableSparkJob
.mergeDatasource( .mergeDatasource(
d("datasource_driver-openaire2.0.json"), d("datasource_driver-openaire2.0.json"),
d("datasource_native.json")) d("datasource_native.json"))
@ -60,7 +60,7 @@ public class MergeGraphSparkJobTest {
.getClassid()); .getClassid());
assertEquals( assertEquals(
"openaire4.0", "openaire4.0",
MergeGraphSparkJob MergeGraphTableSparkJob
.mergeDatasource( .mergeDatasource(
d("datasource_notCompatible.json"), d("datasource_notCompatible.json"),
d("datasource_openaire4.0.json")) d("datasource_openaire4.0.json"))
@ -68,7 +68,7 @@ public class MergeGraphSparkJobTest {
.getClassid()); .getClassid());
assertEquals( assertEquals(
"notCompatible", "notCompatible",
MergeGraphSparkJob MergeGraphTableSparkJob
.mergeDatasource( .mergeDatasource(
d("datasource_notCompatible.json"), d("datasource_notCompatible.json"),
d("datasource_UNKNOWN.json")) d("datasource_UNKNOWN.json"))

View File

@ -70,7 +70,7 @@ public class GenerateEntitiesApplicationTest {
protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz, protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz,
String resultType) { String resultType) {
final Result merge = GenerateEntitiesApplication.mergeResults(publication, dataset); final Result merge = OafMapperUtils.mergeResults(publication, dataset);
assertTrue(clazz.isAssignableFrom(merge.getClass())); assertTrue(clazz.isAssignableFrom(merge.getClass()));
assertEquals(resultType, merge.getResulttype().getClassid()); assertEquals(resultType, merge.getResulttype().getClassid());
} }

View File

@ -72,6 +72,8 @@ public class MappersTest {
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertFalse(p.getDataInfo().getInvisible()); assertFalse(p.getDataInfo().getInvisible());
assertTrue(p.getSource().size() == 1); assertTrue(p.getSource().size() == 1);
assertTrue(StringUtils.isNotBlank(p.getDateofcollection()));
assertTrue(StringUtils.isNotBlank(p.getDateoftransformation()));
assertTrue(p.getAuthor().size() > 0); assertTrue(p.getAuthor().size() > 0);
final Optional<Author> author = p final Optional<Author> author = p
@ -317,7 +319,7 @@ public class MappersTest {
@Test @Test
void testODFRecord() throws IOException { void testODFRecord() throws IOException {
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml")); final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml"));
List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml); final List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
System.out.println("***************"); System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list)); System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************"); System.out.println("***************");
@ -328,6 +330,22 @@ public class MappersTest {
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
} }
@Test
void testTextGrid() throws IOException {
final String xml = IOUtils.toString(getClass().getResourceAsStream("textgrid.xml"));
final List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************");
final Dataset p = (Dataset) list.get(0);
assertValidId(p.getId());
assertValidId(p.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
System.out.println(p.getTitle().get(0).getValue());
}
private void assertValidId(final String id) { private void assertValidId(final String id) {
assertEquals(49, id.length()); assertEquals(49, id.length());
assertEquals('|', id.charAt(2)); assertEquals('|', id.charAt(2));

View File

@ -28,13 +28,7 @@ import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class MigrateDbEntitiesApplicationTest { public class MigrateDbEntitiesApplicationTest {

View File

@ -31,8 +31,8 @@
}, },
{ {
"field": "trust", "field": "trust",
"type": "string", "type": "double",
"value": "0.9" "value": 0.9
}, },
{ {
"field": "inferenceprovenance", "field": "inferenceprovenance",

View File

@ -114,8 +114,8 @@
}, },
{ {
"field": "trust", "field": "trust",
"type": "string", "type": "double",
"value": "0.9" "value": 0.9
}, },
{ {
"field": "inferenceprovenance", "field": "inferenceprovenance",

View File

@ -7,13 +7,12 @@
<header xmlns="http://namespace.openaire.eu/"> <header xmlns="http://namespace.openaire.eu/">
<dri:objIdentifier>pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2</dri:objIdentifier> <dri:objIdentifier>pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2</dri:objIdentifier>
<dri:recordIdentifier>10.3897/oneeco.2.e13718</dri:recordIdentifier> <dri:recordIdentifier>10.3897/oneeco.2.e13718</dri:recordIdentifier>
<dri:dateOfCollection/>
<dri:mdFormat/> <dri:mdFormat/>
<dri:mdFormatInterpretation/> <dri:mdFormatInterpretation/>
<dri:repositoryId/> <dri:repositoryId/>
<dr:objectIdentifier/> <dr:objectIdentifier/>
<dr:dateOfCollection>2020-03-23T00:20:51.392Z</dr:dateOfCollection> <dri:dateOfCollection>2020-03-23T00:20:51.392Z</dri:dateOfCollection>
<dr:dateOfTransformation>2020-03-23T00:26:59.078Z</dr:dateOfTransformation> <dri:dateOfTransformation>2020-03-23T00:26:59.078Z</dri:dateOfTransformation>
<oaf:datasourceprefix>pensoft_____</oaf:datasourceprefix> <oaf:datasourceprefix>pensoft_____</oaf:datasourceprefix>
</header> </header>
<metadata xmlns="http://namespace.openaire.eu/"> <metadata xmlns="http://namespace.openaire.eu/">

View File

@ -96,8 +96,8 @@
}, },
{ {
"field": "trust", "field": "trust",
"type": "string", "type": "double",
"value": "0.9" "value": 0.9
}, },
{ {
"field": "inferenceprovenance", "field": "inferenceprovenance",

View File

@ -41,8 +41,8 @@
}, },
{ {
"field": "trust", "field": "trust",
"type": "string", "type": "double",
"value": "0.9" "value": 0.9
}, },
{ {
"field": "inferenceprovenance", "field": "inferenceprovenance",

View File

@ -86,8 +86,8 @@
}, },
{ {
"field": "trust", "field": "trust",
"type": "string", "type": "double",
"value": "0.9" "value": 0.9
}, },
{ {
"field": "inferenceprovenance", "field": "inferenceprovenance",

View File

@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<oai:header xmlns="http://namespace.openaire.eu/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<dri:objIdentifier>r3f52792889d::000051aa1f61d77d2c0b340091f8024e</dri:objIdentifier>
<dri:recordIdentifier>textgrid:q9cv.0</dri:recordIdentifier>
<dri:dateOfCollection>2020-11-17T09:34:11.128+01:00</dri:dateOfCollection>
<oaf:datasourceprefix>r3f52792889d</oaf:datasourceprefix>
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">textgrid:q9cv.0</identifier>
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2012-01-21T13:35:20Z</datestamp>
<dr:dateOfTransformation>2020-11-17T09:46:21.551+01:00</dr:dateOfTransformation>
</oai:header>
<metadata>
<datacite:resource xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:datacite="http://datacite.org/schema/kernel-3"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<datacite:identifier identifierType="Handle">hdl:11858/00-1734-0000-0003-7664-F</datacite:identifier>
<datacite:creators>
<datacite:creator>
<datacite:creatorName>Hoffmann von Fallersleben, August Heinrich</datacite:creatorName>
<datacite:nameIdentifier nameIdentifierScheme="pnd" schemeURI="https://de.dariah.eu/pnd-service">118552589</datacite:nameIdentifier>
</datacite:creator>
</datacite:creators>
<datacite:titles>
<datacite:title titleType="Other">Mailied</datacite:title>
<datacite:title titleType="Other">August Heinrich Hoffmann von Fallersleben: Unpolitische Lieder von Hoffmann von Fallersleben, 1. + 2. Theil, 1. Theil, Hamburg: Hoffmann und Campe, 1841.</datacite:title>
</datacite:titles>
<datacite:publisher>TextGrid</datacite:publisher>
<datacite:publicationYear>2012</datacite:publicationYear>
<datacite:contributors>
<datacite:contributor contributorType="DataManager">
<datacite:contributorName>tvitt@textgrid.de</datacite:contributorName>
</datacite:contributor>
<datacite:contributor contributorType="Other">
<datacite:contributorName>Digitale Bibliothek</datacite:contributorName>
<datacite:nameIdentifier nameIdentifierScheme="textgrid">TGPR-372fe6dc-57f2-6cd4-01b5-2c4bbefcfd3c</datacite:nameIdentifier>
</datacite:contributor>
</datacite:contributors>
<datacite:dates>
<datacite:date dateType="Created">2012-01-21T13:35:20Z</datacite:date>
<datacite:date dateType="Issued">2012-01-21T13:35:20Z</datacite:date>
<datacite:date dateType="Updated">2012-01-21T13:35:20Z</datacite:date>
</datacite:dates>
<datacite:resourceType resourceTypeGeneral="Dataset"/>
<alternateIdentifiers>
<datacite:alternateIdentifier alternateIdentifierType="URI">textgrid:q9cv.0</datacite:alternateIdentifier>
<alternateIdentifier alternateIdentifierType="URL">http://hdl.handle.net/hdl:11858/00-1734-0000-0003-7664-F</alternateIdentifier>
</alternateIdentifiers>
<datacite:relatedIdentifiers>
<datacite:relatedIdentifier relatedIdentifierType="Handle" relationType="IsPartOf">hdl:11858/00-1734-0000-0003-7666-B</datacite:relatedIdentifier>
</datacite:relatedIdentifiers>
<datacite:sizes>
<datacite:size>527 Bytes</datacite:size>
</datacite:sizes>
<datacite:formats>
<datacite:format>text/tg.edition+tg.aggregation+xml</datacite:format>
</datacite:formats>
<datacite:version>0</datacite:version>
<datacite:rightsList>
<datacite:rights rightsURI="http://creativecommons.org/licenses/by/3.0/de/legalcode"> Der annotierte Datenbestand der Digitalen Bibliothek inklusive
Metadaten sowie davon einzeln zugängliche Teile sind eine Abwandlung
des Datenbestandes von www.editura.de durch TextGrid und werden
unter der Lizenz Creative Commons Namensnennung 3.0 Deutschland
Lizenz (by-Nennung TextGrid) veröffentlicht. Die Lizenz bezieht sich
nicht auf die der Annotation zu Grunde liegenden allgemeinfreien
Texte (Siehe auch Punkt 2 der Lizenzbestimmungen).</datacite:rights>
<datacite:rights rightsURI="info:eu-repo/semantics/openAccess"/>
</datacite:rightsList>
<datacite:descriptions>
<datacite:description descriptionType="Abstract"/>
</datacite:descriptions>
<datacite:geoLocations>
<datacite:geoLocation>
<datacite:geoLocationPlace
xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">Hamburg</datacite:geoLocationPlace>
</datacite:geoLocation>
</datacite:geoLocations>
</datacite:resource>
<oaf:identifier identifierType="handle">hdl:11858/00-1734-0000-0003-7664-F</oaf:identifier>
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
<oaf:refereed>0002</oaf:refereed>
<oaf:dateAccepted>2012-01-01</oaf:dateAccepted>
<oaf:accessrights>OPEN</oaf:accessrights>
<oaf:license>http://creativecommons.org/licenses/by/3.0/de/legalcode</oaf:license>
<oaf:language>und</oaf:language>
<oaf:hostedBy id="re3data_____::r3d100011365" name="TextGrid Repository"/>
<oaf:collectedFrom id="re3data_____::r3d100011365" name="TextGrid Repository"/>
</metadata>
<about xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
<originDescription altered="true" harvestDate="2020-11-17T09:34:11.128+01:00">
<baseURL>https%3A%2F%2Fdev.textgridlab.org%2F1.0%2Ftgoaipmh%2Foai</baseURL>
<identifier>textgrid:q9cv.0</identifier>
<datestamp>2012-01-21T13:35:20Z</datestamp>
<metadataNamespace>http://schema.datacite.org/oai/oai-1.0/</metadataNamespace>
</originDescription>
</provenance>
<oaf:datainfo>
<oaf:inferred>false</oaf:inferred>
<oaf:deletedbyinference>false</oaf:deletedbyinference>
<oaf:trust>0.9</oaf:trust>
<oaf:inferenceprovenance/>
<oaf:provenanceaction classid="sysimport:crosswalk"
classname="sysimport:crosswalk"
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
</oaf:datainfo>
</about>
</record>

View File

@ -22,6 +22,12 @@
<dependency> <dependency>
<groupId>com.jayway.jsonpath</groupId> <groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId> <artifactId>json-path</artifactId>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency> </dependency>
<dependency> <dependency>
<groupId>dom4j</groupId> <groupId>dom4j</groupId>
@ -82,9 +88,6 @@
<groupId>org.codehaus.woodstox</groupId> <groupId>org.codehaus.woodstox</groupId>
<artifactId>*</artifactId> <artifactId>*</artifactId>
</exclusion> </exclusion>
<exclusion> <exclusion>
<groupId>com.github.ben-manes.caffeine</groupId> <groupId>com.github.ben-manes.caffeine</groupId>
<artifactId>*</artifactId> <artifactId>*</artifactId>
@ -109,11 +112,10 @@
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>*</artifactId> <artifactId>*</artifactId>
</exclusion> </exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions> </exclusions>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.oa.provision;
public class ProvisionConstants {
public static final String LAYOUT = "index";
public static final String INTERPRETATION = "openaire";
public static final String SEPARATOR = "-";
public static String getCollectionName(String format) {
return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
}
}

View File

@ -14,11 +14,12 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.dhp.oa.provision.utils.ZkServers; import eu.dnetlib.dhp.oa.provision.utils.ZkServers;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class SolrAdminApplication extends SolrApplication implements Closeable { public class SolrAdminApplication implements Closeable {
private static final Logger log = LoggerFactory.getLogger(SolrAdminApplication.class); private static final Logger log = LoggerFactory.getLogger(SolrAdminApplication.class);
@ -54,12 +55,12 @@ public class SolrAdminApplication extends SolrApplication implements Closeable {
.orElse(false); .orElse(false);
log.info("commit: {}", commit); log.info("commit: {}", commit);
final ISLookUpService isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
final String zkHost = getZkHost(isLookup); final String zkHost = isLookup.getZkHost();
log.info("zkHost: {}", zkHost); log.info("zkHost: {}", zkHost);
final String collection = format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION; final String collection = ProvisionConstants.getCollectionName(format);
log.info("collection: {}", collection); log.info("collection: {}", collection);
try (SolrAdminApplication app = new SolrAdminApplication(zkHost)) { try (SolrAdminApplication app = new SolrAdminApplication(zkHost)) {

View File

@ -1,40 +0,0 @@
package eu.dnetlib.dhp.oa.provision;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public abstract class SolrApplication {
private static final Logger log = LoggerFactory.getLogger(SolrApplication.class);
protected static final String LAYOUT = "index";
protected static final String INTERPRETATION = "openaire";
protected static final String SEPARATOR = "-";
protected static final String DATE_FORMAT = "yyyy-MM-dd'T'hh:mm:ss'Z'";
/**
* Method retrieves from the information system the zookeeper quorum of the Solr server
*
* @param isLookup
* @return the zookeeper quorum of the Solr server
* @throws ISLookUpException
*/
protected static String getZkHost(ISLookUpService isLookup) throws ISLookUpException {
return doLookup(
isLookup,
"for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='IndexServiceResourceType'] return $x//PROTOCOL[./@name='solr']/@address/string()");
}
protected static String doLookup(ISLookUpService isLookup, String xquery) throws ISLookUpException {
log.info(String.format("running xquery: %s", xquery));
final String res = isLookup.getResourceProfileByQuery(xquery);
log.info(String.format("got response (100 chars): %s", StringUtils.left(res, 100) + " ..."));
return res;
}
}

View File

@ -2,12 +2,11 @@
package eu.dnetlib.dhp.oa.provision; package eu.dnetlib.dhp.oa.provision;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
@ -28,13 +27,11 @@ import com.google.common.collect.Maps;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.provision.model.*; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2; import scala.Tuple2;
import scala.collection.JavaConverters;
import scala.collection.Seq;
/** /**
* XmlConverterJob converts the JoinedEntities as XML records * XmlConverterJob converts the JoinedEntities as XML records
@ -43,8 +40,6 @@ public class XmlConverterJob {
private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class); private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
@ -129,10 +124,6 @@ public class XmlConverterJob {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
} }
private static Seq<String> toSeq(List<String> list) {
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
}
private static Map<String, LongAccumulator> prepareAccumulators(SparkContext sc) { private static Map<String, LongAccumulator> prepareAccumulators(SparkContext sc) {
Map<String, LongAccumulator> accumulators = Maps.newHashMap(); Map<String, LongAccumulator> accumulators = Maps.newHashMap();
accumulators accumulators

View File

@ -20,27 +20,42 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.rdd.RDD; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.lucidworks.spark.util.SolrSupport; import com.lucidworks.spark.util.SolrSupport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory; import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class XmlIndexingJob extends SolrApplication { public class XmlIndexingJob {
private static final Logger log = LoggerFactory.getLogger(XmlIndexingJob.class); private static final Logger log = LoggerFactory.getLogger(XmlIndexingJob.class);
private static final Integer DEFAULT_BATCH_SIZE = 1000; private static final Integer DEFAULT_BATCH_SIZE = 1000;
protected static final String DATE_FORMAT = "yyyy-MM-dd'T'hh:mm:ss'Z'";
private String inputPath;
private String format;
private int batchSize;
private String outputPath;
private SparkSession spark;
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -60,27 +75,53 @@ public class XmlIndexingJob extends SolrApplication {
final String inputPath = parser.get("inputPath"); final String inputPath = parser.get("inputPath");
log.info("inputPath: {}", inputPath); log.info("inputPath: {}", inputPath);
final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
final String format = parser.get("format"); final String format = parser.get("format");
log.info("format: {}", format); log.info("format: {}", format);
final String outputPath = Optional
.ofNullable(parser.get("outputPath"))
.orElse(null);
log.info("outputPath: {}", outputPath);
final Integer batchSize = parser.getObjectMap().containsKey("batchSize") final Integer batchSize = parser.getObjectMap().containsKey("batchSize")
? Integer.valueOf(parser.get("batchSize")) ? Integer.valueOf(parser.get("batchSize"))
: DEFAULT_BATCH_SIZE; : DEFAULT_BATCH_SIZE;
log.info("batchSize: {}", batchSize); log.info("batchSize: {}", batchSize);
final ISLookUpService isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); final SparkConf conf = new SparkConf();
final String fields = getLayoutSource(isLookup, format); conf.registerKryoClasses(new Class[] {
SerializableSolrInputDocument.class
});
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
new XmlIndexingJob(spark, inputPath, format, batchSize, outputPath).run(isLookup);
});
}
public XmlIndexingJob(SparkSession spark, String inputPath, String format, Integer batchSize, String outputPath) {
this.spark = spark;
this.inputPath = inputPath;
this.format = format;
this.batchSize = batchSize;
this.outputPath = outputPath;
}
public void run(ISLookupClient isLookup) throws ISLookUpException, TransformerException {
final String fields = isLookup.getLayoutSource(format);
log.info("fields: {}", fields); log.info("fields: {}", fields);
final String xslt = getLayoutTransformer(isLookup); final String xslt = isLookup.getLayoutTransformer();
final String dsId = getDsId(format, isLookup); final String dsId = isLookup.getDsId(format);
log.info("dsId: {}", dsId); log.info("dsId: {}", dsId);
final String zkHost = getZkHost(isLookup); final String zkHost = isLookup.getZkHost();
log.info("zkHost: {}", zkHost); log.info("zkHost: {}", zkHost);
final String version = getRecordDatestamp(); final String version = getRecordDatestamp();
@ -88,24 +129,26 @@ public class XmlIndexingJob extends SolrApplication {
final String indexRecordXslt = getLayoutTransformer(format, fields, xslt); final String indexRecordXslt = getLayoutTransformer(format, fields, xslt);
log.info("indexRecordTransformer {}", indexRecordXslt); log.info("indexRecordTransformer {}", indexRecordXslt);
final SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
RDD<SolrInputDocument> docs = sc JavaRDD<SolrInputDocument> docs = sc
.sequenceFile(inputPath, Text.class, Text.class) .sequenceFile(inputPath, Text.class, Text.class)
.map(t -> t._2().toString()) .map(t -> t._2().toString())
.map(s -> toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), s)) .map(s -> toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), s))
.map(s -> new StreamingInputDocumentFactory(version, dsId).parseDocument(s)) .map(s -> new StreamingInputDocumentFactory(version, dsId).parseDocument(s));
.rdd();
final String collection = format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION; if (StringUtils.isNotBlank(outputPath)) {
SolrSupport.indexDocs(zkHost, collection, batchSize, docs); spark
}); .createDataset(
docs.map(s -> new SerializableSolrInputDocument(s)).rdd(),
Encoders.kryo(SerializableSolrInputDocument.class))
.write()
.mode(SaveMode.Overwrite)
.parquet(outputPath);
} else {
final String collection = ProvisionConstants.getCollectionName(format);
SolrSupport.indexDocs(zkHost, collection, batchSize, docs.rdd());
}
} }
protected static String toIndexRecord(Transformer tr, final String record) { protected static String toIndexRecord(Transformer tr, final String record) {
@ -151,56 +194,4 @@ public class XmlIndexingJob extends SolrApplication {
return new SimpleDateFormat(DATE_FORMAT).format(new Date()); return new SimpleDateFormat(DATE_FORMAT).format(new Date());
} }
/**
* Method retrieves from the information system the list of fields associated to the given MDFormat name
*
* @param isLookup the ISLookup service stub
* @param format the Metadata format name
* @return the string representation of the list of fields to be indexed
* @throws ISLookUpDocumentNotFoundException
* @throws ISLookUpException
*/
private static String getLayoutSource(final ISLookUpService isLookup, final String format)
throws ISLookUpDocumentNotFoundException, ISLookUpException {
return doLookup(
isLookup,
String
.format(
"collection('')//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value = 'MDFormatDSResourceType' and .//NAME='%s']//LAYOUT[@name='%s']",
format, LAYOUT));
}
/**
* Method retrieves from the information system the openaireLayoutToRecordStylesheet
*
* @param isLookup the ISLookup service stub
* @return the string representation of the XSLT contained in the transformation rule profile
* @throws ISLookUpDocumentNotFoundException
* @throws ISLookUpException
*/
private static String getLayoutTransformer(ISLookUpService isLookup) throws ISLookUpException {
return doLookup(
isLookup,
"collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType')"
+ "//RESOURCE_PROFILE[./BODY/CONFIGURATION/SCRIPT/TITLE/text() = 'openaireLayoutToRecordStylesheet']//CODE/node()");
}
/**
* Method retrieves from the information system the IndexDS profile ID associated to the given MDFormat name
*
* @param format
* @param isLookup
* @return the IndexDS identifier
* @throws ISLookUpException
*/
private static String getDsId(String format, ISLookUpService isLookup) throws ISLookUpException {
return doLookup(
isLookup,
String
.format(
"collection('/db/DRIVER/IndexDSResources/IndexDSResourceType')"
+ "//RESOURCE_PROFILE[./BODY/CONFIGURATION/METADATA_FORMAT/text() = '%s']//RESOURCE_IDENTIFIER/@value/string()",
format));
}
} }

View File

@ -0,0 +1,23 @@
package eu.dnetlib.dhp.oa.provision.model;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
/**
* Wrapper class needed to make the SolrInputDocument compatible with the Kryo serialization mechanism.
*/
public class SerializableSolrInputDocument extends SolrInputDocument {
public SerializableSolrInputDocument() {
super(new HashMap<>());
}
public SerializableSolrInputDocument(Map<String, SolrInputField> fields) {
super(fields);
}
}

View File

@ -0,0 +1,95 @@
package eu.dnetlib.dhp.oa.provision.utils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.oa.provision.ProvisionConstants;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class ISLookupClient {
private static final Logger log = LoggerFactory.getLogger(ISLookupClient.class);
private ISLookUpService isLookup;
public ISLookupClient(ISLookUpService isLookup) {
this.isLookup = isLookup;
}
/**
* Method retrieves from the information system the list of fields associated to the given MDFormat name
*
* @param format the Metadata format name
* @return the string representation of the list of fields to be indexed
* @throws ISLookUpDocumentNotFoundException
* @throws ISLookUpException
*/
public String getLayoutSource(final String format)
throws ISLookUpDocumentNotFoundException, ISLookUpException {
return doLookup(
String
.format(
"collection('')//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value = 'MDFormatDSResourceType' and .//NAME='%s']//LAYOUT[@name='%s']",
format, ProvisionConstants.LAYOUT));
}
/**
* Method retrieves from the information system the openaireLayoutToRecordStylesheet
*
* @return the string representation of the XSLT contained in the transformation rule profile
* @throws ISLookUpDocumentNotFoundException
* @throws ISLookUpException
*/
public String getLayoutTransformer() throws ISLookUpException {
return doLookup(
"collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType')"
+ "//RESOURCE_PROFILE[./BODY/CONFIGURATION/SCRIPT/TITLE/text() = 'openaireLayoutToRecordStylesheet']//CODE/node()");
}
/**
* Method retrieves from the information system the IndexDS profile ID associated to the given MDFormat name
*
* @param format
* @return the IndexDS identifier
* @throws ISLookUpException
*/
public String getDsId(String format) throws ISLookUpException {
return doLookup(
String
.format(
"collection('/db/DRIVER/IndexDSResources/IndexDSResourceType')"
+ "//RESOURCE_PROFILE[./BODY/CONFIGURATION/METADATA_FORMAT/text() = '%s']//RESOURCE_IDENTIFIER/@value/string()",
format));
}
/**
* Method retrieves from the information system the zookeeper quorum of the Solr server
*
* @return the zookeeper quorum of the Solr server
* @throws ISLookUpException
*/
public String getZkHost() throws ISLookUpException {
return doLookup(
"for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='IndexServiceResourceType'] return $x//PROTOCOL[./@name='solr']/@address/string()");
}
private String doLookup(String xquery) throws ISLookUpException {
log.info(String.format("running xquery: %s", xquery));
final String res = getIsLookup().getResourceProfileByQuery(xquery);
log.info(String.format("got response (100 chars): %s", StringUtils.left(res, 100) + " ..."));
return res;
}
public ISLookUpService getIsLookup() {
return isLookup;
}
public void setIsLookup(ISLookUpService isLookup) {
this.isLookup = isLookup;
}
}

View File

@ -46,11 +46,6 @@ public class StreamingInputDocumentFactory {
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier"; private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
private static final String outFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
private static final List<String> dateFormats = Arrays
.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy");
private static final String DEFAULTDNETRESULT = "dnetResult"; private static final String DEFAULTDNETRESULT = "dnetResult";
private static final String TARGETFIELDS = "targetFields"; private static final String TARGETFIELDS = "targetFields";
@ -125,13 +120,12 @@ public class StreamingInputDocumentFactory {
} }
if (!indexDocument.containsKey(INDEX_RECORD_ID)) { if (!indexDocument.containsKey(INDEX_RECORD_ID)) {
indexDocument.clear(); throw new IllegalStateException("cannot extract record ID from: " + inputDocument);
System.err.println("missing indexrecord id:\n" + inputDocument);
} }
return indexDocument; return indexDocument;
} catch (XMLStreamException e) { } catch (XMLStreamException e) {
return new SolrInputDocument(); throw new IllegalStateException(e);
} }
} }

View File

@ -22,5 +22,11 @@
"paramLongName": "batchSize", "paramLongName": "batchSize",
"paramDescription": "size of the batch of documents sent to solr", "paramDescription": "size of the batch of documents sent to solr",
"paramRequired": false "paramRequired": false
},
{
"paramName": "o",
"paramLongName": "outputPath",
"paramDescription": "path on hdfs activating an alternative output for the SolrInputDocuments",
"paramRequired": false
} }
] ]

View File

@ -638,6 +638,7 @@
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--format</arg><arg>${format}</arg> <arg>--format</arg><arg>${format}</arg>
<arg>--batchSize</arg><arg>${batchSize}</arg> <arg>--batchSize</arg><arg>${batchSize}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark> </spark>
<ok to="commit_solr_collection"/> <ok to="commit_solr_collection"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -1,107 +1,18 @@
package eu.dnetlib.dhp.oa.provision; package eu.dnetlib.dhp.oa.provision;
import java.io.File;
import java.nio.file.Path;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.client.solrj.embedded.JettyConfig;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.ConfigSetAdminRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.RequestWriter;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.client.solrj.response.ConfigSetAdminResponse;
import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.client.solrj.response.UpdateResponse; import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.cloud.MiniSolrCloudCluster;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import junit.framework.Assert; import junit.framework.Assert;
public class SolrAdminApplicationTest { public class SolrAdminApplicationTest extends SolrTest {
private static final Logger log = LoggerFactory.getLogger(SolrAdminApplicationTest.class);
public static final String DEFAULT_COLLECTION = "testCollection";
public static final String CONFIG_NAME = "testConfig";
private static MiniSolrCloudCluster miniCluster;
private static CloudSolrClient cloudSolrClient;
@TempDir
public static Path tempDir;
@BeforeAll
public static void setup() throws Exception {
// random unassigned HTTP port
final int jettyPort = 0;
final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build();
// create a MiniSolrCloudCluster instance
miniCluster = new MiniSolrCloudCluster(2, tempDir, jettyConfig);
// Upload Solr configuration directory to ZooKeeper
String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig";
File configDir = new File(solrZKConfigDir);
miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME);
// override settings in the solrconfig include
System.setProperty("solr.tests.maxBufferedDocs", "100000");
System.setProperty("solr.tests.maxIndexingThreads", "-1");
System.setProperty("solr.tests.ramBufferSizeMB", "100");
// use non-test classes so RandomizedRunner isn't necessary
System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory");
cloudSolrClient = miniCluster.getSolrClient();
cloudSolrClient.setRequestWriter(new RequestWriter());
cloudSolrClient.setParser(new XMLResponseParser());
cloudSolrClient.setDefaultCollection(DEFAULT_COLLECTION);
cloudSolrClient.connect();
log.info(new ConfigSetAdminRequest.List().process(cloudSolrClient).toString());
log.info(CollectionAdminRequest.ClusterStatus.getClusterStatus().process(cloudSolrClient).toString());
createCollection(cloudSolrClient, DEFAULT_COLLECTION, 2, 1, CONFIG_NAME);
}
@AfterAll
public static void shutDown() throws Exception {
miniCluster.shutdown();
}
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
int replicationFactor, String configName) throws Exception {
ModifiableSolrParams modParams = new ModifiableSolrParams();
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
modParams.set("name", name);
modParams.set("numShards", numShards);
modParams.set("replicationFactor", replicationFactor);
modParams.set("collection.configName", configName);
QueryRequest request = new QueryRequest(modParams);
request.setPath("/admin/collections");
return client.request(request);
}
@Test @Test
public void testPing() throws Exception { public void testPing() throws Exception {
SolrPingResponse pingResponse = cloudSolrClient.ping(); SolrPingResponse pingResponse = miniCluster.getSolrClient().ping();
log.info("pingResponse: '{}'", pingResponse.getStatus()); log.info("pingResponse: '{}'", pingResponse.getStatus());
Assert.assertTrue(pingResponse.getStatus() == 0); Assert.assertTrue(pingResponse.getStatus() == 0);
} }

View File

@ -0,0 +1,109 @@
package eu.dnetlib.dhp.oa.provision;
import java.io.File;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.solr.client.solrj.embedded.JettyConfig;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.ConfigSetAdminRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.cloud.MiniSolrCloudCluster;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.io.TempDir;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class SolrTest {
protected static final Logger log = LoggerFactory.getLogger(SolrTest.class);
protected static final String FORMAT = "test";
protected static final String DEFAULT_COLLECTION = FORMAT + "-index-openaire";
protected static final String CONFIG_NAME = "testConfig";
protected static MiniSolrCloudCluster miniCluster;
@TempDir
public static Path workingDir;
@BeforeAll
public static void setup() throws Exception {
// random unassigned HTTP port
final int jettyPort = 0;
final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build();
log.info(String.format("working directory: %s", workingDir.toString()));
System.setProperty("solr.log.dir", workingDir.resolve("logs").toString());
// create a MiniSolrCloudCluster instance
miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig);
// Upload Solr configuration directory to ZooKeeper
String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig";
File configDir = new File(solrZKConfigDir);
miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME);
// override settings in the solrconfig include
System.setProperty("solr.tests.maxBufferedDocs", "100000");
System.setProperty("solr.tests.maxIndexingThreads", "-1");
System.setProperty("solr.tests.ramBufferSizeMB", "100");
// use non-test classes so RandomizedRunner isn't necessary
System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory");
System.setProperty("solr.lock.type", "single");
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
log
.info(
CollectionAdminRequest.ClusterStatus
.getClusterStatus()
.process(miniCluster.getSolrClient())
.toString());
NamedList<Object> res = createCollection(
miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME);
res.forEach(o -> log.info(o.toString()));
miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION);
log
.info(
CollectionAdminRequest.ClusterStatus
.getClusterStatus()
.process(miniCluster.getSolrClient())
.toString());
}
@AfterAll
public static void shutDown() throws Exception {
miniCluster.shutdown();
FileUtils.deleteDirectory(workingDir.toFile());
}
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
ModifiableSolrParams modParams = new ModifiableSolrParams();
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
modParams.set("name", name);
modParams.set("numShards", numShards);
modParams.set("replicationFactor", replicationFactor);
modParams.set("collection.configName", configName);
modParams.set("maxShardsPerNode", maxShardsPerNode);
QueryRequest request = new QueryRequest(modParams);
request.setPath("/admin/collections");
return client.request(request);
}
}

View File

@ -0,0 +1,147 @@
package eu.dnetlib.dhp.oa.provision;
import java.io.IOException;
import java.io.StringReader;
import java.net.URI;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.CommonParams;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.dom4j.io.SAXReader;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
public class XmlIndexingJobTest extends SolrTest {
protected static SparkSession spark;
private static final Integer batchSize = 100;
@Mock
private ISLookUpService isLookUpService;
@Mock
private ISLookupClient isLookupClient;
@BeforeEach
public void prepareMocks() throws ISLookUpException, IOException {
isLookupClient.setIsLookup(isLookUpService);
int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort();
Mockito
.when(isLookupClient.getDsId(Mockito.anyString()))
.thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl");
Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort));
Mockito
.when(isLookupClient.getLayoutSource(Mockito.anyString()))
.thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml")));
Mockito
.when(isLookupClient.getLayoutTransformer())
.thenReturn(IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")));
}
@BeforeAll
public static void before() {
SparkConf conf = new SparkConf();
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
conf.registerKryoClasses(new Class[] {
SerializableSolrInputDocument.class
});
conf.setMaster("local[1]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
spark = SparkSession
.builder()
.appName(XmlIndexingJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void tearDown() {
spark.stop();
}
@Test
public void testXmlIndexingJob_onSolr() throws Exception {
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
long nRecord = JavaSparkContext
.fromSparkContext(spark.sparkContext())
.sequenceFile(inputPath, Text.class, Text.class)
.count();
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, null).run(isLookupClient);
Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
QueryResponse rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "*:*"));
Assertions
.assertEquals(
nRecord, rsp.getResults().getNumFound(),
"the number of indexed records should be equal to the number of input records");
}
@Test
public void testXmlIndexingJob_saveOnHDFS() throws Exception {
final String ID_XPATH = "//header/*[local-name()='objIdentifier']";
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
final JavaPairRDD<Text, Text> xmlRecords = JavaSparkContext
.fromSparkContext(spark.sparkContext())
.sequenceFile(inputPath, Text.class, Text.class);
long nRecord = xmlRecords.count();
long xmlIdUnique = xmlRecords
.map(t -> t._2().toString())
.map(s -> new SAXReader().read(new StringReader(s)).valueOf(ID_XPATH))
.distinct()
.count();
Assertions.assertEquals(nRecord, xmlIdUnique, "IDs should be unique among input records");
final String outputPath = workingDir.resolve("outputPath").toAbsolutePath().toString();
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, outputPath).run(isLookupClient);
final Dataset<SerializableSolrInputDocument> solrDocs = spark
.read()
.load(outputPath)
.as(Encoders.kryo(SerializableSolrInputDocument.class));
long docIdUnique = solrDocs.map((MapFunction<SerializableSolrInputDocument, String>) doc -> {
final SolrInputField id = doc.getField("__indexrecordidentifier");
return id.getFirstValue().toString();
}, Encoders.STRING())
.distinct()
.count();
Assertions.assertEquals(xmlIdUnique, docIdUnique, "IDs should be unique among the output records");
}
}

View File

@ -105,7 +105,7 @@
<FIELD indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/> <FIELD indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/>
<FIELD indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/> <FIELD indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/>
<FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='result'])"/> <FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='result'])"/>
<FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/resulttype/@classid)"/> <FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@type)"/>
<FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/> <FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/>
<FIELD indexable="true" name="relfundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0"/> <FIELD indexable="true" name="relfundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0"/>
<FIELD indexable="true" name="relfundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()"/> <FIELD indexable="true" name="relfundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()"/>
@ -123,7 +123,8 @@
<FIELD indexable="true" name="relfundername" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@name)"/> <FIELD indexable="true" name="relfundername" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@name)"/>
<FIELD indexable="true" name="relfunderjurisdiction" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@jurisdiction)"/><!-- Collected from of the related entity. Available for result-result relationships --> <FIELD indexable="true" name="relfunderjurisdiction" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@jurisdiction)"/><!-- Collected from of the related entity. Available for result-result relationships -->
<FIELD indexable="true" name="relcollectedfromid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@id)"/> <FIELD indexable="true" name="relcollectedfromid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@id)"/>
<FIELD indexable="true" name="relcollectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@name)"/><!-- COMMON FIELDS --> <FIELD indexable="true" name="relcollectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@name)"/>
<FIELD indexable="true" name="semrelid" result="false" stat="false" tokenizable="false" value="concat(./to/text(), '||', ./to/@class/string())" xpath="//*[local-name()='entity']//rel"/><!-- COMMON FIELDS -->
<FIELD indexable="true" multivalued="false" name="dateofcollection" result="false" stat="false" type="pdate" value="//header/*[local-name()='dateOfCollection']"/> <FIELD indexable="true" multivalued="false" name="dateofcollection" result="false" stat="false" type="pdate" value="//header/*[local-name()='dateOfCollection']"/>
<FIELD indexable="true" name="collectedfrom" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*/*[local-name()='collectedfrom'] | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']"/> <FIELD indexable="true" name="collectedfrom" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*/*[local-name()='collectedfrom'] | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']"/>
<FIELD indexable="true" name="collectedfromdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)"/> <FIELD indexable="true" name="collectedfromdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)"/>

View File

@ -0,0 +1,31 @@
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- If this file is found in the config directory, it will only be
loaded once at startup. If it is found in Solr's data
directory, it will be re-loaded every commit.
See http://wiki.apache.org/solr/QueryElevationComponent for more info
-->
<elevate>
<!-- Query elevation examples
<query text="foo bar">
<doc id="1" />
<doc id="2" />
<doc id="3" />
</query>
for use with techproducts example
<query text="ipod">
<doc id="MA147LL/A" /> put the actual ipod at the top
<doc id="IW-02" exclude="true" /> exclude this cable
</query>
-->
</elevate>

View File

@ -83,6 +83,7 @@
<lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" /> <lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" />
<lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" /> <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" />
<!-- an exact 'path' can be used instead of a 'dir' to specify a <!-- an exact 'path' can be used instead of a 'dir' to specify a
specific jar file. This will cause a serious error to be logged specific jar file. This will cause a serious error to be logged
if it can't be loaded. if it can't be loaded.
@ -112,7 +113,8 @@
One can force a particular implementation via solr.MMapDirectoryFactory, One can force a particular implementation via solr.MMapDirectoryFactory,
solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory. solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
solr.RAMDirectoryFactory is memory based and not persistent. solr.RAMDirectoryFactory is memory based, not
persistent, and doesn't work with replication.
--> -->
<directoryFactory name="DirectoryFactory" <directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
@ -204,7 +206,7 @@
More details on the nuances of each LockFactory... More details on the nuances of each LockFactory...
http://wiki.apache.org/lucene-java/AvailableLockFactories http://wiki.apache.org/lucene-java/AvailableLockFactories
--> -->
<lockType>${solr.lock.type:single}</lockType> <lockType>${solr.lock.type:native}</lockType>
<!-- Commit Deletion Policy <!-- Commit Deletion Policy
Custom deletion policies can be specified here. The class must Custom deletion policies can be specified here. The class must
@ -331,6 +333,29 @@
postCommit - fired after every commit or optimize command postCommit - fired after every commit or optimize command
postOptimize - fired after every optimize command postOptimize - fired after every optimize command
--> -->
<!-- The RunExecutableListener executes an external command from a
hook such as postCommit or postOptimize.
exe - the name of the executable to run
dir - dir to use as the current working directory. (default=".")
wait - the calling thread waits until the executable returns.
(default="true")
args - the arguments to pass to the program. (default is none)
env - environment variables to set. (default is none)
-->
<!-- This example shows how RunExecutableListener could be used
with the script based replication...
http://wiki.apache.org/solr/CollectionDistribution
-->
<!--
<listener event="postCommit" class="solr.RunExecutableListener">
<str name="exe">solr/bin/snapshooter</str>
<str name="dir">.</str>
<bool name="wait">true</bool>
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
<arr name="env"> <str>MYVAR=val1</str> </arr>
</listener>
-->
</updateHandler> </updateHandler>
@ -366,14 +391,22 @@
Query section - these settings control query time things like caches Query section - these settings control query time things like caches
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<query> <query>
<!-- Max Boolean Clauses
Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded.
** WARNING **
This option actually modifies a global Lucene property that
will affect all SolrCores. If multiple solrconfig.xml files
disagree on this property, the value at any given moment will
be based on the last SolrCore to be initialized.
<!-- Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded. It is safe to increase or remove this setting,
since it is purely an arbitrary limit to try and catch user errors where
large boolean queries may not be the best implementation choice.
--> -->
<maxBooleanClauses>1024</maxBooleanClauses> <maxBooleanClauses>1024</maxBooleanClauses>
<!-- Solr Internal Query Caches <!-- Solr Internal Query Caches
There are two implementations of cache available for Solr, There are two implementations of cache available for Solr,
@ -575,8 +608,21 @@
This section contains instructions for how the SolrDispatchFilter This section contains instructions for how the SolrDispatchFilter
should behave when processing requests for this SolrCore. should behave when processing requests for this SolrCore.
handleSelect is a legacy option that affects the behavior of requests
such as /select?qt=XXX
handleSelect="true" will cause the SolrDispatchFilter to process
the request and dispatch the query to a handler specified by the
"qt" param, assuming "/select" isn't already registered.
handleSelect="false" will cause the SolrDispatchFilter to
ignore "/select" requests, resulting in a 404 unless a handler
is explicitly registered with the name "/select"
handleSelect="true" is not recommended for new users, but is the default
for backwards compatibility
--> -->
<requestDispatcher> <requestDispatcher handleSelect="false" >
<!-- Request Parsing <!-- Request Parsing
These settings indicate how Solr Requests may be parsed, and These settings indicate how Solr Requests may be parsed, and
@ -602,14 +648,15 @@
plugins. plugins.
*** WARNING *** *** WARNING ***
Before enabling remote streaming, you should make sure your The settings below authorize Solr to fetch remote files, You
system has authentication enabled. should make sure your system has some authentication before
using enableRemoteStreaming="true"
<requestParsers enableRemoteStreaming="false"
multipartUploadLimitInKB="-1"
formdataUploadLimitInKB="-1"
addHttpRequestToContext="false"/>
--> -->
<requestParsers enableRemoteStreaming="true"
multipartUploadLimitInKB="2048000"
formdataUploadLimitInKB="2048"
addHttpRequestToContext="false"/>
<!-- HTTP Caching <!-- HTTP Caching
@ -673,6 +720,14 @@
Incoming queries will be dispatched to a specific handler by name Incoming queries will be dispatched to a specific handler by name
based on the path specified in the request. based on the path specified in the request.
Legacy behavior: If the request path uses "/select" but no Request
Handler has that name, and if handleSelect="true" has been specified in
the requestDispatcher, then the Request Handler is dispatched based on
the qt parameter. Handlers without a leading '/' are accessed this way
like so: http://host/app/[core/]select?qt=name If no qt is
given, then the requestHandler that declares default="true" will be
used or the one named "standard".
If a Request Handler is declared with startup="lazy", then it will If a Request Handler is declared with startup="lazy", then it will
not be initialized until the first request that uses it. not be initialized until the first request that uses it.
@ -692,13 +747,9 @@
--> -->
<lst name="defaults"> <lst name="defaults">
<str name="echoParams">explicit</str> <str name="echoParams">explicit</str>
<str name="q.op">AND</str>
<int name="rows">10</int> <int name="rows">10</int>
<!-- Default search field <!-- <str name="df">text</str> -->
<str name="df">text</str>
-->
<!-- Change from JSON to XML format (the default prior to Solr 7.0)
<str name="wt">xml</str>
-->
</lst> </lst>
<!-- In addition to defaults, "appends" params can be specified <!-- In addition to defaults, "appends" params can be specified
to identify values which should be appended to the list of to identify values which should be appended to the list of
@ -781,10 +832,18 @@
<initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse"> <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse">
<lst name="defaults"> <lst name="defaults">
<str name="df">_text_</str> <str name="df">__all</str>
</lst> </lst>
</initParams> </initParams>
<!-- This enabled schemaless mode
<initParams path="/update/**">
<lst name="defaults">
<str name="update.chain">add-unknown-fields-to-the-schema</str>
</lst>
</initParams>
-->
<!-- Solr Cell Update Request Handler <!-- Solr Cell Update Request Handler
http://wiki.apache.org/solr/ExtractingRequestHandler http://wiki.apache.org/solr/ExtractingRequestHandler
@ -796,10 +855,9 @@
<lst name="defaults"> <lst name="defaults">
<str name="lowernames">true</str> <str name="lowernames">true</str>
<str name="fmap.meta">ignored_</str> <str name="fmap.meta">ignored_</str>
<str name="fmap.content">_text_</str> <str name="fmap.content">__all</str>
</lst> </lst>
</requestHandler> </requestHandler>
<!-- Search Components <!-- Search Components
Search components are registered to SolrCore and used by Search components are registered to SolrCore and used by
@ -861,7 +919,7 @@
<!-- a spellchecker built from a field of the main index --> <!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker"> <lst name="spellchecker">
<str name="name">default</str> <str name="name">default</str>
<str name="field">_text_</str> <str name="field">__all</str>
<str name="classname">solr.DirectSolrSpellChecker</str> <str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein --> <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str> <str name="distanceMeasure">internal</str>
@ -986,6 +1044,7 @@
<searchComponent name="elevator" class="solr.QueryElevationComponent" > <searchComponent name="elevator" class="solr.QueryElevationComponent" >
<!-- pick a fieldType to analyze queries --> <!-- pick a fieldType to analyze queries -->
<str name="queryFieldType">string</str> <str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent> </searchComponent>
<!-- A request handler for demonstrating the elevator component --> <!-- A request handler for demonstrating the elevator component -->
@ -1116,28 +1175,29 @@
<!-- Add unknown fields to the schema <!-- Add unknown fields to the schema
Field type guessing update processors that will An example field type guessing update processor that will
attempt to parse string-typed field values as Booleans, Longs, attempt to parse string-typed field values as Booleans, Longs,
Doubles, or Dates, and then add schema fields with the guessed Doubles, or Dates, and then add schema fields with the guessed
field types. Text content will be indexed as "text_general" as field types.
well as a copy to a plain string version in *_str.
These require that the schema is both managed and mutable, by This requires that the schema is both managed and mutable, by
declaring schemaFactory as ManagedIndexSchemaFactory, with declaring schemaFactory as ManagedIndexSchemaFactory, with
mutable specified as true. mutable specified as true.
See http://wiki.apache.org/solr/GuessingFieldTypes See http://wiki.apache.org/solr/GuessingFieldTypes
--> -->
<updateProcessor class="solr.UUIDUpdateProcessorFactory" name="uuid"/> <updateRequestProcessorChain name="add-unknown-fields-to-the-schema">
<updateProcessor class="solr.RemoveBlankFieldUpdateProcessorFactory" name="remove-blank"/> <!-- UUIDUpdateProcessorFactory will generate an id if none is present in the incoming document -->
<updateProcessor class="solr.FieldNameMutatingUpdateProcessorFactory" name="field-name-mutating"> <processor class="solr.UUIDUpdateProcessorFactory" />
<processor class="solr.RemoveBlankFieldUpdateProcessorFactory"/>
<processor class="solr.FieldNameMutatingUpdateProcessorFactory">
<str name="pattern">[^\w-\.]</str> <str name="pattern">[^\w-\.]</str>
<str name="replacement">_</str> <str name="replacement">_</str>
</updateProcessor> </processor>
<updateProcessor class="solr.ParseBooleanFieldUpdateProcessorFactory" name="parse-boolean"/> <processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
<updateProcessor class="solr.ParseLongFieldUpdateProcessorFactory" name="parse-long"/> <processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
<updateProcessor class="solr.ParseDoubleFieldUpdateProcessorFactory" name="parse-double"/> <processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>
<updateProcessor class="solr.ParseDateFieldUpdateProcessorFactory" name="parse-date"> <processor class="solr.ParseDateFieldUpdateProcessorFactory">
<arr name="format"> <arr name="format">
<str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str> <str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
<str>yyyy-MM-dd'T'HH:mm:ss,SSSZ</str> <str>yyyy-MM-dd'T'HH:mm:ss,SSSZ</str>
@ -1157,40 +1217,28 @@
<str>yyyy-MM-dd HH:mm</str> <str>yyyy-MM-dd HH:mm</str>
<str>yyyy-MM-dd</str> <str>yyyy-MM-dd</str>
</arr> </arr>
</updateProcessor> </processor>
<updateProcessor class="solr.AddSchemaFieldsUpdateProcessorFactory" name="add-schema-fields"> <processor class="solr.AddSchemaFieldsUpdateProcessorFactory">
<lst name="typeMapping"> <str name="defaultFieldType">strings</str>
<str name="valueClass">java.lang.String</str>
<str name="fieldType">text_general</str>
<lst name="copyField">
<str name="dest">*_str</str>
<int name="maxChars">256</int>
</lst>
<!-- Use as default mapping instead of defaultFieldType -->
<bool name="default">true</bool>
</lst>
<lst name="typeMapping"> <lst name="typeMapping">
<str name="valueClass">java.lang.Boolean</str> <str name="valueClass">java.lang.Boolean</str>
<str name="fieldType">booleans</str> <str name="fieldType">booleans</str>
</lst> </lst>
<lst name="typeMapping"> <lst name="typeMapping">
<str name="valueClass">java.util.Date</str> <str name="valueClass">java.util.Date</str>
<str name="fieldType">pdates</str> <str name="fieldType">tdates</str>
</lst> </lst>
<lst name="typeMapping"> <lst name="typeMapping">
<str name="valueClass">java.lang.Long</str> <str name="valueClass">java.lang.Long</str>
<str name="valueClass">java.lang.Integer</str> <str name="valueClass">java.lang.Integer</str>
<str name="fieldType">plongs</str> <str name="fieldType">tlongs</str>
</lst> </lst>
<lst name="typeMapping"> <lst name="typeMapping">
<str name="valueClass">java.lang.Number</str> <str name="valueClass">java.lang.Number</str>
<str name="fieldType">pdoubles</str> <str name="fieldType">tdoubles</str>
</lst> </lst>
</updateProcessor> </processor>
<!-- The update.autoCreateFields property can be turned to false to disable schemaless mode -->
<updateRequestProcessorChain name="add-unknown-fields-to-the-schema" default="${update.autoCreateFields:true}"
processor="uuid,remove-blank,field-name-mutating,parse-boolean,parse-long,parse-double,parse-date,add-schema-fields">
<processor class="solr.LogUpdateProcessorFactory"/> <processor class="solr.LogUpdateProcessorFactory"/>
<processor class="solr.DistributedUpdateProcessorFactory"/> <processor class="solr.DistributedUpdateProcessorFactory"/>
<processor class="solr.RunUpdateProcessorFactory"/> <processor class="solr.RunUpdateProcessorFactory"/>
@ -1313,7 +1361,7 @@
<!-- Query Parsers <!-- Query Parsers
https://lucene.apache.org/solr/guide/query-syntax-and-parsing.html https://cwiki.apache.org/confluence/display/solr/Query+Syntax+and+Parsing
Multiple QParserPlugins can be registered by name, and then Multiple QParserPlugins can be registered by name, and then
used in either the "defType" param for the QueryComponent (used used in either the "defType" param for the QueryComponent (used

View File

@ -46,7 +46,7 @@
</configuration> </configuration>
</global> </global>
<start to="Step18"/> <start to="Step1"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>

26
pom.xml
View File

@ -50,7 +50,7 @@
<repository> <repository>
<id>dnet45-releases</id> <id>dnet45-releases</id>
<name>D-Net 45 releases</name> <name>D-Net 45 releases</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url> <url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
<layout>default</layout> <layout>default</layout>
<snapshots> <snapshots>
<enabled>false</enabled> <enabled>false</enabled>
@ -70,6 +70,26 @@
<enabled>false</enabled> <enabled>false</enabled>
</snapshots> </snapshots>
</repository> </repository>
<repository>
<id>dnet45-releases-old</id>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>dnet45-snapshots-old</id>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories> </repositories>
<dependencies> <dependencies>
@ -639,12 +659,12 @@
<snapshotRepository> <snapshotRepository>
<id>dnet45-snapshots</id> <id>dnet45-snapshots</id>
<name>DNet45 Snapshots</name> <name>DNet45 Snapshots</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url> <url>https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots</url>
<layout>default</layout> <layout>default</layout>
</snapshotRepository> </snapshotRepository>
<repository> <repository>
<id>dnet45-releases</id> <id>dnet45-releases</id>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url> <url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
</repository> </repository>
</distributionManagement> </distributionManagement>
<reporting> <reporting>