forked from D-Net/dnet-hadoop
merged from master
This commit is contained in:
commit
3f34757c63
|
@ -15,12 +15,12 @@
|
||||||
<snapshotRepository>
|
<snapshotRepository>
|
||||||
<id>dnet45-snapshots</id>
|
<id>dnet45-snapshots</id>
|
||||||
<name>DNet45 Snapshots</name>
|
<name>DNet45 Snapshots</name>
|
||||||
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
|
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots</url>
|
||||||
<layout>default</layout>
|
<layout>default</layout>
|
||||||
</snapshotRepository>
|
</snapshotRepository>
|
||||||
<repository>
|
<repository>
|
||||||
<id>dnet45-releases</id>
|
<id>dnet45-releases</id>
|
||||||
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
|
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
|
||||||
</repository>
|
</repository>
|
||||||
</distributionManagement>
|
</distributionManagement>
|
||||||
|
|
||||||
|
|
|
@ -104,11 +104,6 @@
|
||||||
<artifactId>dnet-pace-core</artifactId>
|
<artifactId>dnet-pace-core</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
|
||||||
<artifactId>dhp-schemas</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
import java.util.LinkedHashMap;
|
import java.util.*;
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@ -12,12 +10,19 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
import com.clearspring.analytics.util.Lists;
|
import com.clearspring.analytics.util.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class CleaningFunctions {
|
public class CleaningFunctions {
|
||||||
|
|
||||||
public static final String DOI_URL_PREFIX_REGEX = "(^http(s?):\\/\\/)(((dx\\.)?doi\\.org)|(handle\\.test\\.datacite\\.org))\\/";
|
public static final String DOI_URL_PREFIX_REGEX = "(^http(s?):\\/\\/)(((dx\\.)?doi\\.org)|(handle\\.test\\.datacite\\.org))\\/";
|
||||||
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
||||||
public static final String NONE = "none";
|
|
||||||
|
public static final Set<String> PID_BLACKLIST = new HashSet<>();
|
||||||
|
|
||||||
|
static {
|
||||||
|
PID_BLACKLIST.add("none");
|
||||||
|
PID_BLACKLIST.add("na");
|
||||||
|
}
|
||||||
|
|
||||||
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
|
@ -71,7 +76,7 @@ public class CleaningFunctions {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends Oaf> T fixDefaults(T value) {
|
protected static <T extends Oaf> T fixDefaults(T value) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
// nothing to clean here
|
// nothing to clean here
|
||||||
} else if (value instanceof Project) {
|
} else if (value instanceof Project) {
|
||||||
|
@ -114,7 +119,7 @@ public class CleaningFunctions {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
|
.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
|
||||||
.filter(sp -> NONE.equalsIgnoreCase(sp.getValue()))
|
.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
|
||||||
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
||||||
.map(CleaningFunctions::normalizePidValue)
|
.map(CleaningFunctions::normalizePidValue)
|
||||||
|
|
|
@ -3,6 +3,10 @@ package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
public class ModelHardLimits {
|
public class ModelHardLimits {
|
||||||
|
|
||||||
|
public static final String LAYOUT = "index";
|
||||||
|
public static final String INTERPRETATION = "openaire";
|
||||||
|
public static final String SEPARATOR = "-";
|
||||||
|
|
||||||
public static final int MAX_EXTERNAL_ENTITIES = 50;
|
public static final int MAX_EXTERNAL_ENTITIES = 50;
|
||||||
public static final int MAX_AUTHORS = 200;
|
public static final int MAX_AUTHORS = 200;
|
||||||
public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
|
public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
|
||||||
|
@ -11,4 +15,8 @@ public class ModelHardLimits {
|
||||||
public static final int MAX_ABSTRACT_LENGTH = 150000;
|
public static final int MAX_ABSTRACT_LENGTH = 150000;
|
||||||
public static final int MAX_INSTANCES = 10;
|
public static final int MAX_INSTANCES = 10;
|
||||||
|
|
||||||
|
public static String getCollectionName(String format) {
|
||||||
|
return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
@ -13,10 +12,43 @@ import java.util.stream.Collectors;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
public class OafMapperUtils {
|
public class OafMapperUtils {
|
||||||
|
|
||||||
|
public static Oaf merge(final Oaf o1, final Oaf o2) {
|
||||||
|
if (ModelSupport.isSubClass(o1, OafEntity.class)) {
|
||||||
|
if (ModelSupport.isSubClass(o1, Result.class)) {
|
||||||
|
|
||||||
|
return mergeResults((Result) o1, (Result) o2);
|
||||||
|
} else if (ModelSupport.isSubClass(o1, Datasource.class)) {
|
||||||
|
((Datasource) o1).mergeFrom((Datasource) o2);
|
||||||
|
} else if (ModelSupport.isSubClass(o1, Organization.class)) {
|
||||||
|
((Organization) o1).mergeFrom((Organization) o2);
|
||||||
|
} else if (ModelSupport.isSubClass(o1, Project.class)) {
|
||||||
|
((Project) o1).mergeFrom((Project) o2);
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("invalid OafEntity subtype:" + o1.getClass().getCanonicalName());
|
||||||
|
}
|
||||||
|
} else if (ModelSupport.isSubClass(o1, Relation.class)) {
|
||||||
|
((Relation) o1).mergeFrom((Relation) o2);
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("invalid Oaf type:" + o1.getClass().getCanonicalName());
|
||||||
|
}
|
||||||
|
return o1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result mergeResults(Result r1, Result r2) {
|
||||||
|
if (new ResultTypeComparator().compare(r1, r2) < 0) {
|
||||||
|
r1.mergeFrom(r2);
|
||||||
|
return r1;
|
||||||
|
} else {
|
||||||
|
r2.mergeFrom(r1);
|
||||||
|
return r2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static KeyValue keyValue(final String k, final String v) {
|
public static KeyValue keyValue(final String k, final String v) {
|
||||||
final KeyValue kv = new KeyValue();
|
final KeyValue kv = new KeyValue();
|
||||||
kv.setKey(k);
|
kv.setKey(k);
|
||||||
|
|
|
@ -3,12 +3,10 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.CleaningFunctions;
|
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctions;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
|
@ -5,6 +5,7 @@ import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.security.MessageDigest;
|
import java.security.MessageDigest;
|
||||||
|
import java.util.List;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.zip.GZIPInputStream;
|
||||||
import java.util.zip.GZIPOutputStream;
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
|
||||||
|
@ -15,9 +16,15 @@ import org.apache.commons.codec.binary.Hex;
|
||||||
import com.jayway.jsonpath.JsonPath;
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
import net.minidev.json.JSONArray;
|
import net.minidev.json.JSONArray;
|
||||||
|
import scala.collection.JavaConverters;
|
||||||
|
import scala.collection.Seq;
|
||||||
|
|
||||||
public class DHPUtils {
|
public class DHPUtils {
|
||||||
|
|
||||||
|
public static Seq<String> toSeq(List<String> list) {
|
||||||
|
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
||||||
|
}
|
||||||
|
|
||||||
public static String md5(final String s) {
|
public static String md5(final String s) {
|
||||||
try {
|
try {
|
||||||
final MessageDigest md = MessageDigest.getInstance("MD5");
|
final MessageDigest md = MessageDigest.getInstance("MD5");
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
<workflow-app name="create broker events - partial" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="update broker notifications" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
|
|
|
@ -6,7 +6,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -20,7 +22,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
@ -68,12 +71,12 @@ public class CleanGraphSparkJob {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
removeOutputDir(spark, outputPath);
|
HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration());
|
||||||
fixGraphTable(spark, vocs, inputPath, entityClazz, outputPath);
|
cleanGraphTable(spark, vocs, inputPath, entityClazz, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> void fixGraphTable(
|
private static <T extends Oaf> void cleanGraphTable(
|
||||||
SparkSession spark,
|
SparkSession spark,
|
||||||
VocabularyGroup vocs,
|
VocabularyGroup vocs,
|
||||||
String inputPath,
|
String inputPath,
|
||||||
|
@ -99,13 +102,15 @@ public class CleanGraphSparkJob {
|
||||||
return spark
|
return spark
|
||||||
.read()
|
.read()
|
||||||
.textFile(inputEntityPath)
|
.textFile(inputEntityPath)
|
||||||
|
.filter((FilterFunction<String>) s -> isEntityType(s, clazz))
|
||||||
|
.map((MapFunction<String, String>) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING())
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
|
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
|
||||||
Encoders.bean(clazz));
|
Encoders.bean(clazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void removeOutputDir(SparkSession spark, String path) {
|
private static <T extends Oaf> boolean isEntityType(final String s, final Class<T> clazz) {
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
return StringUtils.substringBefore(s, "|").equals(clazz.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,206 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.apache.spark.sql.expressions.Aggregator;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.jayway.jsonpath.Configuration;
|
||||||
|
import com.jayway.jsonpath.DocumentContext;
|
||||||
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
import com.jayway.jsonpath.Option;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Groups the graph content by entity identifier to ensure ID uniqueness
|
||||||
|
*/
|
||||||
|
public class GroupEntitiesAndRelationsSparkJob {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(GroupEntitiesAndRelationsSparkJob.class);
|
||||||
|
|
||||||
|
private final static String ID_JPATH = "$.id";
|
||||||
|
|
||||||
|
private final static String SOURCE_JPATH = "$.source";
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
GroupEntitiesAndRelationsSparkJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/group_graph_entities_parameters.json"));
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
String graphInputPath = parser.get("graphInputPath");
|
||||||
|
log.info("graphInputPath: {}", graphInputPath);
|
||||||
|
|
||||||
|
String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||||
|
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration());
|
||||||
|
groupEntitiesAndRelations(spark, graphInputPath, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void groupEntitiesAndRelations(
|
||||||
|
SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath) {
|
||||||
|
|
||||||
|
TypedColumn<Oaf, Oaf> aggregator = new GroupingAggregator().toColumn();
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
spark
|
||||||
|
.read()
|
||||||
|
.textFile(toSeq(listPaths(inputPath, sc)))
|
||||||
|
.map((MapFunction<String, Oaf>) s -> parseOaf(s), Encoders.kryo(Oaf.class))
|
||||||
|
.filter((FilterFunction<Oaf>) oaf -> StringUtils.isNotBlank(ModelSupport.idFn().apply(oaf)))
|
||||||
|
.groupByKey((MapFunction<Oaf, String>) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING())
|
||||||
|
.agg(aggregator)
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<String, Oaf>, String>) t -> t._2().getClass().getName() +
|
||||||
|
"|" + OBJECT_MAPPER.writeValueAsString(t._2()),
|
||||||
|
Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.text(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class GroupingAggregator extends Aggregator<Oaf, Oaf, Oaf> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Oaf zero() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Oaf reduce(Oaf b, Oaf a) {
|
||||||
|
return mergeAndGet(b, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Oaf mergeAndGet(Oaf b, Oaf a) {
|
||||||
|
if (Objects.nonNull(a) && Objects.nonNull(b)) {
|
||||||
|
return OafMapperUtils.merge(b, a);
|
||||||
|
}
|
||||||
|
return Objects.isNull(a) ? b : a;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Oaf merge(Oaf b, Oaf a) {
|
||||||
|
return mergeAndGet(b, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Oaf finish(Oaf j) {
|
||||||
|
return j;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Encoder<Oaf> bufferEncoder() {
|
||||||
|
return Encoders.kryo(Oaf.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Encoder<Oaf> outputEncoder() {
|
||||||
|
return Encoders.kryo(Oaf.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Oaf parseOaf(String s) {
|
||||||
|
|
||||||
|
DocumentContext dc = JsonPath
|
||||||
|
.parse(s, Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS));
|
||||||
|
final String id = dc.read(ID_JPATH);
|
||||||
|
if (StringUtils.isNotBlank(id)) {
|
||||||
|
|
||||||
|
String prefix = StringUtils.substringBefore(id, "|");
|
||||||
|
switch (prefix) {
|
||||||
|
case "10":
|
||||||
|
return parse(s, Datasource.class);
|
||||||
|
case "20":
|
||||||
|
return parse(s, Organization.class);
|
||||||
|
case "40":
|
||||||
|
return parse(s, Project.class);
|
||||||
|
case "50":
|
||||||
|
String resultType = dc.read("$.resulttype.classid");
|
||||||
|
switch (resultType) {
|
||||||
|
case "publication":
|
||||||
|
return parse(s, Publication.class);
|
||||||
|
case "dataset":
|
||||||
|
return parse(s, eu.dnetlib.dhp.schema.oaf.Dataset.class);
|
||||||
|
case "software":
|
||||||
|
return parse(s, Software.class);
|
||||||
|
case "other":
|
||||||
|
return parse(s, OtherResearchProduct.class);
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(String.format("invalid resultType: '%s'", resultType));
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(String.format("invalid id prefix: '%s'", prefix));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
String source = dc.read(SOURCE_JPATH);
|
||||||
|
if (StringUtils.isNotBlank(source)) {
|
||||||
|
return parse(s, Relation.class);
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException(String.format("invalid oaf: '%s'", s));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends Oaf> Oaf parse(String s, Class<T> clazz) {
|
||||||
|
try {
|
||||||
|
return OBJECT_MAPPER.readValue(s, clazz);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> listPaths(String inputPath, JavaSparkContext sc) {
|
||||||
|
return HdfsSupport
|
||||||
|
.listFiles(inputPath, sc.hadoopConfiguration())
|
||||||
|
.stream()
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -33,9 +33,9 @@ import scala.Tuple2;
|
||||||
* are picked preferring those from the BETA aggregator rather then from PROD. The identity of a relationship is defined
|
* are picked preferring those from the BETA aggregator rather then from PROD. The identity of a relationship is defined
|
||||||
* by eu.dnetlib.dhp.schema.common.ModelSupport#idFn()
|
* by eu.dnetlib.dhp.schema.common.ModelSupport#idFn()
|
||||||
*/
|
*/
|
||||||
public class MergeGraphSparkJob {
|
public class MergeGraphTableSparkJob {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(CleanGraphSparkJob.class);
|
private static final Logger log = LoggerFactory.getLogger(MergeGraphTableSparkJob.class);
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
|
@ -258,8 +258,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
||||||
r.setPid(prepareResultPids(doc, info));
|
r.setPid(prepareResultPids(doc, info));
|
||||||
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection"));
|
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection|//dri:dateOfCollection"));
|
||||||
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation"));
|
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation|//dri:dateOfTransformation"));
|
||||||
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||||
r.setOaiprovenance(prepareOAIprovenance(doc));
|
r.setOaiprovenance(prepareOAIprovenance(doc));
|
||||||
r.setAuthor(prepareAuthors(doc, info));
|
r.setAuthor(prepareAuthors(doc, info));
|
||||||
|
|
|
@ -4,9 +4,11 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -18,7 +20,6 @@ import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -68,7 +69,7 @@ public class GenerateEntitiesApplication {
|
||||||
|
|
||||||
final SparkConf conf = new SparkConf();
|
final SparkConf conf = new SparkConf();
|
||||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||||
removeOutputDir(spark, targetPath);
|
HdfsSupport.remove(targetPath, spark.sparkContext().hadoopConfiguration());
|
||||||
generateEntities(spark, vocs, sourcePaths, targetPath);
|
generateEntities(spark, vocs, sourcePaths, targetPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -82,7 +83,7 @@ public class GenerateEntitiesApplication {
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
final List<String> existingSourcePaths = Arrays
|
final List<String> existingSourcePaths = Arrays
|
||||||
.stream(sourcePaths.split(","))
|
.stream(sourcePaths.split(","))
|
||||||
.filter(p -> exists(sc, p))
|
.filter(p -> HdfsSupport.exists(p, sc.hadoopConfiguration()))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
log.info("Generate entities from files:");
|
log.info("Generate entities from files:");
|
||||||
|
@ -103,7 +104,7 @@ public class GenerateEntitiesApplication {
|
||||||
|
|
||||||
inputRdd
|
inputRdd
|
||||||
.mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf))
|
.mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf))
|
||||||
.reduceByKey((o1, o2) -> merge(o1, o2))
|
.reduceByKey((o1, o2) -> OafMapperUtils.merge(o1, o2))
|
||||||
.map(Tuple2::_2)
|
.map(Tuple2::_2)
|
||||||
.map(
|
.map(
|
||||||
oaf -> oaf.getClass().getSimpleName().toLowerCase()
|
oaf -> oaf.getClass().getSimpleName().toLowerCase()
|
||||||
|
@ -112,38 +113,6 @@ public class GenerateEntitiesApplication {
|
||||||
.saveAsTextFile(targetPath, GzipCodec.class);
|
.saveAsTextFile(targetPath, GzipCodec.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Oaf merge(final Oaf o1, final Oaf o2) {
|
|
||||||
if (ModelSupport.isSubClass(o1, OafEntity.class)) {
|
|
||||||
if (ModelSupport.isSubClass(o1, Result.class)) {
|
|
||||||
|
|
||||||
return mergeResults((Result) o1, (Result) o2);
|
|
||||||
} else if (ModelSupport.isSubClass(o1, Datasource.class)) {
|
|
||||||
((Datasource) o1).mergeFrom((Datasource) o2);
|
|
||||||
} else if (ModelSupport.isSubClass(o1, Organization.class)) {
|
|
||||||
((Organization) o1).mergeFrom((Organization) o2);
|
|
||||||
} else if (ModelSupport.isSubClass(o1, Project.class)) {
|
|
||||||
((Project) o1).mergeFrom((Project) o2);
|
|
||||||
} else {
|
|
||||||
throw new RuntimeException("invalid OafEntity subtype:" + o1.getClass().getCanonicalName());
|
|
||||||
}
|
|
||||||
} else if (ModelSupport.isSubClass(o1, Relation.class)) {
|
|
||||||
((Relation) o1).mergeFrom((Relation) o2);
|
|
||||||
} else {
|
|
||||||
throw new RuntimeException("invalid Oaf type:" + o1.getClass().getCanonicalName());
|
|
||||||
}
|
|
||||||
return o1;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static Result mergeResults(Result r1, Result r2) {
|
|
||||||
if (new ResultTypeComparator().compare(r1, r2) < 0) {
|
|
||||||
r1.mergeFrom(r2);
|
|
||||||
return r1;
|
|
||||||
} else {
|
|
||||||
r2.mergeFrom(r1);
|
|
||||||
return r2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<Oaf> convertToListOaf(
|
private static List<Oaf> convertToListOaf(
|
||||||
final String id,
|
final String id,
|
||||||
final String s,
|
final String s,
|
||||||
|
@ -192,17 +161,4 @@ public class GenerateEntitiesApplication {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean exists(final JavaSparkContext context, final String pathToFile) {
|
|
||||||
try {
|
|
||||||
final FileSystem hdfs = FileSystem.get(context.hadoopConfiguration());
|
|
||||||
final Path path = new Path(pathToFile);
|
|
||||||
return hdfs.exists(path);
|
|
||||||
} catch (final IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void removeOutputDir(final SparkSession spark, final String path) {
|
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
||||||
|
@ -9,25 +10,20 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.asString;
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.journal;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listFields;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listKeyValues;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
|
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -442,26 +438,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
|
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
final String targetType = rs.getString(TARGET_TYPE);
|
|
||||||
if (rs.getString(SOURCE_TYPE).equals("context")) {
|
if (rs.getString(SOURCE_TYPE).equals("context")) {
|
||||||
final Result r;
|
final Result r;
|
||||||
|
|
||||||
switch (targetType) {
|
if (rs.getString(TARGET_TYPE).equals("dataset")) {
|
||||||
case "dataset":
|
r = new Dataset();
|
||||||
r = new Dataset();
|
r.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
||||||
break;
|
} else if (rs.getString(TARGET_TYPE).equals("software")) {
|
||||||
case "software":
|
r = new Software();
|
||||||
r = new Software();
|
r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
||||||
break;
|
} else if (rs.getString(TARGET_TYPE).equals("other")) {
|
||||||
case "other":
|
r = new OtherResearchProduct();
|
||||||
r = new OtherResearchProduct();
|
r.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
||||||
break;
|
} else {
|
||||||
case "publication":
|
r = new Publication();
|
||||||
default:
|
r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
||||||
r = new Publication();
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
r.setId(createOpenaireId(50, rs.getString("target_id"), false));
|
r.setId(createOpenaireId(50, rs.getString("target_id"), false));
|
||||||
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||||
r.setContext(prepareContext(rs.getString("source_id"), info));
|
r.setContext(prepareContext(rs.getString("source_id"), info));
|
||||||
|
@ -471,7 +463,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
return Arrays.asList(r);
|
return Arrays.asList(r);
|
||||||
} else {
|
} else {
|
||||||
final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false);
|
final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false);
|
||||||
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
|
final String targetId = createOpenaireId(rs.getString(TARGET_TYPE), rs.getString("target_id"), false);
|
||||||
|
|
||||||
final Relation r1 = new Relation();
|
final Relation r1 = new Relation();
|
||||||
final Relation r2 = new Relation();
|
final Relation r2 = new Relation();
|
||||||
|
@ -527,9 +519,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
|
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
|
||||||
final String inferenceprovenance = rs.getString("inferenceprovenance");
|
final String inferenceprovenance = rs.getString("inferenceprovenance");
|
||||||
final Boolean inferred = rs.getBoolean("inferred");
|
final Boolean inferred = rs.getBoolean("inferred");
|
||||||
final String trust = rs.getString("trust");
|
|
||||||
|
final double trust = rs.getDouble("trust");
|
||||||
|
|
||||||
return dataInfo(
|
return dataInfo(
|
||||||
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
|
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION,
|
||||||
|
String.format("%.3f", trust));
|
||||||
}
|
}
|
||||||
|
|
||||||
private Qualifier prepareQualifierSplitting(final String s) {
|
private Qualifier prepareQualifierSplitting(final String s) {
|
||||||
|
|
|
@ -2,9 +2,7 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -18,9 +16,9 @@ import org.dom4j.Node;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctions;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
|
||||||
|
|
||||||
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
|
|
|
@ -2,15 +2,9 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -18,9 +12,9 @@ import org.dom4j.Document;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctions;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
|
||||||
|
|
||||||
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
|
|
|
@ -50,12 +50,36 @@
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="fork_clean_graph"/>
|
<start to="group_entities"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
|
<action name="group_entities">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>group graph entities and relations</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.clean.GroupEntitiesAndRelationsSparkJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--graphInputPath</arg><arg>${graphInputPath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/grouped_entities</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="fork_clean_graph"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
<fork name="fork_clean_graph">
|
<fork name="fork_clean_graph">
|
||||||
<path start="clean_publication"/>
|
<path start="clean_publication"/>
|
||||||
<path start="clean_dataset"/>
|
<path start="clean_dataset"/>
|
||||||
|
@ -84,7 +108,7 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${graphInputPath}/publication</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/publication</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/publication</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
@ -110,7 +134,7 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${graphInputPath}/dataset</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/dataset</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/dataset</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
@ -136,7 +160,7 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${graphInputPath}/otherresearchproduct</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
@ -162,7 +186,7 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${graphInputPath}/software</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/software</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/software</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
@ -188,7 +212,7 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${graphInputPath}/datasource</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/datasource</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/datasource</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
@ -214,7 +238,7 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${graphInputPath}/organization</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/organization</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/organization</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
@ -240,7 +264,7 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${graphInputPath}/project</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/project</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/project</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
@ -266,7 +290,7 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${graphInputPath}/relation</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/grouped_entities</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "issm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "when true will stop SparkSession after job execution",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "gin",
|
||||||
|
"paramLongName": "graphInputPath",
|
||||||
|
"paramDescription": "the graph root path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the output merged graph root path",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -2,11 +2,11 @@
|
||||||
|
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>betaInputGgraphPath</name>
|
<name>betaInputGraphPath</name>
|
||||||
<description>the beta graph root path</description>
|
<description>the beta graph root path</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>prodInputGgraphPath</name>
|
<name>prodInputGraphPath</name>
|
||||||
<description>the production graph root path</description>
|
<description>the production graph root path</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
|
@ -76,7 +76,7 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Merge publications</name>
|
<name>Merge publications</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class>
|
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
@ -88,8 +88,8 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/publication</arg>
|
<arg>--betaInputPath</arg><arg>${betaInputGraphPath}/publication</arg>
|
||||||
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/publication</arg>
|
<arg>--prodInputPath</arg><arg>${prodInputGraphPath}/publication</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/publication</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/publication</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
<arg>--priority</arg><arg>${priority}</arg>
|
<arg>--priority</arg><arg>${priority}</arg>
|
||||||
|
@ -103,7 +103,7 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Merge datasets</name>
|
<name>Merge datasets</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class>
|
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
@ -115,8 +115,8 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/dataset</arg>
|
<arg>--betaInputPath</arg><arg>${betaInputGraphPath}/dataset</arg>
|
||||||
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/dataset</arg>
|
<arg>--prodInputPath</arg><arg>${prodInputGraphPath}/dataset</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/dataset</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/dataset</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
<arg>--priority</arg><arg>${priority}</arg>
|
<arg>--priority</arg><arg>${priority}</arg>
|
||||||
|
@ -130,7 +130,7 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Merge otherresearchproducts</name>
|
<name>Merge otherresearchproducts</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class>
|
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
@ -142,8 +142,8 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/otherresearchproduct</arg>
|
<arg>--betaInputPath</arg><arg>${betaInputGraphPath}/otherresearchproduct</arg>
|
||||||
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/otherresearchproduct</arg>
|
<arg>--prodInputPath</arg><arg>${prodInputGraphPath}/otherresearchproduct</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
<arg>--priority</arg><arg>${priority}</arg>
|
<arg>--priority</arg><arg>${priority}</arg>
|
||||||
|
@ -157,7 +157,7 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Merge softwares</name>
|
<name>Merge softwares</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class>
|
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
@ -169,8 +169,8 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/software</arg>
|
<arg>--betaInputPath</arg><arg>${betaInputGraphPath}/software</arg>
|
||||||
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/software</arg>
|
<arg>--prodInputPath</arg><arg>${prodInputGraphPath}/software</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/software</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/software</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
<arg>--priority</arg><arg>${priority}</arg>
|
<arg>--priority</arg><arg>${priority}</arg>
|
||||||
|
@ -184,7 +184,7 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Merge datasources</name>
|
<name>Merge datasources</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class>
|
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
@ -196,8 +196,8 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/datasource</arg>
|
<arg>--betaInputPath</arg><arg>${betaInputGraphPath}/datasource</arg>
|
||||||
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/datasource</arg>
|
<arg>--prodInputPath</arg><arg>${prodInputGraphPath}/datasource</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/datasource</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/datasource</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||||
<arg>--priority</arg><arg>${priority}</arg>
|
<arg>--priority</arg><arg>${priority}</arg>
|
||||||
|
@ -211,7 +211,7 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Merge organizations</name>
|
<name>Merge organizations</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class>
|
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
@ -223,8 +223,8 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/organization</arg>
|
<arg>--betaInputPath</arg><arg>${betaInputGraphPath}/organization</arg>
|
||||||
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/organization</arg>
|
<arg>--prodInputPath</arg><arg>${prodInputGraphPath}/organization</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/organization</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/organization</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||||
<arg>--priority</arg><arg>${priority}</arg>
|
<arg>--priority</arg><arg>${priority}</arg>
|
||||||
|
@ -238,7 +238,7 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Merge projects</name>
|
<name>Merge projects</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class>
|
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
@ -250,8 +250,8 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/project</arg>
|
<arg>--betaInputPath</arg><arg>${betaInputGraphPath}/project</arg>
|
||||||
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/project</arg>
|
<arg>--prodInputPath</arg><arg>${prodInputGraphPath}/project</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/project</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/project</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||||
<arg>--priority</arg><arg>${priority}</arg>
|
<arg>--priority</arg><arg>${priority}</arg>
|
||||||
|
@ -265,7 +265,7 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Merge relations</name>
|
<name>Merge relations</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob</class>
|
<class>eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
@ -277,8 +277,8 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--betaInputPath</arg><arg>${betaInputGgraphPath}/relation</arg>
|
<arg>--betaInputPath</arg><arg>${betaInputGraphPath}/relation</arg>
|
||||||
<arg>--prodInputPath</arg><arg>${prodInputGgraphPath}/relation</arg>
|
<arg>--prodInputPath</arg><arg>${prodInputGraphPath}/relation</arg>
|
||||||
<arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg>
|
<arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||||
<arg>--priority</arg><arg>${priority}</arg>
|
<arg>--priority</arg><arg>${priority}</arg>
|
||||||
|
|
|
@ -19,7 +19,10 @@ import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
|
|
||||||
public class MergeGraphSparkJobTest {
|
public class MergeGraphTableSparkJobTest {
|
||||||
|
|
||||||
private ObjectMapper mapper;
|
private ObjectMapper mapper;
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ public class MergeGraphSparkJobTest {
|
||||||
public void testMergeDatasources() throws IOException {
|
public void testMergeDatasources() throws IOException {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"openaire-cris_1.1",
|
"openaire-cris_1.1",
|
||||||
MergeGraphSparkJob
|
MergeGraphTableSparkJob
|
||||||
.mergeDatasource(
|
.mergeDatasource(
|
||||||
d("datasource_cris.json"),
|
d("datasource_cris.json"),
|
||||||
d("datasource_UNKNOWN.json"))
|
d("datasource_UNKNOWN.json"))
|
||||||
|
@ -36,7 +36,7 @@ public class MergeGraphSparkJobTest {
|
||||||
.getClassid());
|
.getClassid());
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"openaire-cris_1.1",
|
"openaire-cris_1.1",
|
||||||
MergeGraphSparkJob
|
MergeGraphTableSparkJob
|
||||||
.mergeDatasource(
|
.mergeDatasource(
|
||||||
d("datasource_UNKNOWN.json"),
|
d("datasource_UNKNOWN.json"),
|
||||||
d("datasource_cris.json"))
|
d("datasource_cris.json"))
|
||||||
|
@ -44,7 +44,7 @@ public class MergeGraphSparkJobTest {
|
||||||
.getClassid());
|
.getClassid());
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"driver-openaire2.0",
|
"driver-openaire2.0",
|
||||||
MergeGraphSparkJob
|
MergeGraphTableSparkJob
|
||||||
.mergeDatasource(
|
.mergeDatasource(
|
||||||
d("datasource_native.json"),
|
d("datasource_native.json"),
|
||||||
d("datasource_driver-openaire2.0.json"))
|
d("datasource_driver-openaire2.0.json"))
|
||||||
|
@ -52,7 +52,7 @@ public class MergeGraphSparkJobTest {
|
||||||
.getClassid());
|
.getClassid());
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"driver-openaire2.0",
|
"driver-openaire2.0",
|
||||||
MergeGraphSparkJob
|
MergeGraphTableSparkJob
|
||||||
.mergeDatasource(
|
.mergeDatasource(
|
||||||
d("datasource_driver-openaire2.0.json"),
|
d("datasource_driver-openaire2.0.json"),
|
||||||
d("datasource_native.json"))
|
d("datasource_native.json"))
|
||||||
|
@ -60,7 +60,7 @@ public class MergeGraphSparkJobTest {
|
||||||
.getClassid());
|
.getClassid());
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"openaire4.0",
|
"openaire4.0",
|
||||||
MergeGraphSparkJob
|
MergeGraphTableSparkJob
|
||||||
.mergeDatasource(
|
.mergeDatasource(
|
||||||
d("datasource_notCompatible.json"),
|
d("datasource_notCompatible.json"),
|
||||||
d("datasource_openaire4.0.json"))
|
d("datasource_openaire4.0.json"))
|
||||||
|
@ -68,7 +68,7 @@ public class MergeGraphSparkJobTest {
|
||||||
.getClassid());
|
.getClassid());
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"notCompatible",
|
"notCompatible",
|
||||||
MergeGraphSparkJob
|
MergeGraphTableSparkJob
|
||||||
.mergeDatasource(
|
.mergeDatasource(
|
||||||
d("datasource_notCompatible.json"),
|
d("datasource_notCompatible.json"),
|
||||||
d("datasource_UNKNOWN.json"))
|
d("datasource_UNKNOWN.json"))
|
|
@ -70,7 +70,7 @@ public class GenerateEntitiesApplicationTest {
|
||||||
|
|
||||||
protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz,
|
protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz,
|
||||||
String resultType) {
|
String resultType) {
|
||||||
final Result merge = GenerateEntitiesApplication.mergeResults(publication, dataset);
|
final Result merge = OafMapperUtils.mergeResults(publication, dataset);
|
||||||
assertTrue(clazz.isAssignableFrom(merge.getClass()));
|
assertTrue(clazz.isAssignableFrom(merge.getClass()));
|
||||||
assertEquals(resultType, merge.getResulttype().getClassid());
|
assertEquals(resultType, merge.getResulttype().getClassid());
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,6 +72,8 @@ public class MappersTest {
|
||||||
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
||||||
assertFalse(p.getDataInfo().getInvisible());
|
assertFalse(p.getDataInfo().getInvisible());
|
||||||
assertTrue(p.getSource().size() == 1);
|
assertTrue(p.getSource().size() == 1);
|
||||||
|
assertTrue(StringUtils.isNotBlank(p.getDateofcollection()));
|
||||||
|
assertTrue(StringUtils.isNotBlank(p.getDateoftransformation()));
|
||||||
|
|
||||||
assertTrue(p.getAuthor().size() > 0);
|
assertTrue(p.getAuthor().size() > 0);
|
||||||
final Optional<Author> author = p
|
final Optional<Author> author = p
|
||||||
|
@ -317,7 +319,7 @@ public class MappersTest {
|
||||||
@Test
|
@Test
|
||||||
void testODFRecord() throws IOException {
|
void testODFRecord() throws IOException {
|
||||||
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml"));
|
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml"));
|
||||||
List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
|
final List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
|
||||||
System.out.println("***************");
|
System.out.println("***************");
|
||||||
System.out.println(new ObjectMapper().writeValueAsString(list));
|
System.out.println(new ObjectMapper().writeValueAsString(list));
|
||||||
System.out.println("***************");
|
System.out.println("***************");
|
||||||
|
@ -328,6 +330,22 @@ public class MappersTest {
|
||||||
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testTextGrid() throws IOException {
|
||||||
|
final String xml = IOUtils.toString(getClass().getResourceAsStream("textgrid.xml"));
|
||||||
|
final List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
|
||||||
|
|
||||||
|
System.out.println("***************");
|
||||||
|
System.out.println(new ObjectMapper().writeValueAsString(list));
|
||||||
|
System.out.println("***************");
|
||||||
|
|
||||||
|
final Dataset p = (Dataset) list.get(0);
|
||||||
|
assertValidId(p.getId());
|
||||||
|
assertValidId(p.getCollectedfrom().get(0).getKey());
|
||||||
|
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
||||||
|
System.out.println(p.getTitle().get(0).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
private void assertValidId(final String id) {
|
private void assertValidId(final String id) {
|
||||||
assertEquals(49, id.length());
|
assertEquals(49, id.length());
|
||||||
assertEquals('|', id.charAt(2));
|
assertEquals('|', id.charAt(2));
|
||||||
|
|
|
@ -28,13 +28,7 @@ import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class MigrateDbEntitiesApplicationTest {
|
public class MigrateDbEntitiesApplicationTest {
|
||||||
|
|
|
@ -31,8 +31,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "trust",
|
"field": "trust",
|
||||||
"type": "string",
|
"type": "double",
|
||||||
"value": "0.9"
|
"value": 0.9
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "inferenceprovenance",
|
"field": "inferenceprovenance",
|
||||||
|
|
|
@ -114,8 +114,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "trust",
|
"field": "trust",
|
||||||
"type": "string",
|
"type": "double",
|
||||||
"value": "0.9"
|
"value": 0.9
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "inferenceprovenance",
|
"field": "inferenceprovenance",
|
||||||
|
|
|
@ -7,13 +7,12 @@
|
||||||
<header xmlns="http://namespace.openaire.eu/">
|
<header xmlns="http://namespace.openaire.eu/">
|
||||||
<dri:objIdentifier>pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2</dri:objIdentifier>
|
<dri:objIdentifier>pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2</dri:objIdentifier>
|
||||||
<dri:recordIdentifier>10.3897/oneeco.2.e13718</dri:recordIdentifier>
|
<dri:recordIdentifier>10.3897/oneeco.2.e13718</dri:recordIdentifier>
|
||||||
<dri:dateOfCollection/>
|
|
||||||
<dri:mdFormat/>
|
<dri:mdFormat/>
|
||||||
<dri:mdFormatInterpretation/>
|
<dri:mdFormatInterpretation/>
|
||||||
<dri:repositoryId/>
|
<dri:repositoryId/>
|
||||||
<dr:objectIdentifier/>
|
<dr:objectIdentifier/>
|
||||||
<dr:dateOfCollection>2020-03-23T00:20:51.392Z</dr:dateOfCollection>
|
<dri:dateOfCollection>2020-03-23T00:20:51.392Z</dri:dateOfCollection>
|
||||||
<dr:dateOfTransformation>2020-03-23T00:26:59.078Z</dr:dateOfTransformation>
|
<dri:dateOfTransformation>2020-03-23T00:26:59.078Z</dri:dateOfTransformation>
|
||||||
<oaf:datasourceprefix>pensoft_____</oaf:datasourceprefix>
|
<oaf:datasourceprefix>pensoft_____</oaf:datasourceprefix>
|
||||||
</header>
|
</header>
|
||||||
<metadata xmlns="http://namespace.openaire.eu/">
|
<metadata xmlns="http://namespace.openaire.eu/">
|
||||||
|
|
|
@ -96,8 +96,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "trust",
|
"field": "trust",
|
||||||
"type": "string",
|
"type": "double",
|
||||||
"value": "0.9"
|
"value": 0.9
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "inferenceprovenance",
|
"field": "inferenceprovenance",
|
||||||
|
|
|
@ -41,8 +41,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "trust",
|
"field": "trust",
|
||||||
"type": "string",
|
"type": "double",
|
||||||
"value": "0.9"
|
"value": 0.9
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "inferenceprovenance",
|
"field": "inferenceprovenance",
|
||||||
|
|
|
@ -86,8 +86,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "trust",
|
"field": "trust",
|
||||||
"type": "string",
|
"type": "double",
|
||||||
"value": "0.9"
|
"value": 0.9
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "inferenceprovenance",
|
"field": "inferenceprovenance",
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<record xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||||
|
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||||
|
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||||
|
<oai:header xmlns="http://namespace.openaire.eu/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
|
||||||
|
<dri:objIdentifier>r3f52792889d::000051aa1f61d77d2c0b340091f8024e</dri:objIdentifier>
|
||||||
|
<dri:recordIdentifier>textgrid:q9cv.0</dri:recordIdentifier>
|
||||||
|
<dri:dateOfCollection>2020-11-17T09:34:11.128+01:00</dri:dateOfCollection>
|
||||||
|
<oaf:datasourceprefix>r3f52792889d</oaf:datasourceprefix>
|
||||||
|
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">textgrid:q9cv.0</identifier>
|
||||||
|
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2012-01-21T13:35:20Z</datestamp>
|
||||||
|
<dr:dateOfTransformation>2020-11-17T09:46:21.551+01:00</dr:dateOfTransformation>
|
||||||
|
</oai:header>
|
||||||
|
<metadata>
|
||||||
|
<datacite:resource xmlns="http://www.openarchives.org/OAI/2.0/"
|
||||||
|
xmlns:datacite="http://datacite.org/schema/kernel-3"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
|
||||||
|
<datacite:identifier identifierType="Handle">hdl:11858/00-1734-0000-0003-7664-F</datacite:identifier>
|
||||||
|
<datacite:creators>
|
||||||
|
<datacite:creator>
|
||||||
|
<datacite:creatorName>Hoffmann von Fallersleben, August Heinrich</datacite:creatorName>
|
||||||
|
<datacite:nameIdentifier nameIdentifierScheme="pnd" schemeURI="https://de.dariah.eu/pnd-service">118552589</datacite:nameIdentifier>
|
||||||
|
</datacite:creator>
|
||||||
|
</datacite:creators>
|
||||||
|
<datacite:titles>
|
||||||
|
<datacite:title titleType="Other">Mailied</datacite:title>
|
||||||
|
<datacite:title titleType="Other">August Heinrich Hoffmann von Fallersleben: Unpolitische Lieder von Hoffmann von Fallersleben, 1. + 2. Theil, 1. Theil, Hamburg: Hoffmann und Campe, 1841.</datacite:title>
|
||||||
|
</datacite:titles>
|
||||||
|
<datacite:publisher>TextGrid</datacite:publisher>
|
||||||
|
<datacite:publicationYear>2012</datacite:publicationYear>
|
||||||
|
<datacite:contributors>
|
||||||
|
<datacite:contributor contributorType="DataManager">
|
||||||
|
<datacite:contributorName>tvitt@textgrid.de</datacite:contributorName>
|
||||||
|
</datacite:contributor>
|
||||||
|
<datacite:contributor contributorType="Other">
|
||||||
|
<datacite:contributorName>Digitale Bibliothek</datacite:contributorName>
|
||||||
|
<datacite:nameIdentifier nameIdentifierScheme="textgrid">TGPR-372fe6dc-57f2-6cd4-01b5-2c4bbefcfd3c</datacite:nameIdentifier>
|
||||||
|
</datacite:contributor>
|
||||||
|
</datacite:contributors>
|
||||||
|
<datacite:dates>
|
||||||
|
<datacite:date dateType="Created">2012-01-21T13:35:20Z</datacite:date>
|
||||||
|
<datacite:date dateType="Issued">2012-01-21T13:35:20Z</datacite:date>
|
||||||
|
<datacite:date dateType="Updated">2012-01-21T13:35:20Z</datacite:date>
|
||||||
|
</datacite:dates>
|
||||||
|
<datacite:resourceType resourceTypeGeneral="Dataset"/>
|
||||||
|
<alternateIdentifiers>
|
||||||
|
<datacite:alternateIdentifier alternateIdentifierType="URI">textgrid:q9cv.0</datacite:alternateIdentifier>
|
||||||
|
<alternateIdentifier alternateIdentifierType="URL">http://hdl.handle.net/hdl:11858/00-1734-0000-0003-7664-F</alternateIdentifier>
|
||||||
|
</alternateIdentifiers>
|
||||||
|
<datacite:relatedIdentifiers>
|
||||||
|
<datacite:relatedIdentifier relatedIdentifierType="Handle" relationType="IsPartOf">hdl:11858/00-1734-0000-0003-7666-B</datacite:relatedIdentifier>
|
||||||
|
</datacite:relatedIdentifiers>
|
||||||
|
<datacite:sizes>
|
||||||
|
<datacite:size>527 Bytes</datacite:size>
|
||||||
|
</datacite:sizes>
|
||||||
|
<datacite:formats>
|
||||||
|
<datacite:format>text/tg.edition+tg.aggregation+xml</datacite:format>
|
||||||
|
</datacite:formats>
|
||||||
|
<datacite:version>0</datacite:version>
|
||||||
|
<datacite:rightsList>
|
||||||
|
<datacite:rights rightsURI="http://creativecommons.org/licenses/by/3.0/de/legalcode"> Der annotierte Datenbestand der Digitalen Bibliothek inklusive
|
||||||
|
Metadaten sowie davon einzeln zugängliche Teile sind eine Abwandlung
|
||||||
|
des Datenbestandes von www.editura.de durch TextGrid und werden
|
||||||
|
unter der Lizenz Creative Commons Namensnennung 3.0 Deutschland
|
||||||
|
Lizenz (by-Nennung TextGrid) veröffentlicht. Die Lizenz bezieht sich
|
||||||
|
nicht auf die der Annotation zu Grunde liegenden allgemeinfreien
|
||||||
|
Texte (Siehe auch Punkt 2 der Lizenzbestimmungen).</datacite:rights>
|
||||||
|
<datacite:rights rightsURI="info:eu-repo/semantics/openAccess"/>
|
||||||
|
</datacite:rightsList>
|
||||||
|
<datacite:descriptions>
|
||||||
|
<datacite:description descriptionType="Abstract"/>
|
||||||
|
</datacite:descriptions>
|
||||||
|
<datacite:geoLocations>
|
||||||
|
<datacite:geoLocation>
|
||||||
|
<datacite:geoLocationPlace
|
||||||
|
xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">Hamburg</datacite:geoLocationPlace>
|
||||||
|
</datacite:geoLocation>
|
||||||
|
</datacite:geoLocations>
|
||||||
|
</datacite:resource>
|
||||||
|
<oaf:identifier identifierType="handle">hdl:11858/00-1734-0000-0003-7664-F</oaf:identifier>
|
||||||
|
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
|
||||||
|
<oaf:refereed>0002</oaf:refereed>
|
||||||
|
<oaf:dateAccepted>2012-01-01</oaf:dateAccepted>
|
||||||
|
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||||
|
<oaf:license>http://creativecommons.org/licenses/by/3.0/de/legalcode</oaf:license>
|
||||||
|
<oaf:language>und</oaf:language>
|
||||||
|
<oaf:hostedBy id="re3data_____::r3d100011365" name="TextGrid Repository"/>
|
||||||
|
<oaf:collectedFrom id="re3data_____::r3d100011365" name="TextGrid Repository"/>
|
||||||
|
</metadata>
|
||||||
|
<about xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
|
||||||
|
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
|
||||||
|
<originDescription altered="true" harvestDate="2020-11-17T09:34:11.128+01:00">
|
||||||
|
<baseURL>https%3A%2F%2Fdev.textgridlab.org%2F1.0%2Ftgoaipmh%2Foai</baseURL>
|
||||||
|
<identifier>textgrid:q9cv.0</identifier>
|
||||||
|
<datestamp>2012-01-21T13:35:20Z</datestamp>
|
||||||
|
<metadataNamespace>http://schema.datacite.org/oai/oai-1.0/</metadataNamespace>
|
||||||
|
</originDescription>
|
||||||
|
</provenance>
|
||||||
|
<oaf:datainfo>
|
||||||
|
<oaf:inferred>false</oaf:inferred>
|
||||||
|
<oaf:deletedbyinference>false</oaf:deletedbyinference>
|
||||||
|
<oaf:trust>0.9</oaf:trust>
|
||||||
|
<oaf:inferenceprovenance/>
|
||||||
|
<oaf:provenanceaction classid="sysimport:crosswalk"
|
||||||
|
classname="sysimport:crosswalk"
|
||||||
|
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
|
||||||
|
</oaf:datainfo>
|
||||||
|
</about>
|
||||||
|
</record>
|
|
@ -22,6 +22,12 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.jayway.jsonpath</groupId>
|
<groupId>com.jayway.jsonpath</groupId>
|
||||||
<artifactId>json-path</artifactId>
|
<artifactId>json-path</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>dom4j</groupId>
|
<groupId>dom4j</groupId>
|
||||||
|
@ -82,9 +88,6 @@
|
||||||
<groupId>org.codehaus.woodstox</groupId>
|
<groupId>org.codehaus.woodstox</groupId>
|
||||||
<artifactId>*</artifactId>
|
<artifactId>*</artifactId>
|
||||||
</exclusion>
|
</exclusion>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<exclusion>
|
<exclusion>
|
||||||
<groupId>com.github.ben-manes.caffeine</groupId>
|
<groupId>com.github.ben-manes.caffeine</groupId>
|
||||||
<artifactId>*</artifactId>
|
<artifactId>*</artifactId>
|
||||||
|
@ -109,11 +112,10 @@
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>*</artifactId>
|
<artifactId>*</artifactId>
|
||||||
</exclusion>
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.apache.zookeeper</groupId>
|
||||||
|
<artifactId>zookeeper</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
public class ProvisionConstants {
|
||||||
|
|
||||||
|
public static final String LAYOUT = "index";
|
||||||
|
public static final String INTERPRETATION = "openaire";
|
||||||
|
public static final String SEPARATOR = "-";
|
||||||
|
|
||||||
|
public static String getCollectionName(String format) {
|
||||||
|
return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -14,11 +14,12 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ZkServers;
|
import eu.dnetlib.dhp.oa.provision.utils.ZkServers;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
public class SolrAdminApplication extends SolrApplication implements Closeable {
|
public class SolrAdminApplication implements Closeable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SolrAdminApplication.class);
|
private static final Logger log = LoggerFactory.getLogger(SolrAdminApplication.class);
|
||||||
|
|
||||||
|
@ -54,12 +55,12 @@ public class SolrAdminApplication extends SolrApplication implements Closeable {
|
||||||
.orElse(false);
|
.orElse(false);
|
||||||
log.info("commit: {}", commit);
|
log.info("commit: {}", commit);
|
||||||
|
|
||||||
final ISLookUpService isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
|
||||||
|
|
||||||
final String zkHost = getZkHost(isLookup);
|
final String zkHost = isLookup.getZkHost();
|
||||||
log.info("zkHost: {}", zkHost);
|
log.info("zkHost: {}", zkHost);
|
||||||
|
|
||||||
final String collection = format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
|
final String collection = ProvisionConstants.getCollectionName(format);
|
||||||
log.info("collection: {}", collection);
|
log.info("collection: {}", collection);
|
||||||
|
|
||||||
try (SolrAdminApplication app = new SolrAdminApplication(zkHost)) {
|
try (SolrAdminApplication app = new SolrAdminApplication(zkHost)) {
|
||||||
|
|
|
@ -1,40 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
|
|
||||||
public abstract class SolrApplication {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SolrApplication.class);
|
|
||||||
|
|
||||||
protected static final String LAYOUT = "index";
|
|
||||||
protected static final String INTERPRETATION = "openaire";
|
|
||||||
protected static final String SEPARATOR = "-";
|
|
||||||
protected static final String DATE_FORMAT = "yyyy-MM-dd'T'hh:mm:ss'Z'";
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method retrieves from the information system the zookeeper quorum of the Solr server
|
|
||||||
*
|
|
||||||
* @param isLookup
|
|
||||||
* @return the zookeeper quorum of the Solr server
|
|
||||||
* @throws ISLookUpException
|
|
||||||
*/
|
|
||||||
protected static String getZkHost(ISLookUpService isLookup) throws ISLookUpException {
|
|
||||||
return doLookup(
|
|
||||||
isLookup,
|
|
||||||
"for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='IndexServiceResourceType'] return $x//PROTOCOL[./@name='solr']/@address/string()");
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static String doLookup(ISLookUpService isLookup, String xquery) throws ISLookUpException {
|
|
||||||
log.info(String.format("running xquery: %s", xquery));
|
|
||||||
final String res = isLookup.getResourceProfileByQuery(xquery);
|
|
||||||
log.info(String.format("got response (100 chars): %s", StringUtils.left(res, 100) + " ..."));
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -2,12 +2,11 @@
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
@ -28,13 +27,11 @@ import com.google.common.collect.Maps;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.*;
|
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
import scala.collection.JavaConverters;
|
|
||||||
import scala.collection.Seq;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* XmlConverterJob converts the JoinedEntities as XML records
|
* XmlConverterJob converts the JoinedEntities as XML records
|
||||||
|
@ -43,8 +40,6 @@ public class XmlConverterJob {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class);
|
private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class);
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
|
|
||||||
public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";
|
public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
@ -129,10 +124,6 @@ public class XmlConverterJob {
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Seq<String> toSeq(List<String> list) {
|
|
||||||
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Map<String, LongAccumulator> prepareAccumulators(SparkContext sc) {
|
private static Map<String, LongAccumulator> prepareAccumulators(SparkContext sc) {
|
||||||
Map<String, LongAccumulator> accumulators = Maps.newHashMap();
|
Map<String, LongAccumulator> accumulators = Maps.newHashMap();
|
||||||
accumulators
|
accumulators
|
||||||
|
|
|
@ -20,27 +20,42 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.apache.spark.rdd.RDD;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.lucidworks.spark.util.SolrSupport;
|
import com.lucidworks.spark.util.SolrSupport;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
|
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
|
|
||||||
public class XmlIndexingJob extends SolrApplication {
|
public class XmlIndexingJob {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(XmlIndexingJob.class);
|
private static final Logger log = LoggerFactory.getLogger(XmlIndexingJob.class);
|
||||||
|
|
||||||
private static final Integer DEFAULT_BATCH_SIZE = 1000;
|
private static final Integer DEFAULT_BATCH_SIZE = 1000;
|
||||||
|
|
||||||
|
protected static final String DATE_FORMAT = "yyyy-MM-dd'T'hh:mm:ss'Z'";
|
||||||
|
|
||||||
|
private String inputPath;
|
||||||
|
|
||||||
|
private String format;
|
||||||
|
|
||||||
|
private int batchSize;
|
||||||
|
|
||||||
|
private String outputPath;
|
||||||
|
|
||||||
|
private SparkSession spark;
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
@ -60,27 +75,53 @@ public class XmlIndexingJob extends SolrApplication {
|
||||||
final String inputPath = parser.get("inputPath");
|
final String inputPath = parser.get("inputPath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
final String isLookupUrl = parser.get("isLookupUrl");
|
|
||||||
log.info("isLookupUrl: {}", isLookupUrl);
|
|
||||||
|
|
||||||
final String format = parser.get("format");
|
final String format = parser.get("format");
|
||||||
log.info("format: {}", format);
|
log.info("format: {}", format);
|
||||||
|
|
||||||
|
final String outputPath = Optional
|
||||||
|
.ofNullable(parser.get("outputPath"))
|
||||||
|
.orElse(null);
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final Integer batchSize = parser.getObjectMap().containsKey("batchSize")
|
final Integer batchSize = parser.getObjectMap().containsKey("batchSize")
|
||||||
? Integer.valueOf(parser.get("batchSize"))
|
? Integer.valueOf(parser.get("batchSize"))
|
||||||
: DEFAULT_BATCH_SIZE;
|
: DEFAULT_BATCH_SIZE;
|
||||||
log.info("batchSize: {}", batchSize);
|
log.info("batchSize: {}", batchSize);
|
||||||
|
|
||||||
final ISLookUpService isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
final SparkConf conf = new SparkConf();
|
||||||
final String fields = getLayoutSource(isLookup, format);
|
conf.registerKryoClasses(new Class[] {
|
||||||
|
SerializableSolrInputDocument.class
|
||||||
|
});
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
final String isLookupUrl = parser.get("isLookupUrl");
|
||||||
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
|
||||||
|
new XmlIndexingJob(spark, inputPath, format, batchSize, outputPath).run(isLookup);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public XmlIndexingJob(SparkSession spark, String inputPath, String format, Integer batchSize, String outputPath) {
|
||||||
|
this.spark = spark;
|
||||||
|
this.inputPath = inputPath;
|
||||||
|
this.format = format;
|
||||||
|
this.batchSize = batchSize;
|
||||||
|
this.outputPath = outputPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run(ISLookupClient isLookup) throws ISLookUpException, TransformerException {
|
||||||
|
final String fields = isLookup.getLayoutSource(format);
|
||||||
log.info("fields: {}", fields);
|
log.info("fields: {}", fields);
|
||||||
|
|
||||||
final String xslt = getLayoutTransformer(isLookup);
|
final String xslt = isLookup.getLayoutTransformer();
|
||||||
|
|
||||||
final String dsId = getDsId(format, isLookup);
|
final String dsId = isLookup.getDsId(format);
|
||||||
log.info("dsId: {}", dsId);
|
log.info("dsId: {}", dsId);
|
||||||
|
|
||||||
final String zkHost = getZkHost(isLookup);
|
final String zkHost = isLookup.getZkHost();
|
||||||
log.info("zkHost: {}", zkHost);
|
log.info("zkHost: {}", zkHost);
|
||||||
|
|
||||||
final String version = getRecordDatestamp();
|
final String version = getRecordDatestamp();
|
||||||
|
@ -88,24 +129,26 @@ public class XmlIndexingJob extends SolrApplication {
|
||||||
final String indexRecordXslt = getLayoutTransformer(format, fields, xslt);
|
final String indexRecordXslt = getLayoutTransformer(format, fields, xslt);
|
||||||
log.info("indexRecordTransformer {}", indexRecordXslt);
|
log.info("indexRecordTransformer {}", indexRecordXslt);
|
||||||
|
|
||||||
final SparkConf conf = new SparkConf();
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
runWithSparkSession(
|
JavaRDD<SolrInputDocument> docs = sc
|
||||||
conf,
|
.sequenceFile(inputPath, Text.class, Text.class)
|
||||||
isSparkSessionManaged,
|
.map(t -> t._2().toString())
|
||||||
spark -> {
|
.map(s -> toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), s))
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
.map(s -> new StreamingInputDocumentFactory(version, dsId).parseDocument(s));
|
||||||
|
|
||||||
RDD<SolrInputDocument> docs = sc
|
if (StringUtils.isNotBlank(outputPath)) {
|
||||||
.sequenceFile(inputPath, Text.class, Text.class)
|
spark
|
||||||
.map(t -> t._2().toString())
|
.createDataset(
|
||||||
.map(s -> toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), s))
|
docs.map(s -> new SerializableSolrInputDocument(s)).rdd(),
|
||||||
.map(s -> new StreamingInputDocumentFactory(version, dsId).parseDocument(s))
|
Encoders.kryo(SerializableSolrInputDocument.class))
|
||||||
.rdd();
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
final String collection = format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
|
.parquet(outputPath);
|
||||||
SolrSupport.indexDocs(zkHost, collection, batchSize, docs);
|
} else {
|
||||||
});
|
final String collection = ProvisionConstants.getCollectionName(format);
|
||||||
|
SolrSupport.indexDocs(zkHost, collection, batchSize, docs.rdd());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static String toIndexRecord(Transformer tr, final String record) {
|
protected static String toIndexRecord(Transformer tr, final String record) {
|
||||||
|
@ -151,56 +194,4 @@ public class XmlIndexingJob extends SolrApplication {
|
||||||
return new SimpleDateFormat(DATE_FORMAT).format(new Date());
|
return new SimpleDateFormat(DATE_FORMAT).format(new Date());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Method retrieves from the information system the list of fields associated to the given MDFormat name
|
|
||||||
*
|
|
||||||
* @param isLookup the ISLookup service stub
|
|
||||||
* @param format the Metadata format name
|
|
||||||
* @return the string representation of the list of fields to be indexed
|
|
||||||
* @throws ISLookUpDocumentNotFoundException
|
|
||||||
* @throws ISLookUpException
|
|
||||||
*/
|
|
||||||
private static String getLayoutSource(final ISLookUpService isLookup, final String format)
|
|
||||||
throws ISLookUpDocumentNotFoundException, ISLookUpException {
|
|
||||||
return doLookup(
|
|
||||||
isLookup,
|
|
||||||
String
|
|
||||||
.format(
|
|
||||||
"collection('')//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value = 'MDFormatDSResourceType' and .//NAME='%s']//LAYOUT[@name='%s']",
|
|
||||||
format, LAYOUT));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method retrieves from the information system the openaireLayoutToRecordStylesheet
|
|
||||||
*
|
|
||||||
* @param isLookup the ISLookup service stub
|
|
||||||
* @return the string representation of the XSLT contained in the transformation rule profile
|
|
||||||
* @throws ISLookUpDocumentNotFoundException
|
|
||||||
* @throws ISLookUpException
|
|
||||||
*/
|
|
||||||
private static String getLayoutTransformer(ISLookUpService isLookup) throws ISLookUpException {
|
|
||||||
return doLookup(
|
|
||||||
isLookup,
|
|
||||||
"collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType')"
|
|
||||||
+ "//RESOURCE_PROFILE[./BODY/CONFIGURATION/SCRIPT/TITLE/text() = 'openaireLayoutToRecordStylesheet']//CODE/node()");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method retrieves from the information system the IndexDS profile ID associated to the given MDFormat name
|
|
||||||
*
|
|
||||||
* @param format
|
|
||||||
* @param isLookup
|
|
||||||
* @return the IndexDS identifier
|
|
||||||
* @throws ISLookUpException
|
|
||||||
*/
|
|
||||||
private static String getDsId(String format, ISLookUpService isLookup) throws ISLookUpException {
|
|
||||||
return doLookup(
|
|
||||||
isLookup,
|
|
||||||
String
|
|
||||||
.format(
|
|
||||||
"collection('/db/DRIVER/IndexDSResources/IndexDSResourceType')"
|
|
||||||
+ "//RESOURCE_PROFILE[./BODY/CONFIGURATION/METADATA_FORMAT/text() = '%s']//RESOURCE_IDENTIFIER/@value/string()",
|
|
||||||
format));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.provision.model;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.SolrInputField;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper class needed to make the SolrInputDocument compatible with the Kryo serialization mechanism.
|
||||||
|
*/
|
||||||
|
public class SerializableSolrInputDocument extends SolrInputDocument {
|
||||||
|
|
||||||
|
public SerializableSolrInputDocument() {
|
||||||
|
super(new HashMap<>());
|
||||||
|
}
|
||||||
|
|
||||||
|
public SerializableSolrInputDocument(Map<String, SolrInputField> fields) {
|
||||||
|
super(fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,95 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.provision.utils;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.provision.ProvisionConstants;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public class ISLookupClient {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ISLookupClient.class);
|
||||||
|
|
||||||
|
private ISLookUpService isLookup;
|
||||||
|
|
||||||
|
public ISLookupClient(ISLookUpService isLookup) {
|
||||||
|
this.isLookup = isLookup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method retrieves from the information system the list of fields associated to the given MDFormat name
|
||||||
|
*
|
||||||
|
* @param format the Metadata format name
|
||||||
|
* @return the string representation of the list of fields to be indexed
|
||||||
|
* @throws ISLookUpDocumentNotFoundException
|
||||||
|
* @throws ISLookUpException
|
||||||
|
*/
|
||||||
|
public String getLayoutSource(final String format)
|
||||||
|
throws ISLookUpDocumentNotFoundException, ISLookUpException {
|
||||||
|
return doLookup(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"collection('')//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value = 'MDFormatDSResourceType' and .//NAME='%s']//LAYOUT[@name='%s']",
|
||||||
|
format, ProvisionConstants.LAYOUT));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method retrieves from the information system the openaireLayoutToRecordStylesheet
|
||||||
|
*
|
||||||
|
* @return the string representation of the XSLT contained in the transformation rule profile
|
||||||
|
* @throws ISLookUpDocumentNotFoundException
|
||||||
|
* @throws ISLookUpException
|
||||||
|
*/
|
||||||
|
public String getLayoutTransformer() throws ISLookUpException {
|
||||||
|
return doLookup(
|
||||||
|
"collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType')"
|
||||||
|
+ "//RESOURCE_PROFILE[./BODY/CONFIGURATION/SCRIPT/TITLE/text() = 'openaireLayoutToRecordStylesheet']//CODE/node()");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method retrieves from the information system the IndexDS profile ID associated to the given MDFormat name
|
||||||
|
*
|
||||||
|
* @param format
|
||||||
|
* @return the IndexDS identifier
|
||||||
|
* @throws ISLookUpException
|
||||||
|
*/
|
||||||
|
public String getDsId(String format) throws ISLookUpException {
|
||||||
|
return doLookup(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"collection('/db/DRIVER/IndexDSResources/IndexDSResourceType')"
|
||||||
|
+ "//RESOURCE_PROFILE[./BODY/CONFIGURATION/METADATA_FORMAT/text() = '%s']//RESOURCE_IDENTIFIER/@value/string()",
|
||||||
|
format));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method retrieves from the information system the zookeeper quorum of the Solr server
|
||||||
|
*
|
||||||
|
* @return the zookeeper quorum of the Solr server
|
||||||
|
* @throws ISLookUpException
|
||||||
|
*/
|
||||||
|
public String getZkHost() throws ISLookUpException {
|
||||||
|
return doLookup(
|
||||||
|
"for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='IndexServiceResourceType'] return $x//PROTOCOL[./@name='solr']/@address/string()");
|
||||||
|
}
|
||||||
|
|
||||||
|
private String doLookup(String xquery) throws ISLookUpException {
|
||||||
|
log.info(String.format("running xquery: %s", xquery));
|
||||||
|
final String res = getIsLookup().getResourceProfileByQuery(xquery);
|
||||||
|
log.info(String.format("got response (100 chars): %s", StringUtils.left(res, 100) + " ..."));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ISLookUpService getIsLookup() {
|
||||||
|
return isLookup;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIsLookup(ISLookUpService isLookup) {
|
||||||
|
this.isLookup = isLookup;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -46,11 +46,6 @@ public class StreamingInputDocumentFactory {
|
||||||
|
|
||||||
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
|
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
|
||||||
|
|
||||||
private static final String outFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
|
|
||||||
|
|
||||||
private static final List<String> dateFormats = Arrays
|
|
||||||
.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy");
|
|
||||||
|
|
||||||
private static final String DEFAULTDNETRESULT = "dnetResult";
|
private static final String DEFAULTDNETRESULT = "dnetResult";
|
||||||
|
|
||||||
private static final String TARGETFIELDS = "targetFields";
|
private static final String TARGETFIELDS = "targetFields";
|
||||||
|
@ -125,13 +120,12 @@ public class StreamingInputDocumentFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!indexDocument.containsKey(INDEX_RECORD_ID)) {
|
if (!indexDocument.containsKey(INDEX_RECORD_ID)) {
|
||||||
indexDocument.clear();
|
throw new IllegalStateException("cannot extract record ID from: " + inputDocument);
|
||||||
System.err.println("missing indexrecord id:\n" + inputDocument);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return indexDocument;
|
return indexDocument;
|
||||||
} catch (XMLStreamException e) {
|
} catch (XMLStreamException e) {
|
||||||
return new SolrInputDocument();
|
throw new IllegalStateException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,5 +22,11 @@
|
||||||
"paramLongName": "batchSize",
|
"paramLongName": "batchSize",
|
||||||
"paramDescription": "size of the batch of documents sent to solr",
|
"paramDescription": "size of the batch of documents sent to solr",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "o",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "path on hdfs activating an alternative output for the SolrInputDocuments",
|
||||||
|
"paramRequired": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -638,6 +638,7 @@
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
<arg>--format</arg><arg>${format}</arg>
|
<arg>--format</arg><arg>${format}</arg>
|
||||||
<arg>--batchSize</arg><arg>${batchSize}</arg>
|
<arg>--batchSize</arg><arg>${batchSize}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="commit_solr_collection"/>
|
<ok to="commit_solr_collection"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -1,107 +1,18 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
|
|
||||||
import org.apache.solr.client.solrj.SolrResponse;
|
|
||||||
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
|
||||||
import org.apache.solr.client.solrj.impl.XMLResponseParser;
|
|
||||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
|
||||||
import org.apache.solr.client.solrj.request.ConfigSetAdminRequest;
|
|
||||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
|
||||||
import org.apache.solr.client.solrj.request.RequestWriter;
|
|
||||||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
|
||||||
import org.apache.solr.client.solrj.response.ConfigSetAdminResponse;
|
|
||||||
import org.apache.solr.client.solrj.response.SolrPingResponse;
|
import org.apache.solr.client.solrj.response.SolrPingResponse;
|
||||||
import org.apache.solr.client.solrj.response.UpdateResponse;
|
import org.apache.solr.client.solrj.response.UpdateResponse;
|
||||||
import org.apache.solr.cloud.MiniSolrCloudCluster;
|
|
||||||
import org.apache.solr.common.params.CollectionParams;
|
|
||||||
import org.apache.solr.common.params.CoreAdminParams;
|
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
|
||||||
import org.apache.solr.common.util.NamedList;
|
|
||||||
import org.junit.jupiter.api.AfterAll;
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.junit.jupiter.api.io.TempDir;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import junit.framework.Assert;
|
import junit.framework.Assert;
|
||||||
|
|
||||||
public class SolrAdminApplicationTest {
|
public class SolrAdminApplicationTest extends SolrTest {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SolrAdminApplicationTest.class);
|
|
||||||
public static final String DEFAULT_COLLECTION = "testCollection";
|
|
||||||
public static final String CONFIG_NAME = "testConfig";
|
|
||||||
|
|
||||||
private static MiniSolrCloudCluster miniCluster;
|
|
||||||
private static CloudSolrClient cloudSolrClient;
|
|
||||||
|
|
||||||
@TempDir
|
|
||||||
public static Path tempDir;
|
|
||||||
|
|
||||||
@BeforeAll
|
|
||||||
public static void setup() throws Exception {
|
|
||||||
|
|
||||||
// random unassigned HTTP port
|
|
||||||
final int jettyPort = 0;
|
|
||||||
|
|
||||||
final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build();
|
|
||||||
|
|
||||||
// create a MiniSolrCloudCluster instance
|
|
||||||
miniCluster = new MiniSolrCloudCluster(2, tempDir, jettyConfig);
|
|
||||||
|
|
||||||
// Upload Solr configuration directory to ZooKeeper
|
|
||||||
String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig";
|
|
||||||
File configDir = new File(solrZKConfigDir);
|
|
||||||
|
|
||||||
miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME);
|
|
||||||
|
|
||||||
// override settings in the solrconfig include
|
|
||||||
System.setProperty("solr.tests.maxBufferedDocs", "100000");
|
|
||||||
System.setProperty("solr.tests.maxIndexingThreads", "-1");
|
|
||||||
System.setProperty("solr.tests.ramBufferSizeMB", "100");
|
|
||||||
|
|
||||||
// use non-test classes so RandomizedRunner isn't necessary
|
|
||||||
System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
|
|
||||||
System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory");
|
|
||||||
|
|
||||||
cloudSolrClient = miniCluster.getSolrClient();
|
|
||||||
cloudSolrClient.setRequestWriter(new RequestWriter());
|
|
||||||
cloudSolrClient.setParser(new XMLResponseParser());
|
|
||||||
cloudSolrClient.setDefaultCollection(DEFAULT_COLLECTION);
|
|
||||||
cloudSolrClient.connect();
|
|
||||||
|
|
||||||
log.info(new ConfigSetAdminRequest.List().process(cloudSolrClient).toString());
|
|
||||||
log.info(CollectionAdminRequest.ClusterStatus.getClusterStatus().process(cloudSolrClient).toString());
|
|
||||||
|
|
||||||
createCollection(cloudSolrClient, DEFAULT_COLLECTION, 2, 1, CONFIG_NAME);
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterAll
|
|
||||||
public static void shutDown() throws Exception {
|
|
||||||
miniCluster.shutdown();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
|
||||||
int replicationFactor, String configName) throws Exception {
|
|
||||||
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
|
||||||
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
|
||||||
modParams.set("name", name);
|
|
||||||
modParams.set("numShards", numShards);
|
|
||||||
modParams.set("replicationFactor", replicationFactor);
|
|
||||||
modParams.set("collection.configName", configName);
|
|
||||||
QueryRequest request = new QueryRequest(modParams);
|
|
||||||
request.setPath("/admin/collections");
|
|
||||||
return client.request(request);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPing() throws Exception {
|
public void testPing() throws Exception {
|
||||||
SolrPingResponse pingResponse = cloudSolrClient.ping();
|
SolrPingResponse pingResponse = miniCluster.getSolrClient().ping();
|
||||||
log.info("pingResponse: '{}'", pingResponse.getStatus());
|
log.info("pingResponse: '{}'", pingResponse.getStatus());
|
||||||
Assert.assertTrue(pingResponse.getStatus() == 0);
|
Assert.assertTrue(pingResponse.getStatus() == 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
||||||
|
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||||
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||||
|
import org.apache.solr.client.solrj.request.ConfigSetAdminRequest;
|
||||||
|
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||||
|
import org.apache.solr.cloud.MiniSolrCloudCluster;
|
||||||
|
import org.apache.solr.common.params.CollectionParams;
|
||||||
|
import org.apache.solr.common.params.CoreAdminParams;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public abstract class SolrTest {
|
||||||
|
|
||||||
|
protected static final Logger log = LoggerFactory.getLogger(SolrTest.class);
|
||||||
|
|
||||||
|
protected static final String FORMAT = "test";
|
||||||
|
protected static final String DEFAULT_COLLECTION = FORMAT + "-index-openaire";
|
||||||
|
protected static final String CONFIG_NAME = "testConfig";
|
||||||
|
|
||||||
|
protected static MiniSolrCloudCluster miniCluster;
|
||||||
|
|
||||||
|
@TempDir
|
||||||
|
public static Path workingDir;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void setup() throws Exception {
|
||||||
|
|
||||||
|
// random unassigned HTTP port
|
||||||
|
final int jettyPort = 0;
|
||||||
|
final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build();
|
||||||
|
|
||||||
|
log.info(String.format("working directory: %s", workingDir.toString()));
|
||||||
|
System.setProperty("solr.log.dir", workingDir.resolve("logs").toString());
|
||||||
|
|
||||||
|
// create a MiniSolrCloudCluster instance
|
||||||
|
miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig);
|
||||||
|
|
||||||
|
// Upload Solr configuration directory to ZooKeeper
|
||||||
|
String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig";
|
||||||
|
File configDir = new File(solrZKConfigDir);
|
||||||
|
|
||||||
|
miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME);
|
||||||
|
|
||||||
|
// override settings in the solrconfig include
|
||||||
|
System.setProperty("solr.tests.maxBufferedDocs", "100000");
|
||||||
|
System.setProperty("solr.tests.maxIndexingThreads", "-1");
|
||||||
|
System.setProperty("solr.tests.ramBufferSizeMB", "100");
|
||||||
|
|
||||||
|
// use non-test classes so RandomizedRunner isn't necessary
|
||||||
|
System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
|
||||||
|
System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory");
|
||||||
|
System.setProperty("solr.lock.type", "single");
|
||||||
|
|
||||||
|
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
|
||||||
|
log
|
||||||
|
.info(
|
||||||
|
CollectionAdminRequest.ClusterStatus
|
||||||
|
.getClusterStatus()
|
||||||
|
.process(miniCluster.getSolrClient())
|
||||||
|
.toString());
|
||||||
|
|
||||||
|
NamedList<Object> res = createCollection(
|
||||||
|
miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||||
|
res.forEach(o -> log.info(o.toString()));
|
||||||
|
|
||||||
|
miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION);
|
||||||
|
|
||||||
|
log
|
||||||
|
.info(
|
||||||
|
CollectionAdminRequest.ClusterStatus
|
||||||
|
.getClusterStatus()
|
||||||
|
.process(miniCluster.getSolrClient())
|
||||||
|
.toString());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void shutDown() throws Exception {
|
||||||
|
miniCluster.shutdown();
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
||||||
|
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
||||||
|
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
||||||
|
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
||||||
|
modParams.set("name", name);
|
||||||
|
modParams.set("numShards", numShards);
|
||||||
|
modParams.set("replicationFactor", replicationFactor);
|
||||||
|
modParams.set("collection.configName", configName);
|
||||||
|
modParams.set("maxShardsPerNode", maxShardsPerNode);
|
||||||
|
QueryRequest request = new QueryRequest(modParams);
|
||||||
|
request.setPath("/admin/collections");
|
||||||
|
return client.request(request);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,147 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.net.URI;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
|
import org.apache.solr.common.SolrInputField;
|
||||||
|
import org.apache.solr.common.params.CommonParams;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaPairRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.junit.jupiter.api.*;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
public class XmlIndexingJobTest extends SolrTest {
|
||||||
|
|
||||||
|
protected static SparkSession spark;
|
||||||
|
|
||||||
|
private static final Integer batchSize = 100;
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private ISLookupClient isLookupClient;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void prepareMocks() throws ISLookUpException, IOException {
|
||||||
|
isLookupClient.setIsLookup(isLookUpService);
|
||||||
|
|
||||||
|
int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort();
|
||||||
|
|
||||||
|
Mockito
|
||||||
|
.when(isLookupClient.getDsId(Mockito.anyString()))
|
||||||
|
.thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl");
|
||||||
|
Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort));
|
||||||
|
Mockito
|
||||||
|
.when(isLookupClient.getLayoutSource(Mockito.anyString()))
|
||||||
|
.thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml")));
|
||||||
|
Mockito
|
||||||
|
.when(isLookupClient.getLayoutTransformer())
|
||||||
|
.thenReturn(IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void before() {
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
||||||
|
conf.registerKryoClasses(new Class[] {
|
||||||
|
SerializableSolrInputDocument.class
|
||||||
|
});
|
||||||
|
|
||||||
|
conf.setMaster("local[1]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(XmlIndexingJobTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void tearDown() {
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testXmlIndexingJob_onSolr() throws Exception {
|
||||||
|
|
||||||
|
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
|
||||||
|
|
||||||
|
long nRecord = JavaSparkContext
|
||||||
|
.fromSparkContext(spark.sparkContext())
|
||||||
|
.sequenceFile(inputPath, Text.class, Text.class)
|
||||||
|
.count();
|
||||||
|
|
||||||
|
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, null).run(isLookupClient);
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
|
||||||
|
|
||||||
|
QueryResponse rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "*:*"));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
nRecord, rsp.getResults().getNumFound(),
|
||||||
|
"the number of indexed records should be equal to the number of input records");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testXmlIndexingJob_saveOnHDFS() throws Exception {
|
||||||
|
final String ID_XPATH = "//header/*[local-name()='objIdentifier']";
|
||||||
|
|
||||||
|
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
|
||||||
|
|
||||||
|
final JavaPairRDD<Text, Text> xmlRecords = JavaSparkContext
|
||||||
|
.fromSparkContext(spark.sparkContext())
|
||||||
|
.sequenceFile(inputPath, Text.class, Text.class);
|
||||||
|
long nRecord = xmlRecords.count();
|
||||||
|
long xmlIdUnique = xmlRecords
|
||||||
|
.map(t -> t._2().toString())
|
||||||
|
.map(s -> new SAXReader().read(new StringReader(s)).valueOf(ID_XPATH))
|
||||||
|
.distinct()
|
||||||
|
.count();
|
||||||
|
Assertions.assertEquals(nRecord, xmlIdUnique, "IDs should be unique among input records");
|
||||||
|
|
||||||
|
final String outputPath = workingDir.resolve("outputPath").toAbsolutePath().toString();
|
||||||
|
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, outputPath).run(isLookupClient);
|
||||||
|
|
||||||
|
final Dataset<SerializableSolrInputDocument> solrDocs = spark
|
||||||
|
.read()
|
||||||
|
.load(outputPath)
|
||||||
|
.as(Encoders.kryo(SerializableSolrInputDocument.class));
|
||||||
|
long docIdUnique = solrDocs.map((MapFunction<SerializableSolrInputDocument, String>) doc -> {
|
||||||
|
final SolrInputField id = doc.getField("__indexrecordidentifier");
|
||||||
|
return id.getFirstValue().toString();
|
||||||
|
}, Encoders.STRING())
|
||||||
|
.distinct()
|
||||||
|
.count();
|
||||||
|
Assertions.assertEquals(xmlIdUnique, docIdUnique, "IDs should be unique among the output records");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -105,7 +105,7 @@
|
||||||
<FIELD indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/>
|
<FIELD indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/>
|
||||||
<FIELD indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/>
|
<FIELD indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/>
|
||||||
<FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='result'])"/>
|
<FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='result'])"/>
|
||||||
<FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/resulttype/@classid)"/>
|
<FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@type)"/>
|
||||||
<FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/>
|
<FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/>
|
||||||
<FIELD indexable="true" name="relfundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0"/>
|
<FIELD indexable="true" name="relfundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0"/>
|
||||||
<FIELD indexable="true" name="relfundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()"/>
|
<FIELD indexable="true" name="relfundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()"/>
|
||||||
|
@ -123,7 +123,8 @@
|
||||||
<FIELD indexable="true" name="relfundername" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@name)"/>
|
<FIELD indexable="true" name="relfundername" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@name)"/>
|
||||||
<FIELD indexable="true" name="relfunderjurisdiction" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@jurisdiction)"/><!-- Collected from of the related entity. Available for result-result relationships -->
|
<FIELD indexable="true" name="relfunderjurisdiction" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@jurisdiction)"/><!-- Collected from of the related entity. Available for result-result relationships -->
|
||||||
<FIELD indexable="true" name="relcollectedfromid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@id)"/>
|
<FIELD indexable="true" name="relcollectedfromid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@id)"/>
|
||||||
<FIELD indexable="true" name="relcollectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@name)"/><!-- COMMON FIELDS -->
|
<FIELD indexable="true" name="relcollectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@name)"/>
|
||||||
|
<FIELD indexable="true" name="semrelid" result="false" stat="false" tokenizable="false" value="concat(./to/text(), '||', ./to/@class/string())" xpath="//*[local-name()='entity']//rel"/><!-- COMMON FIELDS -->
|
||||||
<FIELD indexable="true" multivalued="false" name="dateofcollection" result="false" stat="false" type="pdate" value="//header/*[local-name()='dateOfCollection']"/>
|
<FIELD indexable="true" multivalued="false" name="dateofcollection" result="false" stat="false" type="pdate" value="//header/*[local-name()='dateOfCollection']"/>
|
||||||
<FIELD indexable="true" name="collectedfrom" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*/*[local-name()='collectedfrom'] | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']"/>
|
<FIELD indexable="true" name="collectedfrom" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*/*[local-name()='collectedfrom'] | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']"/>
|
||||||
<FIELD indexable="true" name="collectedfromdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)"/>
|
<FIELD indexable="true" name="collectedfromdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)"/>
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- If this file is found in the config directory, it will only be
|
||||||
|
loaded once at startup. If it is found in Solr's data
|
||||||
|
directory, it will be re-loaded every commit.
|
||||||
|
|
||||||
|
See http://wiki.apache.org/solr/QueryElevationComponent for more info
|
||||||
|
|
||||||
|
-->
|
||||||
|
<elevate>
|
||||||
|
<!-- Query elevation examples
|
||||||
|
<query text="foo bar">
|
||||||
|
<doc id="1" />
|
||||||
|
<doc id="2" />
|
||||||
|
<doc id="3" />
|
||||||
|
</query>
|
||||||
|
|
||||||
|
for use with techproducts example
|
||||||
|
|
||||||
|
<query text="ipod">
|
||||||
|
<doc id="MA147LL/A" /> put the actual ipod at the top
|
||||||
|
<doc id="IW-02" exclude="true" /> exclude this cable
|
||||||
|
</query>
|
||||||
|
-->
|
||||||
|
|
||||||
|
</elevate>
|
File diff suppressed because it is too large
Load Diff
|
@ -83,6 +83,7 @@
|
||||||
|
|
||||||
<lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" />
|
<lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" />
|
||||||
<lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" />
|
<lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" />
|
||||||
|
|
||||||
<!-- an exact 'path' can be used instead of a 'dir' to specify a
|
<!-- an exact 'path' can be used instead of a 'dir' to specify a
|
||||||
specific jar file. This will cause a serious error to be logged
|
specific jar file. This will cause a serious error to be logged
|
||||||
if it can't be loaded.
|
if it can't be loaded.
|
||||||
|
@ -112,7 +113,8 @@
|
||||||
One can force a particular implementation via solr.MMapDirectoryFactory,
|
One can force a particular implementation via solr.MMapDirectoryFactory,
|
||||||
solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
|
solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
|
||||||
|
|
||||||
solr.RAMDirectoryFactory is memory based and not persistent.
|
solr.RAMDirectoryFactory is memory based, not
|
||||||
|
persistent, and doesn't work with replication.
|
||||||
-->
|
-->
|
||||||
<directoryFactory name="DirectoryFactory"
|
<directoryFactory name="DirectoryFactory"
|
||||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||||
|
@ -204,7 +206,7 @@
|
||||||
More details on the nuances of each LockFactory...
|
More details on the nuances of each LockFactory...
|
||||||
http://wiki.apache.org/lucene-java/AvailableLockFactories
|
http://wiki.apache.org/lucene-java/AvailableLockFactories
|
||||||
-->
|
-->
|
||||||
<lockType>${solr.lock.type:single}</lockType>
|
<lockType>${solr.lock.type:native}</lockType>
|
||||||
|
|
||||||
<!-- Commit Deletion Policy
|
<!-- Commit Deletion Policy
|
||||||
Custom deletion policies can be specified here. The class must
|
Custom deletion policies can be specified here. The class must
|
||||||
|
@ -331,6 +333,29 @@
|
||||||
postCommit - fired after every commit or optimize command
|
postCommit - fired after every commit or optimize command
|
||||||
postOptimize - fired after every optimize command
|
postOptimize - fired after every optimize command
|
||||||
-->
|
-->
|
||||||
|
<!-- The RunExecutableListener executes an external command from a
|
||||||
|
hook such as postCommit or postOptimize.
|
||||||
|
|
||||||
|
exe - the name of the executable to run
|
||||||
|
dir - dir to use as the current working directory. (default=".")
|
||||||
|
wait - the calling thread waits until the executable returns.
|
||||||
|
(default="true")
|
||||||
|
args - the arguments to pass to the program. (default is none)
|
||||||
|
env - environment variables to set. (default is none)
|
||||||
|
-->
|
||||||
|
<!-- This example shows how RunExecutableListener could be used
|
||||||
|
with the script based replication...
|
||||||
|
http://wiki.apache.org/solr/CollectionDistribution
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<listener event="postCommit" class="solr.RunExecutableListener">
|
||||||
|
<str name="exe">solr/bin/snapshooter</str>
|
||||||
|
<str name="dir">.</str>
|
||||||
|
<bool name="wait">true</bool>
|
||||||
|
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
|
||||||
|
<arr name="env"> <str>MYVAR=val1</str> </arr>
|
||||||
|
</listener>
|
||||||
|
-->
|
||||||
|
|
||||||
</updateHandler>
|
</updateHandler>
|
||||||
|
|
||||||
|
@ -366,14 +391,22 @@
|
||||||
Query section - these settings control query time things like caches
|
Query section - these settings control query time things like caches
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
||||||
<query>
|
<query>
|
||||||
|
<!-- Max Boolean Clauses
|
||||||
|
|
||||||
|
Maximum number of clauses in each BooleanQuery, an exception
|
||||||
|
is thrown if exceeded.
|
||||||
|
|
||||||
|
** WARNING **
|
||||||
|
|
||||||
|
This option actually modifies a global Lucene property that
|
||||||
|
will affect all SolrCores. If multiple solrconfig.xml files
|
||||||
|
disagree on this property, the value at any given moment will
|
||||||
|
be based on the last SolrCore to be initialized.
|
||||||
|
|
||||||
<!-- Maximum number of clauses in each BooleanQuery, an exception
|
|
||||||
is thrown if exceeded. It is safe to increase or remove this setting,
|
|
||||||
since it is purely an arbitrary limit to try and catch user errors where
|
|
||||||
large boolean queries may not be the best implementation choice.
|
|
||||||
-->
|
-->
|
||||||
<maxBooleanClauses>1024</maxBooleanClauses>
|
<maxBooleanClauses>1024</maxBooleanClauses>
|
||||||
|
|
||||||
|
|
||||||
<!-- Solr Internal Query Caches
|
<!-- Solr Internal Query Caches
|
||||||
|
|
||||||
There are two implementations of cache available for Solr,
|
There are two implementations of cache available for Solr,
|
||||||
|
@ -575,8 +608,21 @@
|
||||||
This section contains instructions for how the SolrDispatchFilter
|
This section contains instructions for how the SolrDispatchFilter
|
||||||
should behave when processing requests for this SolrCore.
|
should behave when processing requests for this SolrCore.
|
||||||
|
|
||||||
|
handleSelect is a legacy option that affects the behavior of requests
|
||||||
|
such as /select?qt=XXX
|
||||||
|
|
||||||
|
handleSelect="true" will cause the SolrDispatchFilter to process
|
||||||
|
the request and dispatch the query to a handler specified by the
|
||||||
|
"qt" param, assuming "/select" isn't already registered.
|
||||||
|
|
||||||
|
handleSelect="false" will cause the SolrDispatchFilter to
|
||||||
|
ignore "/select" requests, resulting in a 404 unless a handler
|
||||||
|
is explicitly registered with the name "/select"
|
||||||
|
|
||||||
|
handleSelect="true" is not recommended for new users, but is the default
|
||||||
|
for backwards compatibility
|
||||||
-->
|
-->
|
||||||
<requestDispatcher>
|
<requestDispatcher handleSelect="false" >
|
||||||
<!-- Request Parsing
|
<!-- Request Parsing
|
||||||
|
|
||||||
These settings indicate how Solr Requests may be parsed, and
|
These settings indicate how Solr Requests may be parsed, and
|
||||||
|
@ -602,14 +648,15 @@
|
||||||
plugins.
|
plugins.
|
||||||
|
|
||||||
*** WARNING ***
|
*** WARNING ***
|
||||||
Before enabling remote streaming, you should make sure your
|
The settings below authorize Solr to fetch remote files, You
|
||||||
system has authentication enabled.
|
should make sure your system has some authentication before
|
||||||
|
using enableRemoteStreaming="true"
|
||||||
|
|
||||||
<requestParsers enableRemoteStreaming="false"
|
|
||||||
multipartUploadLimitInKB="-1"
|
|
||||||
formdataUploadLimitInKB="-1"
|
|
||||||
addHttpRequestToContext="false"/>
|
|
||||||
-->
|
-->
|
||||||
|
<requestParsers enableRemoteStreaming="true"
|
||||||
|
multipartUploadLimitInKB="2048000"
|
||||||
|
formdataUploadLimitInKB="2048"
|
||||||
|
addHttpRequestToContext="false"/>
|
||||||
|
|
||||||
<!-- HTTP Caching
|
<!-- HTTP Caching
|
||||||
|
|
||||||
|
@ -673,6 +720,14 @@
|
||||||
Incoming queries will be dispatched to a specific handler by name
|
Incoming queries will be dispatched to a specific handler by name
|
||||||
based on the path specified in the request.
|
based on the path specified in the request.
|
||||||
|
|
||||||
|
Legacy behavior: If the request path uses "/select" but no Request
|
||||||
|
Handler has that name, and if handleSelect="true" has been specified in
|
||||||
|
the requestDispatcher, then the Request Handler is dispatched based on
|
||||||
|
the qt parameter. Handlers without a leading '/' are accessed this way
|
||||||
|
like so: http://host/app/[core/]select?qt=name If no qt is
|
||||||
|
given, then the requestHandler that declares default="true" will be
|
||||||
|
used or the one named "standard".
|
||||||
|
|
||||||
If a Request Handler is declared with startup="lazy", then it will
|
If a Request Handler is declared with startup="lazy", then it will
|
||||||
not be initialized until the first request that uses it.
|
not be initialized until the first request that uses it.
|
||||||
|
|
||||||
|
@ -692,13 +747,9 @@
|
||||||
-->
|
-->
|
||||||
<lst name="defaults">
|
<lst name="defaults">
|
||||||
<str name="echoParams">explicit</str>
|
<str name="echoParams">explicit</str>
|
||||||
|
<str name="q.op">AND</str>
|
||||||
<int name="rows">10</int>
|
<int name="rows">10</int>
|
||||||
<!-- Default search field
|
<!-- <str name="df">text</str> -->
|
||||||
<str name="df">text</str>
|
|
||||||
-->
|
|
||||||
<!-- Change from JSON to XML format (the default prior to Solr 7.0)
|
|
||||||
<str name="wt">xml</str>
|
|
||||||
-->
|
|
||||||
</lst>
|
</lst>
|
||||||
<!-- In addition to defaults, "appends" params can be specified
|
<!-- In addition to defaults, "appends" params can be specified
|
||||||
to identify values which should be appended to the list of
|
to identify values which should be appended to the list of
|
||||||
|
@ -781,10 +832,18 @@
|
||||||
|
|
||||||
<initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse">
|
<initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse">
|
||||||
<lst name="defaults">
|
<lst name="defaults">
|
||||||
<str name="df">_text_</str>
|
<str name="df">__all</str>
|
||||||
</lst>
|
</lst>
|
||||||
</initParams>
|
</initParams>
|
||||||
|
|
||||||
|
<!-- This enabled schemaless mode
|
||||||
|
<initParams path="/update/**">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="update.chain">add-unknown-fields-to-the-schema</str>
|
||||||
|
</lst>
|
||||||
|
</initParams>
|
||||||
|
-->
|
||||||
|
|
||||||
<!-- Solr Cell Update Request Handler
|
<!-- Solr Cell Update Request Handler
|
||||||
|
|
||||||
http://wiki.apache.org/solr/ExtractingRequestHandler
|
http://wiki.apache.org/solr/ExtractingRequestHandler
|
||||||
|
@ -796,10 +855,9 @@
|
||||||
<lst name="defaults">
|
<lst name="defaults">
|
||||||
<str name="lowernames">true</str>
|
<str name="lowernames">true</str>
|
||||||
<str name="fmap.meta">ignored_</str>
|
<str name="fmap.meta">ignored_</str>
|
||||||
<str name="fmap.content">_text_</str>
|
<str name="fmap.content">__all</str>
|
||||||
</lst>
|
</lst>
|
||||||
</requestHandler>
|
</requestHandler>
|
||||||
|
|
||||||
<!-- Search Components
|
<!-- Search Components
|
||||||
|
|
||||||
Search components are registered to SolrCore and used by
|
Search components are registered to SolrCore and used by
|
||||||
|
@ -861,7 +919,7 @@
|
||||||
<!-- a spellchecker built from a field of the main index -->
|
<!-- a spellchecker built from a field of the main index -->
|
||||||
<lst name="spellchecker">
|
<lst name="spellchecker">
|
||||||
<str name="name">default</str>
|
<str name="name">default</str>
|
||||||
<str name="field">_text_</str>
|
<str name="field">__all</str>
|
||||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||||
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
||||||
<str name="distanceMeasure">internal</str>
|
<str name="distanceMeasure">internal</str>
|
||||||
|
@ -986,6 +1044,7 @@
|
||||||
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
|
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
|
||||||
<!-- pick a fieldType to analyze queries -->
|
<!-- pick a fieldType to analyze queries -->
|
||||||
<str name="queryFieldType">string</str>
|
<str name="queryFieldType">string</str>
|
||||||
|
<str name="config-file">elevate.xml</str>
|
||||||
</searchComponent>
|
</searchComponent>
|
||||||
|
|
||||||
<!-- A request handler for demonstrating the elevator component -->
|
<!-- A request handler for demonstrating the elevator component -->
|
||||||
|
@ -1116,81 +1175,70 @@
|
||||||
|
|
||||||
<!-- Add unknown fields to the schema
|
<!-- Add unknown fields to the schema
|
||||||
|
|
||||||
Field type guessing update processors that will
|
An example field type guessing update processor that will
|
||||||
attempt to parse string-typed field values as Booleans, Longs,
|
attempt to parse string-typed field values as Booleans, Longs,
|
||||||
Doubles, or Dates, and then add schema fields with the guessed
|
Doubles, or Dates, and then add schema fields with the guessed
|
||||||
field types. Text content will be indexed as "text_general" as
|
field types.
|
||||||
well as a copy to a plain string version in *_str.
|
|
||||||
|
|
||||||
These require that the schema is both managed and mutable, by
|
This requires that the schema is both managed and mutable, by
|
||||||
declaring schemaFactory as ManagedIndexSchemaFactory, with
|
declaring schemaFactory as ManagedIndexSchemaFactory, with
|
||||||
mutable specified as true.
|
mutable specified as true.
|
||||||
|
|
||||||
See http://wiki.apache.org/solr/GuessingFieldTypes
|
See http://wiki.apache.org/solr/GuessingFieldTypes
|
||||||
-->
|
-->
|
||||||
<updateProcessor class="solr.UUIDUpdateProcessorFactory" name="uuid"/>
|
<updateRequestProcessorChain name="add-unknown-fields-to-the-schema">
|
||||||
<updateProcessor class="solr.RemoveBlankFieldUpdateProcessorFactory" name="remove-blank"/>
|
<!-- UUIDUpdateProcessorFactory will generate an id if none is present in the incoming document -->
|
||||||
<updateProcessor class="solr.FieldNameMutatingUpdateProcessorFactory" name="field-name-mutating">
|
<processor class="solr.UUIDUpdateProcessorFactory" />
|
||||||
<str name="pattern">[^\w-\.]</str>
|
<processor class="solr.RemoveBlankFieldUpdateProcessorFactory"/>
|
||||||
<str name="replacement">_</str>
|
<processor class="solr.FieldNameMutatingUpdateProcessorFactory">
|
||||||
</updateProcessor>
|
<str name="pattern">[^\w-\.]</str>
|
||||||
<updateProcessor class="solr.ParseBooleanFieldUpdateProcessorFactory" name="parse-boolean"/>
|
<str name="replacement">_</str>
|
||||||
<updateProcessor class="solr.ParseLongFieldUpdateProcessorFactory" name="parse-long"/>
|
</processor>
|
||||||
<updateProcessor class="solr.ParseDoubleFieldUpdateProcessorFactory" name="parse-double"/>
|
<processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
|
||||||
<updateProcessor class="solr.ParseDateFieldUpdateProcessorFactory" name="parse-date">
|
<processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
|
||||||
<arr name="format">
|
<processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>
|
||||||
<str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
|
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
|
||||||
<str>yyyy-MM-dd'T'HH:mm:ss,SSSZ</str>
|
<arr name="format">
|
||||||
<str>yyyy-MM-dd'T'HH:mm:ss.SSS</str>
|
<str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
|
||||||
<str>yyyy-MM-dd'T'HH:mm:ss,SSS</str>
|
<str>yyyy-MM-dd'T'HH:mm:ss,SSSZ</str>
|
||||||
<str>yyyy-MM-dd'T'HH:mm:ssZ</str>
|
<str>yyyy-MM-dd'T'HH:mm:ss.SSS</str>
|
||||||
<str>yyyy-MM-dd'T'HH:mm:ss</str>
|
<str>yyyy-MM-dd'T'HH:mm:ss,SSS</str>
|
||||||
<str>yyyy-MM-dd'T'HH:mmZ</str>
|
<str>yyyy-MM-dd'T'HH:mm:ssZ</str>
|
||||||
<str>yyyy-MM-dd'T'HH:mm</str>
|
<str>yyyy-MM-dd'T'HH:mm:ss</str>
|
||||||
<str>yyyy-MM-dd HH:mm:ss.SSSZ</str>
|
<str>yyyy-MM-dd'T'HH:mmZ</str>
|
||||||
<str>yyyy-MM-dd HH:mm:ss,SSSZ</str>
|
<str>yyyy-MM-dd'T'HH:mm</str>
|
||||||
<str>yyyy-MM-dd HH:mm:ss.SSS</str>
|
<str>yyyy-MM-dd HH:mm:ss.SSSZ</str>
|
||||||
<str>yyyy-MM-dd HH:mm:ss,SSS</str>
|
<str>yyyy-MM-dd HH:mm:ss,SSSZ</str>
|
||||||
<str>yyyy-MM-dd HH:mm:ssZ</str>
|
<str>yyyy-MM-dd HH:mm:ss.SSS</str>
|
||||||
<str>yyyy-MM-dd HH:mm:ss</str>
|
<str>yyyy-MM-dd HH:mm:ss,SSS</str>
|
||||||
<str>yyyy-MM-dd HH:mmZ</str>
|
<str>yyyy-MM-dd HH:mm:ssZ</str>
|
||||||
<str>yyyy-MM-dd HH:mm</str>
|
<str>yyyy-MM-dd HH:mm:ss</str>
|
||||||
<str>yyyy-MM-dd</str>
|
<str>yyyy-MM-dd HH:mmZ</str>
|
||||||
</arr>
|
<str>yyyy-MM-dd HH:mm</str>
|
||||||
</updateProcessor>
|
<str>yyyy-MM-dd</str>
|
||||||
<updateProcessor class="solr.AddSchemaFieldsUpdateProcessorFactory" name="add-schema-fields">
|
</arr>
|
||||||
<lst name="typeMapping">
|
</processor>
|
||||||
<str name="valueClass">java.lang.String</str>
|
<processor class="solr.AddSchemaFieldsUpdateProcessorFactory">
|
||||||
<str name="fieldType">text_general</str>
|
<str name="defaultFieldType">strings</str>
|
||||||
<lst name="copyField">
|
<lst name="typeMapping">
|
||||||
<str name="dest">*_str</str>
|
<str name="valueClass">java.lang.Boolean</str>
|
||||||
<int name="maxChars">256</int>
|
<str name="fieldType">booleans</str>
|
||||||
</lst>
|
</lst>
|
||||||
<!-- Use as default mapping instead of defaultFieldType -->
|
<lst name="typeMapping">
|
||||||
<bool name="default">true</bool>
|
<str name="valueClass">java.util.Date</str>
|
||||||
</lst>
|
<str name="fieldType">tdates</str>
|
||||||
<lst name="typeMapping">
|
</lst>
|
||||||
<str name="valueClass">java.lang.Boolean</str>
|
<lst name="typeMapping">
|
||||||
<str name="fieldType">booleans</str>
|
<str name="valueClass">java.lang.Long</str>
|
||||||
</lst>
|
<str name="valueClass">java.lang.Integer</str>
|
||||||
<lst name="typeMapping">
|
<str name="fieldType">tlongs</str>
|
||||||
<str name="valueClass">java.util.Date</str>
|
</lst>
|
||||||
<str name="fieldType">pdates</str>
|
<lst name="typeMapping">
|
||||||
</lst>
|
<str name="valueClass">java.lang.Number</str>
|
||||||
<lst name="typeMapping">
|
<str name="fieldType">tdoubles</str>
|
||||||
<str name="valueClass">java.lang.Long</str>
|
</lst>
|
||||||
<str name="valueClass">java.lang.Integer</str>
|
</processor>
|
||||||
<str name="fieldType">plongs</str>
|
|
||||||
</lst>
|
|
||||||
<lst name="typeMapping">
|
|
||||||
<str name="valueClass">java.lang.Number</str>
|
|
||||||
<str name="fieldType">pdoubles</str>
|
|
||||||
</lst>
|
|
||||||
</updateProcessor>
|
|
||||||
|
|
||||||
<!-- The update.autoCreateFields property can be turned to false to disable schemaless mode -->
|
|
||||||
<updateRequestProcessorChain name="add-unknown-fields-to-the-schema" default="${update.autoCreateFields:true}"
|
|
||||||
processor="uuid,remove-blank,field-name-mutating,parse-boolean,parse-long,parse-double,parse-date,add-schema-fields">
|
|
||||||
<processor class="solr.LogUpdateProcessorFactory"/>
|
<processor class="solr.LogUpdateProcessorFactory"/>
|
||||||
<processor class="solr.DistributedUpdateProcessorFactory"/>
|
<processor class="solr.DistributedUpdateProcessorFactory"/>
|
||||||
<processor class="solr.RunUpdateProcessorFactory"/>
|
<processor class="solr.RunUpdateProcessorFactory"/>
|
||||||
|
@ -1313,7 +1361,7 @@
|
||||||
|
|
||||||
<!-- Query Parsers
|
<!-- Query Parsers
|
||||||
|
|
||||||
https://lucene.apache.org/solr/guide/query-syntax-and-parsing.html
|
https://cwiki.apache.org/confluence/display/solr/Query+Syntax+and+Parsing
|
||||||
|
|
||||||
Multiple QParserPlugins can be registered by name, and then
|
Multiple QParserPlugins can be registered by name, and then
|
||||||
used in either the "defType" param for the QueryComponent (used
|
used in either the "defType" param for the QueryComponent (used
|
||||||
|
|
Binary file not shown.
|
@ -46,7 +46,7 @@
|
||||||
</configuration>
|
</configuration>
|
||||||
</global>
|
</global>
|
||||||
|
|
||||||
<start to="Step18"/>
|
<start to="Step1"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
|
26
pom.xml
26
pom.xml
|
@ -50,7 +50,7 @@
|
||||||
<repository>
|
<repository>
|
||||||
<id>dnet45-releases</id>
|
<id>dnet45-releases</id>
|
||||||
<name>D-Net 45 releases</name>
|
<name>D-Net 45 releases</name>
|
||||||
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
|
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
|
||||||
<layout>default</layout>
|
<layout>default</layout>
|
||||||
<snapshots>
|
<snapshots>
|
||||||
<enabled>false</enabled>
|
<enabled>false</enabled>
|
||||||
|
@ -70,6 +70,26 @@
|
||||||
<enabled>false</enabled>
|
<enabled>false</enabled>
|
||||||
</snapshots>
|
</snapshots>
|
||||||
</repository>
|
</repository>
|
||||||
|
<repository>
|
||||||
|
<id>dnet45-releases-old</id>
|
||||||
|
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
|
||||||
|
<releases>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</snapshots>
|
||||||
|
</repository>
|
||||||
|
<repository>
|
||||||
|
<id>dnet45-snapshots-old</id>
|
||||||
|
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
|
||||||
|
<releases>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</snapshots>
|
||||||
|
</repository>
|
||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
@ -639,12 +659,12 @@
|
||||||
<snapshotRepository>
|
<snapshotRepository>
|
||||||
<id>dnet45-snapshots</id>
|
<id>dnet45-snapshots</id>
|
||||||
<name>DNet45 Snapshots</name>
|
<name>DNet45 Snapshots</name>
|
||||||
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
|
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots</url>
|
||||||
<layout>default</layout>
|
<layout>default</layout>
|
||||||
</snapshotRepository>
|
</snapshotRepository>
|
||||||
<repository>
|
<repository>
|
||||||
<id>dnet45-releases</id>
|
<id>dnet45-releases</id>
|
||||||
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
|
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
|
||||||
</repository>
|
</repository>
|
||||||
</distributionManagement>
|
</distributionManagement>
|
||||||
<reporting>
|
<reporting>
|
||||||
|
|
Loading…
Reference in New Issue