Solr JSON payload #399
|
@ -506,6 +506,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
||||||
.map(GraphCleaningFunctions::cleanValue)
|
.map(GraphCleaningFunctions::cleanValue)
|
||||||
|
.sorted((s1, s2) -> s2.getValue().length() - s1.getValue().length())
|
||||||
|
.limit(ModelHardLimits.MAX_ABSTRACTS)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
|
if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024.
|
||||||
|
* SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
|
||||||
|
import javax.xml.transform.Transformer;
|
||||||
|
import javax.xml.transform.TransformerException;
|
||||||
|
import javax.xml.transform.stream.StreamResult;
|
||||||
|
import javax.xml.transform.stream.StreamSource;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
||||||
|
|
||||||
|
public abstract class AbstractSolrRecordTransformJob {
|
||||||
|
|
||||||
|
protected static String toIndexRecord(Transformer tr, final String xmlRecord) {
|
||||||
|
final StreamResult res = new StreamResult(new StringWriter());
|
||||||
|
try {
|
||||||
|
tr.transform(new StreamSource(new StringReader(xmlRecord)), res);
|
||||||
|
return res.getWriter().toString();
|
||||||
|
} catch (TransformerException e) {
|
||||||
|
throw new IllegalArgumentException("XPathException on record: \n" + xmlRecord, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates the XSLT responsible for building the index xml records.
|
||||||
|
*
|
||||||
|
* @param format Metadata format name (DMF|TMF)
|
||||||
|
* @param xslt xslt for building the index record transformer
|
||||||
|
* @param fields the list of fields
|
||||||
|
* @return the javax.xml.transform.Transformer
|
||||||
|
* @throws TransformerException could happen
|
||||||
|
*/
|
||||||
|
protected static String getLayoutTransformer(String format, String fields, String xslt)
|
||||||
|
throws TransformerException {
|
||||||
|
|
||||||
|
final Transformer layoutTransformer = SaxonTransformerFactory.newInstance(xslt);
|
||||||
|
final StreamResult layoutToXsltXslt = new StreamResult(new StringWriter());
|
||||||
|
|
||||||
|
layoutTransformer.setParameter("format", format);
|
||||||
|
layoutTransformer.transform(new StreamSource(new StringReader(fields)), layoutToXsltXslt);
|
||||||
|
|
||||||
|
return layoutToXsltXslt.getWriter().toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -27,14 +27,7 @@ import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
||||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
|
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
@ -156,10 +149,33 @@ public class CreateRelatedEntitiesJob_phase1 {
|
||||||
case software:
|
case software:
|
||||||
final Result result = (Result) entity;
|
final Result result = (Result) entity;
|
||||||
|
|
||||||
if (result.getTitle() != null && !result.getTitle().isEmpty()) {
|
if (Objects.nonNull(result.getTitle()) && !result.getTitle().isEmpty()) {
|
||||||
final StructuredProperty title = result.getTitle().stream().findFirst().get();
|
result
|
||||||
title.setValue(StringUtils.left(title.getValue(), ModelHardLimits.MAX_TITLE_LENGTH));
|
.getTitle()
|
||||||
re.setTitle(title);
|
.stream()
|
||||||
|
.findFirst()
|
||||||
|
.map(StructuredProperty::getValue)
|
||||||
|
.ifPresent(
|
||||||
|
title -> re.getTitle().setValue(StringUtils.left(title, ModelHardLimits.MAX_TITLE_LENGTH)));
|
||||||
|
}
|
||||||
|
if (Objects.nonNull(result.getDescription()) && !result.getDescription().isEmpty()) {
|
||||||
|
result
|
||||||
|
.getDescription()
|
||||||
|
.stream()
|
||||||
|
.findFirst()
|
||||||
|
.map(Field::getValue)
|
||||||
|
.ifPresent(
|
||||||
|
d -> re.setDescription(StringUtils.left(d, ModelHardLimits.MAX_RELATED_ABSTRACT_LENGTH)));
|
||||||
|
}
|
||||||
|
if (Objects.nonNull(result.getAuthor()) && !result.getAuthor().isEmpty()) {
|
||||||
|
re
|
||||||
|
.setAuthor(
|
||||||
|
result
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.map(Author::getFullname)
|
||||||
|
.filter(StringUtils::isNotBlank)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
|
||||||
re.setDateofacceptance(getValue(result.getDateofacceptance()));
|
re.setDateofacceptance(getValue(result.getDateofacceptance()));
|
||||||
|
|
|
@ -0,0 +1,144 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import javax.xml.transform.TransformerException;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoder;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.TupleWrapper;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
|
||||||
|
public class SolrRecordDumpJob extends AbstractSolrRecordTransformJob {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SolrRecordDumpJob.class);
|
||||||
|
|
||||||
|
private static final Integer DEFAULT_BATCH_SIZE = 1000;
|
||||||
|
|
||||||
|
private final String inputPath;
|
||||||
|
|
||||||
|
private final String format;
|
||||||
|
|
||||||
|
private final String outputPath;
|
||||||
|
|
||||||
|
private final SparkSession spark;
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
SolrRecordDumpJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json")));
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("inputPath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String format = parser.get("format");
|
||||||
|
log.info("format: {}", format);
|
||||||
|
|
||||||
|
final String outputPath = Optional
|
||||||
|
.ofNullable(parser.get("outputPath"))
|
||||||
|
.map(StringUtils::trim)
|
||||||
|
.orElse(null);
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final Integer batchSize = Optional
|
||||||
|
.ofNullable(parser.get("batchSize"))
|
||||||
|
.map(Integer::valueOf)
|
||||||
|
.orElse(DEFAULT_BATCH_SIZE);
|
||||||
|
log.info("batchSize: {}", batchSize);
|
||||||
|
|
||||||
|
final boolean shouldIndex = Optional
|
||||||
|
.ofNullable(parser.get("shouldIndex"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(false);
|
||||||
|
log.info("shouldIndex: {}", shouldIndex);
|
||||||
|
|
||||||
|
final SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
conf.registerKryoClasses(new Class[] {
|
||||||
|
SerializableSolrInputDocument.class
|
||||||
|
});
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
final String isLookupUrl = parser.get("isLookupUrl");
|
||||||
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
|
||||||
|
new SolrRecordDumpJob(spark, inputPath, format, outputPath).run(isLookup);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public SolrRecordDumpJob(SparkSession spark, String inputPath, String format, String outputPath) {
|
||||||
|
this.spark = spark;
|
||||||
|
this.inputPath = inputPath;
|
||||||
|
this.format = format;
|
||||||
|
this.outputPath = outputPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run(ISLookupClient isLookup) throws ISLookUpException, TransformerException {
|
||||||
|
final String fields = isLookup.getLayoutSource(format);
|
||||||
|
log.info("fields: {}", fields);
|
||||||
|
|
||||||
|
final String xslt = isLookup.getLayoutTransformer();
|
||||||
|
|
||||||
|
final String dsId = isLookup.getDsId(format);
|
||||||
|
log.info("dsId: {}", dsId);
|
||||||
|
|
||||||
|
final String indexRecordXslt = getLayoutTransformer(format, fields, xslt);
|
||||||
|
log.info("indexRecordTransformer {}", indexRecordXslt);
|
||||||
|
|
||||||
|
final Encoder<TupleWrapper> encoder = Encoders.bean(TupleWrapper.class);
|
||||||
|
spark
|
||||||
|
.read()
|
||||||
|
.schema(encoder.schema())
|
||||||
|
.json(inputPath)
|
||||||
|
.as(encoder)
|
||||||
|
.map(
|
||||||
|
(MapFunction<TupleWrapper, TupleWrapper>) t -> new TupleWrapper(
|
||||||
|
toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), t.getXml()),
|
||||||
|
t.getJson()),
|
||||||
|
Encoders.bean(TupleWrapper.class))
|
||||||
|
.map(
|
||||||
|
(MapFunction<TupleWrapper, SerializableSolrInputDocument>) t -> {
|
||||||
|
SolrInputDocument s = new StreamingInputDocumentFactory()
|
||||||
|
.parseDocument(t.getXml(), t.getJson());
|
||||||
|
return new SerializableSolrInputDocument(s);
|
||||||
|
},
|
||||||
|
Encoders.kryo(SerializableSolrInputDocument.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.parquet(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
|
import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
|
||||||
|
import static org.apache.spark.sql.functions.*;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -14,22 +15,31 @@ import org.apache.hadoop.io.compress.GzipCodec;
|
||||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.SparkContext;
|
import org.apache.spark.SparkContext;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.api.java.function.PairFunction;
|
import org.apache.spark.api.java.function.PairFunction;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.*;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.expressions.UserDefinedFunction;
|
||||||
|
import org.apache.spark.sql.types.DataTypes;
|
||||||
import org.apache.spark.util.LongAccumulator;
|
import org.apache.spark.util.LongAccumulator;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
|
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.TupleWrapper;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.SolrRecord;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -65,13 +75,20 @@ public class XmlConverterJob {
|
||||||
final String contextApiBaseUrl = parser.get("contextApiBaseUrl");
|
final String contextApiBaseUrl = parser.get("contextApiBaseUrl");
|
||||||
log.info("contextApiBaseUrl: {}", contextApiBaseUrl);
|
log.info("contextApiBaseUrl: {}", contextApiBaseUrl);
|
||||||
|
|
||||||
|
String isLookupUrl = parser.get("isLookupUrl");
|
||||||
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
|
||||||
|
ISLookUpService isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
|
||||||
final SparkConf conf = new SparkConf();
|
final SparkConf conf = new SparkConf();
|
||||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||||
conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
|
conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
|
||||||
|
|
||||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
convertToXml(spark, inputPath, outputPath, ContextMapper.fromAPI(contextApiBaseUrl));
|
convertToXml(
|
||||||
|
spark, inputPath, outputPath, ContextMapper.fromAPI(contextApiBaseUrl),
|
||||||
|
VocabularyGroup.loadVocsFromIS(isLookup));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,7 +96,8 @@ public class XmlConverterJob {
|
||||||
final SparkSession spark,
|
final SparkSession spark,
|
||||||
final String inputPath,
|
final String inputPath,
|
||||||
final String outputPath,
|
final String outputPath,
|
||||||
final ContextMapper contextMapper) {
|
final ContextMapper contextMapper,
|
||||||
|
final VocabularyGroup vocabularies) {
|
||||||
|
|
||||||
final XmlRecordFactory recordFactory = new XmlRecordFactory(
|
final XmlRecordFactory recordFactory = new XmlRecordFactory(
|
||||||
prepareAccumulators(spark.sparkContext()),
|
prepareAccumulators(spark.sparkContext()),
|
||||||
|
@ -92,20 +110,25 @@ public class XmlConverterJob {
|
||||||
|
|
||||||
log.info("Found paths: {}", String.join(",", paths));
|
log.info("Found paths: {}", String.join(",", paths));
|
||||||
|
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
mapper.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
|
||||||
spark
|
spark
|
||||||
.read()
|
.read()
|
||||||
.load(toSeq(paths))
|
.load(toSeq(paths))
|
||||||
.as(Encoders.kryo(JoinedEntity.class))
|
.as(Encoders.kryo(JoinedEntity.class))
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<JoinedEntity, Tuple2<String, String>>) je -> new Tuple2<>(
|
(MapFunction<JoinedEntity, Tuple2<String, SolrRecord>>) je -> new Tuple2<>(
|
||||||
je.getEntity().getId(),
|
recordFactory.build(je),
|
||||||
recordFactory.build(je)),
|
ProvisionModelSupport.transform(je, contextMapper, vocabularies)),
|
||||||
Encoders.tuple(Encoders.STRING(), Encoders.STRING()))
|
Encoders.tuple(Encoders.STRING(), Encoders.bean(SolrRecord.class)))
|
||||||
.javaRDD()
|
.map(
|
||||||
.mapToPair(
|
(MapFunction<Tuple2<String, SolrRecord>, TupleWrapper>) t -> new TupleWrapper(
|
||||||
(PairFunction<Tuple2<String, String>, Text, Text>) t -> new Tuple2<>(new Text(t._1()),
|
t._1(), mapper.writeValueAsString(t._2())),
|
||||||
new Text(t._2())))
|
Encoders.bean(TupleWrapper.class))
|
||||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void removeOutputDir(final SparkSession spark, final String path) {
|
private static void removeOutputDir(final SparkSession spark, final String path) {
|
||||||
|
|
|
@ -3,26 +3,17 @@ package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.io.StringWriter;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import javax.xml.transform.Transformer;
|
|
||||||
import javax.xml.transform.TransformerException;
|
import javax.xml.transform.TransformerException;
|
||||||
import javax.xml.transform.stream.StreamResult;
|
|
||||||
import javax.xml.transform.stream.StreamSource;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoder;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SaveMode;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -31,34 +22,25 @@ import com.lucidworks.spark.util.SolrSupport;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.TupleWrapper;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
|
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
|
||||||
public class XmlIndexingJob {
|
public class XmlIndexingJob extends AbstractSolrRecordTransformJob {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(XmlIndexingJob.class);
|
private static final Logger log = LoggerFactory.getLogger(XmlIndexingJob.class);
|
||||||
|
|
||||||
public enum OutputFormat {
|
|
||||||
SOLR, HDFS
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final Integer DEFAULT_BATCH_SIZE = 1000;
|
private static final Integer DEFAULT_BATCH_SIZE = 1000;
|
||||||
|
|
||||||
protected static final String DATE_FORMAT = "yyyy-MM-dd'T'hh:mm:ss'Z'";
|
|
||||||
|
|
||||||
private final String inputPath;
|
private final String inputPath;
|
||||||
|
|
||||||
private final String format;
|
private final String format;
|
||||||
|
|
||||||
private final int batchSize;
|
private final int batchSize;
|
||||||
|
|
||||||
private final OutputFormat outputFormat;
|
|
||||||
|
|
||||||
private final String outputPath;
|
|
||||||
|
|
||||||
private final SparkSession spark;
|
private final SparkSession spark;
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
@ -83,25 +65,14 @@ public class XmlIndexingJob {
|
||||||
final String format = parser.get("format");
|
final String format = parser.get("format");
|
||||||
log.info("format: {}", format);
|
log.info("format: {}", format);
|
||||||
|
|
||||||
final String outputPath = Optional
|
|
||||||
.ofNullable(parser.get("outputPath"))
|
|
||||||
.map(StringUtils::trim)
|
|
||||||
.orElse(null);
|
|
||||||
log.info("outputPath: {}", outputPath);
|
|
||||||
|
|
||||||
final Integer batchSize = Optional
|
final Integer batchSize = Optional
|
||||||
.ofNullable(parser.get("batchSize"))
|
.ofNullable(parser.get("batchSize"))
|
||||||
.map(Integer::valueOf)
|
.map(Integer::valueOf)
|
||||||
.orElse(DEFAULT_BATCH_SIZE);
|
.orElse(DEFAULT_BATCH_SIZE);
|
||||||
log.info("batchSize: {}", batchSize);
|
log.info("batchSize: {}", batchSize);
|
||||||
|
|
||||||
final OutputFormat outputFormat = Optional
|
|
||||||
.ofNullable(parser.get("outputFormat"))
|
|
||||||
.map(OutputFormat::valueOf)
|
|
||||||
.orElse(OutputFormat.SOLR);
|
|
||||||
log.info("outputFormat: {}", outputFormat);
|
|
||||||
|
|
||||||
final SparkConf conf = new SparkConf();
|
final SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
conf.registerKryoClasses(new Class[] {
|
conf.registerKryoClasses(new Class[] {
|
||||||
SerializableSolrInputDocument.class
|
SerializableSolrInputDocument.class
|
||||||
});
|
});
|
||||||
|
@ -113,19 +84,16 @@ public class XmlIndexingJob {
|
||||||
final String isLookupUrl = parser.get("isLookupUrl");
|
final String isLookupUrl = parser.get("isLookupUrl");
|
||||||
log.info("isLookupUrl: {}", isLookupUrl);
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
|
final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
|
||||||
new XmlIndexingJob(spark, inputPath, format, batchSize, outputFormat, outputPath).run(isLookup);
|
new XmlIndexingJob(spark, inputPath, format, batchSize)
|
||||||
|
.run(isLookup);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public XmlIndexingJob(SparkSession spark, String inputPath, String format, Integer batchSize,
|
public XmlIndexingJob(SparkSession spark, String inputPath, String format, Integer batchSize) {
|
||||||
OutputFormat outputFormat,
|
|
||||||
String outputPath) {
|
|
||||||
this.spark = spark;
|
this.spark = spark;
|
||||||
this.inputPath = inputPath;
|
this.inputPath = inputPath;
|
||||||
this.format = format;
|
this.format = format;
|
||||||
this.batchSize = batchSize;
|
this.batchSize = batchSize;
|
||||||
this.outputFormat = outputFormat;
|
|
||||||
this.outputPath = outputPath;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void run(ISLookupClient isLookup) throws ISLookUpException, TransformerException {
|
public void run(ISLookupClient isLookup) throws ISLookUpException, TransformerException {
|
||||||
|
@ -137,84 +105,31 @@ public class XmlIndexingJob {
|
||||||
final String dsId = isLookup.getDsId(format);
|
final String dsId = isLookup.getDsId(format);
|
||||||
log.info("dsId: {}", dsId);
|
log.info("dsId: {}", dsId);
|
||||||
|
|
||||||
|
final String collection = ProvisionConstants.getCollectionName(format);
|
||||||
|
log.info("collection: {}", collection);
|
||||||
|
|
||||||
final String zkHost = isLookup.getZkHost();
|
final String zkHost = isLookup.getZkHost();
|
||||||
log.info("zkHost: {}", zkHost);
|
log.info("zkHost: {}", zkHost);
|
||||||
|
|
||||||
final String version = getRecordDatestamp();
|
|
||||||
|
|
||||||
final String indexRecordXslt = getLayoutTransformer(format, fields, xslt);
|
final String indexRecordXslt = getLayoutTransformer(format, fields, xslt);
|
||||||
log.info("indexRecordTransformer {}", indexRecordXslt);
|
log.info("indexRecordTransformer {}", indexRecordXslt);
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final Encoder<TupleWrapper> encoder = Encoders.bean(TupleWrapper.class);
|
||||||
|
|
||||||
JavaRDD<SolrInputDocument> docs = sc
|
JavaRDD<SolrInputDocument> docs = spark
|
||||||
.sequenceFile(inputPath, Text.class, Text.class)
|
.read()
|
||||||
.map(t -> t._2().toString())
|
.schema(encoder.schema())
|
||||||
.map(s -> toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), s))
|
.json(inputPath)
|
||||||
.map(s -> new StreamingInputDocumentFactory().parseDocument(s));
|
.as(encoder)
|
||||||
|
.map(
|
||||||
switch (outputFormat) {
|
(MapFunction<TupleWrapper, TupleWrapper>) t -> new TupleWrapper(
|
||||||
case SOLR:
|
toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), t.getXml()),
|
||||||
final String collection = ProvisionConstants.getCollectionName(format);
|
t.getJson()),
|
||||||
|
Encoders.bean(TupleWrapper.class))
|
||||||
// SparkSolr >= 4
|
.javaRDD()
|
||||||
// com.lucidworks.spark.BatchSizeType bt = com.lucidworks.spark.BatchSizeType.NUM_DOCS;
|
.map(
|
||||||
// SolrSupport.indexDocs(zkHost, collection, batchSize, bt, docs.rdd());
|
t -> new StreamingInputDocumentFactory().parseDocument(t.getXml(), t.getJson()));
|
||||||
// SparkSolr < 4
|
SolrSupport.indexDocs(zkHost, collection, batchSize, docs.rdd());
|
||||||
SolrSupport.indexDocs(zkHost, collection, batchSize, docs.rdd());
|
|
||||||
break;
|
|
||||||
case HDFS:
|
|
||||||
spark
|
|
||||||
.createDataset(
|
|
||||||
docs.map(SerializableSolrInputDocument::new).rdd(),
|
|
||||||
Encoders.kryo(SerializableSolrInputDocument.class))
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.parquet(outputPath);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException("invalid outputFormat: " + outputFormat);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static String toIndexRecord(Transformer tr, final String xmlRecord) {
|
|
||||||
final StreamResult res = new StreamResult(new StringWriter());
|
|
||||||
try {
|
|
||||||
tr.transform(new StreamSource(new StringReader(xmlRecord)), res);
|
|
||||||
return res.getWriter().toString();
|
|
||||||
} catch (TransformerException e) {
|
|
||||||
throw new IllegalArgumentException("XPathException on record: \n" + xmlRecord, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates the XSLT responsible for building the index xml records.
|
|
||||||
*
|
|
||||||
* @param format Metadata format name (DMF|TMF)
|
|
||||||
* @param xslt xslt for building the index record transformer
|
|
||||||
* @param fields the list of fields
|
|
||||||
* @return the javax.xml.transform.Transformer
|
|
||||||
* @throws TransformerException could happen
|
|
||||||
*/
|
|
||||||
protected static String getLayoutTransformer(String format, String fields, String xslt)
|
|
||||||
throws TransformerException {
|
|
||||||
|
|
||||||
final Transformer layoutTransformer = SaxonTransformerFactory.newInstance(xslt);
|
|
||||||
final StreamResult layoutToXsltXslt = new StreamResult(new StringWriter());
|
|
||||||
|
|
||||||
layoutTransformer.setParameter("format", format);
|
|
||||||
layoutTransformer.transform(new StreamSource(new StringReader(fields)), layoutToXsltXslt);
|
|
||||||
|
|
||||||
return layoutToXsltXslt.getWriter().toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* method return a solr-compatible string representation of a date, used to mark all records as indexed today
|
|
||||||
*
|
|
||||||
* @return the parsed date
|
|
||||||
*/
|
|
||||||
public static String getRecordDatestamp() {
|
|
||||||
return new SimpleDateFormat(DATE_FORMAT).format(new Date());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,11 +5,15 @@ import java.io.Serializable;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||||
|
|
||||||
public class JoinedEntity<E extends OafEntity> implements Serializable {
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
private E entity;
|
public class JoinedEntity implements Serializable {
|
||||||
|
|
||||||
|
private OafEntity entity;
|
||||||
|
|
||||||
private List<RelatedEntityWrapper> links;
|
private List<RelatedEntityWrapper> links;
|
||||||
|
|
||||||
|
@ -17,16 +21,16 @@ public class JoinedEntity<E extends OafEntity> implements Serializable {
|
||||||
links = new LinkedList<>();
|
links = new LinkedList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public JoinedEntity(E entity) {
|
public JoinedEntity(OafEntity entity) {
|
||||||
this();
|
this();
|
||||||
this.entity = entity;
|
this.entity = entity;
|
||||||
}
|
}
|
||||||
|
|
||||||
public E getEntity() {
|
public OafEntity getEntity() {
|
||||||
return entity;
|
return entity;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setEntity(E entity) {
|
public void setEntity(OafEntity entity) {
|
||||||
this.entity = entity;
|
this.entity = entity;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,46 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision.model;
|
package eu.dnetlib.dhp.oa.provision.model;
|
||||||
|
|
||||||
import java.util.List;
|
import static org.apache.commons.lang3.StringUtils.substringBefore;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
|
import com.google.common.base.Splitter;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm;
|
||||||
import eu.dnetlib.dhp.oa.provision.RelationList;
|
import eu.dnetlib.dhp.oa.provision.RelationList;
|
||||||
import eu.dnetlib.dhp.oa.provision.SortableRelation;
|
import eu.dnetlib.dhp.oa.provision.SortableRelation;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ContextDef;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.*;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.AccessRight;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.Context;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.Country;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.Datasource;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.Journal;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.OpenAccessColor;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.OpenAccessRoute;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.Subject;
|
||||||
|
|
||||||
public class ProvisionModelSupport {
|
public class ProvisionModelSupport {
|
||||||
|
|
||||||
|
@ -28,4 +61,636 @@ public class ProvisionModelSupport {
|
||||||
RelationList.class));
|
RelationList.class));
|
||||||
return modelClasses.toArray(new Class[] {});
|
return modelClasses.toArray(new Class[] {});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static SolrRecord transform(JoinedEntity je, ContextMapper contextMapper, VocabularyGroup vocs) {
|
||||||
|
SolrRecord r = new SolrRecord();
|
||||||
|
final OafEntity e = je.getEntity();
|
||||||
|
final RecordType type = RecordType.valueOf(e.getClass().getSimpleName().toLowerCase());
|
||||||
|
final Boolean deletedbyinference = Optional
|
||||||
|
.ofNullable(e.getDataInfo())
|
||||||
|
.map(DataInfo::getDeletedbyinference)
|
||||||
|
.orElse(null);
|
||||||
|
r
|
||||||
|
.setHeader(
|
||||||
|
SolrRecordHeader
|
||||||
|
.newInstance(
|
||||||
|
e.getId(), e.getOriginalId(), type, deletedbyinference));
|
||||||
|
r.setCollectedfrom(asProvenance(e.getCollectedfrom()));
|
||||||
|
r.setContext(asContext(e.getContext(), contextMapper));
|
||||||
|
r.setPid(asPid(e.getPid()));
|
||||||
|
|
||||||
|
if (e instanceof eu.dnetlib.dhp.schema.oaf.Result) {
|
||||||
|
r.setResult(mapResult((eu.dnetlib.dhp.schema.oaf.Result) e));
|
||||||
|
} else if (e instanceof eu.dnetlib.dhp.schema.oaf.Datasource) {
|
||||||
|
r.setDatasource(mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) e));
|
||||||
|
} else if (e instanceof eu.dnetlib.dhp.schema.oaf.Organization) {
|
||||||
|
r.setOrganization(mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) e));
|
||||||
|
} else if (e instanceof eu.dnetlib.dhp.schema.oaf.Project) {
|
||||||
|
r.setProject(mapProject((eu.dnetlib.dhp.schema.oaf.Project) e, vocs));
|
||||||
|
}
|
||||||
|
r
|
||||||
|
.setLinks(
|
||||||
|
Optional
|
||||||
|
.ofNullable(je.getLinks())
|
||||||
|
.map(
|
||||||
|
links -> links
|
||||||
|
.stream()
|
||||||
|
.map(rew -> mapRelatedRecord(rew, vocs))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static RelatedRecord mapRelatedRecord(RelatedEntityWrapper rew, VocabularyGroup vocs) {
|
||||||
|
RelatedRecord rr = new RelatedRecord();
|
||||||
|
|
||||||
|
final RelatedEntity re = rew.getTarget();
|
||||||
|
final RecordType relatedRecordType = RecordType.valueOf(re.getType());
|
||||||
|
final Relation relation = rew.getRelation();
|
||||||
|
rr
|
||||||
|
.setHeader(
|
||||||
|
RelatedRecordHeader
|
||||||
|
.newInstance(
|
||||||
|
relation.getRelType(),
|
||||||
|
relation.getRelClass(),
|
||||||
|
relation.getTarget(), relatedRecordType));
|
||||||
|
|
||||||
|
rr.setAcronym(re.getAcronym());
|
||||||
|
rr.setCode(re.getCode());
|
||||||
|
rr.setContracttype(mapCodeLabel(re.getContracttype()));
|
||||||
|
rr.setCollectedfrom(asProvenance(re.getCollectedfrom()));
|
||||||
|
rr.setCodeRepositoryUrl(re.getCodeRepositoryUrl());
|
||||||
|
rr.setCountry(asCountry(re.getCountry()));
|
||||||
|
rr.setDatasourcetype(mapCodeLabel(re.getDatasourcetype()));
|
||||||
|
rr.setDatasourcetypeui(mapCodeLabel(re.getDatasourcetypeui()));
|
||||||
|
rr.setDateofacceptance(re.getDateofacceptance());
|
||||||
|
rr.setFunding(mapFunding(re.getFundingtree(), vocs));
|
||||||
|
rr.setInstances(mapInstances(re.getInstances()));
|
||||||
|
rr.setLegalname(re.getLegalname());
|
||||||
|
rr.setLegalshortname(re.getLegalshortname());
|
||||||
|
rr.setOfficialname(re.getOfficialname());
|
||||||
|
rr.setOpenairecompatibility(mapCodeLabel(re.getOpenairecompatibility()));
|
||||||
|
rr.setPid(asPid(re.getPid()));
|
||||||
|
rr.setProjectTitle(rr.getProjectTitle());
|
||||||
|
rr.setPublisher(re.getPublisher());
|
||||||
|
rr.setResulttype(mapQualifier(re.getResulttype()));
|
||||||
|
rr.setTitle(Optional.ofNullable(re.getTitle()).map(StructuredProperty::getValue).orElse(null));
|
||||||
|
|
||||||
|
return rr;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p, VocabularyGroup vocs) {
|
||||||
|
Project ps = new Project();
|
||||||
|
ps.setAcronym(mapField(p.getAcronym()));
|
||||||
|
ps.setCode(mapField(p.getCode()));
|
||||||
|
ps.setContracttype(mapCodeLabel(p.getContracttype()));
|
||||||
|
ps.setCurrency(mapField(p.getCurrency()));
|
||||||
|
ps.setDuration(mapField(p.getDuration()));
|
||||||
|
ps.setCallidentifier(mapField(p.getCallidentifier()));
|
||||||
|
ps.setEcarticle29_3(mapField(p.getEcarticle29_3()));
|
||||||
|
ps.setEnddate(mapField(p.getEnddate()));
|
||||||
|
ps.setFundedamount(p.getFundedamount());
|
||||||
|
ps.setKeywords(mapField(p.getKeywords()));
|
||||||
|
ps.setStartdate(mapField(p.getStartdate()));
|
||||||
|
ps.setSubjects(asSubjectSP(p.getSubjects()));
|
||||||
|
ps.setSummary(mapField(p.getSummary()));
|
||||||
|
ps.setTitle(mapField(p.getTitle()));
|
||||||
|
ps.setTotalcost(p.getTotalcost());
|
||||||
|
ps.setWebsiteurl(mapField(p.getWebsiteurl()));
|
||||||
|
ps.setFunding(mapFundingField(p.getFundingtree(), vocs));
|
||||||
|
return ps;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Funding mapFunding(List<String> fundingtree, VocabularyGroup vocs) {
|
||||||
|
SAXReader reader = new SAXReader();
|
||||||
|
return Optional
|
||||||
|
.ofNullable(fundingtree)
|
||||||
|
.flatMap(
|
||||||
|
ftree -> ftree
|
||||||
|
.stream()
|
||||||
|
.map(ft -> {
|
||||||
|
try {
|
||||||
|
Document doc = reader.read(new StringReader(ft));
|
||||||
|
String countryCode = doc.valueOf("/fundingtree/funder/jurisdiction/text()");
|
||||||
|
Country country = vocs
|
||||||
|
.find("dnet:countries")
|
||||||
|
.map(voc -> voc.getTerm(countryCode))
|
||||||
|
.map(VocabularyTerm::getName)
|
||||||
|
.map(label -> Country.newInstance(countryCode, label))
|
||||||
|
.orElse(null);
|
||||||
|
|
||||||
|
String level0_id = doc.valueOf("//funding_level_0/id/text()");
|
||||||
|
String level1_id = doc.valueOf("//funding_level_1/id/text()");
|
||||||
|
String level2_id = doc.valueOf("//funding_level_2/id/text()");
|
||||||
|
|
||||||
|
return Funding
|
||||||
|
.newInstance(
|
||||||
|
Funder
|
||||||
|
.newInstance(
|
||||||
|
doc.valueOf("/fundingtree/funder/id/text()"),
|
||||||
|
doc.valueOf("/fundingtree/funder/shortname/text()"),
|
||||||
|
doc.valueOf("/fundingtree/funder/name/text()"),
|
||||||
|
country, new ArrayList<>()),
|
||||||
|
Optional
|
||||||
|
.ofNullable(level0_id)
|
||||||
|
.map(
|
||||||
|
id -> FundingLevel
|
||||||
|
.newInstance(
|
||||||
|
id,
|
||||||
|
doc.valueOf("//funding_level_0/description/text()"),
|
||||||
|
doc.valueOf("//funding_level_0/name/text()")))
|
||||||
|
.orElse(null),
|
||||||
|
Optional
|
||||||
|
.ofNullable(level1_id)
|
||||||
|
.map(
|
||||||
|
id -> FundingLevel
|
||||||
|
.newInstance(
|
||||||
|
id,
|
||||||
|
doc.valueOf("//funding_level_1/description/text()"),
|
||||||
|
doc.valueOf("//funding_level_1/name/text()")))
|
||||||
|
.orElse(null),
|
||||||
|
Optional
|
||||||
|
.ofNullable(level2_id)
|
||||||
|
.map(
|
||||||
|
id -> FundingLevel
|
||||||
|
.newInstance(
|
||||||
|
id,
|
||||||
|
doc.valueOf("//funding_level_2/description/text()"),
|
||||||
|
doc.valueOf("//funding_level_2/name/text()")))
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
} catch (DocumentException e) {
|
||||||
|
throw new IllegalArgumentException(e);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.findFirst())
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Funding mapFundingField(List<Field<String>> fundingtree, VocabularyGroup vocs) {
|
||||||
|
return mapFunding(
|
||||||
|
Optional
|
||||||
|
.ofNullable(fundingtree)
|
||||||
|
.map(fts -> fts.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
|
.orElse(null),
|
||||||
|
vocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization o) {
|
||||||
|
Organization org = new Organization();
|
||||||
|
org.setCountry(mapCodeLabel(o.getCountry()));
|
||||||
|
org.setLegalname(mapField(o.getLegalname()));
|
||||||
|
org.setLegalshortname(mapField(o.getLegalshortname()));
|
||||||
|
org.setAlternativeNames(mapFieldList(o.getAlternativeNames()));
|
||||||
|
org.setWebsiteurl(mapField(o.getWebsiteurl()));
|
||||||
|
org.setLogourl(mapField(o.getLogourl()));
|
||||||
|
|
||||||
|
org.setEcenterprise(mapField(o.getEcenterprise()));
|
||||||
|
org.setEchighereducation(mapField(o.getEchighereducation()));
|
||||||
|
org.setEclegalbody(mapField(o.getEclegalbody()));
|
||||||
|
org.setEcinternationalorganization(mapField(o.getEcinternationalorganization()));
|
||||||
|
org.setEcinternationalorganizationeurinterests(mapField(o.getEcinternationalorganizationeurinterests()));
|
||||||
|
org.setEclegalperson(mapField(o.getEclegalperson()));
|
||||||
|
org.setEcnonprofit(mapField(o.getEcnonprofit()));
|
||||||
|
org.setEcnutscode(mapField(o.getEcnutscode()));
|
||||||
|
org.setEcresearchorganization(mapField(o.getEcresearchorganization()));
|
||||||
|
org.setEcsmevalidated(mapField(o.getEcsmevalidated()));
|
||||||
|
|
||||||
|
return org;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
|
||||||
|
Datasource ds = new Datasource();
|
||||||
|
ds.setEnglishname(mapField(d.getEnglishname()));
|
||||||
|
ds.setOfficialname(mapField(d.getOfficialname()));
|
||||||
|
ds.setDescription(mapField(d.getDescription()));
|
||||||
|
ds.setJournal(mapJournal(d.getJournal()));
|
||||||
|
ds.setLogourl(mapField(d.getLogourl()));
|
||||||
|
ds.setAccessinfopackage(mapFieldList(d.getAccessinfopackage()));
|
||||||
|
ds.setCertificates(mapField(d.getCertificates()));
|
||||||
|
ds.setCitationguidelineurl(mapField(d.getCitationguidelineurl()));
|
||||||
|
ds.setConsenttermsofuse(d.getConsenttermsofuse());
|
||||||
|
ds.setConsenttermsofusedate(d.getConsenttermsofusedate());
|
||||||
|
ds.setContactemail(mapField(d.getContactemail()));
|
||||||
|
ds.setContentpolicies(mapCodeLabel(d.getContentpolicies()));
|
||||||
|
ds.setDatabaseaccessrestriction(mapField(d.getDatabaseaccessrestriction()));
|
||||||
|
ds.setDatabaseaccesstype(mapField(d.getDatabaseaccesstype()));
|
||||||
|
ds.setDataprovider(mapField(d.getDataprovider()));
|
||||||
|
ds.setDatasourcetype(mapCodeLabel(d.getDatasourcetype()));
|
||||||
|
ds.setDatasourcetypeui(mapCodeLabel(d.getDatasourcetypeui()));
|
||||||
|
ds.setDatauploadrestriction(mapField(d.getDatauploadrestriction()));
|
||||||
|
ds.setDatauploadtype(mapField(d.getDatauploadtype()));
|
||||||
|
ds.setDateofvalidation(mapField(d.getDateofvalidation()));
|
||||||
|
ds.setEoscdatasourcetype(mapCodeLabel(d.getEoscdatasourcetype()));
|
||||||
|
ds.setEosctype(mapCodeLabel(d.getEosctype()));
|
||||||
|
ds.setFulltextdownload(d.getFulltextdownload());
|
||||||
|
ds.setJurisdiction(mapCodeLabel(d.getJurisdiction()));
|
||||||
|
ds.setLanguages(d.getLanguages());
|
||||||
|
ds.setLatitude(mapField(d.getLatitude()));
|
||||||
|
ds.setLongitude(mapField(d.getLongitude()));
|
||||||
|
ds.setLastconsenttermsofusedate(d.getLastconsenttermsofusedate());
|
||||||
|
ds.setMissionstatementurl(mapField(d.getMissionstatementurl()));
|
||||||
|
ds.setNamespaceprefix(mapField(d.getNamespaceprefix()));
|
||||||
|
ds.setOdcontenttypes(mapFieldList(d.getOdcontenttypes()));
|
||||||
|
ds.setOdlanguages(mapFieldList(d.getOdlanguages()));
|
||||||
|
ds.setOdnumberofitems(mapField(d.getOdnumberofitems()));
|
||||||
|
ds.setOdnumberofitemsdate(mapField(d.getOdnumberofitemsdate()));
|
||||||
|
ds.setOdpolicies(mapField(d.getOdpolicies()));
|
||||||
|
ds.setOpenairecompatibility(mapCodeLabel(d.getOpenairecompatibility()));
|
||||||
|
ds.setPidsystems(mapField(d.getPidsystems()));
|
||||||
|
ds.setPolicies(mapCodeLabelKV(d.getPolicies()));
|
||||||
|
ds.setPreservationpolicyurl(d.getPreservationpolicyurl());
|
||||||
|
ds.setProvidedproducttypes(ds.getProvidedproducttypes());
|
||||||
|
ds.setReleaseenddate(mapField(d.getReleasestartdate()));
|
||||||
|
ds.setReleasestartdate(mapField(d.getReleasestartdate()));
|
||||||
|
ds.setResearchentitytypes(ds.getResearchentitytypes());
|
||||||
|
ds.setResearchproductaccesspolicies(d.getResearchproductaccesspolicies());
|
||||||
|
ds.setResearchproductmetadataaccesspolicies(d.getResearchproductmetadataaccesspolicies());
|
||||||
|
ds.setServiceprovider(mapField(d.getServiceprovider()));
|
||||||
|
ds.setSubjects(asSubjectSP(d.getSubjects()));
|
||||||
|
ds.setSubmissionpolicyurl(d.getSubmissionpolicyurl());
|
||||||
|
ds.setThematic(d.getThematic());
|
||||||
|
ds.setVersioncontrol(d.getVersioncontrol());
|
||||||
|
ds.setVersioning(mapField(d.getVersioning()));
|
||||||
|
|
||||||
|
return ds;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Result mapResult(eu.dnetlib.dhp.schema.oaf.Result r) {
|
||||||
|
Result rs = new Result();
|
||||||
|
|
||||||
|
rs.setResulttype(mapQualifier(r.getResulttype()));
|
||||||
|
rs.setAuthor(asAuthor(r.getAuthor()));
|
||||||
|
rs.setMaintitle(getMaintitle(r.getTitle()));
|
||||||
|
rs.setOtherTitles(getOtherTitles(r.getTitle()));
|
||||||
|
rs.setDescription(mapFieldList(r.getDescription()));
|
||||||
|
rs.setSubject(asSubject(r.getSubject()));
|
||||||
|
rs.setPublicationdate(mapField(r.getDateofacceptance()));
|
||||||
|
rs.setPublisher(mapField(r.getPublisher()));
|
||||||
|
rs.setEmbargoenddate(mapField(r.getEmbargoenddate()));
|
||||||
|
rs.setSource(mapFieldList(r.getSource()));
|
||||||
|
rs.setFormat(mapFieldList(r.getFormat()));
|
||||||
|
rs.setContributor(mapFieldList(r.getContributor()));
|
||||||
|
rs.setCoverage(mapFieldList(r.getCoverage()));
|
||||||
|
rs
|
||||||
|
.setBestaccessright(
|
||||||
|
BestAccessRight
|
||||||
|
.newInstance(r.getBestaccessright().getClassid(), r.getBestaccessright().getClassname()));
|
||||||
|
rs.setFulltext(mapFieldList(r.getFulltext()));
|
||||||
|
rs.setCountry(asCountry(r.getCountry()));
|
||||||
|
rs.setEoscifguidelines(asEOSCIF(r.getEoscifguidelines()));
|
||||||
|
|
||||||
|
rs.setGreen(r.getIsGreen());
|
||||||
|
rs
|
||||||
|
.setOpenAccessColor(
|
||||||
|
Optional
|
||||||
|
.ofNullable(r.getOpenAccessColor())
|
||||||
|
.map(color -> OpenAccessColor.valueOf(color.toString()))
|
||||||
|
.orElse(null));
|
||||||
|
rs.setInDiamondJournal(r.getIsInDiamondJournal());
|
||||||
|
rs.setPubliclyFunded(r.getPubliclyFunded());
|
||||||
|
rs.setTransformativeAgreement(r.getTransformativeAgreement());
|
||||||
|
|
||||||
|
rs.setInstance(mapInstances(r.getInstance()));
|
||||||
|
|
||||||
|
if (r instanceof Publication) {
|
||||||
|
Publication pub = (Publication) r;
|
||||||
|
rs.setJournal(mapJournal(pub.getJournal()));
|
||||||
|
} else if (r instanceof Dataset) {
|
||||||
|
Dataset d = (Dataset) r;
|
||||||
|
rs.setSize(mapField(d.getSize()));
|
||||||
|
rs.setVersion(mapField(d.getVersion()));
|
||||||
|
} else if (r instanceof Software) {
|
||||||
|
Software sw = (Software) r;
|
||||||
|
rs.setCodeRepositoryUrl(mapField(sw.getCodeRepositoryUrl()));
|
||||||
|
rs.setProgrammingLanguage(mapQualifier(sw.getProgrammingLanguage()));
|
||||||
|
rs.setDocumentationUrl(mapFieldList(sw.getDocumentationUrl()));
|
||||||
|
} else if (r instanceof OtherResearchProduct) {
|
||||||
|
OtherResearchProduct orp = (OtherResearchProduct) r;
|
||||||
|
rs.setContactperson(mapFieldList(orp.getContactperson()));
|
||||||
|
rs.setContactgroup(mapFieldList(orp.getContactgroup()));
|
||||||
|
rs.setTool(mapFieldList(orp.getTool()));
|
||||||
|
}
|
||||||
|
return rs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nullable
|
||||||
|
private static List<String> getOtherTitles(List<StructuredProperty> titleList) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(titleList)
|
||||||
|
.map(
|
||||||
|
titles -> titles
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
t -> !"main title"
|
||||||
|
.equals(
|
||||||
|
Optional
|
||||||
|
.ofNullable(t.getQualifier())
|
||||||
|
.map(Qualifier::getClassid)
|
||||||
|
.orElse(null)))
|
||||||
|
.map(StructuredProperty::getValue)
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getMaintitle(List<StructuredProperty> titleList) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(titleList)
|
||||||
|
.flatMap(
|
||||||
|
titles -> titles
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
t -> "main title"
|
||||||
|
.equals(
|
||||||
|
Optional
|
||||||
|
.ofNullable(t.getQualifier())
|
||||||
|
.map(Qualifier::getClassid)
|
||||||
|
.orElse(null)))
|
||||||
|
.map(StructuredProperty::getValue)
|
||||||
|
.findFirst())
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Instance> mapInstances(List<eu.dnetlib.dhp.schema.oaf.Instance> instanceList) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(instanceList)
|
||||||
|
.map(
|
||||||
|
instances -> instances
|
||||||
|
.stream()
|
||||||
|
.map(instance -> {
|
||||||
|
Instance i = new Instance();
|
||||||
|
i.setCollectedfrom(asProvenance(instance.getCollectedfrom()));
|
||||||
|
i.setHostedby(asProvenance(instance.getHostedby()));
|
||||||
|
i.setFulltext(i.getFulltext());
|
||||||
|
i.setPid(asPid(instance.getPid()));
|
||||||
|
i.setAlternateIdentifier(asPid(instance.getAlternateIdentifier()));
|
||||||
|
i.setAccessright(mapAccessRight(instance.getAccessright()));
|
||||||
|
i.setInstancetype(mapQualifier(instance.getInstancetype()));
|
||||||
|
i.setLicense(mapField(instance.getLicense()));
|
||||||
|
i.setUrl(instance.getUrl());
|
||||||
|
i.setRefereed(mapQualifier(instance.getRefereed()));
|
||||||
|
i.setDateofacceptance(mapField(instance.getDateofacceptance()));
|
||||||
|
i.setDistributionlocation(instance.getDistributionlocation());
|
||||||
|
i.setProcessingcharges(getProcessingcharges(instance));
|
||||||
|
return i;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static APC getProcessingcharges(eu.dnetlib.dhp.schema.oaf.Instance instance) {
|
||||||
|
return Optional
|
||||||
|
.of(
|
||||||
|
APC
|
||||||
|
.newInstance(
|
||||||
|
mapField(instance.getProcessingchargecurrency()),
|
||||||
|
mapField(instance.getProcessingchargeamount())))
|
||||||
|
.filter(apc -> Objects.nonNull(apc.getAmount()) && Objects.nonNull(apc.getCurrency()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static AccessRight mapAccessRight(eu.dnetlib.dhp.schema.oaf.AccessRight accessright) {
|
||||||
|
return AccessRight
|
||||||
|
.newInstance(
|
||||||
|
mapQualifier(accessright),
|
||||||
|
Optional
|
||||||
|
.ofNullable(accessright.getOpenAccessRoute())
|
||||||
|
.map(route -> OpenAccessRoute.valueOf(route.toString()))
|
||||||
|
.orElse(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T> T mapField(eu.dnetlib.dhp.schema.oaf.Field<T> f) {
|
||||||
|
return Optional.ofNullable(f).map(Field::getValue).orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T> List<T> mapFieldList(List<eu.dnetlib.dhp.schema.oaf.Field<T>> fl) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(fl)
|
||||||
|
.map(v -> v.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String mapQualifier(eu.dnetlib.dhp.schema.oaf.Qualifier q) {
|
||||||
|
return Optional.ofNullable(q).map(Qualifier::getClassid).orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Journal mapJournal(eu.dnetlib.dhp.schema.oaf.Journal joaf) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(joaf)
|
||||||
|
.map(jo -> {
|
||||||
|
Journal j = new Journal();
|
||||||
|
j.setConferencedate(jo.getConferencedate());
|
||||||
|
j.setConferenceplace(jo.getConferenceplace());
|
||||||
|
j.setEdition(jo.getEdition());
|
||||||
|
j.setSp(jo.getSp());
|
||||||
|
j.setEp(jo.getEp());
|
||||||
|
j.setVol(jo.getVol());
|
||||||
|
j.setIss(jo.getEdition());
|
||||||
|
j.setName(jo.getName());
|
||||||
|
j.setIssnPrinted(jo.getIssnPrinted());
|
||||||
|
j.setIssnOnline(jo.getIssnOnline());
|
||||||
|
j.setIssnLinking(jo.getIssnLinking());
|
||||||
|
return j;
|
||||||
|
})
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Provenance> asProvenance(List<KeyValue> keyValueList) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(keyValueList)
|
||||||
|
.map(
|
||||||
|
kvs -> kvs
|
||||||
|
.stream()
|
||||||
|
.map(ProvisionModelSupport::asProvenance)
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Provenance asProvenance(KeyValue keyValue) {
|
||||||
|
return Optional.ofNullable(keyValue).map(cf -> Provenance.newInstance(cf.getKey(), cf.getValue())).orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
|
||||||
|
ContextMapper contextMapper) {
|
||||||
|
|
||||||
|
final Set<String> contexts = Optional
|
||||||
|
.ofNullable(ctxList)
|
||||||
|
.map(
|
||||||
|
ctx -> ctx
|
||||||
|
.stream()
|
||||||
|
.map(eu.dnetlib.dhp.schema.oaf.Context::getId)
|
||||||
|
.collect(Collectors.toCollection(HashSet::new)))
|
||||||
|
.orElse(new HashSet<>());
|
||||||
|
|
||||||
|
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
|
||||||
|
if (contexts.contains("dh-ch::subcommunity::2")) {
|
||||||
|
contexts.add("clarin");
|
||||||
|
}
|
||||||
|
|
||||||
|
return Optional
|
||||||
|
.ofNullable(contexts)
|
||||||
|
.map(
|
||||||
|
ctx -> ctx
|
||||||
|
.stream()
|
||||||
|
.map(contextPath -> {
|
||||||
|
Context context = new Context();
|
||||||
|
String id = "";
|
||||||
|
Map<String, Category> categoryMap = Maps.newHashMap();
|
||||||
|
for (final String token : Splitter.on("::").split(contextPath)) {
|
||||||
|
id += token;
|
||||||
|
|
||||||
|
final ContextDef def = contextMapper.get(id);
|
||||||
|
|
||||||
|
if (def == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (def.getName().equals("context")) {
|
||||||
|
context.setId(def.getId());
|
||||||
|
context.setLabel(def.getLabel());
|
||||||
|
context.setType(def.getType());
|
||||||
|
}
|
||||||
|
if (def.getName().equals("category")) {
|
||||||
|
Category category = Category.newInstance(def.getId(), def.getLabel());
|
||||||
|
if (Objects.isNull(context.getCategory())) {
|
||||||
|
context.setCategory(Lists.newArrayList());
|
||||||
|
}
|
||||||
|
context.getCategory().add(category);
|
||||||
|
categoryMap.put(def.getId(), category);
|
||||||
|
}
|
||||||
|
if (def.getName().equals("concept")) {
|
||||||
|
String parentId = StringUtils.substringBeforeLast(def.getId(), "::");
|
||||||
|
if (categoryMap.containsKey(parentId)) {
|
||||||
|
categoryMap
|
||||||
|
.get(parentId)
|
||||||
|
.getConcept()
|
||||||
|
.add(Concept.newInstance(def.getId(), def.getLabel()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id += "::";
|
||||||
|
}
|
||||||
|
return context;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Pid> asPid(List<StructuredProperty> pidList) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(pidList)
|
||||||
|
.map(
|
||||||
|
pids -> pids
|
||||||
|
.stream()
|
||||||
|
.map(p -> Pid.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Author> asAuthor(List<eu.dnetlib.dhp.schema.oaf.Author> authorList) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(authorList)
|
||||||
|
.map(
|
||||||
|
authors -> authors
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
a -> Author
|
||||||
|
.newInstance(a.getFullname(), a.getName(), a.getSurname(), a.getRank(), asPid(a.getPid())))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Subject> asSubject(List<eu.dnetlib.dhp.schema.oaf.Subject> subjectList) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(subjectList)
|
||||||
|
.map(
|
||||||
|
subjects -> subjects
|
||||||
|
.stream()
|
||||||
|
.filter(s -> Objects.nonNull(s.getQualifier()))
|
||||||
|
.filter(s -> Objects.nonNull(s.getQualifier().getClassid()))
|
||||||
|
.map(s -> Subject.newInstance(s.getValue(), s.getQualifier().getClassid()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Subject> asSubjectSP(List<eu.dnetlib.dhp.schema.oaf.StructuredProperty> subjectList) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(subjectList)
|
||||||
|
.map(
|
||||||
|
subjects -> subjects
|
||||||
|
.stream()
|
||||||
|
.filter(s -> Objects.nonNull(s.getQualifier()))
|
||||||
|
.filter(s -> Objects.nonNull(s.getQualifier().getClassid()))
|
||||||
|
.map(s -> Subject.newInstance(s.getValue(), s.getQualifier().getClassid()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Country asCountry(eu.dnetlib.dhp.schema.oaf.Qualifier country) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(country)
|
||||||
|
.filter(c -> Objects.nonNull(c.getClassid()) && Objects.nonNull(c.getClassname()))
|
||||||
|
.map(c -> Country.newInstance(c.getClassid(), c.getClassname()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Country> asCountry(List<eu.dnetlib.dhp.schema.oaf.Country> countryList) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(countryList)
|
||||||
|
.map(
|
||||||
|
countries -> countries
|
||||||
|
.stream()
|
||||||
|
.map(c -> Country.newInstance(c.getClassid(), c.getClassname()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<EoscIfGuidelines> asEOSCIF(List<eu.dnetlib.dhp.schema.oaf.EoscIfGuidelines> eoscIfGuidelines) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(eoscIfGuidelines)
|
||||||
|
.map(
|
||||||
|
eoscif -> eoscif
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
e -> EoscIfGuidelines
|
||||||
|
.newInstance(e.getCode(), e.getLabel(), e.getUrl(), e.getSemanticRelation()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<CodeLabel> mapCodeLabelKV(List<KeyValue> kvList) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(kvList)
|
||||||
|
.map(
|
||||||
|
kvs -> kvs
|
||||||
|
.stream()
|
||||||
|
.map(ProvisionModelSupport::mapCodeLabel)
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<CodeLabel> mapCodeLabel(List<Qualifier> qualifiers) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(qualifiers)
|
||||||
|
.map(
|
||||||
|
list -> list
|
||||||
|
.stream()
|
||||||
|
.map(ProvisionModelSupport::mapCodeLabel)
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CodeLabel mapCodeLabel(Qualifier qualifier) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(qualifier)
|
||||||
|
.map(q -> CodeLabel.newInstance(q.getClassid(), q.getClassname()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CodeLabel mapCodeLabel(KeyValue kv) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(kv)
|
||||||
|
.map(q -> CodeLabel.newInstance(kv.getKey(), kv.getValue()))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,8 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class RelatedEntity implements Serializable {
|
public class RelatedEntity implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = -4982643490443810597L;
|
||||||
|
|
||||||
private String id;
|
private String id;
|
||||||
private String type;
|
private String type;
|
||||||
|
|
||||||
|
@ -21,6 +23,8 @@ public class RelatedEntity implements Serializable {
|
||||||
private String websiteurl; // datasource, organizations, projects
|
private String websiteurl; // datasource, organizations, projects
|
||||||
|
|
||||||
// results
|
// results
|
||||||
|
private String description;
|
||||||
|
private List<String> author;
|
||||||
private String dateofacceptance;
|
private String dateofacceptance;
|
||||||
private String publisher;
|
private String publisher;
|
||||||
private List<StructuredProperty> pid;
|
private List<StructuredProperty> pid;
|
||||||
|
@ -75,6 +79,22 @@ public class RelatedEntity implements Serializable {
|
||||||
return websiteurl;
|
return websiteurl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getAuthor() {
|
||||||
|
return author;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAuthor(List<String> author) {
|
||||||
|
this.author = author;
|
||||||
|
}
|
||||||
|
|
||||||
public void setWebsiteurl(String websiteurl) {
|
public void setWebsiteurl(String websiteurl) {
|
||||||
this.websiteurl = websiteurl;
|
this.websiteurl = websiteurl;
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,8 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
public class RelatedEntityWrapper implements Serializable {
|
public class RelatedEntityWrapper implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = -2624854064081757234L;
|
||||||
|
|
||||||
private Relation relation;
|
private Relation relation;
|
||||||
private RelatedEntity target;
|
private RelatedEntity target;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024.
|
||||||
|
* SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.provision.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class TupleWrapper implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = -1418439827125577822L;
|
||||||
|
private String xml;
|
||||||
|
|
||||||
|
private String json;
|
||||||
|
|
||||||
|
public TupleWrapper() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public TupleWrapper(String xml, String json) {
|
||||||
|
this.xml = xml;
|
||||||
|
this.json = json;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getXml() {
|
||||||
|
return xml;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setXml(String xml) {
|
||||||
|
this.xml = xml;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJson() {
|
||||||
|
return json;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJson(String json) {
|
||||||
|
this.json = json;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision.utils;
|
package eu.dnetlib.dhp.oa.provision.utils;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -32,7 +33,7 @@ import com.google.common.collect.Lists;
|
||||||
*
|
*
|
||||||
* @author claudio
|
* @author claudio
|
||||||
*/
|
*/
|
||||||
public class StreamingInputDocumentFactory {
|
public class StreamingInputDocumentFactory implements Serializable {
|
||||||
|
|
||||||
private static final String INDEX_FIELD_PREFIX = "__";
|
private static final String INDEX_FIELD_PREFIX = "__";
|
||||||
|
|
||||||
|
@ -40,6 +41,8 @@ public class StreamingInputDocumentFactory {
|
||||||
|
|
||||||
private static final String INDEX_RESULT = INDEX_FIELD_PREFIX + RESULT;
|
private static final String INDEX_RESULT = INDEX_FIELD_PREFIX + RESULT;
|
||||||
|
|
||||||
|
private static final String INDEX_JSON_RESULT = INDEX_FIELD_PREFIX + "json";
|
||||||
|
|
||||||
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
|
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
|
||||||
|
|
||||||
private static final String DEFAULTDNETRESULT = "dnetResult";
|
private static final String DEFAULTDNETRESULT = "dnetResult";
|
||||||
|
@ -71,13 +74,17 @@ public class StreamingInputDocumentFactory {
|
||||||
this.resultName = resultName;
|
this.resultName = resultName;
|
||||||
}
|
}
|
||||||
|
|
||||||
public SolrInputDocument parseDocument(final String inputDocument) {
|
public SolrInputDocument parseDocument(final String xml) {
|
||||||
|
return parseDocument(xml, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
public SolrInputDocument parseDocument(final String xml, final String json) {
|
||||||
|
|
||||||
final StringWriter results = new StringWriter();
|
final StringWriter results = new StringWriter();
|
||||||
final List<Namespace> nsList = Lists.newLinkedList();
|
final List<Namespace> nsList = Lists.newLinkedList();
|
||||||
try {
|
try {
|
||||||
|
|
||||||
XMLEventReader parser = inputFactory.get().createXMLEventReader(new StringReader(inputDocument));
|
XMLEventReader parser = inputFactory.get().createXMLEventReader(new StringReader(xml));
|
||||||
|
|
||||||
final SolrInputDocument indexDocument = new SolrInputDocument(new HashMap<>());
|
final SolrInputDocument indexDocument = new SolrInputDocument(new HashMap<>());
|
||||||
|
|
||||||
|
@ -95,13 +102,13 @@ public class StreamingInputDocumentFactory {
|
||||||
} else if (TARGETFIELDS.equals(localName)) {
|
} else if (TARGETFIELDS.equals(localName)) {
|
||||||
parseTargetFields(indexDocument, parser);
|
parseTargetFields(indexDocument, parser);
|
||||||
} else if (resultName.equals(localName)) {
|
} else if (resultName.equals(localName)) {
|
||||||
copyResult(indexDocument, results, parser, nsList, resultName);
|
copyResult(indexDocument, json, results, parser, nsList, resultName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!indexDocument.containsKey(INDEX_RECORD_ID)) {
|
if (!indexDocument.containsKey(INDEX_RECORD_ID)) {
|
||||||
throw new IllegalStateException("cannot extract record ID from: " + inputDocument);
|
throw new IllegalStateException("cannot extract record ID from: " + xml);
|
||||||
}
|
}
|
||||||
|
|
||||||
return indexDocument;
|
return indexDocument;
|
||||||
|
@ -171,6 +178,7 @@ public class StreamingInputDocumentFactory {
|
||||||
*/
|
*/
|
||||||
protected void copyResult(
|
protected void copyResult(
|
||||||
final SolrInputDocument indexDocument,
|
final SolrInputDocument indexDocument,
|
||||||
|
final String json,
|
||||||
final StringWriter results,
|
final StringWriter results,
|
||||||
final XMLEventReader parser,
|
final XMLEventReader parser,
|
||||||
final List<Namespace> nsList,
|
final List<Namespace> nsList,
|
||||||
|
@ -205,6 +213,7 @@ public class StreamingInputDocumentFactory {
|
||||||
}
|
}
|
||||||
writer.close();
|
writer.close();
|
||||||
indexDocument.addField(INDEX_RESULT, results.toString());
|
indexDocument.addField(INDEX_RESULT, results.toString());
|
||||||
|
indexDocument.addField(INDEX_JSON_RESULT, json);
|
||||||
} finally {
|
} finally {
|
||||||
outputFactory.remove();
|
outputFactory.remove();
|
||||||
eventFactory.remove();
|
eventFactory.remove();
|
||||||
|
|
|
@ -100,13 +100,17 @@ public class TemplateFactory {
|
||||||
|
|
||||||
public String getInstance(
|
public String getInstance(
|
||||||
final List<String> instancemetadata, final String url) {
|
final List<String> instancemetadata, final String url) {
|
||||||
|
return getInstance(instancemetadata, Lists.newArrayList(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getInstance(
|
||||||
|
final List<String> instancemetadata, final List<String> url) {
|
||||||
return getTemplate(resources.getInstance())
|
return getTemplate(resources.getInstance())
|
||||||
.add("metadata", instancemetadata)
|
.add("metadata", instancemetadata)
|
||||||
.add(
|
.add(
|
||||||
"webresources",
|
"webresources",
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(url)
|
.ofNullable(url)
|
||||||
.map(u -> Lists.newArrayList(url))
|
|
||||||
.orElse(Lists.newArrayList())
|
.orElse(Lists.newArrayList())
|
||||||
.stream()
|
.stream()
|
||||||
.filter(StringUtils::isNotBlank)
|
.filter(StringUtils::isNotBlank)
|
||||||
|
|
|
@ -49,6 +49,7 @@ import eu.dnetlib.dhp.schema.common.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class XmlRecordFactory implements Serializable {
|
public class XmlRecordFactory implements Serializable {
|
||||||
|
@ -365,6 +366,7 @@ public class XmlRecordFactory implements Serializable {
|
||||||
.getDescription()
|
.getDescription()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
|
.limit(ModelHardLimits.MAX_ABSTRACTS)
|
||||||
.map(c -> XmlSerializationUtils.asXmlElement("description", c.getValue()))
|
.map(c -> XmlSerializationUtils.asXmlElement("description", c.getValue()))
|
||||||
.collect(Collectors.toCollection(HashSet::new)));
|
.collect(Collectors.toCollection(HashSet::new)));
|
||||||
}
|
}
|
||||||
|
@ -1057,7 +1059,8 @@ public class XmlRecordFactory implements Serializable {
|
||||||
return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue());
|
return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> mapFields(final RelatedEntityWrapper link, final Set<String> contexts) {
|
private List<String> mapFields(final TemplateFactory templateFactory, final RelatedEntityWrapper link,
|
||||||
|
final Set<String> contexts) {
|
||||||
final Relation rel = link.getRelation();
|
final Relation rel = link.getRelation();
|
||||||
final RelatedEntity re = link.getTarget();
|
final RelatedEntity re = link.getTarget();
|
||||||
final String targetType = link.getTarget().getType();
|
final String targetType = link.getTarget().getType();
|
||||||
|
@ -1071,6 +1074,18 @@ public class XmlRecordFactory implements Serializable {
|
||||||
if (re.getTitle() != null && isNotBlank(re.getTitle().getValue())) {
|
if (re.getTitle() != null && isNotBlank(re.getTitle().getValue())) {
|
||||||
metadata.add(XmlSerializationUtils.mapStructuredProperty("title", re.getTitle()));
|
metadata.add(XmlSerializationUtils.mapStructuredProperty("title", re.getTitle()));
|
||||||
}
|
}
|
||||||
|
if (StringUtils.isNotBlank(re.getDescription())) {
|
||||||
|
metadata.add(XmlSerializationUtils.asXmlElement("description", re.getDescription()));
|
||||||
|
}
|
||||||
|
if (re.getAuthor() != null) {
|
||||||
|
metadata
|
||||||
|
.addAll(
|
||||||
|
re
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.map(author -> XmlSerializationUtils.asXmlElement("creator", author))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
if (isNotBlank(re.getDateofacceptance())) {
|
if (isNotBlank(re.getDateofacceptance())) {
|
||||||
metadata
|
metadata
|
||||||
.add(XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance()));
|
.add(XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance()));
|
||||||
|
@ -1104,6 +1119,54 @@ public class XmlRecordFactory implements Serializable {
|
||||||
.map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p))
|
.map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
if (re.getInstances() != null) {
|
||||||
|
re
|
||||||
|
.getInstances()
|
||||||
|
.forEach(i -> {
|
||||||
|
final List<String> instanceFields = Lists.newArrayList();
|
||||||
|
if (i.getAccessright() != null && !i.getAccessright().isBlank()) {
|
||||||
|
instanceFields
|
||||||
|
.add(XmlSerializationUtils.mapQualifier("accessright", i.getAccessright()));
|
||||||
|
}
|
||||||
|
if (i.getHostedby() != null) {
|
||||||
|
instanceFields.add(XmlSerializationUtils.mapKeyValue("hostedby", i.getHostedby()));
|
||||||
|
}
|
||||||
|
if (i.getDateofacceptance() != null && isNotBlank(i.getDateofacceptance().getValue())) {
|
||||||
|
instanceFields
|
||||||
|
.add(
|
||||||
|
XmlSerializationUtils
|
||||||
|
.asXmlElement("dateofacceptance", i.getDateofacceptance().getValue()));
|
||||||
|
}
|
||||||
|
if (i.getInstancetype() != null && !i.getInstancetype().isBlank()) {
|
||||||
|
instanceFields
|
||||||
|
.add(XmlSerializationUtils.mapQualifier("instancetype", i.getInstancetype()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i.getRefereed() != null && !i.getRefereed().isBlank()) {
|
||||||
|
instanceFields.add(XmlSerializationUtils.mapQualifier("refereed", i.getRefereed()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i.getLicense() != null && isNotBlank(i.getLicense().getValue())) {
|
||||||
|
instanceFields
|
||||||
|
.add(XmlSerializationUtils.asXmlElement("license", i.getLicense().getValue()));
|
||||||
|
}
|
||||||
|
if (isNotBlank(i.getFulltext())) {
|
||||||
|
instanceFields.add(XmlSerializationUtils.asXmlElement("fulltext", i.getFulltext()));
|
||||||
|
}
|
||||||
|
if (i.getUrl() != null && !i.getUrl().isEmpty()) {
|
||||||
|
instanceFields
|
||||||
|
.addAll(
|
||||||
|
i
|
||||||
|
.getUrl()
|
||||||
|
.stream()
|
||||||
|
.filter(StringUtils::isNotBlank)
|
||||||
|
.map(url -> XmlSerializationUtils.asXmlElement("url", url))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
metadata.add(templateFactory.getInstance(instanceFields, i.getUrl()));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case datasource:
|
case datasource:
|
||||||
if (isNotBlank(re.getOfficialname())) {
|
if (isNotBlank(re.getOfficialname())) {
|
||||||
|
@ -1133,6 +1196,9 @@ public class XmlRecordFactory implements Serializable {
|
||||||
if (re.getCountry() != null && !re.getCountry().isBlank()) {
|
if (re.getCountry() != null && !re.getCountry().isBlank()) {
|
||||||
metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry()));
|
metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry()));
|
||||||
}
|
}
|
||||||
|
if (StringUtils.isNotBlank(re.getWebsiteurl())) {
|
||||||
|
metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", re.getWebsiteurl()));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case project:
|
case project:
|
||||||
if (isNotBlank(re.getProjectTitle())) {
|
if (isNotBlank(re.getProjectTitle())) {
|
||||||
|
@ -1182,7 +1248,7 @@ public class XmlRecordFactory implements Serializable {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
String.format("missing scheme for: <%s - %s>", type, targetType));
|
String.format("missing scheme for: <%s - %s>", type, targetType));
|
||||||
}
|
}
|
||||||
final HashSet<String> fields = Sets.newHashSet(mapFields(link, contexts));
|
final HashSet<String> fields = Sets.newHashSet(mapFields(templateFactory, link, contexts));
|
||||||
if (rel.getValidated() == null) {
|
if (rel.getValidated() == null) {
|
||||||
rel.setValidated(false);
|
rel.setValidated(false);
|
||||||
}
|
}
|
||||||
|
@ -1206,7 +1272,7 @@ public class XmlRecordFactory implements Serializable {
|
||||||
.map(link -> {
|
.map(link -> {
|
||||||
final String targetType = link.getTarget().getType();
|
final String targetType = link.getTarget().getType();
|
||||||
final String name = ModelSupport.getMainType(EntityType.valueOf(targetType));
|
final String name = ModelSupport.getMainType(EntityType.valueOf(targetType));
|
||||||
final HashSet<String> fields = Sets.newHashSet(mapFields(link, null));
|
final HashSet<String> fields = Sets.newHashSet(mapFields(templateFactory, link, null));
|
||||||
return templateFactory
|
return templateFactory
|
||||||
.getChild(name, link.getTarget().getId(), Lists.newArrayList(fields));
|
.getChild(name, link.getTarget().getId(), Lists.newArrayList(fields));
|
||||||
})
|
})
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "is",
|
||||||
|
"paramLongName": "isLookupUrl",
|
||||||
|
"paramDescription": "URL of the isLookUp Service",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "i",
|
||||||
|
"paramLongName": "inputPath",
|
||||||
|
"paramDescription": "the path of the sequence file to read the XML records",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "f",
|
||||||
|
"paramLongName": "format",
|
||||||
|
"paramDescription": "MDFormat name found in the IS profile",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "op",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "path on hdfs activating an alternative output for the SolrInputDocuments",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
|
@ -22,17 +22,5 @@
|
||||||
"paramLongName": "batchSize",
|
"paramLongName": "batchSize",
|
||||||
"paramDescription": "size of the batch of documents sent to solr",
|
"paramDescription": "size of the batch of documents sent to solr",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "of",
|
|
||||||
"paramLongName": "outputFormat",
|
|
||||||
"paramDescription": "decides the job output format, SOLR | HDFS",
|
|
||||||
"paramRequired": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "op",
|
|
||||||
"paramLongName": "outputPath",
|
|
||||||
"paramDescription": "path on hdfs activating an alternative output for the SolrInputDocuments",
|
|
||||||
"paramRequired": false
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -16,5 +16,11 @@
|
||||||
"paramLongName": "contextApiBaseUrl",
|
"paramLongName": "contextApiBaseUrl",
|
||||||
"paramDescription": "URL of the context API",
|
"paramDescription": "URL of the context API",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "isu",
|
||||||
|
"paramLongName": "isLookupUrl",
|
||||||
|
"paramDescription": "URL of the context ISLookup Service",
|
||||||
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -592,8 +592,9 @@
|
||||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/xml</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/xml_json</arg>
|
||||||
<arg>--contextApiBaseUrl</arg><arg>${contextApiBaseUrl}</arg>
|
<arg>--contextApiBaseUrl</arg><arg>${contextApiBaseUrl}</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="should_index"/>
|
<ok to="should_index"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -602,8 +603,8 @@
|
||||||
<decision name="should_index">
|
<decision name="should_index">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="drop_solr_collection">${wf:conf('shouldIndex') eq 'true'}</case>
|
<case to="drop_solr_collection">${wf:conf('shouldIndex') eq 'true'}</case>
|
||||||
<case to="End">${wf:conf('shouldIndex') eq 'false'}</case>
|
<case to="dump_solr_records_hdfs">${wf:conf('shouldIndex') eq 'false'}</case>
|
||||||
<default to="drop_solr_collection"/>
|
<default to="dump_solr_records_hdfs"/>
|
||||||
</switch>
|
</switch>
|
||||||
</decision>
|
</decision>
|
||||||
|
|
||||||
|
@ -646,12 +647,10 @@
|
||||||
--conf spark.hadoop.mapreduce.map.speculative=false
|
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${workingDir}/xml</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/xml_json</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
<arg>--format</arg><arg>${format}</arg>
|
<arg>--format</arg><arg>${format}</arg>
|
||||||
<arg>--batchSize</arg><arg>${batchSize}</arg>
|
<arg>--batchSize</arg><arg>${batchSize}</arg>
|
||||||
<arg>--outputFormat</arg><arg>${outputFormat}</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/solr_documents</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="commit_solr_collection"/>
|
<ok to="commit_solr_collection"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -674,5 +673,30 @@
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_solr_records_hdfs">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>dump_solr_records_hdfs</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.provision.SolrRecordDumpJob</class>
|
||||||
|
<jar>dhp-graph-provision-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||||
|
--driver-memory=${sparkDriverMemoryForJoining}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--inputPath</arg><arg>${workingDir}/xml_json</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
<arg>--format</arg><arg>${format}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/solr_documents</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
</workflow-app>
|
</workflow-app>
|
|
@ -57,7 +57,7 @@ public class EOSCFuture_Test {
|
||||||
IOUtils.toString(getClass().getResourceAsStream("eosc-future/photic-zone.json")),
|
IOUtils.toString(getClass().getResourceAsStream("eosc-future/photic-zone.json")),
|
||||||
OtherResearchProduct.class);
|
OtherResearchProduct.class);
|
||||||
|
|
||||||
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
final String xml = xmlRecordFactory.build(new JoinedEntity(p));
|
||||||
|
|
||||||
assertNotNull(xml);
|
assertNotNull(xml);
|
||||||
|
|
||||||
|
|
|
@ -63,7 +63,7 @@ public class IndexRecordTransformerTest {
|
||||||
final Project pj = load("project.json", Project.class);
|
final Project pj = load("project.json", Project.class);
|
||||||
final Relation rel = load("relToValidatedProject.json", Relation.class);
|
final Relation rel = load("relToValidatedProject.json", Relation.class);
|
||||||
|
|
||||||
final JoinedEntity je = new JoinedEntity<>(p);
|
final JoinedEntity je = new JoinedEntity(p);
|
||||||
je
|
je
|
||||||
.setLinks(
|
.setLinks(
|
||||||
Lists
|
Lists
|
||||||
|
@ -86,7 +86,7 @@ public class IndexRecordTransformerTest {
|
||||||
|
|
||||||
final Publication p = load("publication.json", Publication.class);
|
final Publication p = load("publication.json", Publication.class);
|
||||||
|
|
||||||
final JoinedEntity<Publication> je = new JoinedEntity<>(p);
|
final JoinedEntity je = new JoinedEntity(p);
|
||||||
final String record = xmlRecordFactory.build(je);
|
final String record = xmlRecordFactory.build(je);
|
||||||
assertNotNull(record);
|
assertNotNull(record);
|
||||||
SolrInputDocument solrDoc = testRecordTransformation(record);
|
SolrInputDocument solrDoc = testRecordTransformation(record);
|
||||||
|
@ -102,7 +102,7 @@ public class IndexRecordTransformerTest {
|
||||||
|
|
||||||
final Publication p = load("riunet.json", Publication.class);
|
final Publication p = load("riunet.json", Publication.class);
|
||||||
|
|
||||||
final JoinedEntity je = new JoinedEntity<>(p);
|
final JoinedEntity je = new JoinedEntity(p);
|
||||||
final String record = xmlRecordFactory.build(je);
|
final String record = xmlRecordFactory.build(je);
|
||||||
assertNotNull(record);
|
assertNotNull(record);
|
||||||
testRecordTransformation(record);
|
testRecordTransformation(record);
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024.
|
||||||
|
* SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
|
||||||
|
class JoinedEntityTest {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(JoinedEntityTest.class);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test_serialisation() throws IOException {
|
||||||
|
|
||||||
|
Publication p = new Publication();
|
||||||
|
p.setId("p1");
|
||||||
|
Journal j = new Journal();
|
||||||
|
j.setIss("1234-5678");
|
||||||
|
p.setJournal(j);
|
||||||
|
|
||||||
|
Organization o = new Organization();
|
||||||
|
o.setId("o1");
|
||||||
|
Field<String> lName = new Field<>();
|
||||||
|
lName.setValue("CNR");
|
||||||
|
o.setLegalname(lName);
|
||||||
|
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
mapper.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
|
||||||
|
|
||||||
|
final String json = mapper.writeValueAsString(new JoinedEntity(p));
|
||||||
|
log.info(json);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -86,8 +86,7 @@ public class SolrConfigExploreTest extends SolrExploreTest {
|
||||||
|
|
||||||
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
|
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
|
||||||
|
|
||||||
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.SOLR, null)
|
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize).run(isLookupClient);
|
||||||
.run(isLookupClient);
|
|
||||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
|
Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
|
||||||
|
|
||||||
String[] queryStrings = {
|
String[] queryStrings = {
|
||||||
|
|
|
@ -95,7 +95,7 @@ public class SolrConfigTest extends SolrTest {
|
||||||
|
|
||||||
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
|
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
|
||||||
|
|
||||||
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.SOLR, null)
|
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize)
|
||||||
.run(isLookupClient);
|
.run(isLookupClient);
|
||||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
|
Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,170 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024.
|
||||||
|
* SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.solr.common.SolrInputField;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.TupleWrapper;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
class SolrRecordDumpJobTest {
|
||||||
|
|
||||||
|
protected static final Logger log = LoggerFactory.getLogger(SolrRecordDumpJobTest.class);
|
||||||
|
|
||||||
|
protected static SparkSession spark;
|
||||||
|
|
||||||
|
protected static final String FORMAT = "test";
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private ISLookupClient isLookupClient;
|
||||||
|
|
||||||
|
@TempDir
|
||||||
|
public static Path workingDir;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void before() {
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(SolrRecordDumpJobTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.registerKryoClasses(new Class[] {
|
||||||
|
SerializableSolrInputDocument.class
|
||||||
|
});
|
||||||
|
|
||||||
|
conf.setMaster("local[1]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(SolrRecordDumpJobTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void shutDown() throws Exception {
|
||||||
|
spark.stop();
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void prepareMocks() throws ISLookUpException, IOException {
|
||||||
|
isLookupClient.setIsLookup(isLookUpService);
|
||||||
|
|
||||||
|
Mockito
|
||||||
|
.when(isLookupClient.getDsId(Mockito.anyString()))
|
||||||
|
.thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl");
|
||||||
|
Mockito
|
||||||
|
.when(isLookupClient.getLayoutSource(Mockito.anyString()))
|
||||||
|
.thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml")));
|
||||||
|
Mockito
|
||||||
|
.when(isLookupClient.getLayoutTransformer())
|
||||||
|
.thenReturn(IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testXmlIndexingJob_saveOnHDFS() throws Exception {
|
||||||
|
final String ID_XPATH = "//*[local-name()='header']/*[local-name()='objIdentifier']";
|
||||||
|
|
||||||
|
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
|
||||||
|
// String inputPath = "/Users/claudio/workspace/data/index";
|
||||||
|
|
||||||
|
Dataset<TupleWrapper> records = spark
|
||||||
|
.read()
|
||||||
|
.schema(Encoders.bean(TupleWrapper.class).schema())
|
||||||
|
.json(inputPath)
|
||||||
|
.as(Encoders.bean(TupleWrapper.class));
|
||||||
|
|
||||||
|
records.printSchema();
|
||||||
|
|
||||||
|
long nRecord = records.count();
|
||||||
|
log.info("found {} records", nRecord);
|
||||||
|
|
||||||
|
final Dataset<String> ids = records
|
||||||
|
.map((MapFunction<TupleWrapper, String>) TupleWrapper::getXml, Encoders.STRING())
|
||||||
|
.map(
|
||||||
|
(MapFunction<String, String>) s -> new SAXReader().read(new StringReader(s)).valueOf(ID_XPATH),
|
||||||
|
Encoders.STRING());
|
||||||
|
|
||||||
|
log.info("found {} ids", ids.count());
|
||||||
|
|
||||||
|
long xmlIdUnique = ids
|
||||||
|
.distinct()
|
||||||
|
.count();
|
||||||
|
|
||||||
|
log.info("found {} unique ids", xmlIdUnique);
|
||||||
|
|
||||||
|
assertEquals(nRecord, xmlIdUnique, "IDs should be unique among input records");
|
||||||
|
|
||||||
|
final String outputPath = workingDir.resolve("outputPath").toAbsolutePath().toString();
|
||||||
|
new SolrRecordDumpJob(spark, inputPath, FORMAT, outputPath).run(isLookupClient);
|
||||||
|
|
||||||
|
final Dataset<SerializableSolrInputDocument> solrDocs = spark
|
||||||
|
.read()
|
||||||
|
.load(outputPath)
|
||||||
|
.as(Encoders.kryo(SerializableSolrInputDocument.class));
|
||||||
|
|
||||||
|
solrDocs.foreach(doc -> {
|
||||||
|
assertNotNull(doc.get("__result"));
|
||||||
|
assertNotNull(doc.get("__json"));
|
||||||
|
});
|
||||||
|
|
||||||
|
long docIdUnique = solrDocs.map((MapFunction<SerializableSolrInputDocument, String>) doc -> {
|
||||||
|
final SolrInputField id = doc.getField("__indexrecordidentifier");
|
||||||
|
return id.getFirstValue().toString();
|
||||||
|
}, Encoders.STRING())
|
||||||
|
.distinct()
|
||||||
|
.count();
|
||||||
|
assertEquals(xmlIdUnique, docIdUnique, "IDs should be unique among the output XML records");
|
||||||
|
|
||||||
|
long jsonUnique = solrDocs
|
||||||
|
.map(
|
||||||
|
(MapFunction<SerializableSolrInputDocument, String>) je -> (String) je.getField("__json").getValue(),
|
||||||
|
Encoders.STRING())
|
||||||
|
.distinct()
|
||||||
|
.count();
|
||||||
|
|
||||||
|
assertEquals(jsonUnique, docIdUnique, "IDs should be unique among the output JSON records");
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -92,7 +92,7 @@ public abstract class SolrTest {
|
||||||
FileUtils.deleteDirectory(workingDir.toFile());
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
public static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
||||||
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
||||||
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
||||||
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
||||||
|
|
|
@ -2,34 +2,31 @@
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringReader;
|
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.util.Map;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
import org.apache.solr.common.SolrInputField;
|
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaPairRDD;
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.junit.jupiter.api.AfterAll;
|
||||||
import org.junit.jupiter.api.*;
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
import org.junit.jupiter.api.extension.ExtendWith;
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.TupleWrapper;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
@ -70,6 +67,7 @@ public class XmlIndexingJobTest extends SolrTest {
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
||||||
|
|
||||||
conf.registerKryoClasses(new Class[] {
|
conf.registerKryoClasses(new Class[] {
|
||||||
SerializableSolrInputDocument.class
|
SerializableSolrInputDocument.class
|
||||||
});
|
});
|
||||||
|
@ -97,13 +95,15 @@ public class XmlIndexingJobTest extends SolrTest {
|
||||||
|
|
||||||
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
|
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
|
||||||
|
|
||||||
long nRecord = JavaSparkContext
|
Dataset<TupleWrapper> records = spark
|
||||||
.fromSparkContext(spark.sparkContext())
|
.read()
|
||||||
.sequenceFile(inputPath, Text.class, Text.class)
|
.schema(Encoders.bean(TupleWrapper.class).schema())
|
||||||
.count();
|
.json(inputPath)
|
||||||
|
.as(Encoders.bean(TupleWrapper.class));
|
||||||
|
|
||||||
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.SOLR, null)
|
long nRecord = records.count();
|
||||||
.run(isLookupClient);
|
|
||||||
|
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize).run(isLookupClient);
|
||||||
|
|
||||||
assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
|
assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
|
||||||
|
|
||||||
|
@ -137,40 +137,26 @@ public class XmlIndexingJobTest extends SolrTest {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
0, rsp.getResults().getNumFound(),
|
0, rsp.getResults().getNumFound(),
|
||||||
"the number of indexed records having peerreviewed = true");
|
"the number of indexed records having peerreviewed = true");
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
rsp = miniCluster
|
||||||
void testXmlIndexingJob_saveOnHDFS() throws Exception {
|
.getSolrClient()
|
||||||
final String ID_XPATH = "//header/*[local-name()='objIdentifier']";
|
.query(
|
||||||
|
new SolrQuery()
|
||||||
|
.add(CommonParams.Q, "objidentifier:\"iddesignpres::ae77e56e84ad058d9e7f19fa2f7325db\"")
|
||||||
|
.add(CommonParams.FL, "__json"));
|
||||||
|
assertEquals(
|
||||||
|
1, rsp.getResults().getNumFound(),
|
||||||
|
"the number of indexed records having the given identifier");
|
||||||
|
Optional<Object> json = rsp
|
||||||
|
.getResults()
|
||||||
|
.stream()
|
||||||
|
.map(d -> d.getFieldValues("__json"))
|
||||||
|
.flatMap(d -> d.stream())
|
||||||
|
.findFirst();
|
||||||
|
|
||||||
String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";
|
assertTrue(json.isPresent());
|
||||||
|
|
||||||
final JavaPairRDD<Text, Text> xmlRecords = JavaSparkContext
|
log.info((String) json.get());
|
||||||
.fromSparkContext(spark.sparkContext())
|
|
||||||
.sequenceFile(inputPath, Text.class, Text.class);
|
|
||||||
long nRecord = xmlRecords.count();
|
|
||||||
long xmlIdUnique = xmlRecords
|
|
||||||
.map(t -> t._2().toString())
|
|
||||||
.map(s -> new SAXReader().read(new StringReader(s)).valueOf(ID_XPATH))
|
|
||||||
.distinct()
|
|
||||||
.count();
|
|
||||||
assertEquals(nRecord, xmlIdUnique, "IDs should be unique among input records");
|
|
||||||
|
|
||||||
final String outputPath = workingDir.resolve("outputPath").toAbsolutePath().toString();
|
|
||||||
new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.HDFS, outputPath)
|
|
||||||
.run(isLookupClient);
|
|
||||||
|
|
||||||
final Dataset<SerializableSolrInputDocument> solrDocs = spark
|
|
||||||
.read()
|
|
||||||
.load(outputPath)
|
|
||||||
.as(Encoders.kryo(SerializableSolrInputDocument.class));
|
|
||||||
long docIdUnique = solrDocs.map((MapFunction<SerializableSolrInputDocument, String>) doc -> {
|
|
||||||
final SolrInputField id = doc.getField("__indexrecordidentifier");
|
|
||||||
return id.getFirstValue().toString();
|
|
||||||
}, Encoders.STRING())
|
|
||||||
.distinct()
|
|
||||||
.count();
|
|
||||||
assertEquals(xmlIdUnique, docIdUnique, "IDs should be unique among the output records");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class XmlRecordFactoryTest {
|
||||||
final Publication p = OBJECT_MAPPER
|
final Publication p = OBJECT_MAPPER
|
||||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
||||||
|
|
||||||
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
final String xml = xmlRecordFactory.build(new JoinedEntity(p));
|
||||||
|
|
||||||
assertNotNull(xml);
|
assertNotNull(xml);
|
||||||
|
|
||||||
|
@ -117,7 +117,7 @@ public class XmlRecordFactoryTest {
|
||||||
final List<RelatedEntityWrapper> links = Lists.newArrayList();
|
final List<RelatedEntityWrapper> links = Lists.newArrayList();
|
||||||
final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
||||||
links.add(rew);
|
links.add(rew);
|
||||||
final JoinedEntity je = new JoinedEntity<>(p);
|
final JoinedEntity je = new JoinedEntity(p);
|
||||||
je.setLinks(links);
|
je.setLinks(links);
|
||||||
|
|
||||||
final String xml = xmlRecordFactory.build(je);
|
final String xml = xmlRecordFactory.build(je);
|
||||||
|
@ -148,7 +148,7 @@ public class XmlRecordFactoryTest {
|
||||||
final List<RelatedEntityWrapper> links = Lists.newArrayList();
|
final List<RelatedEntityWrapper> links = Lists.newArrayList();
|
||||||
final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
||||||
links.add(rew);
|
links.add(rew);
|
||||||
final JoinedEntity je = new JoinedEntity<>(p);
|
final JoinedEntity je = new JoinedEntity(p);
|
||||||
je.setLinks(links);
|
je.setLinks(links);
|
||||||
|
|
||||||
final String xml = xmlRecordFactory.build(je);
|
final String xml = xmlRecordFactory.build(je);
|
||||||
|
@ -171,7 +171,7 @@ public class XmlRecordFactoryTest {
|
||||||
final Datasource d = OBJECT_MAPPER
|
final Datasource d = OBJECT_MAPPER
|
||||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("datasource.json")), Datasource.class);
|
.readValue(IOUtils.toString(getClass().getResourceAsStream("datasource.json")), Datasource.class);
|
||||||
|
|
||||||
final String xml = xmlRecordFactory.build(new JoinedEntity<>(d));
|
final String xml = xmlRecordFactory.build(new JoinedEntity(d));
|
||||||
|
|
||||||
assertNotNull(xml);
|
assertNotNull(xml);
|
||||||
|
|
||||||
|
@ -210,7 +210,7 @@ public class XmlRecordFactoryTest {
|
||||||
IOUtils.toString(getClass().getResourceAsStream("d4science-1-training.json")),
|
IOUtils.toString(getClass().getResourceAsStream("d4science-1-training.json")),
|
||||||
OtherResearchProduct.class);
|
OtherResearchProduct.class);
|
||||||
|
|
||||||
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
final String xml = xmlRecordFactory.build(new JoinedEntity(p));
|
||||||
|
|
||||||
assertNotNull(xml);
|
assertNotNull(xml);
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ public class XmlRecordFactoryTest {
|
||||||
IOUtils.toString(getClass().getResourceAsStream("d4science-2-dataset.json")),
|
IOUtils.toString(getClass().getResourceAsStream("d4science-2-dataset.json")),
|
||||||
OtherResearchProduct.class);
|
OtherResearchProduct.class);
|
||||||
|
|
||||||
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
final String xml = xmlRecordFactory.build(new JoinedEntity(p));
|
||||||
|
|
||||||
assertNotNull(xml);
|
assertNotNull(xml);
|
||||||
|
|
||||||
|
@ -256,7 +256,7 @@ public class XmlRecordFactoryTest {
|
||||||
IOUtils.toString(getClass().getResourceAsStream("iris-odf-4.json")),
|
IOUtils.toString(getClass().getResourceAsStream("iris-odf-4.json")),
|
||||||
Publication.class);
|
Publication.class);
|
||||||
|
|
||||||
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
final String xml = xmlRecordFactory.build(new JoinedEntity(p));
|
||||||
|
|
||||||
assertNotNull(xml);
|
assertNotNull(xml);
|
||||||
|
|
||||||
|
|
|
@ -202,6 +202,7 @@
|
||||||
<field name="__dsversion" type="pdate" omitNorms="true" omitTermFreqAndPositions="true" indexed="true" stored="true"/>
|
<field name="__dsversion" type="pdate" omitNorms="true" omitTermFreqAndPositions="true" indexed="true" stored="true"/>
|
||||||
<field name="__indexrecordidentifier" type="string" multiValued="false" indexed="true" required="true" stored="true"/>
|
<field name="__indexrecordidentifier" type="string" multiValued="false" indexed="true" required="true" stored="true"/>
|
||||||
<field name="__result" type="string" docValues="false" multiValued="false" indexed="false" stored="true"/>
|
<field name="__result" type="string" docValues="false" multiValued="false" indexed="false" stored="true"/>
|
||||||
|
<field name="__json" type="string" docValues="false" multiValued="false" indexed="false" stored="true"/>
|
||||||
<field name="_root_" type="string" docValues="false" indexed="true" stored="false"/>
|
<field name="_root_" type="string" docValues="false" indexed="true" stored="false"/>
|
||||||
<field name="_version_" type="long" multiValued="false" indexed="true" stored="true"/>
|
<field name="_version_" type="long" multiValued="false" indexed="true" stored="true"/>
|
||||||
<field name="authorid" type="string_ci" multiValued="true" indexed="true" stored="false"/>
|
<field name="authorid" type="string_ci" multiValued="true" indexed="true" stored="false"/>
|
||||||
|
@ -223,17 +224,22 @@
|
||||||
<field name="datasourcecompatibilityid" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="datasourcecompatibilityid" type="string" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="datasourcecompatibilityname" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="datasourcecompatibilityname" type="string" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="datasourceenglishname" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="datasourceenglishname" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
||||||
|
<field name="datasourcejurisdiction" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceodcontenttypes" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="datasourceodcontenttypes" type="string" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceoddescription" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="datasourceoddescription" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceodlanguages" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="datasourceodlanguages" type="string" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceodsubjects" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="datasourceodsubjects" type="string" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceofficialname" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="datasourceofficialname" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourcesubject" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
|
<field name="datasourcesubject" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
|
||||||
|
<field name="datasourcethematic" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourcetypename" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="datasourcetypename" type="string" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="datasourcetypeuiid" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="datasourcetypeuiid" type="string" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="datasourcetypeuiname" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="datasourcetypeuiname" type="string" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="dateofcollection" type="pdate" multiValued="false" indexed="true" stored="false"/>
|
<field name="dateofcollection" type="pdate" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="deletedbyinference" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="deletedbyinference" type="string" multiValued="true" indexed="true" stored="false"/>
|
||||||
|
<field name="eoscdatasourcetype" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
|
<field name="eoscifguidelines" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
|
<field name="eosctype" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="externalrefclass" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="externalrefclass" type="string" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="externalrefid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="externalrefid" type="string" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="externalreflabel" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="externalreflabel" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
||||||
|
|
|
@ -195,186 +195,147 @@
|
||||||
<fieldType name="tlong" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" precisionStep="8"/>
|
<fieldType name="tlong" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" precisionStep="8"/>
|
||||||
<fieldType name="tlongs" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
|
<fieldType name="tlongs" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
|
||||||
<field name="__all" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="__all" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="__deleted" type="boolean" default="false" omitNorms="true" omitTermFreqAndPositions="true" indexed="true" stored="false"/>
|
|
||||||
<field name="__dsid" type="string" omitNorms="true" omitTermFreqAndPositions="true" indexed="true" stored="true"/>
|
|
||||||
<field name="__dsversion" type="pdate" omitNorms="true" omitTermFreqAndPositions="true" indexed="true" stored="true"/>
|
|
||||||
<field name="__indexrecordidentifier" type="string" multiValued="false" indexed="true" required="true" stored="true"/>
|
<field name="__indexrecordidentifier" type="string" multiValued="false" indexed="true" required="true" stored="true"/>
|
||||||
<field name="__result" type="string" docValues="false" multiValued="false" indexed="false" stored="true"/>
|
<field name="__result" type="string" docValues="false" multiValued="false" indexed="false" stored="true"/>
|
||||||
|
<field name="__json" type="string" docValues="false" multiValued="false" indexed="false" stored="true"/>
|
||||||
<field name="_root_" type="string" docValues="false" indexed="true" stored="false"/>
|
<field name="_root_" type="string" docValues="false" indexed="true" stored="false"/>
|
||||||
<field name="_version_" type="long" multiValued="false" indexed="true" stored="true"/>
|
<field name="_version_" type="long" multiValued="false" indexed="true" stored="true"/>
|
||||||
<field name="authorid" type="string_ci" multiValued="true" indexed="true" stored="false"/>
|
<field name="authorid" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="authoridtype" type="string_ci" multiValued="true" indexed="true" stored="false"/>
|
<field name="categoryid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="categoryid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="citation_count" type="pint" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="categoryname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="citation_count_class" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="collectedfrom" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="collectedfromdatasourceid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="collectedfromdatasourceid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="collectedfromname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="collectedfromname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="community" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="community" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="communityid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="communityid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="conceptname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="communityname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="contextid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="conceptid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="contextname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="conceptname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="country" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="contextid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="countrynojurisdiction" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="contextname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="datasourcecompatibilityid" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="contexttype" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="datasourcecompatibilityname" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="counter_affiliation" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourceenglishname" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="counter_authorship" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourcejurisdiction" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="counter_dedup" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourceodcontenttypes" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="counter_doi" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourceoddescription" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="counter_outcome" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourceodlanguages" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="counter_outcome_claimed" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourceodsubjects" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="counter_outcome_collected" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourceofficialname" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="counter_outcome_inferred" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourcesubject" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="counter_participation" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourcethematic" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="counter_publicationdataset" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourcetypename" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="counter_publicationdataset_claimed" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourcetypeuiid" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="counter_publicationdataset_collected" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="datasourcetypeuiname" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="counter_publicationdataset_inferred" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="dateofcollection" type="date" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="counter_similarity" type="pint" multiValued="false" indexed="true" stored="true"/>
|
<field name="deletedbyinference" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="country" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="eoscdatasourcetype" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourcecompatibilityid" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="eoscifguidelines" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourcecompatibilityname" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="eosctype" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceenglishname" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="externalreflabel" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceodcontenttypes" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="fos" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceoddescription" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="foslabel" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceodlanguages" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="funder" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceodsubjects" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="funderid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourceofficialname" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="fundershortname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourcesubject" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
|
<field name="fundinglevel0_description" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourcetypename" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="fundinglevel0_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourcetypeuiid" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="fundinglevel0_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="datasourcetypeuiname" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="fundinglevel1_description" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="dateofcollection" type="pdate" multiValued="false" indexed="true" stored="false"/>
|
<field name="fundinglevel1_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="deletedbyinference" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="fundinglevel1_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="externalrefclass" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="fundinglevel2_description" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="externalrefid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="fundinglevel2_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="externalreflabel" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="fundinglevel2_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="externalrefsite" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="haslicense" type="boolean" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="funder" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="impulse" type="pint" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="funderid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="impulse_class" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="funderjurisdiction" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="influence" type="pfloat" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="fundername" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="influence_class" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="funderoriginalname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="instancetypename" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="fundershortname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="isgreen" type="boolean" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="fundinglevel0_description" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="isindiamondjournal" type="boolean" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="fundinglevel0_id" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="oafentity" type="string" docValues="false" multiValued="true" indexed="false" stored="false"/>
|
||||||
<field name="fundinglevel0_name" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="oaftype" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="fundinglevel1_description" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="objidentifier" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="fundinglevel1_id" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="openaccesscolor" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="fundinglevel1_name" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="orcidtypevalue" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="fundinglevel2_description" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="organizationalternativenames" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="fundinglevel2_id" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="organizationdupid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="fundinglevel2_name" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="organizationlegalname" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="inferenceprovenance" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="organizationlegalshortname" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="inferred" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="originalid" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="instancetypename" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="peerreviewed" type="boolean" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="oafentity" type="string" multiValued="true" indexed="false" stored="false"/>
|
<field name="pid" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="oaftype" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="pidclassid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="objidentifier" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="popularity" type="pfloat" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="organizationalternativenames" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
|
<field name="popularity_alt" type="pfloat" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="organizationcountryname" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="popularity_alt_class" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="organizationdupid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="popularity_class" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="organizationecenterprise" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectacronym" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="organizationecinternationalorganization" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectcallidentifier" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="organizationecinternationalorganizationeurinterests" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectcode" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="organizationeclegalbody" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectcode_nt" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="organizationeclegalperson" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectduration" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="organizationecnonprofit" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectecsc39" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="organizationecnutscode" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectenddate" type="date" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="organizationecresearchorganization" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectendyear" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="organizationecsmevalidated" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectkeywords" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="organizationlegalname" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectoamandatepublications" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="organizationlegalshortname" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectstartdate" type="date" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="originalid" type="string_ci" multiValued="true" indexed="true" stored="false"/>
|
<field name="projectstartyear" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="pid" type="string_ci" multiValued="true" indexed="true" stored="false"/>
|
<field name="projecttitle" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="pidclassid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="projecttitle_alternative" type="text_en" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="pidclassname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="provenanceactionclassid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectacronym" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
|
<field name="publiclyfunded" type="boolean" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="projectcallidentifier" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="relclass" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectcode" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
|
<field name="relcontracttypename" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectcode_nt" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="reldatasourcecompatibilityid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectcontracttypename" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="relfunder" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectduration" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="relfunderid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectecarticle29_3" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="relfundershortname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectecsc39" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="relfundinglevel0_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectenddate" type="pdate" multiValued="false" indexed="true" stored="false"/>
|
<field name="relfundinglevel0_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectendyear" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="relfundinglevel1_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectkeywords" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="relfundinglevel1_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectoamandatepublications" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="relfundinglevel2_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectstartdate" type="pdate" multiValued="false" indexed="true" stored="false"/>
|
<field name="relfundinglevel2_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectstartyear" type="string" multiValued="false" indexed="true" stored="false"/>
|
<field name="relorganizationcountryid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projectsubject" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="relorganizationcountryname" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="projecttitle" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
|
<field name="relorganizationid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="provenanceactionclassid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="relorganizationname" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relclass" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="relorganizationshortname" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relcollectedfromid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="relproject" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relcollectedfromname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="relprojectcode" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relcontracttypeid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="relprojectid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relcontracttypename" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="relprojectname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="reldatasourcecompatibilityid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="relprojecttitle" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relfunder" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="relresultid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relfunderid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="relresulttype" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relfunderjurisdiction" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultacceptanceyear" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="relfundername" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultauthor" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relfundershortname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultbestaccessright" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="relfundinglevel0_id" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultdateofacceptance" type="date" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="relfundinglevel0_name" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultdescription" type="text_en" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relfundinglevel1_id" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultdupid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relfundinglevel1_name" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultembargoenddate" type="date" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="relfundinglevel2_id" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultembargoendyear" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="relfundinglevel2_name" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resulthostingdatasource" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relinferenceprovenance" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resulthostingdatasourceid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relinferred" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultidentifier" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relorganizationcountryid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultlanguagename" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="relorganizationcountryname" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultpublisher" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relorganizationid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultsource" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relorganizationname" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="resultsubject" type="text_en" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relorganizationshortname" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
<field name="resulttitle" type="text_en" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relproject" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="resulttypeid" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
|
||||||
<field name="relprojectcode" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="sdg" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relprojectid" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="semrelid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relprojectname" type="string" multiValued="true" indexed="true" stored="false"/>
|
<field name="status" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
|
||||||
<field name="relprojecttitle" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="relprovenanceactionclassid" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="relresultid" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="relresulttype" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="reltrust" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultacceptanceyear" type="string" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="resultaccessright" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultauthor" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultauthor_nt" type="string_ci" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultbestaccessright" type="string" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="resultdateofacceptance" type="pdate" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="resultdescription" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultdupid" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultembargoenddate" type="pdate" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="resultembargoendyear" type="string" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="resulthostingdatasource" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resulthostingdatasourceid" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resulthostingdatasourcename" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultidentifier" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultlanguagename" type="string" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="resultlicense" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultpublisher" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultsource" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultsubject" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resultsubjectclass" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resulttitle" type="text_common" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="resulttypeid" type="string" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="resulttypename" type="string" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="semrelid" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="text" type="text_common" indexed="false" stored="false"/>
|
<field name="text" type="text_common" indexed="false" stored="false"/>
|
||||||
<field name="trust" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
|
|
||||||
<field name="versioning" type="string" multiValued="true" indexed="true" stored="false"/>
|
|
||||||
<field name="isgreen" type="boolean" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="openaccesscolor" type="string" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="isindiamondjournal" type="boolean" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="publiclyfunded" type="boolean" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
<field name="peerreviewed" type="boolean" multiValued="false" indexed="true" stored="false"/>
|
|
||||||
|
|
||||||
<copyField source="datasourceenglishname" dest="__all"/>
|
<copyField source="datasourceenglishname" dest="__all"/>
|
||||||
<copyField source="datasourceoddescription" dest="__all"/>
|
<copyField source="datasourceoddescription" dest="__all"/>
|
||||||
|
<copyField source="datasourceodsubjects" dest="__all"/>
|
||||||
<copyField source="datasourceofficialname" dest="__all"/>
|
<copyField source="datasourceofficialname" dest="__all"/>
|
||||||
<copyField source="datasourcesubject" dest="__all"/>
|
<copyField source="datasourcesubject" dest="__all"/>
|
||||||
<copyField source="externalreflabel" dest="__all"/>
|
<copyField source="externalreflabel" dest="__all"/>
|
||||||
|
@ -382,26 +343,20 @@
|
||||||
<copyField source="fundinglevel1_description" dest="__all"/>
|
<copyField source="fundinglevel1_description" dest="__all"/>
|
||||||
<copyField source="fundinglevel2_description" dest="__all"/>
|
<copyField source="fundinglevel2_description" dest="__all"/>
|
||||||
<copyField source="organizationalternativenames" dest="__all"/>
|
<copyField source="organizationalternativenames" dest="__all"/>
|
||||||
<copyField source="organizationecenterprise" dest="__all"/>
|
|
||||||
<copyField source="organizationecinternationalorganization" dest="__all"/>
|
|
||||||
<copyField source="organizationecinternationalorganizationeurinterests" dest="__all"/>
|
|
||||||
<copyField source="organizationeclegalbody" dest="__all"/>
|
|
||||||
<copyField source="organizationeclegalperson" dest="__all"/>
|
|
||||||
<copyField source="organizationecnonprofit" dest="__all"/>
|
|
||||||
<copyField source="organizationecnutscode" dest="__all"/>
|
|
||||||
<copyField source="organizationecresearchorganization" dest="__all"/>
|
|
||||||
<copyField source="organizationecsmevalidated" dest="__all"/>
|
|
||||||
<copyField source="organizationlegalname" dest="__all"/>
|
<copyField source="organizationlegalname" dest="__all"/>
|
||||||
<copyField source="organizationlegalshortname" dest="__all"/>
|
<copyField source="organizationlegalshortname" dest="__all"/>
|
||||||
<copyField source="projectacronym" dest="__all"/>
|
<copyField source="projectacronym" dest="__all"/>
|
||||||
<copyField source="projectcode" dest="__all"/>
|
<copyField source="projectcode" dest="__all"/>
|
||||||
<copyField source="projectkeywords" dest="__all"/>
|
<copyField source="projectkeywords" dest="__all"/>
|
||||||
<copyField source="projecttitle" dest="__all"/>
|
<copyField source="projecttitle" dest="__all"/>
|
||||||
|
<copyField source="projecttitle_alternative" dest="__all"/>
|
||||||
<copyField source="relcontracttypename" dest="__all"/>
|
<copyField source="relcontracttypename" dest="__all"/>
|
||||||
<copyField source="relorganizationcountryname" dest="__all"/>
|
<copyField source="relorganizationcountryname" dest="__all"/>
|
||||||
<copyField source="relorganizationname" dest="__all"/>
|
<copyField source="relorganizationname" dest="__all"/>
|
||||||
<copyField source="relorganizationshortname" dest="__all"/>
|
<copyField source="relorganizationshortname" dest="__all"/>
|
||||||
|
<copyField source="relprojectname" dest="__all"/>
|
||||||
<copyField source="relprojecttitle" dest="__all"/>
|
<copyField source="relprojecttitle" dest="__all"/>
|
||||||
|
<copyField source="resultacceptanceyear" dest="__all"/>
|
||||||
<copyField source="resultauthor" dest="__all"/>
|
<copyField source="resultauthor" dest="__all"/>
|
||||||
<copyField source="resultdescription" dest="__all"/>
|
<copyField source="resultdescription" dest="__all"/>
|
||||||
<copyField source="resultidentifier" dest="__all"/>
|
<copyField source="resultidentifier" dest="__all"/>
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue