forked from antonis.lempesis/dnet-hadoop
mergin with branch beta
This commit is contained in:
commit
d4fc62c2f6
|
@ -107,7 +107,7 @@
|
|||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=2560
|
||||
--conf spark.sql.shuffle.partitions=7000
|
||||
</spark-opts>
|
||||
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/dataset</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
|
@ -159,7 +159,7 @@
|
|||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=2560
|
||||
--conf spark.sql.shuffle.partitions=7000
|
||||
</spark-opts>
|
||||
<arg>--inputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
|
|
|
@ -107,7 +107,7 @@
|
|||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=5000
|
||||
--conf spark.sql.shuffle.partitions=7000
|
||||
</spark-opts>
|
||||
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/publication</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
|
@ -159,7 +159,7 @@
|
|||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=5000
|
||||
--conf spark.sql.shuffle.partitions=7000
|
||||
</spark-opts>
|
||||
<arg>--inputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
|
|
|
@ -99,7 +99,7 @@
|
|||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=5000
|
||||
--conf spark.sql.shuffle.partitions=10000
|
||||
</spark-opts>
|
||||
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/relation</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||
|
|
|
@ -22,6 +22,7 @@ public class Constants {
|
|||
public static final String DOI_CLASSNAME = "Digital Object Identifier";
|
||||
|
||||
public static final String DEFAULT_DELIMITER = ",";
|
||||
public static final String DEFAULT_FOS_DELIMITER = "\t";
|
||||
|
||||
public static final String UPDATE_DATA_INFO_TYPE = "update";
|
||||
public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos";
|
||||
|
@ -61,7 +62,7 @@ public class Constants {
|
|||
|
||||
public static Subject getSubject(String sbj, String classid, String classname,
|
||||
String diqualifierclassid) {
|
||||
if (sbj.equals(NULL))
|
||||
if (sbj == null || sbj.equals(NULL))
|
||||
return null;
|
||||
Subject s = new Subject();
|
||||
s.setValue(sbj);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER;
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_FOS_DELIMITER;
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
|
@ -9,8 +9,6 @@ import java.io.Serializable;
|
|||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
|
@ -49,7 +47,7 @@ public class GetFOSSparkJob implements Serializable {
|
|||
|
||||
final String delimiter = Optional
|
||||
.ofNullable(parser.get("delimiter"))
|
||||
.orElse(DEFAULT_DELIMITER);
|
||||
.orElse(DEFAULT_FOS_DELIMITER);
|
||||
|
||||
SparkConf sconf = new SparkConf();
|
||||
runWithSparkSession(
|
||||
|
|
|
@ -86,7 +86,7 @@
|
|||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Produces the unresolved from bip finder!</name>
|
||||
<name>Produces the unresolved from BIP! Finder</name>
|
||||
<class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareBipFinder</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
|
@ -135,7 +135,7 @@
|
|||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Produces the unresolved from FOS!</name>
|
||||
<name>Produces the unresolved from FOS</name>
|
||||
<class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareFOSSparkJob</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
|
@ -185,7 +185,7 @@
|
|||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Produces the unresolved from FOS!</name>
|
||||
<name>Produces the unresolved from FOS</name>
|
||||
<class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareSDGSparkJob</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
|
|
|
@ -78,16 +78,6 @@ object DataciteModelConstants {
|
|||
OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME)
|
||||
|
||||
val subRelTypeMapping: Map[String, OAFRelations] = Map(
|
||||
ModelConstants.REFERENCES -> OAFRelations(
|
||||
ModelConstants.REFERENCES,
|
||||
ModelConstants.IS_REFERENCED_BY,
|
||||
ModelConstants.RELATIONSHIP
|
||||
),
|
||||
ModelConstants.IS_REFERENCED_BY -> OAFRelations(
|
||||
ModelConstants.IS_REFERENCED_BY,
|
||||
ModelConstants.REFERENCES,
|
||||
ModelConstants.RELATIONSHIP
|
||||
),
|
||||
ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations(
|
||||
ModelConstants.IS_SUPPLEMENTED_BY,
|
||||
ModelConstants.IS_SUPPLEMENT_TO,
|
||||
|
@ -163,16 +153,6 @@ object DataciteModelConstants {
|
|||
ModelConstants.IS_SOURCE_OF,
|
||||
ModelConstants.VERSION
|
||||
),
|
||||
ModelConstants.CITES -> OAFRelations(
|
||||
ModelConstants.CITES,
|
||||
ModelConstants.IS_CITED_BY,
|
||||
ModelConstants.CITATION
|
||||
),
|
||||
ModelConstants.IS_CITED_BY -> OAFRelations(
|
||||
ModelConstants.IS_CITED_BY,
|
||||
ModelConstants.CITES,
|
||||
ModelConstants.CITATION
|
||||
),
|
||||
ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations(
|
||||
ModelConstants.IS_VARIANT_FORM_OF,
|
||||
ModelConstants.IS_DERIVED_FROM,
|
||||
|
|
|
@ -645,7 +645,7 @@ object DataciteToOAFTransformation {
|
|||
id: String,
|
||||
date: String
|
||||
): List[Relation] = {
|
||||
rels
|
||||
val bidirectionalRels: List[Relation] = rels
|
||||
.filter(r =>
|
||||
subRelTypeMapping
|
||||
.contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") ||
|
||||
|
@ -653,27 +653,49 @@ object DataciteToOAFTransformation {
|
|||
r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
|
||||
)
|
||||
.map(r => {
|
||||
val subRelType = subRelTypeMapping(r.relationType).relType
|
||||
val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
|
||||
relation(id, target, subRelType, r.relationType, date)
|
||||
})
|
||||
val citationRels: List[Relation] = rels
|
||||
.filter(r =>
|
||||
(r.relatedIdentifierType.equalsIgnoreCase("doi") ||
|
||||
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
|
||||
r.relatedIdentifierType.equalsIgnoreCase("arxiv")) &&
|
||||
(r.relationType.toLowerCase.contains("cite") || r.relationType.toLowerCase.contains("reference"))
|
||||
)
|
||||
.map(r => {
|
||||
r.relationType match {
|
||||
case ModelConstants.CITES | ModelConstants.REFERENCES =>
|
||||
val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
|
||||
relation(id, target, ModelConstants.CITATION, ModelConstants.CITES, date)
|
||||
case ModelConstants.IS_CITED_BY | ModelConstants.IS_REFERENCED_BY =>
|
||||
val source = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
|
||||
relation(source, id, ModelConstants.CITATION, ModelConstants.CITES, date)
|
||||
}
|
||||
})
|
||||
|
||||
citationRels ::: bidirectionalRels
|
||||
}
|
||||
|
||||
def relation(source: String, target: String, subRelType: String, relClass: String, date: String): Relation = {
|
||||
val rel = new Relation
|
||||
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
||||
rel.setDataInfo(dataInfo)
|
||||
|
||||
val subRelType = subRelTypeMapping(r.relationType).relType
|
||||
rel.setRelType(REL_TYPE_VALUE)
|
||||
rel.setSubRelType(subRelType)
|
||||
rel.setRelClass(r.relationType)
|
||||
rel.setRelClass(relClass)
|
||||
|
||||
val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
|
||||
|
||||
rel.setProperties(List(dateProps).asJava)
|
||||
|
||||
rel.setSource(id)
|
||||
rel.setTarget(
|
||||
DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
|
||||
)
|
||||
rel.setSource(source)
|
||||
rel.setTarget(target)
|
||||
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
||||
rel.getCollectedfrom.asScala.map(c => c.getValue).toList
|
||||
rel
|
||||
})
|
||||
}
|
||||
|
||||
def generateDSId(input: String): String = {
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 13/02/23
|
||||
*/
|
||||
public class GetFosTest {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ProduceTest.class);
|
||||
|
||||
private static Path workingDir;
|
||||
private static SparkSession spark;
|
||||
private static LocalFileSystem fs;
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(PrepareTest.class.getSimpleName());
|
||||
|
||||
fs = FileSystem.getLocal(new Configuration());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(ProduceTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(PrepareTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void test3() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.tsv")
|
||||
.getPath();
|
||||
|
||||
final String outputPath = workingDir.toString() + "/fos.json";
|
||||
GetFOSSparkJob
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--sourcePath", sourcePath,
|
||||
|
||||
"-outputPath", outputPath
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<FOSDataModel> tmp = sc
|
||||
.textFile(outputPath)
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class));
|
||||
|
||||
tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
|
||||
tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null));
|
||||
tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null));
|
||||
tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null));
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
doi level1 level2 level3
|
||||
10.1080/09638237.2018.1466033 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||
10.1016/j.dsi.2015.10.003 03 medical and health sciences 0301 basic medicine 030105 genetics & heredity
|
||||
10.1007/s10072-017-2914-9 03 medical and health sciences 0302 clinical medicine 030217 neurology & neurosurgery
|
||||
10.1016/j.bspc.2021.102726 02 engineering and technology 0206 medical engineering 020601 biomedical engineering
|
||||
10.1177/0306312706069439 06 humanities and the arts 0601 history and archaeology 060101 anthropology
|
||||
10.1016/j.jacep.2016.05.010 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||
10.1111/anae.13418 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||
10.1142/s1793744210000168 01 natural sciences 0103 physical sciences 010306 general physics
|
||||
10.1016/j.jadohealth.2019.04.029 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||
10.1109/icais50930.2021.9395847 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
|
||||
10.1145/3154837 01 natural sciences 0101 mathematics 010102 general mathematics
|
||||
10.1038/srep38130 03 medical and health sciences 0301 basic medicine 030106 microbiology
|
||||
10.1007/s13369-017-2871-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
|
||||
10.1063/1.4964718 03 medical and health sciences 0301 basic medicine 030104 developmental biology
|
||||
10.1007/s12603-019-1276-9 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||
10.1002/cam4.1463 03 medical and health sciences 0301 basic medicine 030104 developmental biology
|
||||
10.1164/rccm.201611-2290ed 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||
10.1088/1757-899x/225/1/012132 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
|
||||
10.1117/1.jmm.15.1.015501 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
|
||||
10.1088/1361-6587/ab569d 01 natural sciences 0103 physical sciences 010303 astronomy & astrophysics
|
||||
10.1016/j.rser.2015.11.092 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy
|
||||
10.1016/j.jhydrol.2013.06.035 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
|
||||
10.1111/php.12892 03 medical and health sciences 0301 basic medicine 030104 developmental biology
|
||||
10.1088/0264-9381/27/10/105001 01 natural sciences 0103 physical sciences 010308 nuclear & particles physics
|
||||
10.1016/j.matchemphys.2018.02.039 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
|
||||
10.1098/rsos.160993 03 medical and health sciences 0301 basic medicine 030104 developmental biology
|
||||
10.1016/j.rinp.2017.07.054 02 engineering and technology 0209 industrial biotechnology 020901 industrial engineering & automation
|
||||
10.1111/eip.12348 03 medical and health sciences 0302 clinical medicine 030227 psychiatry
|
||||
10.20965/jrm.2016.p0371 02 engineering and technology 0201 civil engineering 020101 civil engineering
|
||||
10.2337/dci19-0036 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
|
||||
10.1155/2018/7692913 01 natural sciences 0104 chemical sciences 010404 medicinal & biomolecular chemistry
|
||||
10.1117/12.2262306 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020206 networking & telecommunications
|
||||
10.1021/acs.jpcb.7b01885 01 natural sciences 0104 chemical sciences 010405 organic chemistry
|
||||
10.1177/0033294117711131 05 social sciences 0502 economics and business 050203 business & management
|
||||
10.1016/j.jrurstud.2017.08.019 05 social sciences 0502 economics and business 050203 business & management
|
||||
10.1111/febs.15296 03 medical and health sciences 0301 basic medicine 030104 developmental biology
|
||||
10.3923/jeasci.2017.6922.6927 05 social sciences 0505 law 050501 criminology
|
||||
10.1007/s10854-017-6376-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020208 electrical & electronic engineering
|
||||
10.3390/app10176095 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy
|
|
|
@ -397,17 +397,7 @@ case object Crossref2Oaf {
|
|||
from.setDataInfo(source.getDataInfo)
|
||||
from.setLastupdatetimestamp(source.getLastupdatetimestamp)
|
||||
|
||||
val to = new Relation
|
||||
to.setTarget(source.getId)
|
||||
to.setSource(targetId)
|
||||
to.setRelType(ModelConstants.RESULT_RESULT)
|
||||
to.setRelClass(ModelConstants.IS_CITED_BY)
|
||||
to.setSubRelType(ModelConstants.CITATION)
|
||||
to.setCollectedfrom(source.getCollectedfrom)
|
||||
to.setDataInfo(source.getDataInfo)
|
||||
to.setLastupdatetimestamp(source.getLastupdatetimestamp)
|
||||
|
||||
List(from, to)
|
||||
List(from)
|
||||
}
|
||||
|
||||
def generateCitationRelations(dois: List[String], result: Result): List[Relation] = {
|
||||
|
@ -505,6 +495,13 @@ case object Crossref2Oaf {
|
|||
val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63")
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
|
||||
case "10.13039/100020031" =>
|
||||
val targetId = getProjectId("tara________", "1e5e62235d094afd01cd56e65112fc63")
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
|
||||
case "10.13039/501100005416" => generateSimpleRelationFromAward(funder, "rcn_________", a => a)
|
||||
case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a)
|
||||
case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
|
||||
case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a => a)
|
||||
|
@ -541,21 +538,21 @@ case object Crossref2Oaf {
|
|||
generateSimpleRelationFromAward(funder, "corda_____he", extractECAward)
|
||||
//FCT
|
||||
case "10.13039/501100001871" =>
|
||||
generateSimpleRelationFromAward(funder, "fct_________", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "fct_________", a => a)
|
||||
//NHMRC
|
||||
case "10.13039/501100000925" =>
|
||||
generateSimpleRelationFromAward(funder, "mhmrc_______", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "nhmrc_______", a => a)
|
||||
//NIH
|
||||
case "10.13039/100000002" =>
|
||||
generateSimpleRelationFromAward(funder, "nih_________", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "nih_________", a => a)
|
||||
//NWO
|
||||
case "10.13039/501100003246" =>
|
||||
generateSimpleRelationFromAward(funder, "nwo_________", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "nwo_________", a => a)
|
||||
//UKRI
|
||||
case "10.13039/100014013" | "10.13039/501100000267" | "10.13039/501100000268" | "10.13039/501100000269" |
|
||||
"10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" |
|
||||
"10.13039/501100013589" | "10.13039/501100000271" =>
|
||||
generateSimpleRelationFromAward(funder, "nwo_________", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "ukri________", a => a)
|
||||
|
||||
case _ => logger.debug("no match for " + funder.DOI.get)
|
||||
|
||||
|
@ -568,10 +565,11 @@ case object Crossref2Oaf {
|
|||
case "European Union's" =>
|
||||
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "corda_____he", extractECAward)
|
||||
case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
|
||||
generateSimpleRelationFromAward(funder, "anr_________", a => a)
|
||||
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
|
||||
generateSimpleRelationFromAward(funder, "conicytf____", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "conicytf____", a => a)
|
||||
case "Wellcome Trust Masters Fellowship" =>
|
||||
generateSimpleRelationFromAward(funder, "wt__________", a => a)
|
||||
val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
|
||||
|
|
|
@ -6,11 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
@ -24,6 +20,7 @@ import org.apache.http.impl.client.CloseableHttpClient;
|
|||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.Row;
|
||||
|
@ -110,6 +107,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication
|
|||
.read()
|
||||
.parquet(validPaths)
|
||||
.map((MapFunction<Row, String>) MigrateHdfsMdstoresApplication::enrichRecord, Encoders.STRING())
|
||||
.filter((FilterFunction<String>) Objects::nonNull)
|
||||
.toJavaRDD()
|
||||
.mapToPair(xml -> new Tuple2<>(new Text(UUID.randomUUID() + ":" + type), new Text(xml)))
|
||||
// .coalesce(1)
|
||||
|
@ -135,13 +133,14 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication
|
|||
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||
final Document doc = reader.read(new StringReader(xml));
|
||||
final Element head = (Element) doc.selectSingleNode("//*[local-name() = 'header']");
|
||||
|
||||
head.addElement(new QName("objIdentifier", DRI_NS_PREFIX)).addText(r.getAs("id"));
|
||||
head.addElement(new QName("dateOfCollection", DRI_NS_PREFIX)).addText(collDate);
|
||||
head.addElement(new QName("dateOfTransformation", DRI_NS_PREFIX)).addText(tranDate);
|
||||
return doc.asXML();
|
||||
} catch (final Exception e) {
|
||||
log.error("Error patching record: " + xml);
|
||||
throw new RuntimeException("Error patching record: " + xml, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -131,7 +131,7 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio
|
|||
// that is the hdfs path basePath/MDSTOREID/timestamp is missing
|
||||
// So we have to synch it
|
||||
if (!hdfsMDStores.containsKey(currentMDStore.getMdstore())) {
|
||||
log.info("Adding store " + currentMDStore.getMdstore());
|
||||
log.info("Adding store {}", currentMDStore.getMdstore());
|
||||
try {
|
||||
synchMDStoreIntoHDFS(
|
||||
mdFormat, mdLayout, mdInterpretation, hdfsPath, fileSystem, mongoBaseUrl, mongoDb,
|
||||
|
@ -145,14 +145,14 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio
|
|||
// basePath/MDSTOREID/timestamp but the timestamp on hdfs is older that the
|
||||
// new one in mongo so we have to synch the new mdstore and delete the old one
|
||||
if (currentMDStore.getLatestTimestamp() > current.getLatestTimestamp()) {
|
||||
log.info("Updating MDStore " + currentMDStore.getMdstore());
|
||||
log.info("Updating MDStore {}", currentMDStore.getMdstore());
|
||||
final String mdstoreDir = createMDStoreDir(hdfsPath, currentMDStore.getMdstore());
|
||||
final String rmPath = createMDStoreDir(mdstoreDir, current.getLatestTimestamp().toString());
|
||||
try {
|
||||
synchMDStoreIntoHDFS(
|
||||
mdFormat, mdLayout, mdInterpretation, hdfsPath, fileSystem, mongoBaseUrl, mongoDb,
|
||||
currentMDStore);
|
||||
log.info("deleting " + rmPath);
|
||||
log.info("deleting {}", rmPath);
|
||||
// DELETE THE OLD MDSTORE
|
||||
fileSystem.delete(new Path(rmPath), true);
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -214,16 +214,14 @@
|
|||
|
||||
<action name="ImportODF_claims">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/odf_claims"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
|
||||
<arg>-p</arg><arg>${contentPath}/odf_claims</arg>
|
||||
<arg>-mongourl</arg><arg>${mongoURL}</arg>
|
||||
<arg>-mongodb</arg><arg>${mongoDb}</arg>
|
||||
<arg>-f</arg><arg>ODF</arg>
|
||||
<arg>-l</arg><arg>store</arg>
|
||||
<arg>-i</arg><arg>claim</arg>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/mdstore</arg>
|
||||
<arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
|
||||
<arg>--mongoDb</arg><arg>${mongoDb}</arg>
|
||||
<arg>--mdFormat</arg><arg>ODF</arg>
|
||||
<arg>--mdLayout</arg><arg>store</arg>
|
||||
<arg>--mdInterpretation</arg><arg>claim</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
</java>
|
||||
<ok to="reuse_oaf_claims"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -239,16 +237,14 @@
|
|||
|
||||
<action name="ImportOAF_claims">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/oaf_claims"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
|
||||
<arg>-p</arg><arg>${contentPath}/oaf_claims</arg>
|
||||
<arg>-mongourl</arg><arg>${mongoURL}</arg>
|
||||
<arg>-mongodb</arg><arg>${mongoDb}</arg>
|
||||
<arg>-f</arg><arg>OAF</arg>
|
||||
<arg>-l</arg><arg>store</arg>
|
||||
<arg>-i</arg><arg>claim</arg>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/mdstore</arg>
|
||||
<arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
|
||||
<arg>--mongoDb</arg><arg>${mongoDb}</arg>
|
||||
<arg>--mdFormat</arg><arg>OAF</arg>
|
||||
<arg>--mdLayout</arg><arg>store</arg>
|
||||
<arg>--mdInterpretation</arg><arg>claim</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
</java>
|
||||
<ok to="wait_import"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -291,11 +287,8 @@
|
|||
|
||||
<action name="ImportODF">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/odf_records"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/odf_records</arg>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/mdstore</arg>
|
||||
<arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
|
||||
<arg>--mongoDb</arg><arg>${mongoDb}</arg>
|
||||
<arg>--mdFormat</arg><arg>ODF</arg>
|
||||
|
@ -317,11 +310,8 @@
|
|||
|
||||
<action name="ImportOAF">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/oaf_records"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/oaf_records</arg>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/mdstore</arg>
|
||||
<arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
|
||||
<arg>--mongoDb</arg><arg>${mongoDb}</arg>
|
||||
<arg>--mdFormat</arg><arg>OAF</arg>
|
||||
|
@ -335,11 +325,8 @@
|
|||
|
||||
<action name="ImportOAF_invisible">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/oaf_records_invisible"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/oaf_records_invisible</arg>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/mdstore</arg>
|
||||
<arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
|
||||
<arg>--mongoDb</arg><arg>${mongoDb}</arg>
|
||||
<arg>--mdFormat</arg><arg>OAF</arg>
|
||||
|
@ -375,7 +362,7 @@
|
|||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/odf_records_hdfs</arg>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/odf_mdstore_hdfs</arg>
|
||||
<arg>--mdstoreManagerUrl</arg><arg>${mdstoreManagerUrl}</arg>
|
||||
<arg>--mdFormat</arg><arg>ODF</arg>
|
||||
<arg>--mdLayout</arg><arg>store</arg>
|
||||
|
@ -409,7 +396,7 @@
|
|||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/oaf_records_hdfs</arg>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/oaf_mdstore_hdfs</arg>
|
||||
<arg>--mdstoreManagerUrl</arg><arg>${mdstoreManagerUrl}</arg>
|
||||
<arg>--mdFormat</arg><arg>OAF</arg>
|
||||
<arg>--mdLayout</arg><arg>store</arg>
|
||||
|
@ -542,7 +529,7 @@
|
|||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePaths</arg><arg>${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_records,${contentPath}/odf_records,${contentPath}/oaf_records_hdfs,${contentPath}/odf_records_hdfs,${contentPath}/oaf_records_invisible</arg>
|
||||
<arg>--sourcePaths</arg><arg>${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_mdstore_hdfs,${contentPath}/odf_mdstore_hdfs,${contentPath}/mdstore/*/*</arg>
|
||||
<arg>--invalidPath</arg><arg>${workingDir}/invalid_records</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
</spark>
|
||||
|
@ -566,7 +553,7 @@
|
|||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePaths</arg><arg>${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_records,${contentPath}/odf_records,${contentPath}/oaf_records_hdfs,${contentPath}/odf_records_hdfs,${contentPath}/oaf_records_invisible</arg>
|
||||
<arg>--sourcePaths</arg><arg>${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_mdstore_hdfs,${contentPath}/odf_mdstore_hdfs,${contentPath}/mdstore/*/*</arg>
|
||||
<arg>--targetPath</arg><arg>${workingDir}/entities</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--shouldHashId</arg><arg>${shouldHashId}</arg>
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,162 +0,0 @@
|
|||
<workflow-app name="import Claims as Graph" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>reuseContent</name>
|
||||
<value>false</value>
|
||||
<description>should import content from the aggregator or reuse a previous version</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>contentPath</name>
|
||||
<description>path location to store (or reuse) content from the aggregator</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresURL</name>
|
||||
<description>the postgres URL to access to the database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresUser</name>
|
||||
<description>the user postgres</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresPassword</name>
|
||||
<description>the password postgres</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dbSchema</name>
|
||||
<value>beta</value>
|
||||
<description>the database schema according to the D-Net infrastructure (beta or production)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mongoURL</name>
|
||||
<description>mongoDB url, example: mongodb://[username:password@]host[:port]</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mongoDb</name>
|
||||
<description>mongo database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookupUrl</name>
|
||||
<description>the address of the lookUp service</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>nsPrefixBlacklist</name>
|
||||
<value></value>
|
||||
<description>a blacklist of nsprefixes (comma separeted)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="ImportDB_claims"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ImportDB_claims">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/db_claims"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/db_claims</arg>
|
||||
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
|
||||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--action</arg><arg>claims</arg>
|
||||
<arg>--dbschema</arg><arg>${dbSchema}</arg>
|
||||
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
||||
</java>
|
||||
<ok to="ImportODF_claims"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="ImportODF_claims">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/odf_claims"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
|
||||
<arg>-p</arg><arg>${contentPath}/odf_claims</arg>
|
||||
<arg>-mongourl</arg><arg>${mongoURL}</arg>
|
||||
<arg>-mongodb</arg><arg>${mongoDb}</arg>
|
||||
<arg>-f</arg><arg>ODF</arg>
|
||||
<arg>-l</arg><arg>store</arg>
|
||||
<arg>-i</arg><arg>claim</arg>
|
||||
</java>
|
||||
<ok to="ImportOAF_claims"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="ImportOAF_claims">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/oaf_claims"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
|
||||
<arg>-p</arg><arg>${contentPath}/oaf_claims</arg>
|
||||
<arg>-mongourl</arg><arg>${mongoURL}</arg>
|
||||
<arg>-mongodb</arg><arg>${mongoDb}</arg>
|
||||
<arg>-f</arg><arg>OAF</arg>
|
||||
<arg>-l</arg><arg>store</arg>
|
||||
<arg>-i</arg><arg>claim</arg>
|
||||
</java>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -1,18 +0,0 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,195 +0,0 @@
|
|||
<workflow-app name="import DB entities" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>contentPath</name>
|
||||
<description>path location to store (or reuse) content from the aggregator</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresURL</name>
|
||||
<description>the postgres URL to access to the database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresUser</name>
|
||||
<description>the user postgres</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresPassword</name>
|
||||
<description>the password postgres</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dbSchema</name>
|
||||
<value>beta</value>
|
||||
<description>the database schema according to the D-Net infrastructure (beta or production)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookupUrl</name>
|
||||
<description>the address of the lookUp service</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>nsPrefixBlacklist</name>
|
||||
<value></value>
|
||||
<description>a blacklist of nsprefixes (comma separeted)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>reuseContent</name>
|
||||
<value>false</value>
|
||||
<description>reuse content in the aggregator database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reuse_db"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<decision name="reuse_db">
|
||||
<switch>
|
||||
<case to="ImportDB">${wf:conf('reuseContent') eq false}</case>
|
||||
<case to="GenerateEntities">${wf:conf('reuseContent') eq true}</case>
|
||||
<default to="ImportDB"/>
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="ImportDB">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/db_records"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/db_records</arg>
|
||||
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
|
||||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--action</arg><arg>openaire</arg>
|
||||
<arg>--dbschema</arg><arg>${dbSchema}</arg>
|
||||
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
||||
</java>
|
||||
<ok to="ImportDB_claims"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="ImportDB_claims">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/db_claims"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/db_claims</arg>
|
||||
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
|
||||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--dbschema</arg><arg>${dbSchema}</arg>
|
||||
<arg>--action</arg><arg>claims</arg>
|
||||
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
||||
</java>
|
||||
<ok to="GenerateEntities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="GenerateEntities">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateEntities</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--executor-cores ${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/db_claims</arg>
|
||||
<arg>--targetPath</arg><arg>${workingDir}/entities</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--shouldHashId</arg><arg>true</arg>
|
||||
</spark>
|
||||
<ok to="GenerateGraph"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="GenerateGraph">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateGraph</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--executor-cores ${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
|
||||
<arg>--graphRawPath</arg><arg>${workingDir}/graph_aggregator</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -1,18 +0,0 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,157 +0,0 @@
|
|||
<workflow-app name="Test Import of Hdfs Stores" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<parameters>
|
||||
<property>
|
||||
<name>graphOutputPath</name>
|
||||
<description>the target path to store raw graph</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>contentPath</name>
|
||||
<description>path location to store (or reuse) content from the aggregator</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mdstoreManagerUrl</name>
|
||||
<description>the address of the Mdstore Manager</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookupUrl</name>
|
||||
<description>the address of the lookUp service</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="ImportODF_hdfs"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ImportODF_hdfs">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>ImportODF_hdfs</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.raw.MigrateHdfsMdstoresApplication</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--executor-cores ${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/odf_records_hdfs</arg>
|
||||
<arg>--mdstoreManagerUrl</arg><arg>${mdstoreManagerUrl}</arg>
|
||||
<arg>--mdFormat</arg><arg>ODF</arg>
|
||||
<arg>--mdLayout</arg><arg>store</arg>
|
||||
<arg>--mdInterpretation</arg><arg>cleaned</arg>
|
||||
</spark>
|
||||
<ok to="GenerateEntities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="GenerateEntities">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateEntities</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--executor-cores ${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePaths</arg><arg>${contentPath}/odf_records_hdfs</arg>
|
||||
<arg>--targetPath</arg><arg>${workingDir}/entities</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--shouldHashId</arg><arg>${shouldHashId}</arg>
|
||||
</spark>
|
||||
<ok to="GenerateGraph"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="GenerateGraph">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateGraph</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--executor-cores ${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
|
||||
<arg>--graphRawPath</arg><arg>${workingDir}/graph_raw</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -1,18 +0,0 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,67 +0,0 @@
|
|||
<workflow-app name="import regular entities as Graph (step 1)" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>migrationPathStep1</name>
|
||||
<description>the base path to store hdfs file</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mongoURL</name>
|
||||
<description>mongoDB url, example: mongodb://[username:password@]host[:port]</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mongoDb</name>
|
||||
<description>mongo database</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetWorkingPath"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${migrationPathStep1}'/>
|
||||
<mkdir path='${migrationPathStep1}'/>
|
||||
</fs>
|
||||
<ok to="ImportODF"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="ImportODF">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
|
||||
<arg>-p</arg><arg>${migrationPathStep1}</arg>
|
||||
<arg>-mongourl</arg><arg>${mongoURL}</arg>
|
||||
<arg>-mongodb</arg><arg>${mongoDb}</arg>
|
||||
<arg>-f</arg><arg>ODF</arg>
|
||||
<arg>-l</arg><arg>store</arg>
|
||||
<arg>-i</arg><arg>cleaned</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
|
||||
</java>
|
||||
<ok to="ImportOAF"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="ImportOAF">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
|
||||
<arg>-p</arg><arg>${migrationPathStep1}</arg>
|
||||
<arg>-mongourl</arg><arg>${mongoURL}</arg>
|
||||
<arg>-mongodb</arg><arg>${mongoDb}</arg>
|
||||
<arg>-f</arg><arg>OAF</arg>
|
||||
<arg>-l</arg><arg>store</arg>
|
||||
<arg>-i</arg><arg>cleaned</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
</java>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -1,18 +0,0 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,65 +0,0 @@
|
|||
<workflow-app name="import regular entities as Graph (step 2)" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>migrationPathStep1</name>
|
||||
<description>the base path to store hdfs file</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>migrationPathStep2</name>
|
||||
<description>the temporary path to store entities before dispatching</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookupUrl</name>
|
||||
<description>the address of the lookUp service</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetEntities"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetEntities">
|
||||
<fs>
|
||||
<delete path='${migrationPathStep2}'/>
|
||||
<mkdir path='${migrationPathStep2}'/>
|
||||
</fs>
|
||||
<ok to="GenerateEntities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="GenerateEntities">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateEntities</name>
|
||||
<class>eu.dnetlib.dhp.migration.step2.GenerateEntitiesApplication</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
|
||||
<arg>-mt</arg> <arg>yarn-cluster</arg>
|
||||
<arg>-s</arg><arg>${migrationPathStep1}/db_records,${migrationPathStep1}/oaf_records,${migrationPathStep1}/odf_records</arg>
|
||||
<arg>-t</arg><arg>${migrationPathStep2}/all_entities</arg>
|
||||
<arg>--islookup</arg><arg>${isLookupUrl}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -1,18 +0,0 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,60 +0,0 @@
|
|||
<workflow-app name="import regular entities as Graph (step 3)" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
|
||||
<property>
|
||||
<name>migrationPathStep2</name>
|
||||
<description>the temporary path to store entities before dispatching</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>migrationPathStep3</name>
|
||||
<description>the graph Raw base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetGraph"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetGraph">
|
||||
<fs>
|
||||
<delete path='${migrationPathStep3}'/>
|
||||
<mkdir path='${migrationPathStep3}'/>
|
||||
</fs>
|
||||
<ok to="GenerateGraph"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="GenerateGraph">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateGraph</name>
|
||||
<class>eu.dnetlib.dhp.migration.step3.DispatchEntitiesApplication</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
|
||||
<arg>-mt</arg> <arg>yarn-cluster</arg>
|
||||
<arg>-s</arg><arg>${migrationPathStep2}/all_entities</arg>
|
||||
<arg>-g</arg><arg>${migrationPathStep3}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -151,7 +151,7 @@ public class XmlIndexingJob {
|
|||
.sequenceFile(inputPath, Text.class, Text.class)
|
||||
.map(t -> t._2().toString())
|
||||
.map(s -> toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), s))
|
||||
.map(s -> new StreamingInputDocumentFactory(version, dsId).parseDocument(s));
|
||||
.map(s -> new StreamingInputDocumentFactory().parseDocument(s));
|
||||
|
||||
switch (outputFormat) {
|
||||
case SOLR:
|
||||
|
|
|
@ -36,10 +36,6 @@ public class StreamingInputDocumentFactory {
|
|||
|
||||
private static final String INDEX_FIELD_PREFIX = "__";
|
||||
|
||||
private static final String DS_VERSION = INDEX_FIELD_PREFIX + "dsversion";
|
||||
|
||||
private static final String DS_ID = INDEX_FIELD_PREFIX + "dsid";
|
||||
|
||||
private static final String RESULT = "result";
|
||||
|
||||
private static final String INDEX_RESULT = INDEX_FIELD_PREFIX + RESULT;
|
||||
|
@ -65,20 +61,13 @@ public class StreamingInputDocumentFactory {
|
|||
private final ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal
|
||||
.withInitial(XMLEventFactory::newInstance);
|
||||
|
||||
private final String version;
|
||||
|
||||
private final String dsId;
|
||||
|
||||
private String resultName = DEFAULTDNETRESULT;
|
||||
|
||||
public StreamingInputDocumentFactory(final String version, final String dsId) {
|
||||
this(version, dsId, DEFAULTDNETRESULT);
|
||||
public StreamingInputDocumentFactory() {
|
||||
this(DEFAULTDNETRESULT);
|
||||
}
|
||||
|
||||
public StreamingInputDocumentFactory(
|
||||
final String version, final String dsId, final String resultName) {
|
||||
this.version = version;
|
||||
this.dsId = dsId;
|
||||
public StreamingInputDocumentFactory(final String resultName) {
|
||||
this.resultName = resultName;
|
||||
}
|
||||
|
||||
|
@ -111,14 +100,6 @@ public class StreamingInputDocumentFactory {
|
|||
}
|
||||
}
|
||||
|
||||
if (version != null) {
|
||||
indexDocument.addField(DS_VERSION, version);
|
||||
}
|
||||
|
||||
if (dsId != null) {
|
||||
indexDocument.addField(DS_ID, dsId);
|
||||
}
|
||||
|
||||
if (!indexDocument.containsKey(INDEX_RECORD_ID)) {
|
||||
throw new IllegalStateException("cannot extract record ID from: " + inputDocument);
|
||||
}
|
||||
|
|
|
@ -79,8 +79,7 @@ public class EOSCFuture_Test {
|
|||
|
||||
final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
|
||||
|
||||
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID)
|
||||
.parseDocument(indexRecordXML);
|
||||
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory().parseDocument(indexRecordXML);
|
||||
|
||||
final String xmlDoc = ClientUtils.toXML(solrDoc);
|
||||
|
||||
|
|
|
@ -39,9 +39,6 @@ import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
|||
*/
|
||||
public class IndexRecordTransformerTest {
|
||||
|
||||
public static final String VERSION = "2021-04-15T10:05:53Z";
|
||||
public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl";
|
||||
|
||||
private ContextMapper contextMapper;
|
||||
|
||||
@BeforeEach
|
||||
|
@ -197,8 +194,7 @@ public class IndexRecordTransformerTest {
|
|||
|
||||
final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
|
||||
|
||||
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID)
|
||||
.parseDocument(indexRecordXML);
|
||||
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory().parseDocument(indexRecordXML);
|
||||
|
||||
final String xmlDoc = ClientUtils.toXML(solrDoc);
|
||||
|
||||
|
|
|
@ -115,16 +115,8 @@ public class SolrConfigTest extends SolrTest {
|
|||
for (SolrDocument doc : rsp.getResults()) {
|
||||
System.out
|
||||
.println(
|
||||
doc.get("score") + "\t" +
|
||||
doc.get("__indexrecordidentifier") + "\t" +
|
||||
doc.get("resultidentifier") + "\t" +
|
||||
doc.get("resultauthor") + "\t" +
|
||||
doc.get("resultacceptanceyear") + "\t" +
|
||||
doc.get("resultsubject") + "\t" +
|
||||
doc.get("resulttitle") + "\t" +
|
||||
doc.get("relprojectname") + "\t" +
|
||||
doc.get("resultdescription") + "\t" +
|
||||
doc.get("__all") + "\t");
|
||||
doc.get("__result") + "\t");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,165 +1,116 @@
|
|||
<LAYOUT name="index">
|
||||
<FIELDS>
|
||||
<FIELD indexable="false" name="oafentity" result="true" stat="false" tokenizable="false" xpath="//*[local-name() = 'entity']"/>
|
||||
<FIELD indexable="true" name="oaftype" result="false" stat="false" tokenizable="false" value="local-name(//*[local-name()='entity']/*[local-name() != 'extraInfo'])"/>
|
||||
<FIELD indexable="true" name="objIdentifier" result="false" stat="false" tokenizable="false" xpath="//header/dri:objIdentifier"/><!-- DATASOURCE FIELDS -->
|
||||
<FIELD copy="true" indexable="true" name="datasourceofficialname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/officialname"/>
|
||||
<FIELD copy="true" indexable="true" name="datasourceenglishname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/englishname"/>
|
||||
<FIELD copy="true" indexable="true" name="datasourceoddescription" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/oddescription"/>
|
||||
<FIELD copy="true" indexable="true" name="datasourceodsubjects" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odsubjects"/>
|
||||
<FIELD indexable="true" name="datasourceodlanguages" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odlanguages"/>
|
||||
<FIELD indexable="true" name="datasourceodcontenttypes" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odcontenttypes"/>
|
||||
<FIELD indexable="true" multivalued="false" name="datasourcetypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetype/@classname"/>
|
||||
<FIELD indexable="true" multivalued="false" name="datasourcetypeuiid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classid"/>
|
||||
<FIELD indexable="true" multivalued="false" name="datasourcetypeuiname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classname"/>
|
||||
<FIELD indexable="true" multivalued="false" name="datasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classid"/>
|
||||
<FIELD indexable="true" multivalued="false" name="datasourcecompatibilityname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classname"/>
|
||||
<FIELD copy="true" indexable="true" multivalued="true" name="datasourcesubject" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='datasource']/subjects"/>
|
||||
<FIELD indexable="true" name="versioning" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/versioning"/><!-- datasource fields for EOSC -->
|
||||
<FIELD indexable="true" name="datasourcejurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/jurisdiction/@classname"/>
|
||||
<FIELD indexable="true" name="datasourcethematic" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/thematic"/>
|
||||
<FIELD indexable="true" name="datasourcecontentpolicy" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/contentpolicy/@classname"/>
|
||||
<FIELD indexable="true" name="eosctype" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eosctype/@classname"/>
|
||||
<FIELD indexable="true" name="eoscdatasourcetype" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eoscdatasourcetype/@classname"/><!-- ORGANIZATION FIELDS --><!-- ORGANIZATION FIELDS --><!-- ORGANIZATION FIELDS -->
|
||||
<FIELD copy="true" indexable="true" name="organizationlegalshortname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalshortname)"/>
|
||||
<FIELD copy="true" indexable="true" name="organizationlegalname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalname)"/>
|
||||
<FIELD copy="true" indexable="true" name="organizationalternativenames" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//alternativeNames)"/>
|
||||
<FIELD indexable="true" name="organizationeclegalbody" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/eclegalbody"/>
|
||||
<FIELD indexable="true" name="organizationeclegalperson" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/eclegalperson"/>
|
||||
<FIELD indexable="true" name="organizationecnonprofit" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnonprofit"/>
|
||||
<FIELD indexable="true" name="organizationecresearchorganization" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecresearchorganization"/>
|
||||
<FIELD indexable="true" name="organizationecinternationalorganizationeurinterests" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecinternationalorganizationeurinterests"/>
|
||||
<FIELD indexable="true" name="organizationecinternationalorganization" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecinternationalorganization"/>
|
||||
<FIELD indexable="true" name="organizationecenterprise" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecenterprise"/>
|
||||
<FIELD indexable="true" name="organizationecsmevalidated" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecsmevalidated"/>
|
||||
<FIELD indexable="true" name="organizationecnutscode" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnutscode"/>
|
||||
<FIELD indexable="true" multivalued="false" name="organizationcountryname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/country/@classname"/><!-- PROJECT FIELDS -->
|
||||
<FIELD copy="true" indexable="true" name="projectcode" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/>
|
||||
<FIELD indexable="true" name="projectcode_nt" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/>
|
||||
<FIELD copy="true" indexable="true" name="projectacronym" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/acronym"/>
|
||||
<FIELD copy="true" indexable="true" name="projecttitle" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/title"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectstartdate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='project']/startdate"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectstartyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/startdate)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectenddate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='project']/enddate"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectendyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/enddate)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectcallidentifier" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/callidentifier"/>
|
||||
<FIELD copy="true" indexable="true" name="projectkeywords" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/keywords"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectduration" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/duration"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectecsc39" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='project']/ecsc39)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectoamandatepublications" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/oamandatepublications"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectecarticle29_3" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/ecarticle29_3"/>
|
||||
<FIELD indexable="true" name="projectsubject" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/subjects"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectcontracttypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/contracttype/@classname"/>
|
||||
<FIELD indexable="true" name="fundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/id"/>
|
||||
<FIELD indexable="true" name="fundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/name"/>
|
||||
<FIELD copy="true" indexable="true" name="fundinglevel0_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/description"/>
|
||||
<FIELD indexable="true" name="fundinglevel1_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/id"/>
|
||||
<FIELD indexable="true" name="fundinglevel1_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/name"/>
|
||||
<FIELD copy="true" indexable="true" name="fundinglevel1_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/description"/>
|
||||
<FIELD indexable="true" name="fundinglevel2_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/id"/>
|
||||
<FIELD indexable="true" name="fundinglevel2_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/name"/>
|
||||
<FIELD copy="true" indexable="true" name="fundinglevel2_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/description"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling -->
|
||||
<FIELD indexable="true" name="funder" result="false" stat="false" tokenizable="false" value="concat(./id/text(), '||', ./name/text(), '||', ./shortname/text())" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder"/>
|
||||
<FIELD indexable="true" name="fundershortname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/shortname"/>
|
||||
<FIELD indexable="true" name="funderid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/id"/>
|
||||
<FIELD indexable="true" name="fundername" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/name"/>
|
||||
<FIELD indexable="true" name="funderoriginalname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/originalname"/>
|
||||
<FIELD indexable="true" name="funderjurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/jurisdiction"/><!-- RESULT FIELDS -->
|
||||
<FIELD copy="true" indexable="true" name="resulttitle" result="false" stat="false" type="text_en" xpath="//*[local-name() = 'entity']/*[local-name() ='result']/title | //*[local-name()='entity']/*[local-name()='result']/children/result/title"/>
|
||||
<FIELD indexable="true" name="resultsubject" result="false" stat="false" type="text_en" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject)"/>
|
||||
<FIELD indexable="true" name="resultsubjectclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject/@classname)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultembargoenddate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='result']/embargoenddate"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultembargoendyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/embargoenddate)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resulttypeid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classid"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resulttypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classname"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultlanguagename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/language/@classname"/>
|
||||
<FIELD copy="true" indexable="true" name="resultpublisher" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/*[local-name()='publisher']"/>
|
||||
<FIELD copy="true" indexable="true" name="resultdescription" result="false" stat="false" type="text_en" xpath="//*[local-name()='entity']/*[local-name()='result']//*[local-name()='description']"/>
|
||||
<FIELD indexable="true" name="resultlicense" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/license"/>
|
||||
<FIELD indexable="true" name="resultaccessright" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/accessright/@classname"/>
|
||||
<FIELD indexable="true" name="resultresourcetypename" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/resourcetype/@classname"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultbestaccessright" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/bestaccessright/@classname)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultdateofacceptance" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='result']/dateofacceptance"/>
|
||||
<FIELD copy="true" indexable="true" multivalued="false" name="resultacceptanceyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/dateofacceptance)"/>
|
||||
<FIELD copy="true" indexable="true" multivalued="true" name="resultauthor" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/>
|
||||
<FIELD indexable="true" multivalued="true" name="resultauthor_nt" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/>
|
||||
<FIELD indexable="true" multivalued="true" name="authorid" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']"/>
|
||||
<FIELD indexable="true" multivalued="true" name="authoridtype" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']/local-name()"/>
|
||||
<FIELD indexable="true" multivalued="true" name="orcidtypevalue" result="false" stat="false" type="string_ci" value="string-join((./@*[local-name() = 'orcid' or local-name() = 'orcid_pending'], ./@*[local-name() = 'orcid' or local-name() = 'orcid_pending']/local-name()), '||' )" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/>
|
||||
<FIELD indexable="true" name="resulthostingdatasource" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']"/>
|
||||
<FIELD indexable="true" name="resulthostingdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']/@id)"/>
|
||||
<FIELD indexable="true" name="resulthostingdatasourcename" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']/@name)"/>
|
||||
<FIELD indexable="true" name="instancetypename" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='instancetype']/@classname)"/>
|
||||
<FIELD indexable="true" name="resultdupid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*//children/result/@objidentifier"/>
|
||||
<FIELD indexable="true" name="organizationdupid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*//children/organization/@objidentifier"/>
|
||||
<FIELD indexable="true" name="externalrefsite" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/sitename)"/>
|
||||
<FIELD copy="true" indexable="true" name="externalreflabel" result="false" stat="false" tokenizable="true" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/label)"/>
|
||||
<FIELD indexable="true" name="externalrefclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/qualifier/@classid)"/>
|
||||
<FIELD indexable="true" name="externalrefid" result="false" stat="false" tokenizable="false" xpath="(//*[local-name()='entity']/*//children/externalreference/refidentifier)"/>
|
||||
<FIELD copy="true" indexable="true" name="resultidentifier" result="false" stat="false" type="string_ci" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/webresource/*[local-name()='url'])"/>
|
||||
<FIELD copy="true" indexable="true" name="resultsource" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/source)"/>
|
||||
<FIELD indexable="true" name="eoscifguidelines" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name() = 'result']/eoscifguidelines/@code)"/><!-- FOS and SDGs non tokenizable for faceted search-->
|
||||
<FIELD indexable="true" name="fos" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='FOS'])"/>
|
||||
<FIELD indexable="true" name="sdg" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='SDG'])"/><!-- REL FIELDS -->
|
||||
<FIELD indexable="true" name="reldatasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='datasource']/openairecompatibility/@classid)"/>
|
||||
<FIELD indexable="true" name="relproject" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./text(), '||', dnet:pickFirst(../acronym/text(), ../title/text())))" xpath="//*[local-name()='entity']/*//rel/to[@type='project']"/>
|
||||
<FIELD indexable="true" name="relprojectid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='project'])"/>
|
||||
<FIELD indexable="true" name="relprojectcode" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/code)"/>
|
||||
<FIELD copy="true" indexable="true" name="relprojectname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/acronym)"/>
|
||||
<FIELD copy="true" indexable="true" name="relprojecttitle" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/title)"/>
|
||||
<FIELD indexable="true" name="relcontracttypeid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classid)"/>
|
||||
<FIELD copy="true" indexable="true" name="relcontracttypename" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classname)"/>
|
||||
<FIELD indexable="true" name="relorganizationcountryid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid)"/>
|
||||
<FIELD copy="true" indexable="true" name="relorganizationcountryname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classname)"/>
|
||||
<FIELD indexable="true" name="relorganizationid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='organization'])"/>
|
||||
<FIELD copy="true" indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/>
|
||||
<FIELD copy="true" indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/>
|
||||
<FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='result'])"/>
|
||||
<FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@type)"/>
|
||||
<FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/>
|
||||
<FIELD indexable="true" name="relfundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0"/>
|
||||
<FIELD indexable="true" name="relfundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()"/>
|
||||
<FIELD indexable="true" name="relfundinglevel1_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_1"/>
|
||||
<FIELD indexable="true" name="relfundinglevel1_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_1/@name/string()"/>
|
||||
<FIELD indexable="true" name="relfundinglevel2_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_2"/>
|
||||
<FIELD indexable="true" name="relfundinglevel2_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_2/@name/string()"/>
|
||||
<FIELD indexable="true" name="relinferred" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/@inferred)"/>
|
||||
<FIELD indexable="true" name="reltrust" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/@trust)"/>
|
||||
<FIELD indexable="true" name="relinferenceprovenance" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/@inferenceprovenance)"/>
|
||||
<FIELD indexable="true" name="relprovenanceactionclassid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/@provenanceaction)"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling -->
|
||||
<FIELD indexable="true" name="relfunder" result="false" stat="false" tokenizable="false" value="distinct-values(concat(@id, '||', @name, '||', @shortname))" xpath="//*[local-name()='entity']//rel/funding/funder"/>
|
||||
<FIELD indexable="true" name="relfunderid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@id)"/>
|
||||
<FIELD indexable="true" name="relfundershortname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@shortname)"/>
|
||||
<FIELD indexable="true" name="relfundername" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@name)"/>
|
||||
<FIELD indexable="true" name="relfunderjurisdiction" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@jurisdiction)"/><!-- Collected from of the related entity. Available for result-result relationships -->
|
||||
<FIELD indexable="true" name="relcollectedfromid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@id)"/>
|
||||
<FIELD indexable="true" name="relcollectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@name)"/>
|
||||
<FIELD indexable="true" name="relvalidated" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./validated]/to[@type='project'])"/>
|
||||
<FIELD indexable="true" name="semrelid" result="false" stat="false" tokenizable="false" value="concat(./to/text(), '||', ./to/@class/string())" xpath="//*[local-name()='entity']//rel"/><!-- COMMON FIELDS -->
|
||||
<FIELD indexable="true" multivalued="false" name="dateofcollection" result="false" stat="false" type="date" value="//header/*[local-name()='dateOfCollection']"/>
|
||||
<FIELD indexable="true" name="status" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//header/*[local-name()='status']"/>
|
||||
<FIELD indexable="true" name="collectedfrom" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*/*[local-name()='collectedfrom'] | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']"/>
|
||||
<FIELD indexable="true" name="collectedfromdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)"/>
|
||||
<FIELD indexable="true" name="collectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@name | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@name)"/>
|
||||
<FIELD indexable="true" name="originalid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/*[local-name()='originalId']"/>
|
||||
<FIELD indexable="true" name="pid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/pid/text()"/>
|
||||
<FIELD indexable="true" name="pidclassid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classid)"/>
|
||||
<FIELD indexable="true" name="pidclassname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classname)"/>
|
||||
<FIELD indexable="true" name="inferred" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/inferred"/>
|
||||
<FIELD indexable="true" name="deletedbyinference" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/deletedbyinference"/>
|
||||
<FIELD indexable="true" name="trust" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/trust"/>
|
||||
<FIELD indexable="true" name="inferenceprovenance" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/inferenceprovenance"/>
|
||||
<FIELD indexable="true" name="provenanceactionclassid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/provenanceaction/@classid"/>
|
||||
<FIELD indexable="true" name="contextid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/@id)"/>
|
||||
<FIELD indexable="true" name="contexttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/@type)"/>
|
||||
<FIELD indexable="true" name="contextname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/@label)"/><!-- Need special fields for community (research initiative) context in order to exclude funders from the context browse -->
|
||||
<FIELD indexable="true" name="community" result="false" stat="false" tokenizable="false" value="distinct-values(concat(@id, '||', @label))" xpath="//*[local-name()='entity']/*[local-name()='result']/context[@type='community' or @type='ri']"/>
|
||||
<FIELD indexable="true" name="communityname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context[@type='community' or @type='ri']/@label)"/>
|
||||
<FIELD indexable="true" name="communityid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context[@type='community' or @type='ri']/@id)"/>
|
||||
<FIELD indexable="true" name="categoryid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category/@id)"/>
|
||||
<FIELD indexable="true" name="categoryname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category/@label)"/>
|
||||
<FIELD indexable="true" name="conceptid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@id)"/>
|
||||
<FIELD indexable="true" name="conceptname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@label)"/><!-- new index field for country info from different xpaths for any type of entity -->
|
||||
<FIELD indexable="true" name="country" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/country/@classid | //*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid | //*[local-name()='entity']//funder/@jurisdiction)"/>
|
||||
<FIELD name="oaftype" indexable="true" value="local-name(//*[local-name()='entity']/*[local-name() != 'extraInfo'])" tokenizable="false" result="false" stat="false"/>
|
||||
<FIELD name="objidentifier" stat="false" tokenizable="false" xpath="//header/dri:objIdentifier" result="false" indexable="true"/><!-- DATASOURCE FIELDS -->
|
||||
<FIELD name="datasourceofficialname" stat="false" result="false" copy="true" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='datasource']/officialname"/>
|
||||
<FIELD name="datasourceenglishname" indexable="true" copy="true" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/englishname"/>
|
||||
<FIELD name="datasourceoddescription" stat="false" result="false" indexable="false" copy="true" xpath="//*[local-name()='entity']/*[local-name()='datasource']/oddescription"/>
|
||||
<FIELD name="datasourceodsubjects" result="false" stat="false" copy="true" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odsubjects" indexable="true" tokenizable="false"/>
|
||||
<FIELD name="datasourceodlanguages" indexable="true" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odlanguages" tokenizable="false"/>
|
||||
<FIELD name="datasourceodcontenttypes" result="false" stat="false" indexable="true" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odcontenttypes"/>
|
||||
<FIELD name="datasourcetypename" tokenizable="false" indexable="true" result="false" multivalued="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetype/@classname"/>
|
||||
<FIELD name="datasourcetypeuiid" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classid" result="false" indexable="true" multivalued="false" tokenizable="false" stat="false"/>
|
||||
<FIELD name="datasourcetypeuiname" result="false" indexable="true" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classname" tokenizable="false" multivalued="false"/>
|
||||
<FIELD name="datasourcecompatibilityid" result="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classid" multivalued="false" stat="false" indexable="true" tokenizable="false"/>
|
||||
<FIELD name="datasourcecompatibilityname" indexable="true" multivalued="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classname" stat="false" tokenizable="false" result="false"/>
|
||||
<FIELD name="datasourcesubject" type="ngramtext" multivalued="true" copy="true" result="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/subjects" indexable="true" stat="false"/><!-- datasource fields for EOSC -->
|
||||
<FIELD name="datasourcejurisdiction" xpath="//*[local-name()='entity']/*[local-name()='datasource']/jurisdiction/@classname" indexable="true" tokenizable="false" result="false" stat="false"/>
|
||||
<FIELD name="datasourcethematic" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/thematic" result="false" tokenizable="false" indexable="true"/>
|
||||
<FIELD name="eosctype" indexable="true" stat="false" tokenizable="false" result="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eosctype/@classname"/>
|
||||
<FIELD name="eoscdatasourcetype" result="false" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eoscdatasourcetype/@classname" tokenizable="false" stat="false"/><!-- ORGANIZATION FIELDS -->
|
||||
<FIELD name="organizationlegalshortname" copy="true" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalshortname)" type="ngramtext" result="false" stat="false"/>
|
||||
<FIELD name="organizationlegalname" indexable="true" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalname)" type="ngramtext" copy="true" result="false"/>
|
||||
<FIELD name="organizationalternativenames" stat="false" copy="true" type="ngramtext" result="false" indexable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//alternativeNames)"/><!-- PROJECT FIELDS -->
|
||||
<FIELD name="projectcode" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/code" copy="true" indexable="false" stat="false" type="ngramtext"/>
|
||||
<FIELD name="projectcode_nt" indexable="true" tokenizable="false" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/code" stat="false"/>
|
||||
<FIELD name="projectacronym" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='project']/acronym" copy="true" result="false" stat="false" type="ngramtext"/>
|
||||
<FIELD name="projecttitle" copy="true" stat="false" result="false" indexable="true" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/title"/>
|
||||
<FIELD name="projectstartdate" indexable="true" type="date" result="false" multivalued="false" value="//*[local-name()='entity']/*[local-name()='project']/startdate" stat="false"/>
|
||||
<FIELD name="projectstartyear" indexable="true" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/startdate)" tokenizable="false" multivalued="false" result="false" stat="false"/>
|
||||
<FIELD name="projectenddate" stat="false" multivalued="false" result="false" type="date" value="//*[local-name()='entity']/*[local-name()='project']/enddate" indexable="true"/>
|
||||
<FIELD name="projectendyear" result="false" tokenizable="false" multivalued="false" indexable="true" stat="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/enddate)"/>
|
||||
<FIELD name="projectcallidentifier" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='project']/callidentifier" stat="false" tokenizable="false" multivalued="false" result="false"/>
|
||||
<FIELD name="projectkeywords" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/keywords" copy="true" stat="false" indexable="false"/>
|
||||
<FIELD name="projectduration" multivalued="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/duration" indexable="true" tokenizable="false" result="false"/>
|
||||
<FIELD name="projectecsc39" multivalued="false" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='project']/ecsc39)" indexable="true"/>
|
||||
<FIELD name="projectoamandatepublications" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/oamandatepublications" indexable="true" multivalued="false"/>
|
||||
<FIELD name="fundinglevel0_id" indexable="true" tokenizable="false" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/id" stat="false"/>
|
||||
<FIELD name="fundinglevel0_name" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/name" indexable="true" tokenizable="false" result="false" stat="false"/>
|
||||
<FIELD name="fundinglevel0_description" indexable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/description" result="false" stat="false" copy="true"/>
|
||||
<FIELD name="fundinglevel1_id" stat="false" tokenizable="false" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/id" indexable="true"/>
|
||||
<FIELD name="fundinglevel1_name" stat="false" result="false" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/name" tokenizable="false"/>
|
||||
<FIELD name="fundinglevel1_description" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/description" stat="false" copy="true" indexable="false"/>
|
||||
<FIELD name="fundinglevel2_id" indexable="true" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/id" tokenizable="false"/>
|
||||
<FIELD name="fundinglevel2_name" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/name" result="false" indexable="true" stat="false"/>
|
||||
<FIELD name="fundinglevel2_description" indexable="false" copy="true" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/description" stat="false"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling -->
|
||||
<FIELD name="funder" result="false" value="concat(./id/text(), '||', ./name/text(), '||', ./shortname/text())" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder" indexable="true" stat="false" tokenizable="false"/>
|
||||
<FIELD name="fundershortname" stat="false" tokenizable="false" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/shortname" result="false"/>
|
||||
<FIELD name="funderid" stat="false" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/id" result="false" tokenizable="false"/><!-- RESULT FIELDS -->
|
||||
<FIELD name="resulttitle" indexable="true" xpath="//*[local-name() = 'entity']/*[local-name() ='result']/title | //*[local-name()='entity']/*[local-name()='result']/children/result/title" stat="false" type="text_en" copy="true" result="false"/>
|
||||
<FIELD name="resultsubject" stat="false" type="text_en" indexable="true" result="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject)"/>
|
||||
<FIELD name="resultembargoenddate" type="date" multivalued="false" result="false" value="//*[local-name()='entity']/*[local-name()='result']/embargoenddate" indexable="true" stat="false"/>
|
||||
<FIELD name="resultembargoendyear" multivalued="false" tokenizable="false" stat="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/embargoenddate)" indexable="true" result="false"/>
|
||||
<FIELD name="resulttypeid" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classid" stat="false" multivalued="false" indexable="true" result="false"/>
|
||||
<FIELD name="resultlanguagename" result="false" indexable="true" tokenizable="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/language/@classname" multivalued="false"/>
|
||||
<FIELD name="resultpublisher" stat="false" copy="true" xpath="//*[local-name()='entity']/*[local-name()='result']/*[local-name()='publisher']" result="false" indexable="true"/>
|
||||
<FIELD name="resultdescription" result="false" xpath="//*[local-name()='entity']/*[local-name()='result']//*[local-name()='description']" copy="true" type="text_en" indexable="true" stat="false"/>
|
||||
<FIELD name="resultbestaccessright" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/bestaccessright/@classname)" stat="false" indexable="true" tokenizable="false" result="false" multivalued="false"/>
|
||||
<FIELD name="resultdateofacceptance" type="date" multivalued="false" stat="false" result="false" indexable="true" value="//*[local-name()='entity']/*[local-name()='result']/dateofacceptance"/>
|
||||
<FIELD name="resultacceptanceyear" result="false" copy="true" indexable="true" multivalued="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/dateofacceptance)" stat="false" tokenizable="false"/>
|
||||
<FIELD name="resultauthor" copy="true" indexable="true" multivalued="true" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/creator" result="false"/>
|
||||
<FIELD name="authorid" indexable="true" result="false" stat="false" type="string_ci" multivalued="true" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']"/>
|
||||
<FIELD name="orcidtypevalue" xpath="//*[local-name()='entity']/*[local-name()='result']/creator" type="string_ci" result="false" value="string-join((./@*[local-name() = 'orcid' or local-name() = 'orcid_pending'], ./@*[local-name() = 'orcid' or local-name() = 'orcid_pending']/local-name()), '||' )" stat="false" multivalued="true" indexable="true"/>
|
||||
<FIELD result="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']" stat="false" indexable="true" name="resulthostingdatasource" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))"/>
|
||||
<FIELD name="resulthostingdatasourceid" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']/@id)" result="false" tokenizable="false" stat="false"/>
|
||||
<FIELD name="instancetypename" indexable="true" tokenizable="false" stat="false" result="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='instancetype']/@classname)"/>
|
||||
<FIELD name="resultdupid" indexable="true" stat="false" xpath="//*[local-name()='entity']/*//children/result/@objidentifier" result="false" tokenizable="false"/>
|
||||
<FIELD name="organizationdupid" stat="false" result="false" tokenizable="false" indexable="true" xpath="//*[local-name()='entity']/*//children/organization/@objidentifier"/>
|
||||
<FIELD name="externalreflabel" stat="false" tokenizable="true" result="false" indexable="false" copy="true" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/label)"/>
|
||||
<FIELD name="resultidentifier" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/webresource/*[local-name()='url'])" copy="true" type="string_ci" stat="false" indexable="true" result="false"/>
|
||||
<FIELD name="resultsource" result="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/source)" indexable="false" copy="true" stat="false"/>
|
||||
<FIELD name="eoscifguidelines" stat="false" indexable="true" tokenizable="false" result="false" xpath="distinct-values(//*[local-name() = 'result']/eoscifguidelines/@code)"/><!-- FOS and SDGs non tokenizable for faceted search-->
|
||||
<FIELD name="fos" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='FOS'])" indexable="true" stat="false" tokenizable="false" result="false"/>
|
||||
<FIELD name="sdg" tokenizable="false" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='SDG'])" stat="false" result="false"/><!-- REL FIELDS -->
|
||||
<FIELD name="reldatasourcecompatibilityid" result="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='datasource']/openairecompatibility/@classid)" indexable="true" stat="false" tokenizable="false"/>
|
||||
<FIELD name="relproject" value="distinct-values(concat(./text(), '||', dnet:pickFirst(../acronym/text(), ../title/text())))" result="false" xpath="//*[local-name()='entity']/*//rel/to[@type='project']" stat="false" tokenizable="false" indexable="true"/>
|
||||
<FIELD name="relprojectid" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='project'])" indexable="true" tokenizable="false" result="false" stat="false"/>
|
||||
<FIELD name="relprojectcode" tokenizable="false" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/code)" stat="false" result="false"/>
|
||||
<FIELD name="relprojectname" stat="false" result="false" indexable="true" copy="true" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/acronym)" tokenizable="false"/>
|
||||
<FIELD name="relprojecttitle" indexable="false" stat="false" copy="true" result="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/title)"/>
|
||||
<FIELD name="relcontracttypename" stat="false" indexable="false" copy="true" result="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classname)"/>
|
||||
<FIELD name="relorganizationcountryid" stat="false" result="false" indexable="true" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid)"/>
|
||||
<FIELD name="relorganizationcountryname" result="false" copy="true" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classname)" indexable="false" stat="false"/>
|
||||
<FIELD name="relorganizationid" result="false" indexable="true" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='organization'])"/>
|
||||
<FIELD name="relorganizationname" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)" result="false" stat="false" indexable="true" copy="true"/>
|
||||
<FIELD name="relorganizationshortname" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)" indexable="true" result="false" copy="true" stat="false"/>
|
||||
<FIELD name="relresulttype" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@type)" tokenizable="false" stat="false" result="false"/>
|
||||
<FIELD name="relclass" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)" result="false" indexable="true"/>
|
||||
<FIELD name="relfundinglevel0_id" stat="false" indexable="true" result="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0" tokenizable="false"/>
|
||||
<FIELD name="relfundinglevel0_name" indexable="true" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()" result="false" stat="false"/>
|
||||
<FIELD name="relfundinglevel1_id" indexable="true" stat="false" tokenizable="false" result="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_1"/>
|
||||
<FIELD name="relfundinglevel1_name" xpath="//*[local-name()='entity']//rel/funding/funding_level_1/@name/string()" result="false" indexable="true" stat="false" tokenizable="false"/>
|
||||
<FIELD name="relfundinglevel2_id" result="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_2" indexable="true" tokenizable="false" stat="false"/>
|
||||
<FIELD name="relfundinglevel2_name" xpath="//*[local-name()='entity']//rel/funding/funding_level_2/@name/string()" tokenizable="false" result="false" stat="false" indexable="true"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling -->
|
||||
<FIELD name="relfunder" indexable="true" tokenizable="false" stat="false" result="false" value="distinct-values(concat(@id, '||', @name, '||', @shortname))" xpath="//*[local-name()='entity']//rel/funding/funder"/>
|
||||
<FIELD name="relfunderid" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@id)" stat="false" tokenizable="false" result="false" indexable="true"/>
|
||||
<FIELD name="relfundershortname" indexable="true" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@shortname)" result="false" stat="false" tokenizable="false"/>
|
||||
<FIELD name="semrelid" stat="false" tokenizable="false" value="concat(./to/text(), '||', ./to/@class/string())" indexable="true" result="false" xpath="//*[local-name()='entity']//rel"/><!-- COMMON FIELDS -->
|
||||
<FIELD name="dateofcollection" stat="false" type="date" indexable="true" result="false" multivalued="false" value="//header/*[local-name()='dateOfCollection']"/>
|
||||
<FIELD name="status" type="string_ci" tokenizable="false" stat="false" indexable="true" xpath="//header/*[local-name()='status']" result="false"/>
|
||||
<FIELD name="collectedfromdatasourceid" indexable="true" result="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)" tokenizable="false" stat="false"/>
|
||||
<FIELD name="collectedfromname" stat="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@name | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@name)" result="false" tokenizable="false" indexable="true"/>
|
||||
<FIELD name="originalid" result="false" indexable="true" type="string_ci" xpath="//*[local-name()='entity']/*/*[local-name()='originalId']" tokenizable="false" stat="false"/>
|
||||
<FIELD name="pid" type="string_ci" tokenizable="false" indexable="true" result="false" xpath="//*[local-name()='entity']/*/pid/text()" stat="false"/>
|
||||
<FIELD name="pidclassid" result="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classid)" stat="false" indexable="true" tokenizable="false"/>
|
||||
<FIELD name="deletedbyinference" indexable="true" stat="false" xpath="//*[local-name()='entity']//datainfo/deletedbyinference" result="false" tokenizable="false"/>
|
||||
<FIELD name="provenanceactionclassid" result="false" stat="false" xpath="//*[local-name()='entity']//datainfo/provenanceaction/@classid" tokenizable="false" indexable="true"/>
|
||||
<FIELD name="contextid" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/@id)" tokenizable="false" indexable="true" stat="false" result="false"/>
|
||||
<FIELD name="contextname" indexable="true" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/@label)"/><!-- Need special fields for community (research initiative) context in order to exclude funders from the context browse -->
|
||||
<FIELD name="community" tokenizable="false" result="false" xpath="//*[local-name()='entity']/*[local-name()='result']/context[@type='community' or @type='ri']" value="distinct-values(concat(@id, '||', @label))" stat="false" indexable="true"/>
|
||||
<FIELD name="communityid" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context[@type='community' or @type='ri']/@id)" stat="false" tokenizable="false" result="false" indexable="true"/>
|
||||
<FIELD name="categoryid" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category/@id)" stat="false" indexable="true" result="false" tokenizable="false"/>
|
||||
<FIELD name="conceptname" stat="false" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@label)" result="false" tokenizable="false"/><!-- new index field for country info from different xpaths for any type of entity -->
|
||||
<FIELD name="country" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*/country/@classid | //*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid | //*[local-name()='entity']//funder/@jurisdiction)" tokenizable="false" indexable="true"/>
|
||||
<FIELD name="oafentity" indexable="false" result="true" tokenizable="false" stat="false" xpath="//*[local-name() = 'entity']"/>
|
||||
</FIELDS>
|
||||
</LAYOUT>
|
Loading…
Reference in New Issue