[stats wf] indicators across stats dbs & updates in the org ids #248

Closed
dimitris.pierrakos wants to merge 1742 commits from beta into beta2master_sept_2022
32 changed files with 357 additions and 1128 deletions
Showing only changes of commit d4fc62c2f6 - Show all commits

View File

@ -107,7 +107,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=2560 --conf spark.sql.shuffle.partitions=7000
</spark-opts> </spark-opts>
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/dataset</arg> <arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/dataset</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
@ -159,7 +159,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=2560 --conf spark.sql.shuffle.partitions=7000
</spark-opts> </spark-opts>
<arg>--inputGraphTablePath</arg><arg>${workingDir}/dataset</arg> <arg>--inputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>

View File

@ -107,7 +107,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=5000 --conf spark.sql.shuffle.partitions=7000
</spark-opts> </spark-opts>
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/publication</arg> <arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/publication</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
@ -159,7 +159,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=5000 --conf spark.sql.shuffle.partitions=7000
</spark-opts> </spark-opts>
<arg>--inputGraphTablePath</arg><arg>${workingDir}/publication</arg> <arg>--inputGraphTablePath</arg><arg>${workingDir}/publication</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>

View File

@ -99,7 +99,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=5000 --conf spark.sql.shuffle.partitions=10000
</spark-opts> </spark-opts>
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/relation</arg> <arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/relation</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>

View File

@ -22,6 +22,7 @@ public class Constants {
public static final String DOI_CLASSNAME = "Digital Object Identifier"; public static final String DOI_CLASSNAME = "Digital Object Identifier";
public static final String DEFAULT_DELIMITER = ","; public static final String DEFAULT_DELIMITER = ",";
public static final String DEFAULT_FOS_DELIMITER = "\t";
public static final String UPDATE_DATA_INFO_TYPE = "update"; public static final String UPDATE_DATA_INFO_TYPE = "update";
public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos";
@ -61,7 +62,7 @@ public class Constants {
public static Subject getSubject(String sbj, String classid, String classname, public static Subject getSubject(String sbj, String classid, String classname,
String diqualifierclassid) { String diqualifierclassid) {
if (sbj.equals(NULL)) if (sbj == null || sbj.equals(NULL))
return null; return null;
Subject s = new Subject(); Subject s = new Subject();
s.setValue(sbj); s.setValue(sbj);

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.actionmanager.createunresolvedentities; package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_FOS_DELIMITER;
import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged; import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
@ -9,8 +9,6 @@ import java.io.Serializable;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*; import org.apache.spark.sql.*;
@ -49,7 +47,7 @@ public class GetFOSSparkJob implements Serializable {
final String delimiter = Optional final String delimiter = Optional
.ofNullable(parser.get("delimiter")) .ofNullable(parser.get("delimiter"))
.orElse(DEFAULT_DELIMITER); .orElse(DEFAULT_FOS_DELIMITER);
SparkConf sconf = new SparkConf(); SparkConf sconf = new SparkConf();
runWithSparkSession( runWithSparkSession(

View File

@ -86,7 +86,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Produces the unresolved from bip finder!</name> <name>Produces the unresolved from BIP! Finder</name>
<class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareBipFinder</class> <class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareBipFinder</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar> <jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -135,7 +135,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Produces the unresolved from FOS!</name> <name>Produces the unresolved from FOS</name>
<class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareFOSSparkJob</class> <class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareFOSSparkJob</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar> <jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -185,7 +185,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Produces the unresolved from FOS!</name> <name>Produces the unresolved from FOS</name>
<class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareSDGSparkJob</class> <class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareSDGSparkJob</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar> <jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>

View File

@ -78,16 +78,6 @@ object DataciteModelConstants {
OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME) OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME)
val subRelTypeMapping: Map[String, OAFRelations] = Map( val subRelTypeMapping: Map[String, OAFRelations] = Map(
ModelConstants.REFERENCES -> OAFRelations(
ModelConstants.REFERENCES,
ModelConstants.IS_REFERENCED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_REFERENCED_BY -> OAFRelations(
ModelConstants.IS_REFERENCED_BY,
ModelConstants.REFERENCES,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations( ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations(
ModelConstants.IS_SUPPLEMENTED_BY, ModelConstants.IS_SUPPLEMENTED_BY,
ModelConstants.IS_SUPPLEMENT_TO, ModelConstants.IS_SUPPLEMENT_TO,
@ -163,16 +153,6 @@ object DataciteModelConstants {
ModelConstants.IS_SOURCE_OF, ModelConstants.IS_SOURCE_OF,
ModelConstants.VERSION ModelConstants.VERSION
), ),
ModelConstants.CITES -> OAFRelations(
ModelConstants.CITES,
ModelConstants.IS_CITED_BY,
ModelConstants.CITATION
),
ModelConstants.IS_CITED_BY -> OAFRelations(
ModelConstants.IS_CITED_BY,
ModelConstants.CITES,
ModelConstants.CITATION
),
ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations( ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations(
ModelConstants.IS_VARIANT_FORM_OF, ModelConstants.IS_VARIANT_FORM_OF,
ModelConstants.IS_DERIVED_FROM, ModelConstants.IS_DERIVED_FROM,

View File

@ -645,7 +645,7 @@ object DataciteToOAFTransformation {
id: String, id: String,
date: String date: String
): List[Relation] = { ): List[Relation] = {
rels val bidirectionalRels: List[Relation] = rels
.filter(r => .filter(r =>
subRelTypeMapping subRelTypeMapping
.contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") || .contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") ||
@ -653,27 +653,49 @@ object DataciteToOAFTransformation {
r.relatedIdentifierType.equalsIgnoreCase("arxiv")) r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
) )
.map(r => { .map(r => {
val subRelType = subRelTypeMapping(r.relationType).relType
val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(id, target, subRelType, r.relationType, date)
})
val citationRels: List[Relation] = rels
.filter(r =>
(r.relatedIdentifierType.equalsIgnoreCase("doi") ||
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
r.relatedIdentifierType.equalsIgnoreCase("arxiv")) &&
(r.relationType.toLowerCase.contains("cite") || r.relationType.toLowerCase.contains("reference"))
)
.map(r => {
r.relationType match {
case ModelConstants.CITES | ModelConstants.REFERENCES =>
val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(id, target, ModelConstants.CITATION, ModelConstants.CITES, date)
case ModelConstants.IS_CITED_BY | ModelConstants.IS_REFERENCED_BY =>
val source = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(source, id, ModelConstants.CITATION, ModelConstants.CITES, date)
}
})
citationRels ::: bidirectionalRels
}
def relation(source: String, target: String, subRelType: String, relClass: String, date: String): Relation = {
val rel = new Relation val rel = new Relation
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
rel.setDataInfo(dataInfo) rel.setDataInfo(dataInfo)
val subRelType = subRelTypeMapping(r.relationType).relType
rel.setRelType(REL_TYPE_VALUE) rel.setRelType(REL_TYPE_VALUE)
rel.setSubRelType(subRelType) rel.setSubRelType(subRelType)
rel.setRelClass(r.relationType) rel.setRelClass(relClass)
val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
rel.setProperties(List(dateProps).asJava) rel.setProperties(List(dateProps).asJava)
rel.setSource(id) rel.setSource(source)
rel.setTarget( rel.setTarget(target)
DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
)
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
rel.getCollectedfrom.asScala.map(c => c.getValue).toList rel.getCollectedfrom.asScala.map(c => c.getValue).toList
rel rel
})
} }
def generateDSId(input: String): String = { def generateDSId(input: String): String = {

View File

@ -0,0 +1,99 @@
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
/**
* @author miriam.baglioni
* @Date 13/02/23
*/
public class GetFosTest {
private static final Logger log = LoggerFactory.getLogger(ProduceTest.class);
private static Path workingDir;
private static SparkSession spark;
private static LocalFileSystem fs;
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(PrepareTest.class.getSimpleName());
fs = FileSystem.getLocal(new Configuration());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(ProduceTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(PrepareTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void test3() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.tsv")
.getPath();
final String outputPath = workingDir.toString() + "/fos.json";
GetFOSSparkJob
.main(
new String[] {
"--isSparkSessionManaged", Boolean.FALSE.toString(),
"--sourcePath", sourcePath,
"-outputPath", outputPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<FOSDataModel> tmp = sc
.textFile(outputPath)
.map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class));
tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null));
tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null));
tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null));
}
}

View File

@ -0,0 +1,40 @@
doi level1 level2 level3
10.1080/09638237.2018.1466033 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1016/j.dsi.2015.10.003 03 medical and health sciences 0301 basic medicine 030105 genetics & heredity
10.1007/s10072-017-2914-9 03 medical and health sciences 0302 clinical medicine 030217 neurology & neurosurgery
10.1016/j.bspc.2021.102726 02 engineering and technology 0206 medical engineering 020601 biomedical engineering
10.1177/0306312706069439 06 humanities and the arts 0601 history and archaeology 060101 anthropology
10.1016/j.jacep.2016.05.010 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1111/anae.13418 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1142/s1793744210000168 01 natural sciences 0103 physical sciences 010306 general physics
10.1016/j.jadohealth.2019.04.029 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1109/icais50930.2021.9395847 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
10.1145/3154837 01 natural sciences 0101 mathematics 010102 general mathematics
10.1038/srep38130 03 medical and health sciences 0301 basic medicine 030106 microbiology
10.1007/s13369-017-2871-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
10.1063/1.4964718 03 medical and health sciences 0301 basic medicine 030104 developmental biology
10.1007/s12603-019-1276-9 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1002/cam4.1463 03 medical and health sciences 0301 basic medicine 030104 developmental biology
10.1164/rccm.201611-2290ed 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1088/1757-899x/225/1/012132 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
10.1117/1.jmm.15.1.015501 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
10.1088/1361-6587/ab569d 01 natural sciences 0103 physical sciences 010303 astronomy & astrophysics
10.1016/j.rser.2015.11.092 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy
10.1016/j.jhydrol.2013.06.035 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
10.1111/php.12892 03 medical and health sciences 0301 basic medicine 030104 developmental biology
10.1088/0264-9381/27/10/105001 01 natural sciences 0103 physical sciences 010308 nuclear & particles physics
10.1016/j.matchemphys.2018.02.039 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
10.1098/rsos.160993 03 medical and health sciences 0301 basic medicine 030104 developmental biology
10.1016/j.rinp.2017.07.054 02 engineering and technology 0209 industrial biotechnology 020901 industrial engineering & automation
10.1111/eip.12348 03 medical and health sciences 0302 clinical medicine 030227 psychiatry
10.20965/jrm.2016.p0371 02 engineering and technology 0201 civil engineering 020101 civil engineering
10.2337/dci19-0036 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
10.1155/2018/7692913 01 natural sciences 0104 chemical sciences 010404 medicinal & biomolecular chemistry
10.1117/12.2262306 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020206 networking & telecommunications
10.1021/acs.jpcb.7b01885 01 natural sciences 0104 chemical sciences 010405 organic chemistry
10.1177/0033294117711131 05 social sciences 0502 economics and business 050203 business & management
10.1016/j.jrurstud.2017.08.019 05 social sciences 0502 economics and business 050203 business & management
10.1111/febs.15296 03 medical and health sciences 0301 basic medicine 030104 developmental biology
10.3923/jeasci.2017.6922.6927 05 social sciences 0505 law 050501 criminology
10.1007/s10854-017-6376-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020208 electrical & electronic engineering
10.3390/app10176095 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy
1 doi level1 level2 level3
2 10.1080/09638237.2018.1466033 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
3 10.1016/j.dsi.2015.10.003 03 medical and health sciences 0301 basic medicine 030105 genetics & heredity
4 10.1007/s10072-017-2914-9 03 medical and health sciences 0302 clinical medicine 030217 neurology & neurosurgery
5 10.1016/j.bspc.2021.102726 02 engineering and technology 0206 medical engineering 020601 biomedical engineering
6 10.1177/0306312706069439 06 humanities and the arts 0601 history and archaeology 060101 anthropology
7 10.1016/j.jacep.2016.05.010 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
8 10.1111/anae.13418 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
9 10.1142/s1793744210000168 01 natural sciences 0103 physical sciences 010306 general physics
10 10.1016/j.jadohealth.2019.04.029 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
11 10.1109/icais50930.2021.9395847 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
12 10.1145/3154837 01 natural sciences 0101 mathematics 010102 general mathematics
13 10.1038/srep38130 03 medical and health sciences 0301 basic medicine 030106 microbiology
14 10.1007/s13369-017-2871-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020201 artificial intelligence & image processing
15 10.1063/1.4964718 03 medical and health sciences 0301 basic medicine 030104 developmental biology
16 10.1007/s12603-019-1276-9 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
17 10.1002/cam4.1463 03 medical and health sciences 0301 basic medicine 030104 developmental biology
18 10.1164/rccm.201611-2290ed 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
19 10.1088/1757-899x/225/1/012132 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
20 10.1117/1.jmm.15.1.015501 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
21 10.1088/1361-6587/ab569d 01 natural sciences 0103 physical sciences 010303 astronomy & astrophysics
22 10.1016/j.rser.2015.11.092 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy
23 10.1016/j.jhydrol.2013.06.035 01 natural sciences 0105 earth and related environmental sciences 010504 meteorology & atmospheric sciences
24 10.1111/php.12892 03 medical and health sciences 0301 basic medicine 030104 developmental biology
25 10.1088/0264-9381/27/10/105001 01 natural sciences 0103 physical sciences 010308 nuclear & particles physics
26 10.1016/j.matchemphys.2018.02.039 02 engineering and technology 0210 nano-technology 021001 nanoscience & nanotechnology
27 10.1098/rsos.160993 03 medical and health sciences 0301 basic medicine 030104 developmental biology
28 10.1016/j.rinp.2017.07.054 02 engineering and technology 0209 industrial biotechnology 020901 industrial engineering & automation
29 10.1111/eip.12348 03 medical and health sciences 0302 clinical medicine 030227 psychiatry
30 10.20965/jrm.2016.p0371 02 engineering and technology 0201 civil engineering 020101 civil engineering
31 10.2337/dci19-0036 03 medical and health sciences 0302 clinical medicine 030212 general & internal medicine
32 10.1155/2018/7692913 01 natural sciences 0104 chemical sciences 010404 medicinal & biomolecular chemistry
33 10.1117/12.2262306 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020206 networking & telecommunications
34 10.1021/acs.jpcb.7b01885 01 natural sciences 0104 chemical sciences 010405 organic chemistry
35 10.1177/0033294117711131 05 social sciences 0502 economics and business 050203 business & management
36 10.1016/j.jrurstud.2017.08.019 05 social sciences 0502 economics and business 050203 business & management
37 10.1111/febs.15296 03 medical and health sciences 0301 basic medicine 030104 developmental biology
38 10.3923/jeasci.2017.6922.6927 05 social sciences 0505 law 050501 criminology
39 10.1007/s10854-017-6376-x 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020208 electrical & electronic engineering
40 10.3390/app10176095 02 engineering and technology 0202 electrical engineering, electronic engineering, information engineering 020209 energy

View File

@ -397,17 +397,7 @@ case object Crossref2Oaf {
from.setDataInfo(source.getDataInfo) from.setDataInfo(source.getDataInfo)
from.setLastupdatetimestamp(source.getLastupdatetimestamp) from.setLastupdatetimestamp(source.getLastupdatetimestamp)
val to = new Relation List(from)
to.setTarget(source.getId)
to.setSource(targetId)
to.setRelType(ModelConstants.RESULT_RESULT)
to.setRelClass(ModelConstants.IS_CITED_BY)
to.setSubRelType(ModelConstants.CITATION)
to.setCollectedfrom(source.getCollectedfrom)
to.setDataInfo(source.getDataInfo)
to.setLastupdatetimestamp(source.getLastupdatetimestamp)
List(from, to)
} }
def generateCitationRelations(dois: List[String], result: Result): List[Relation] = { def generateCitationRelations(dois: List[String], result: Result): List[Relation] = {
@ -505,6 +495,13 @@ case object Crossref2Oaf {
val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63") val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
case "10.13039/100020031" =>
val targetId = getProjectId("tara________", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
case "10.13039/501100005416" => generateSimpleRelationFromAward(funder, "rcn_________", a => a)
case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a) case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a)
case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a => a) case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a => a)
@ -541,21 +538,21 @@ case object Crossref2Oaf {
generateSimpleRelationFromAward(funder, "corda_____he", extractECAward) generateSimpleRelationFromAward(funder, "corda_____he", extractECAward)
//FCT //FCT
case "10.13039/501100001871" => case "10.13039/501100001871" =>
generateSimpleRelationFromAward(funder, "fct_________", extractECAward) generateSimpleRelationFromAward(funder, "fct_________", a => a)
//NHMRC //NHMRC
case "10.13039/501100000925" => case "10.13039/501100000925" =>
generateSimpleRelationFromAward(funder, "mhmrc_______", extractECAward) generateSimpleRelationFromAward(funder, "nhmrc_______", a => a)
//NIH //NIH
case "10.13039/100000002" => case "10.13039/100000002" =>
generateSimpleRelationFromAward(funder, "nih_________", extractECAward) generateSimpleRelationFromAward(funder, "nih_________", a => a)
//NWO //NWO
case "10.13039/501100003246" => case "10.13039/501100003246" =>
generateSimpleRelationFromAward(funder, "nwo_________", extractECAward) generateSimpleRelationFromAward(funder, "nwo_________", a => a)
//UKRI //UKRI
case "10.13039/100014013" | "10.13039/501100000267" | "10.13039/501100000268" | "10.13039/501100000269" | case "10.13039/100014013" | "10.13039/501100000267" | "10.13039/501100000268" | "10.13039/501100000269" |
"10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" | "10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" |
"10.13039/501100013589" | "10.13039/501100000271" => "10.13039/501100013589" | "10.13039/501100000271" =>
generateSimpleRelationFromAward(funder, "nwo_________", extractECAward) generateSimpleRelationFromAward(funder, "ukri________", a => a)
case _ => logger.debug("no match for " + funder.DOI.get) case _ => logger.debug("no match for " + funder.DOI.get)
@ -568,10 +565,11 @@ case object Crossref2Oaf {
case "European Union's" => case "European Union's" =>
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
generateSimpleRelationFromAward(funder, "corda_______", extractECAward) generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
generateSimpleRelationFromAward(funder, "corda_____he", extractECAward)
case "The French National Research Agency (ANR)" | "The French National Research Agency" => case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
generateSimpleRelationFromAward(funder, "anr_________", a => a) generateSimpleRelationFromAward(funder, "anr_________", a => a)
case "CONICYT, Programa de Formación de Capital Humano Avanzado" => case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) generateSimpleRelationFromAward(funder, "conicytf____", a => a)
case "Wellcome Trust Masters Fellowship" => case "Wellcome Trust Masters Fellowship" =>
generateSimpleRelationFromAward(funder, "wt__________", a => a) generateSimpleRelationFromAward(funder, "wt__________", a => a)
val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")

View File

@ -6,11 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Arrays; import java.util.*;
import java.util.Date;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -24,6 +20,7 @@ import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
@ -110,6 +107,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication
.read() .read()
.parquet(validPaths) .parquet(validPaths)
.map((MapFunction<Row, String>) MigrateHdfsMdstoresApplication::enrichRecord, Encoders.STRING()) .map((MapFunction<Row, String>) MigrateHdfsMdstoresApplication::enrichRecord, Encoders.STRING())
.filter((FilterFunction<String>) Objects::nonNull)
.toJavaRDD() .toJavaRDD()
.mapToPair(xml -> new Tuple2<>(new Text(UUID.randomUUID() + ":" + type), new Text(xml))) .mapToPair(xml -> new Tuple2<>(new Text(UUID.randomUUID() + ":" + type), new Text(xml)))
// .coalesce(1) // .coalesce(1)
@ -135,13 +133,14 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
final Document doc = reader.read(new StringReader(xml)); final Document doc = reader.read(new StringReader(xml));
final Element head = (Element) doc.selectSingleNode("//*[local-name() = 'header']"); final Element head = (Element) doc.selectSingleNode("//*[local-name() = 'header']");
head.addElement(new QName("objIdentifier", DRI_NS_PREFIX)).addText(r.getAs("id")); head.addElement(new QName("objIdentifier", DRI_NS_PREFIX)).addText(r.getAs("id"));
head.addElement(new QName("dateOfCollection", DRI_NS_PREFIX)).addText(collDate); head.addElement(new QName("dateOfCollection", DRI_NS_PREFIX)).addText(collDate);
head.addElement(new QName("dateOfTransformation", DRI_NS_PREFIX)).addText(tranDate); head.addElement(new QName("dateOfTransformation", DRI_NS_PREFIX)).addText(tranDate);
return doc.asXML(); return doc.asXML();
} catch (final Exception e) { } catch (final Exception e) {
log.error("Error patching record: " + xml); log.error("Error patching record: " + xml);
throw new RuntimeException("Error patching record: " + xml, e); return null;
} }
} }

View File

@ -131,7 +131,7 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio
// that is the hdfs path basePath/MDSTOREID/timestamp is missing // that is the hdfs path basePath/MDSTOREID/timestamp is missing
// So we have to synch it // So we have to synch it
if (!hdfsMDStores.containsKey(currentMDStore.getMdstore())) { if (!hdfsMDStores.containsKey(currentMDStore.getMdstore())) {
log.info("Adding store " + currentMDStore.getMdstore()); log.info("Adding store {}", currentMDStore.getMdstore());
try { try {
synchMDStoreIntoHDFS( synchMDStoreIntoHDFS(
mdFormat, mdLayout, mdInterpretation, hdfsPath, fileSystem, mongoBaseUrl, mongoDb, mdFormat, mdLayout, mdInterpretation, hdfsPath, fileSystem, mongoBaseUrl, mongoDb,
@ -145,14 +145,14 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio
// basePath/MDSTOREID/timestamp but the timestamp on hdfs is older that the // basePath/MDSTOREID/timestamp but the timestamp on hdfs is older that the
// new one in mongo so we have to synch the new mdstore and delete the old one // new one in mongo so we have to synch the new mdstore and delete the old one
if (currentMDStore.getLatestTimestamp() > current.getLatestTimestamp()) { if (currentMDStore.getLatestTimestamp() > current.getLatestTimestamp()) {
log.info("Updating MDStore " + currentMDStore.getMdstore()); log.info("Updating MDStore {}", currentMDStore.getMdstore());
final String mdstoreDir = createMDStoreDir(hdfsPath, currentMDStore.getMdstore()); final String mdstoreDir = createMDStoreDir(hdfsPath, currentMDStore.getMdstore());
final String rmPath = createMDStoreDir(mdstoreDir, current.getLatestTimestamp().toString()); final String rmPath = createMDStoreDir(mdstoreDir, current.getLatestTimestamp().toString());
try { try {
synchMDStoreIntoHDFS( synchMDStoreIntoHDFS(
mdFormat, mdLayout, mdInterpretation, hdfsPath, fileSystem, mongoBaseUrl, mongoDb, mdFormat, mdLayout, mdInterpretation, hdfsPath, fileSystem, mongoBaseUrl, mongoDb,
currentMDStore); currentMDStore);
log.info("deleting " + rmPath); log.info("deleting {}", rmPath);
// DELETE THE OLD MDSTORE // DELETE THE OLD MDSTORE
fileSystem.delete(new Path(rmPath), true); fileSystem.delete(new Path(rmPath), true);
} catch (IOException e) { } catch (IOException e) {

View File

@ -214,16 +214,14 @@
<action name="ImportODF_claims"> <action name="ImportODF_claims">
<java> <java>
<prepare>
<delete path="${contentPath}/odf_claims"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class> <main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
<arg>-p</arg><arg>${contentPath}/odf_claims</arg> <arg>--hdfsPath</arg><arg>${contentPath}/mdstore</arg>
<arg>-mongourl</arg><arg>${mongoURL}</arg> <arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
<arg>-mongodb</arg><arg>${mongoDb}</arg> <arg>--mongoDb</arg><arg>${mongoDb}</arg>
<arg>-f</arg><arg>ODF</arg> <arg>--mdFormat</arg><arg>ODF</arg>
<arg>-l</arg><arg>store</arg> <arg>--mdLayout</arg><arg>store</arg>
<arg>-i</arg><arg>claim</arg> <arg>--mdInterpretation</arg><arg>claim</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
</java> </java>
<ok to="reuse_oaf_claims"/> <ok to="reuse_oaf_claims"/>
<error to="Kill"/> <error to="Kill"/>
@ -239,16 +237,14 @@
<action name="ImportOAF_claims"> <action name="ImportOAF_claims">
<java> <java>
<prepare>
<delete path="${contentPath}/oaf_claims"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class> <main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
<arg>-p</arg><arg>${contentPath}/oaf_claims</arg> <arg>--hdfsPath</arg><arg>${contentPath}/mdstore</arg>
<arg>-mongourl</arg><arg>${mongoURL}</arg> <arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
<arg>-mongodb</arg><arg>${mongoDb}</arg> <arg>--mongoDb</arg><arg>${mongoDb}</arg>
<arg>-f</arg><arg>OAF</arg> <arg>--mdFormat</arg><arg>OAF</arg>
<arg>-l</arg><arg>store</arg> <arg>--mdLayout</arg><arg>store</arg>
<arg>-i</arg><arg>claim</arg> <arg>--mdInterpretation</arg><arg>claim</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
</java> </java>
<ok to="wait_import"/> <ok to="wait_import"/>
<error to="Kill"/> <error to="Kill"/>
@ -291,11 +287,8 @@
<action name="ImportODF"> <action name="ImportODF">
<java> <java>
<prepare>
<delete path="${contentPath}/odf_records"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class> <main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
<arg>--hdfsPath</arg><arg>${contentPath}/odf_records</arg> <arg>--hdfsPath</arg><arg>${contentPath}/mdstore</arg>
<arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg> <arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
<arg>--mongoDb</arg><arg>${mongoDb}</arg> <arg>--mongoDb</arg><arg>${mongoDb}</arg>
<arg>--mdFormat</arg><arg>ODF</arg> <arg>--mdFormat</arg><arg>ODF</arg>
@ -317,11 +310,8 @@
<action name="ImportOAF"> <action name="ImportOAF">
<java> <java>
<prepare>
<delete path="${contentPath}/oaf_records"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class> <main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
<arg>--hdfsPath</arg><arg>${contentPath}/oaf_records</arg> <arg>--hdfsPath</arg><arg>${contentPath}/mdstore</arg>
<arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg> <arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
<arg>--mongoDb</arg><arg>${mongoDb}</arg> <arg>--mongoDb</arg><arg>${mongoDb}</arg>
<arg>--mdFormat</arg><arg>OAF</arg> <arg>--mdFormat</arg><arg>OAF</arg>
@ -335,11 +325,8 @@
<action name="ImportOAF_invisible"> <action name="ImportOAF_invisible">
<java> <java>
<prepare>
<delete path="${contentPath}/oaf_records_invisible"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class> <main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
<arg>--hdfsPath</arg><arg>${contentPath}/oaf_records_invisible</arg> <arg>--hdfsPath</arg><arg>${contentPath}/mdstore</arg>
<arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg> <arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
<arg>--mongoDb</arg><arg>${mongoDb}</arg> <arg>--mongoDb</arg><arg>${mongoDb}</arg>
<arg>--mdFormat</arg><arg>OAF</arg> <arg>--mdFormat</arg><arg>OAF</arg>
@ -375,7 +362,7 @@
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts> </spark-opts>
<arg>--hdfsPath</arg><arg>${contentPath}/odf_records_hdfs</arg> <arg>--hdfsPath</arg><arg>${contentPath}/odf_mdstore_hdfs</arg>
<arg>--mdstoreManagerUrl</arg><arg>${mdstoreManagerUrl}</arg> <arg>--mdstoreManagerUrl</arg><arg>${mdstoreManagerUrl}</arg>
<arg>--mdFormat</arg><arg>ODF</arg> <arg>--mdFormat</arg><arg>ODF</arg>
<arg>--mdLayout</arg><arg>store</arg> <arg>--mdLayout</arg><arg>store</arg>
@ -409,7 +396,7 @@
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts> </spark-opts>
<arg>--hdfsPath</arg><arg>${contentPath}/oaf_records_hdfs</arg> <arg>--hdfsPath</arg><arg>${contentPath}/oaf_mdstore_hdfs</arg>
<arg>--mdstoreManagerUrl</arg><arg>${mdstoreManagerUrl}</arg> <arg>--mdstoreManagerUrl</arg><arg>${mdstoreManagerUrl}</arg>
<arg>--mdFormat</arg><arg>OAF</arg> <arg>--mdFormat</arg><arg>OAF</arg>
<arg>--mdLayout</arg><arg>store</arg> <arg>--mdLayout</arg><arg>store</arg>
@ -542,7 +529,7 @@
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts> </spark-opts>
<arg>--sourcePaths</arg><arg>${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_records,${contentPath}/odf_records,${contentPath}/oaf_records_hdfs,${contentPath}/odf_records_hdfs,${contentPath}/oaf_records_invisible</arg> <arg>--sourcePaths</arg><arg>${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_mdstore_hdfs,${contentPath}/odf_mdstore_hdfs,${contentPath}/mdstore/*/*</arg>
<arg>--invalidPath</arg><arg>${workingDir}/invalid_records</arg> <arg>--invalidPath</arg><arg>${workingDir}/invalid_records</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
@ -566,7 +553,7 @@
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts> </spark-opts>
<arg>--sourcePaths</arg><arg>${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_records,${contentPath}/odf_records,${contentPath}/oaf_records_hdfs,${contentPath}/odf_records_hdfs,${contentPath}/oaf_records_invisible</arg> <arg>--sourcePaths</arg><arg>${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_mdstore_hdfs,${contentPath}/odf_mdstore_hdfs,${contentPath}/mdstore/*/*</arg>
<arg>--targetPath</arg><arg>${workingDir}/entities</arg> <arg>--targetPath</arg><arg>${workingDir}/entities</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--shouldHashId</arg><arg>${shouldHashId}</arg> <arg>--shouldHashId</arg><arg>${shouldHashId}</arg>

View File

@ -1,18 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -1,162 +0,0 @@
<workflow-app name="import Claims as Graph" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>reuseContent</name>
<value>false</value>
<description>should import content from the aggregator or reuse a previous version</description>
</property>
<property>
<name>contentPath</name>
<description>path location to store (or reuse) content from the aggregator</description>
</property>
<property>
<name>postgresURL</name>
<description>the postgres URL to access to the database</description>
</property>
<property>
<name>postgresUser</name>
<description>the user postgres</description>
</property>
<property>
<name>postgresPassword</name>
<description>the password postgres</description>
</property>
<property>
<name>dbSchema</name>
<value>beta</value>
<description>the database schema according to the D-Net infrastructure (beta or production)</description>
</property>
<property>
<name>mongoURL</name>
<description>mongoDB url, example: mongodb://[username:password@]host[:port]</description>
</property>
<property>
<name>mongoDb</name>
<description>mongo database</description>
</property>
<property>
<name>isLookupUrl</name>
<description>the address of the lookUp service</description>
</property>
<property>
<name>nsPrefixBlacklist</name>
<value></value>
<description>a blacklist of nsprefixes (comma separeted)</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="ImportDB_claims"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ImportDB_claims">
<java>
<prepare>
<delete path="${contentPath}/db_claims"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication</main-class>
<arg>--hdfsPath</arg><arg>${contentPath}/db_claims</arg>
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--action</arg><arg>claims</arg>
<arg>--dbschema</arg><arg>${dbSchema}</arg>
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
</java>
<ok to="ImportODF_claims"/>
<error to="Kill"/>
</action>
<action name="ImportODF_claims">
<java>
<prepare>
<delete path="${contentPath}/odf_claims"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
<arg>-p</arg><arg>${contentPath}/odf_claims</arg>
<arg>-mongourl</arg><arg>${mongoURL}</arg>
<arg>-mongodb</arg><arg>${mongoDb}</arg>
<arg>-f</arg><arg>ODF</arg>
<arg>-l</arg><arg>store</arg>
<arg>-i</arg><arg>claim</arg>
</java>
<ok to="ImportOAF_claims"/>
<error to="Kill"/>
</action>
<action name="ImportOAF_claims">
<java>
<prepare>
<delete path="${contentPath}/oaf_claims"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
<arg>-p</arg><arg>${contentPath}/oaf_claims</arg>
<arg>-mongourl</arg><arg>${mongoURL}</arg>
<arg>-mongodb</arg><arg>${mongoDb}</arg>
<arg>-f</arg><arg>OAF</arg>
<arg>-l</arg><arg>store</arg>
<arg>-i</arg><arg>claim</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -1,18 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -1,195 +0,0 @@
<workflow-app name="import DB entities" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>contentPath</name>
<description>path location to store (or reuse) content from the aggregator</description>
</property>
<property>
<name>postgresURL</name>
<description>the postgres URL to access to the database</description>
</property>
<property>
<name>postgresUser</name>
<description>the user postgres</description>
</property>
<property>
<name>postgresPassword</name>
<description>the password postgres</description>
</property>
<property>
<name>dbSchema</name>
<value>beta</value>
<description>the database schema according to the D-Net infrastructure (beta or production)</description>
</property>
<property>
<name>isLookupUrl</name>
<description>the address of the lookUp service</description>
</property>
<property>
<name>nsPrefixBlacklist</name>
<value></value>
<description>a blacklist of nsprefixes (comma separeted)</description>
</property>
<property>
<name>reuseContent</name>
<value>false</value>
<description>reuse content in the aggregator database</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="reuse_db"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<decision name="reuse_db">
<switch>
<case to="ImportDB">${wf:conf('reuseContent') eq false}</case>
<case to="GenerateEntities">${wf:conf('reuseContent') eq true}</case>
<default to="ImportDB"/>
</switch>
</decision>
<action name="ImportDB">
<java>
<prepare>
<delete path="${contentPath}/db_records"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication</main-class>
<arg>--hdfsPath</arg><arg>${contentPath}/db_records</arg>
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--action</arg><arg>openaire</arg>
<arg>--dbschema</arg><arg>${dbSchema}</arg>
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
</java>
<ok to="ImportDB_claims"/>
<error to="Kill"/>
</action>
<action name="ImportDB_claims">
<java>
<prepare>
<delete path="${contentPath}/db_claims"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication</main-class>
<arg>--hdfsPath</arg><arg>${contentPath}/db_claims</arg>
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--dbschema</arg><arg>${dbSchema}</arg>
<arg>--action</arg><arg>claims</arg>
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
</java>
<ok to="GenerateEntities"/>
<error to="Kill"/>
</action>
<action name="GenerateEntities">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>GenerateEntities</name>
<class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/db_claims</arg>
<arg>--targetPath</arg><arg>${workingDir}/entities</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--shouldHashId</arg><arg>true</arg>
</spark>
<ok to="GenerateGraph"/>
<error to="Kill"/>
</action>
<action name="GenerateGraph">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>GenerateGraph</name>
<class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
<arg>--graphRawPath</arg><arg>${workingDir}/graph_aggregator</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -1,18 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -1,157 +0,0 @@
<workflow-app name="Test Import of Hdfs Stores" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>graphOutputPath</name>
<description>the target path to store raw graph</description>
</property>
<property>
<name>contentPath</name>
<description>path location to store (or reuse) content from the aggregator</description>
</property>
<property>
<name>mdstoreManagerUrl</name>
<description>the address of the Mdstore Manager</description>
</property>
<property>
<name>isLookupUrl</name>
<description>the address of the lookUp service</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="ImportODF_hdfs"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ImportODF_hdfs">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>ImportODF_hdfs</name>
<class>eu.dnetlib.dhp.oa.graph.raw.MigrateHdfsMdstoresApplication</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--hdfsPath</arg><arg>${contentPath}/odf_records_hdfs</arg>
<arg>--mdstoreManagerUrl</arg><arg>${mdstoreManagerUrl}</arg>
<arg>--mdFormat</arg><arg>ODF</arg>
<arg>--mdLayout</arg><arg>store</arg>
<arg>--mdInterpretation</arg><arg>cleaned</arg>
</spark>
<ok to="GenerateEntities"/>
<error to="Kill"/>
</action>
<action name="GenerateEntities">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>GenerateEntities</name>
<class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePaths</arg><arg>${contentPath}/odf_records_hdfs</arg>
<arg>--targetPath</arg><arg>${workingDir}/entities</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--shouldHashId</arg><arg>${shouldHashId}</arg>
</spark>
<ok to="GenerateGraph"/>
<error to="Kill"/>
</action>
<action name="GenerateGraph">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>GenerateGraph</name>
<class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
<arg>--graphRawPath</arg><arg>${workingDir}/graph_raw</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -1,18 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -1,67 +0,0 @@
<workflow-app name="import regular entities as Graph (step 1)" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>migrationPathStep1</name>
<description>the base path to store hdfs file</description>
</property>
<property>
<name>mongoURL</name>
<description>mongoDB url, example: mongodb://[username:password@]host[:port]</description>
</property>
<property>
<name>mongoDb</name>
<description>mongo database</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${migrationPathStep1}'/>
<mkdir path='${migrationPathStep1}'/>
</fs>
<ok to="ImportODF"/>
<error to="Kill"/>
</action>
<action name="ImportODF">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
<arg>-p</arg><arg>${migrationPathStep1}</arg>
<arg>-mongourl</arg><arg>${mongoURL}</arg>
<arg>-mongodb</arg><arg>${mongoDb}</arg>
<arg>-f</arg><arg>ODF</arg>
<arg>-l</arg><arg>store</arg>
<arg>-i</arg><arg>cleaned</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
</java>
<ok to="ImportOAF"/>
<error to="Kill"/>
</action>
<action name="ImportOAF">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
<arg>-p</arg><arg>${migrationPathStep1}</arg>
<arg>-mongourl</arg><arg>${mongoURL}</arg>
<arg>-mongodb</arg><arg>${mongoDb}</arg>
<arg>-f</arg><arg>OAF</arg>
<arg>-l</arg><arg>store</arg>
<arg>-i</arg><arg>cleaned</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -1,18 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -1,65 +0,0 @@
<workflow-app name="import regular entities as Graph (step 2)" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>migrationPathStep1</name>
<description>the base path to store hdfs file</description>
</property>
<property>
<name>migrationPathStep2</name>
<description>the temporary path to store entities before dispatching</description>
</property>
<property>
<name>isLookupUrl</name>
<description>the address of the lookUp service</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
</parameters>
<start to="ResetEntities"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetEntities">
<fs>
<delete path='${migrationPathStep2}'/>
<mkdir path='${migrationPathStep2}'/>
</fs>
<ok to="GenerateEntities"/>
<error to="Kill"/>
</action>
<action name="GenerateEntities">
<spark xmlns="uri:oozie:spark-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>GenerateEntities</name>
<class>eu.dnetlib.dhp.migration.step2.GenerateEntitiesApplication</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
<arg>-mt</arg> <arg>yarn-cluster</arg>
<arg>-s</arg><arg>${migrationPathStep1}/db_records,${migrationPathStep1}/oaf_records,${migrationPathStep1}/odf_records</arg>
<arg>-t</arg><arg>${migrationPathStep2}/all_entities</arg>
<arg>--islookup</arg><arg>${isLookupUrl}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -1,18 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -1,60 +0,0 @@
<workflow-app name="import regular entities as Graph (step 3)" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>migrationPathStep2</name>
<description>the temporary path to store entities before dispatching</description>
</property>
<property>
<name>migrationPathStep3</name>
<description>the graph Raw base path</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
</parameters>
<start to="ResetGraph"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetGraph">
<fs>
<delete path='${migrationPathStep3}'/>
<mkdir path='${migrationPathStep3}'/>
</fs>
<ok to="GenerateGraph"/>
<error to="Kill"/>
</action>
<action name="GenerateGraph">
<spark xmlns="uri:oozie:spark-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>GenerateGraph</name>
<class>eu.dnetlib.dhp.migration.step3.DispatchEntitiesApplication</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
<arg>-mt</arg> <arg>yarn-cluster</arg>
<arg>-s</arg><arg>${migrationPathStep2}/all_entities</arg>
<arg>-g</arg><arg>${migrationPathStep3}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -151,7 +151,7 @@ public class XmlIndexingJob {
.sequenceFile(inputPath, Text.class, Text.class) .sequenceFile(inputPath, Text.class, Text.class)
.map(t -> t._2().toString()) .map(t -> t._2().toString())
.map(s -> toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), s)) .map(s -> toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), s))
.map(s -> new StreamingInputDocumentFactory(version, dsId).parseDocument(s)); .map(s -> new StreamingInputDocumentFactory().parseDocument(s));
switch (outputFormat) { switch (outputFormat) {
case SOLR: case SOLR:

View File

@ -36,10 +36,6 @@ public class StreamingInputDocumentFactory {
private static final String INDEX_FIELD_PREFIX = "__"; private static final String INDEX_FIELD_PREFIX = "__";
private static final String DS_VERSION = INDEX_FIELD_PREFIX + "dsversion";
private static final String DS_ID = INDEX_FIELD_PREFIX + "dsid";
private static final String RESULT = "result"; private static final String RESULT = "result";
private static final String INDEX_RESULT = INDEX_FIELD_PREFIX + RESULT; private static final String INDEX_RESULT = INDEX_FIELD_PREFIX + RESULT;
@ -65,20 +61,13 @@ public class StreamingInputDocumentFactory {
private final ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal private final ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal
.withInitial(XMLEventFactory::newInstance); .withInitial(XMLEventFactory::newInstance);
private final String version;
private final String dsId;
private String resultName = DEFAULTDNETRESULT; private String resultName = DEFAULTDNETRESULT;
public StreamingInputDocumentFactory(final String version, final String dsId) { public StreamingInputDocumentFactory() {
this(version, dsId, DEFAULTDNETRESULT); this(DEFAULTDNETRESULT);
} }
public StreamingInputDocumentFactory( public StreamingInputDocumentFactory(final String resultName) {
final String version, final String dsId, final String resultName) {
this.version = version;
this.dsId = dsId;
this.resultName = resultName; this.resultName = resultName;
} }
@ -111,14 +100,6 @@ public class StreamingInputDocumentFactory {
} }
} }
if (version != null) {
indexDocument.addField(DS_VERSION, version);
}
if (dsId != null) {
indexDocument.addField(DS_ID, dsId);
}
if (!indexDocument.containsKey(INDEX_RECORD_ID)) { if (!indexDocument.containsKey(INDEX_RECORD_ID)) {
throw new IllegalStateException("cannot extract record ID from: " + inputDocument); throw new IllegalStateException("cannot extract record ID from: " + inputDocument);
} }

View File

@ -79,8 +79,7 @@ public class EOSCFuture_Test {
final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) final SolrInputDocument solrDoc = new StreamingInputDocumentFactory().parseDocument(indexRecordXML);
.parseDocument(indexRecordXML);
final String xmlDoc = ClientUtils.toXML(solrDoc); final String xmlDoc = ClientUtils.toXML(solrDoc);

View File

@ -39,9 +39,6 @@ import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
*/ */
public class IndexRecordTransformerTest { public class IndexRecordTransformerTest {
public static final String VERSION = "2021-04-15T10:05:53Z";
public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl";
private ContextMapper contextMapper; private ContextMapper contextMapper;
@BeforeEach @BeforeEach
@ -197,8 +194,7 @@ public class IndexRecordTransformerTest {
final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) final SolrInputDocument solrDoc = new StreamingInputDocumentFactory().parseDocument(indexRecordXML);
.parseDocument(indexRecordXML);
final String xmlDoc = ClientUtils.toXML(solrDoc); final String xmlDoc = ClientUtils.toXML(solrDoc);

View File

@ -115,16 +115,8 @@ public class SolrConfigTest extends SolrTest {
for (SolrDocument doc : rsp.getResults()) { for (SolrDocument doc : rsp.getResults()) {
System.out System.out
.println( .println(
doc.get("score") + "\t" +
doc.get("__indexrecordidentifier") + "\t" + doc.get("__indexrecordidentifier") + "\t" +
doc.get("resultidentifier") + "\t" + doc.get("__result") + "\t");
doc.get("resultauthor") + "\t" +
doc.get("resultacceptanceyear") + "\t" +
doc.get("resultsubject") + "\t" +
doc.get("resulttitle") + "\t" +
doc.get("relprojectname") + "\t" +
doc.get("resultdescription") + "\t" +
doc.get("__all") + "\t");
} }
} }
} }

View File

@ -1,165 +1,116 @@
<LAYOUT name="index"> <LAYOUT name="index">
<FIELDS> <FIELDS>
<FIELD indexable="false" name="oafentity" result="true" stat="false" tokenizable="false" xpath="//*[local-name() = 'entity']"/> <FIELD name="oaftype" indexable="true" value="local-name(//*[local-name()='entity']/*[local-name() != 'extraInfo'])" tokenizable="false" result="false" stat="false"/>
<FIELD indexable="true" name="oaftype" result="false" stat="false" tokenizable="false" value="local-name(//*[local-name()='entity']/*[local-name() != 'extraInfo'])"/> <FIELD name="objidentifier" stat="false" tokenizable="false" xpath="//header/dri:objIdentifier" result="false" indexable="true"/><!-- DATASOURCE FIELDS -->
<FIELD indexable="true" name="objIdentifier" result="false" stat="false" tokenizable="false" xpath="//header/dri:objIdentifier"/><!-- DATASOURCE FIELDS --> <FIELD name="datasourceofficialname" stat="false" result="false" copy="true" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='datasource']/officialname"/>
<FIELD copy="true" indexable="true" name="datasourceofficialname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/officialname"/> <FIELD name="datasourceenglishname" indexable="true" copy="true" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/englishname"/>
<FIELD copy="true" indexable="true" name="datasourceenglishname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/englishname"/> <FIELD name="datasourceoddescription" stat="false" result="false" indexable="false" copy="true" xpath="//*[local-name()='entity']/*[local-name()='datasource']/oddescription"/>
<FIELD copy="true" indexable="true" name="datasourceoddescription" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/oddescription"/> <FIELD name="datasourceodsubjects" result="false" stat="false" copy="true" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odsubjects" indexable="true" tokenizable="false"/>
<FIELD copy="true" indexable="true" name="datasourceodsubjects" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odsubjects"/> <FIELD name="datasourceodlanguages" indexable="true" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odlanguages" tokenizable="false"/>
<FIELD indexable="true" name="datasourceodlanguages" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odlanguages"/> <FIELD name="datasourceodcontenttypes" result="false" stat="false" indexable="true" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odcontenttypes"/>
<FIELD indexable="true" name="datasourceodcontenttypes" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odcontenttypes"/> <FIELD name="datasourcetypename" tokenizable="false" indexable="true" result="false" multivalued="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetype/@classname"/>
<FIELD indexable="true" multivalued="false" name="datasourcetypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetype/@classname"/> <FIELD name="datasourcetypeuiid" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classid" result="false" indexable="true" multivalued="false" tokenizable="false" stat="false"/>
<FIELD indexable="true" multivalued="false" name="datasourcetypeuiid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classid"/> <FIELD name="datasourcetypeuiname" result="false" indexable="true" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classname" tokenizable="false" multivalued="false"/>
<FIELD indexable="true" multivalued="false" name="datasourcetypeuiname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classname"/> <FIELD name="datasourcecompatibilityid" result="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classid" multivalued="false" stat="false" indexable="true" tokenizable="false"/>
<FIELD indexable="true" multivalued="false" name="datasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classid"/> <FIELD name="datasourcecompatibilityname" indexable="true" multivalued="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classname" stat="false" tokenizable="false" result="false"/>
<FIELD indexable="true" multivalued="false" name="datasourcecompatibilityname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classname"/> <FIELD name="datasourcesubject" type="ngramtext" multivalued="true" copy="true" result="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/subjects" indexable="true" stat="false"/><!-- datasource fields for EOSC -->
<FIELD copy="true" indexable="true" multivalued="true" name="datasourcesubject" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='datasource']/subjects"/> <FIELD name="datasourcejurisdiction" xpath="//*[local-name()='entity']/*[local-name()='datasource']/jurisdiction/@classname" indexable="true" tokenizable="false" result="false" stat="false"/>
<FIELD indexable="true" name="versioning" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/versioning"/><!-- datasource fields for EOSC --> <FIELD name="datasourcethematic" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/thematic" result="false" tokenizable="false" indexable="true"/>
<FIELD indexable="true" name="datasourcejurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/jurisdiction/@classname"/> <FIELD name="eosctype" indexable="true" stat="false" tokenizable="false" result="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eosctype/@classname"/>
<FIELD indexable="true" name="datasourcethematic" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/thematic"/> <FIELD name="eoscdatasourcetype" result="false" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eoscdatasourcetype/@classname" tokenizable="false" stat="false"/><!-- ORGANIZATION FIELDS -->
<FIELD indexable="true" name="datasourcecontentpolicy" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/contentpolicy/@classname"/> <FIELD name="organizationlegalshortname" copy="true" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalshortname)" type="ngramtext" result="false" stat="false"/>
<FIELD indexable="true" name="eosctype" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eosctype/@classname"/> <FIELD name="organizationlegalname" indexable="true" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalname)" type="ngramtext" copy="true" result="false"/>
<FIELD indexable="true" name="eoscdatasourcetype" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eoscdatasourcetype/@classname"/><!-- ORGANIZATION FIELDS --><!-- ORGANIZATION FIELDS --><!-- ORGANIZATION FIELDS --> <FIELD name="organizationalternativenames" stat="false" copy="true" type="ngramtext" result="false" indexable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//alternativeNames)"/><!-- PROJECT FIELDS -->
<FIELD copy="true" indexable="true" name="organizationlegalshortname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalshortname)"/> <FIELD name="projectcode" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/code" copy="true" indexable="false" stat="false" type="ngramtext"/>
<FIELD copy="true" indexable="true" name="organizationlegalname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalname)"/> <FIELD name="projectcode_nt" indexable="true" tokenizable="false" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/code" stat="false"/>
<FIELD copy="true" indexable="true" name="organizationalternativenames" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//alternativeNames)"/> <FIELD name="projectacronym" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='project']/acronym" copy="true" result="false" stat="false" type="ngramtext"/>
<FIELD indexable="true" name="organizationeclegalbody" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/eclegalbody"/> <FIELD name="projecttitle" copy="true" stat="false" result="false" indexable="true" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/title"/>
<FIELD indexable="true" name="organizationeclegalperson" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/eclegalperson"/> <FIELD name="projectstartdate" indexable="true" type="date" result="false" multivalued="false" value="//*[local-name()='entity']/*[local-name()='project']/startdate" stat="false"/>
<FIELD indexable="true" name="organizationecnonprofit" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnonprofit"/> <FIELD name="projectstartyear" indexable="true" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/startdate)" tokenizable="false" multivalued="false" result="false" stat="false"/>
<FIELD indexable="true" name="organizationecresearchorganization" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecresearchorganization"/> <FIELD name="projectenddate" stat="false" multivalued="false" result="false" type="date" value="//*[local-name()='entity']/*[local-name()='project']/enddate" indexable="true"/>
<FIELD indexable="true" name="organizationecinternationalorganizationeurinterests" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecinternationalorganizationeurinterests"/> <FIELD name="projectendyear" result="false" tokenizable="false" multivalued="false" indexable="true" stat="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/enddate)"/>
<FIELD indexable="true" name="organizationecinternationalorganization" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecinternationalorganization"/> <FIELD name="projectcallidentifier" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='project']/callidentifier" stat="false" tokenizable="false" multivalued="false" result="false"/>
<FIELD indexable="true" name="organizationecenterprise" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecenterprise"/> <FIELD name="projectkeywords" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/keywords" copy="true" stat="false" indexable="false"/>
<FIELD indexable="true" name="organizationecsmevalidated" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecsmevalidated"/> <FIELD name="projectduration" multivalued="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/duration" indexable="true" tokenizable="false" result="false"/>
<FIELD indexable="true" name="organizationecnutscode" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnutscode"/> <FIELD name="projectecsc39" multivalued="false" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='project']/ecsc39)" indexable="true"/>
<FIELD indexable="true" multivalued="false" name="organizationcountryname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/country/@classname"/><!-- PROJECT FIELDS --> <FIELD name="projectoamandatepublications" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/oamandatepublications" indexable="true" multivalued="false"/>
<FIELD copy="true" indexable="true" name="projectcode" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/> <FIELD name="fundinglevel0_id" indexable="true" tokenizable="false" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/id" stat="false"/>
<FIELD indexable="true" name="projectcode_nt" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/> <FIELD name="fundinglevel0_name" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/name" indexable="true" tokenizable="false" result="false" stat="false"/>
<FIELD copy="true" indexable="true" name="projectacronym" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/acronym"/> <FIELD name="fundinglevel0_description" indexable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/description" result="false" stat="false" copy="true"/>
<FIELD copy="true" indexable="true" name="projecttitle" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/title"/> <FIELD name="fundinglevel1_id" stat="false" tokenizable="false" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/id" indexable="true"/>
<FIELD indexable="true" multivalued="false" name="projectstartdate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='project']/startdate"/> <FIELD name="fundinglevel1_name" stat="false" result="false" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/name" tokenizable="false"/>
<FIELD indexable="true" multivalued="false" name="projectstartyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/startdate)"/> <FIELD name="fundinglevel1_description" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/description" stat="false" copy="true" indexable="false"/>
<FIELD indexable="true" multivalued="false" name="projectenddate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='project']/enddate"/> <FIELD name="fundinglevel2_id" indexable="true" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/id" tokenizable="false"/>
<FIELD indexable="true" multivalued="false" name="projectendyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/enddate)"/> <FIELD name="fundinglevel2_name" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/name" result="false" indexable="true" stat="false"/>
<FIELD indexable="true" multivalued="false" name="projectcallidentifier" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/callidentifier"/> <FIELD name="fundinglevel2_description" indexable="false" copy="true" result="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/description" stat="false"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling -->
<FIELD copy="true" indexable="true" name="projectkeywords" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/keywords"/> <FIELD name="funder" result="false" value="concat(./id/text(), '||', ./name/text(), '||', ./shortname/text())" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder" indexable="true" stat="false" tokenizable="false"/>
<FIELD indexable="true" multivalued="false" name="projectduration" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/duration"/> <FIELD name="fundershortname" stat="false" tokenizable="false" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/shortname" result="false"/>
<FIELD indexable="true" multivalued="false" name="projectecsc39" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='project']/ecsc39)"/> <FIELD name="funderid" stat="false" indexable="true" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/id" result="false" tokenizable="false"/><!-- RESULT FIELDS -->
<FIELD indexable="true" multivalued="false" name="projectoamandatepublications" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/oamandatepublications"/> <FIELD name="resulttitle" indexable="true" xpath="//*[local-name() = 'entity']/*[local-name() ='result']/title | //*[local-name()='entity']/*[local-name()='result']/children/result/title" stat="false" type="text_en" copy="true" result="false"/>
<FIELD indexable="true" multivalued="false" name="projectecarticle29_3" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/ecarticle29_3"/> <FIELD name="resultsubject" stat="false" type="text_en" indexable="true" result="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject)"/>
<FIELD indexable="true" name="projectsubject" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/subjects"/> <FIELD name="resultembargoenddate" type="date" multivalued="false" result="false" value="//*[local-name()='entity']/*[local-name()='result']/embargoenddate" indexable="true" stat="false"/>
<FIELD indexable="true" multivalued="false" name="projectcontracttypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/contracttype/@classname"/> <FIELD name="resultembargoendyear" multivalued="false" tokenizable="false" stat="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/embargoenddate)" indexable="true" result="false"/>
<FIELD indexable="true" name="fundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/id"/> <FIELD name="resulttypeid" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classid" stat="false" multivalued="false" indexable="true" result="false"/>
<FIELD indexable="true" name="fundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/name"/> <FIELD name="resultlanguagename" result="false" indexable="true" tokenizable="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/language/@classname" multivalued="false"/>
<FIELD copy="true" indexable="true" name="fundinglevel0_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/description"/> <FIELD name="resultpublisher" stat="false" copy="true" xpath="//*[local-name()='entity']/*[local-name()='result']/*[local-name()='publisher']" result="false" indexable="true"/>
<FIELD indexable="true" name="fundinglevel1_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/id"/> <FIELD name="resultdescription" result="false" xpath="//*[local-name()='entity']/*[local-name()='result']//*[local-name()='description']" copy="true" type="text_en" indexable="true" stat="false"/>
<FIELD indexable="true" name="fundinglevel1_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/name"/> <FIELD name="resultbestaccessright" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/bestaccessright/@classname)" stat="false" indexable="true" tokenizable="false" result="false" multivalued="false"/>
<FIELD copy="true" indexable="true" name="fundinglevel1_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/description"/> <FIELD name="resultdateofacceptance" type="date" multivalued="false" stat="false" result="false" indexable="true" value="//*[local-name()='entity']/*[local-name()='result']/dateofacceptance"/>
<FIELD indexable="true" name="fundinglevel2_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/id"/> <FIELD name="resultacceptanceyear" result="false" copy="true" indexable="true" multivalued="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/dateofacceptance)" stat="false" tokenizable="false"/>
<FIELD indexable="true" name="fundinglevel2_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/name"/> <FIELD name="resultauthor" copy="true" indexable="true" multivalued="true" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/creator" result="false"/>
<FIELD copy="true" indexable="true" name="fundinglevel2_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/description"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling --> <FIELD name="authorid" indexable="true" result="false" stat="false" type="string_ci" multivalued="true" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']"/>
<FIELD indexable="true" name="funder" result="false" stat="false" tokenizable="false" value="concat(./id/text(), '||', ./name/text(), '||', ./shortname/text())" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder"/> <FIELD name="orcidtypevalue" xpath="//*[local-name()='entity']/*[local-name()='result']/creator" type="string_ci" result="false" value="string-join((./@*[local-name() = 'orcid' or local-name() = 'orcid_pending'], ./@*[local-name() = 'orcid' or local-name() = 'orcid_pending']/local-name()), '||' )" stat="false" multivalued="true" indexable="true"/>
<FIELD indexable="true" name="fundershortname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/shortname"/> <FIELD result="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']" stat="false" indexable="true" name="resulthostingdatasource" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))"/>
<FIELD indexable="true" name="funderid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/id"/> <FIELD name="resulthostingdatasourceid" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']/@id)" result="false" tokenizable="false" stat="false"/>
<FIELD indexable="true" name="fundername" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/name"/> <FIELD name="instancetypename" indexable="true" tokenizable="false" stat="false" result="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='instancetype']/@classname)"/>
<FIELD indexable="true" name="funderoriginalname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/originalname"/> <FIELD name="resultdupid" indexable="true" stat="false" xpath="//*[local-name()='entity']/*//children/result/@objidentifier" result="false" tokenizable="false"/>
<FIELD indexable="true" name="funderjurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/jurisdiction"/><!-- RESULT FIELDS --> <FIELD name="organizationdupid" stat="false" result="false" tokenizable="false" indexable="true" xpath="//*[local-name()='entity']/*//children/organization/@objidentifier"/>
<FIELD copy="true" indexable="true" name="resulttitle" result="false" stat="false" type="text_en" xpath="//*[local-name() = 'entity']/*[local-name() ='result']/title | //*[local-name()='entity']/*[local-name()='result']/children/result/title"/> <FIELD name="externalreflabel" stat="false" tokenizable="true" result="false" indexable="false" copy="true" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/label)"/>
<FIELD indexable="true" name="resultsubject" result="false" stat="false" type="text_en" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject)"/> <FIELD name="resultidentifier" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/webresource/*[local-name()='url'])" copy="true" type="string_ci" stat="false" indexable="true" result="false"/>
<FIELD indexable="true" name="resultsubjectclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject/@classname)"/> <FIELD name="resultsource" result="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/source)" indexable="false" copy="true" stat="false"/>
<FIELD indexable="true" multivalued="false" name="resultembargoenddate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='result']/embargoenddate"/> <FIELD name="eoscifguidelines" stat="false" indexable="true" tokenizable="false" result="false" xpath="distinct-values(//*[local-name() = 'result']/eoscifguidelines/@code)"/><!-- FOS and SDGs non tokenizable for faceted search-->
<FIELD indexable="true" multivalued="false" name="resultembargoendyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/embargoenddate)"/> <FIELD name="fos" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='FOS'])" indexable="true" stat="false" tokenizable="false" result="false"/>
<FIELD indexable="true" multivalued="false" name="resulttypeid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classid"/> <FIELD name="sdg" tokenizable="false" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='SDG'])" stat="false" result="false"/><!-- REL FIELDS -->
<FIELD indexable="true" multivalued="false" name="resulttypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classname"/> <FIELD name="reldatasourcecompatibilityid" result="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='datasource']/openairecompatibility/@classid)" indexable="true" stat="false" tokenizable="false"/>
<FIELD indexable="true" multivalued="false" name="resultlanguagename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/language/@classname"/> <FIELD name="relproject" value="distinct-values(concat(./text(), '||', dnet:pickFirst(../acronym/text(), ../title/text())))" result="false" xpath="//*[local-name()='entity']/*//rel/to[@type='project']" stat="false" tokenizable="false" indexable="true"/>
<FIELD copy="true" indexable="true" name="resultpublisher" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/*[local-name()='publisher']"/> <FIELD name="relprojectid" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='project'])" indexable="true" tokenizable="false" result="false" stat="false"/>
<FIELD copy="true" indexable="true" name="resultdescription" result="false" stat="false" type="text_en" xpath="//*[local-name()='entity']/*[local-name()='result']//*[local-name()='description']"/> <FIELD name="relprojectcode" tokenizable="false" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/code)" stat="false" result="false"/>
<FIELD indexable="true" name="resultlicense" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/license"/> <FIELD name="relprojectname" stat="false" result="false" indexable="true" copy="true" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/acronym)" tokenizable="false"/>
<FIELD indexable="true" name="resultaccessright" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/accessright/@classname"/> <FIELD name="relprojecttitle" indexable="false" stat="false" copy="true" result="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/title)"/>
<FIELD indexable="true" name="resultresourcetypename" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/resourcetype/@classname"/> <FIELD name="relcontracttypename" stat="false" indexable="false" copy="true" result="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classname)"/>
<FIELD indexable="true" multivalued="false" name="resultbestaccessright" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/bestaccessright/@classname)"/> <FIELD name="relorganizationcountryid" stat="false" result="false" indexable="true" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid)"/>
<FIELD indexable="true" multivalued="false" name="resultdateofacceptance" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='result']/dateofacceptance"/> <FIELD name="relorganizationcountryname" result="false" copy="true" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classname)" indexable="false" stat="false"/>
<FIELD copy="true" indexable="true" multivalued="false" name="resultacceptanceyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/dateofacceptance)"/> <FIELD name="relorganizationid" result="false" indexable="true" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='organization'])"/>
<FIELD copy="true" indexable="true" multivalued="true" name="resultauthor" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/> <FIELD name="relorganizationname" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)" result="false" stat="false" indexable="true" copy="true"/>
<FIELD indexable="true" multivalued="true" name="resultauthor_nt" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/> <FIELD name="relorganizationshortname" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)" indexable="true" result="false" copy="true" stat="false"/>
<FIELD indexable="true" multivalued="true" name="authorid" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']"/> <FIELD name="relresulttype" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@type)" tokenizable="false" stat="false" result="false"/>
<FIELD indexable="true" multivalued="true" name="authoridtype" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']/local-name()"/> <FIELD name="relclass" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)" result="false" indexable="true"/>
<FIELD indexable="true" multivalued="true" name="orcidtypevalue" result="false" stat="false" type="string_ci" value="string-join((./@*[local-name() = 'orcid' or local-name() = 'orcid_pending'], ./@*[local-name() = 'orcid' or local-name() = 'orcid_pending']/local-name()), '||' )" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/> <FIELD name="relfundinglevel0_id" stat="false" indexable="true" result="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0" tokenizable="false"/>
<FIELD indexable="true" name="resulthostingdatasource" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']"/> <FIELD name="relfundinglevel0_name" indexable="true" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()" result="false" stat="false"/>
<FIELD indexable="true" name="resulthostingdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']/@id)"/> <FIELD name="relfundinglevel1_id" indexable="true" stat="false" tokenizable="false" result="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_1"/>
<FIELD indexable="true" name="resulthostingdatasourcename" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']/@name)"/> <FIELD name="relfundinglevel1_name" xpath="//*[local-name()='entity']//rel/funding/funding_level_1/@name/string()" result="false" indexable="true" stat="false" tokenizable="false"/>
<FIELD indexable="true" name="instancetypename" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='instancetype']/@classname)"/> <FIELD name="relfundinglevel2_id" result="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_2" indexable="true" tokenizable="false" stat="false"/>
<FIELD indexable="true" name="resultdupid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*//children/result/@objidentifier"/> <FIELD name="relfundinglevel2_name" xpath="//*[local-name()='entity']//rel/funding/funding_level_2/@name/string()" tokenizable="false" result="false" stat="false" indexable="true"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling -->
<FIELD indexable="true" name="organizationdupid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*//children/organization/@objidentifier"/> <FIELD name="relfunder" indexable="true" tokenizable="false" stat="false" result="false" value="distinct-values(concat(@id, '||', @name, '||', @shortname))" xpath="//*[local-name()='entity']//rel/funding/funder"/>
<FIELD indexable="true" name="externalrefsite" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/sitename)"/> <FIELD name="relfunderid" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@id)" stat="false" tokenizable="false" result="false" indexable="true"/>
<FIELD copy="true" indexable="true" name="externalreflabel" result="false" stat="false" tokenizable="true" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/label)"/> <FIELD name="relfundershortname" indexable="true" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@shortname)" result="false" stat="false" tokenizable="false"/>
<FIELD indexable="true" name="externalrefclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/qualifier/@classid)"/> <FIELD name="semrelid" stat="false" tokenizable="false" value="concat(./to/text(), '||', ./to/@class/string())" indexable="true" result="false" xpath="//*[local-name()='entity']//rel"/><!-- COMMON FIELDS -->
<FIELD indexable="true" name="externalrefid" result="false" stat="false" tokenizable="false" xpath="(//*[local-name()='entity']/*//children/externalreference/refidentifier)"/> <FIELD name="dateofcollection" stat="false" type="date" indexable="true" result="false" multivalued="false" value="//header/*[local-name()='dateOfCollection']"/>
<FIELD copy="true" indexable="true" name="resultidentifier" result="false" stat="false" type="string_ci" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/webresource/*[local-name()='url'])"/> <FIELD name="status" type="string_ci" tokenizable="false" stat="false" indexable="true" xpath="//header/*[local-name()='status']" result="false"/>
<FIELD copy="true" indexable="true" name="resultsource" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/source)"/> <FIELD name="collectedfromdatasourceid" indexable="true" result="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)" tokenizable="false" stat="false"/>
<FIELD indexable="true" name="eoscifguidelines" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name() = 'result']/eoscifguidelines/@code)"/><!-- FOS and SDGs non tokenizable for faceted search--> <FIELD name="collectedfromname" stat="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@name | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@name)" result="false" tokenizable="false" indexable="true"/>
<FIELD indexable="true" name="fos" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='FOS'])"/> <FIELD name="originalid" result="false" indexable="true" type="string_ci" xpath="//*[local-name()='entity']/*/*[local-name()='originalId']" tokenizable="false" stat="false"/>
<FIELD indexable="true" name="sdg" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='SDG'])"/><!-- REL FIELDS --> <FIELD name="pid" type="string_ci" tokenizable="false" indexable="true" result="false" xpath="//*[local-name()='entity']/*/pid/text()" stat="false"/>
<FIELD indexable="true" name="reldatasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='datasource']/openairecompatibility/@classid)"/> <FIELD name="pidclassid" result="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classid)" stat="false" indexable="true" tokenizable="false"/>
<FIELD indexable="true" name="relproject" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./text(), '||', dnet:pickFirst(../acronym/text(), ../title/text())))" xpath="//*[local-name()='entity']/*//rel/to[@type='project']"/> <FIELD name="deletedbyinference" indexable="true" stat="false" xpath="//*[local-name()='entity']//datainfo/deletedbyinference" result="false" tokenizable="false"/>
<FIELD indexable="true" name="relprojectid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='project'])"/> <FIELD name="provenanceactionclassid" result="false" stat="false" xpath="//*[local-name()='entity']//datainfo/provenanceaction/@classid" tokenizable="false" indexable="true"/>
<FIELD indexable="true" name="relprojectcode" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/code)"/> <FIELD name="contextid" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/@id)" tokenizable="false" indexable="true" stat="false" result="false"/>
<FIELD copy="true" indexable="true" name="relprojectname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/acronym)"/> <FIELD name="contextname" indexable="true" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/@label)"/><!-- Need special fields for community (research initiative) context in order to exclude funders from the context browse -->
<FIELD copy="true" indexable="true" name="relprojecttitle" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/title)"/> <FIELD name="community" tokenizable="false" result="false" xpath="//*[local-name()='entity']/*[local-name()='result']/context[@type='community' or @type='ri']" value="distinct-values(concat(@id, '||', @label))" stat="false" indexable="true"/>
<FIELD indexable="true" name="relcontracttypeid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classid)"/> <FIELD name="communityid" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context[@type='community' or @type='ri']/@id)" stat="false" tokenizable="false" result="false" indexable="true"/>
<FIELD copy="true" indexable="true" name="relcontracttypename" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classname)"/> <FIELD name="categoryid" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category/@id)" stat="false" indexable="true" result="false" tokenizable="false"/>
<FIELD indexable="true" name="relorganizationcountryid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid)"/> <FIELD name="conceptname" stat="false" indexable="true" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@label)" result="false" tokenizable="false"/><!-- new index field for country info from different xpaths for any type of entity -->
<FIELD copy="true" indexable="true" name="relorganizationcountryname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classname)"/> <FIELD name="country" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*/country/@classid | //*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid | //*[local-name()='entity']//funder/@jurisdiction)" tokenizable="false" indexable="true"/>
<FIELD indexable="true" name="relorganizationid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='organization'])"/> <FIELD name="oafentity" indexable="false" result="true" tokenizable="false" stat="false" xpath="//*[local-name() = 'entity']"/>
<FIELD copy="true" indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/>
<FIELD copy="true" indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/>
<FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='result'])"/>
<FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@type)"/>
<FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/>
<FIELD indexable="true" name="relfundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0"/>
<FIELD indexable="true" name="relfundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()"/>
<FIELD indexable="true" name="relfundinglevel1_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_1"/>
<FIELD indexable="true" name="relfundinglevel1_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_1/@name/string()"/>
<FIELD indexable="true" name="relfundinglevel2_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_2"/>
<FIELD indexable="true" name="relfundinglevel2_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_2/@name/string()"/>
<FIELD indexable="true" name="relinferred" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/@inferred)"/>
<FIELD indexable="true" name="reltrust" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/@trust)"/>
<FIELD indexable="true" name="relinferenceprovenance" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/@inferenceprovenance)"/>
<FIELD indexable="true" name="relprovenanceactionclassid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/@provenanceaction)"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling -->
<FIELD indexable="true" name="relfunder" result="false" stat="false" tokenizable="false" value="distinct-values(concat(@id, '||', @name, '||', @shortname))" xpath="//*[local-name()='entity']//rel/funding/funder"/>
<FIELD indexable="true" name="relfunderid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@id)"/>
<FIELD indexable="true" name="relfundershortname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@shortname)"/>
<FIELD indexable="true" name="relfundername" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@name)"/>
<FIELD indexable="true" name="relfunderjurisdiction" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@jurisdiction)"/><!-- Collected from of the related entity. Available for result-result relationships -->
<FIELD indexable="true" name="relcollectedfromid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@id)"/>
<FIELD indexable="true" name="relcollectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@name)"/>
<FIELD indexable="true" name="relvalidated" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./validated]/to[@type='project'])"/>
<FIELD indexable="true" name="semrelid" result="false" stat="false" tokenizable="false" value="concat(./to/text(), '||', ./to/@class/string())" xpath="//*[local-name()='entity']//rel"/><!-- COMMON FIELDS -->
<FIELD indexable="true" multivalued="false" name="dateofcollection" result="false" stat="false" type="date" value="//header/*[local-name()='dateOfCollection']"/>
<FIELD indexable="true" name="status" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//header/*[local-name()='status']"/>
<FIELD indexable="true" name="collectedfrom" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*/*[local-name()='collectedfrom'] | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']"/>
<FIELD indexable="true" name="collectedfromdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)"/>
<FIELD indexable="true" name="collectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@name | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@name)"/>
<FIELD indexable="true" name="originalid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/*[local-name()='originalId']"/>
<FIELD indexable="true" name="pid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/pid/text()"/>
<FIELD indexable="true" name="pidclassid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classid)"/>
<FIELD indexable="true" name="pidclassname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classname)"/>
<FIELD indexable="true" name="inferred" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/inferred"/>
<FIELD indexable="true" name="deletedbyinference" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/deletedbyinference"/>
<FIELD indexable="true" name="trust" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/trust"/>
<FIELD indexable="true" name="inferenceprovenance" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/inferenceprovenance"/>
<FIELD indexable="true" name="provenanceactionclassid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/provenanceaction/@classid"/>
<FIELD indexable="true" name="contextid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/@id)"/>
<FIELD indexable="true" name="contexttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/@type)"/>
<FIELD indexable="true" name="contextname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/@label)"/><!-- Need special fields for community (research initiative) context in order to exclude funders from the context browse -->
<FIELD indexable="true" name="community" result="false" stat="false" tokenizable="false" value="distinct-values(concat(@id, '||', @label))" xpath="//*[local-name()='entity']/*[local-name()='result']/context[@type='community' or @type='ri']"/>
<FIELD indexable="true" name="communityname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context[@type='community' or @type='ri']/@label)"/>
<FIELD indexable="true" name="communityid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context[@type='community' or @type='ri']/@id)"/>
<FIELD indexable="true" name="categoryid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category/@id)"/>
<FIELD indexable="true" name="categoryname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category/@label)"/>
<FIELD indexable="true" name="conceptid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@id)"/>
<FIELD indexable="true" name="conceptname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@label)"/><!-- new index field for country info from different xpaths for any type of entity -->
<FIELD indexable="true" name="country" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/country/@classid | //*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid | //*[local-name()='entity']//funder/@jurisdiction)"/>
</FIELDS> </FIELDS>
</LAYOUT> </LAYOUT>