[ENRICHMENT][BETA] Use of community API in enrichment process AND addition to tagging result for communities through projects #359
|
@ -11,6 +11,9 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import javax.management.Query;
|
import javax.management.Query;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.amazonaws.util.StringUtils;
|
import com.amazonaws.util.StringUtils;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
|
@ -21,6 +24,7 @@ import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||||
import eu.dnetlib.dhp.bulktag.community.Provider;
|
import eu.dnetlib.dhp.bulktag.community.Provider;
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
||||||
|
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
|
@ -30,6 +34,8 @@ public class Utils implements Serializable {
|
||||||
private static final ObjectMapper MAPPER = new ObjectMapper();
|
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||||
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
|
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(Utils.class);
|
||||||
|
|
||||||
public static CommunityConfiguration getCommunityConfiguration(boolean production) throws IOException {
|
public static CommunityConfiguration getCommunityConfiguration(boolean production) throws IOException {
|
||||||
final Map<String, Community> communities = Maps.newHashMap();
|
final Map<String, Community> communities = Maps.newHashMap();
|
||||||
List<Community> validCommunities = new ArrayList<>();
|
List<Community> validCommunities = new ArrayList<>();
|
||||||
|
@ -126,6 +132,7 @@ public class Utils implements Serializable {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
return organizationMap;
|
return organizationMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -95,10 +95,7 @@ public class SparkBulkTagJob {
|
||||||
|
|
||||||
Dataset<String> datasources = readPath(
|
Dataset<String> datasources = readPath(
|
||||||
spark, inputPath
|
spark, inputPath
|
||||||
.substring(
|
+ "datasource",
|
||||||
0,
|
|
||||||
inputPath.lastIndexOf("/"))
|
|
||||||
+ "/datasource",
|
|
||||||
Datasource.class)
|
Datasource.class)
|
||||||
.filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds))
|
.filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds))
|
||||||
.map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING());
|
.map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING());
|
||||||
|
@ -106,10 +103,10 @@ public class SparkBulkTagJob {
|
||||||
Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap();
|
Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap();
|
||||||
|
|
||||||
for (String ds : datasources.collectAsList()) {
|
for (String ds : datasources.collectAsList()) {
|
||||||
final String dsId = ds.substring(3);
|
// final String dsId = ds.substring(3);
|
||||||
if (!dsm.containsKey(dsId)) {
|
if (!dsm.containsKey(ds)) {
|
||||||
ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>();
|
ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>();
|
||||||
dsm.put(dsId, eoscList);
|
dsm.put(ds, eoscList);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,34 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
|
|
||||||
public class QueryInformationSystem {
|
|
||||||
|
|
||||||
public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
|
|
||||||
throws ISLookUpException, DocumentException, SAXException, IOException {
|
|
||||||
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
|
||||||
final List<String> res = isLookUp
|
|
||||||
.quickSearchProfile(
|
|
||||||
IOUtils
|
|
||||||
.toString(
|
|
||||||
QueryInformationSystem.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/bulktag/query.xq")));
|
|
||||||
|
|
||||||
final String xmlConf = "<communities>" + Joiner.on(" ").join(res) + "</communities>";
|
|
||||||
|
|
||||||
return CommunityConfigurationFactory.newInstance(xmlConf);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -2,7 +2,7 @@
|
||||||
package eu.dnetlib.dhp.resulttocommunityfromorganization;
|
package eu.dnetlib.dhp.resulttocommunityfromorganization;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -53,22 +54,15 @@ public class SparkResultToCommunityFromOrganizationJob {
|
||||||
final String possibleupdatespath = parser.get("preparedInfoPath");
|
final String possibleupdatespath = parser.get("preparedInfoPath");
|
||||||
log.info("preparedInfoPath: {}", possibleupdatespath);
|
log.info("preparedInfoPath: {}", possibleupdatespath);
|
||||||
|
|
||||||
final String resultClassName = parser.get("resultTableName");
|
|
||||||
log.info("resultTableName: {}", resultClassName);
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
|
|
||||||
|
|
||||||
runWithSparkHiveSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
removeOutputDir(spark, outputPath);
|
// removeOutputDir(spark, outputPath);
|
||||||
|
|
||||||
execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
|
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
|
||||||
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -77,11 +71,18 @@ public class SparkResultToCommunityFromOrganizationJob {
|
||||||
SparkSession spark,
|
SparkSession spark,
|
||||||
String inputPath,
|
String inputPath,
|
||||||
String outputPath,
|
String outputPath,
|
||||||
Class<R> resultClazz,
|
|
||||||
String possibleUpdatesPath) {
|
String possibleUpdatesPath) {
|
||||||
|
|
||||||
Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class);
|
Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class);
|
||||||
Dataset<R> result = readPath(spark, inputPath, resultClazz);
|
|
||||||
|
ModelSupport.entityTypes
|
||||||
|
.keySet()
|
||||||
|
.parallelStream()
|
||||||
|
.forEach(e -> {
|
||||||
|
if (ModelSupport.isResult(e)) {
|
||||||
|
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||||
|
removeOutputDir(spark, outputPath + e.name());
|
||||||
|
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
|
||||||
|
|
||||||
result
|
result
|
||||||
.joinWith(
|
.joinWith(
|
||||||
|
@ -92,7 +93,10 @@ public class SparkResultToCommunityFromOrganizationJob {
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.json(outputPath + e.name());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {
|
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {
|
||||||
|
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.resulttocommunityfromproject;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
@ -51,16 +52,15 @@ public class PrepareResultCommunitySet {
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final boolean production = Boolean.valueOf(parser.get("outputPath"));
|
final boolean production = Boolean.valueOf(parser.get("production"));
|
||||||
log.info("production: {}", production);
|
log.info("production: {}", production);
|
||||||
|
|
||||||
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(production);
|
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(production);
|
||||||
log.info("projectsMap: {}", new Gson().toJson(projectsMap));
|
log.info("projectsMap: {}", new Gson().toJson(projectsMap));
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
|
|
||||||
|
|
||||||
runWithSparkHiveSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
|
@ -94,24 +94,27 @@ public class PrepareResultCommunitySet {
|
||||||
.select(
|
.select(
|
||||||
new Column("source").as("resultId"),
|
new Column("source").as("resultId"),
|
||||||
new Column("target").as("projectId"))
|
new Column("target").as("projectId"))
|
||||||
.groupByKey((MapFunction<Row, String>) r -> (String) r.getAs("source"), Encoders.STRING())
|
.groupByKey((MapFunction<Row, String>) r -> (String) r.getAs("resultId"), Encoders.STRING())
|
||||||
.mapGroups((MapGroupsFunction<String, Row, ResultProjectList>) (k, v) -> {
|
.mapGroups((MapGroupsFunction<String, Row, ResultProjectList>) (k, v) -> {
|
||||||
ResultProjectList rpl = new ResultProjectList();
|
ResultProjectList rpl = new ResultProjectList();
|
||||||
rpl.setResultId(k);
|
rpl.setResultId(k);
|
||||||
ArrayList<String> cl = new ArrayList<>();
|
ArrayList<String> cl = new ArrayList<>();
|
||||||
cl.addAll(projectMap.get(v.next().getAs("target")));
|
cl.addAll(projectMap.get(v.next().getAs("projectId")));
|
||||||
v.forEachRemaining(r -> {
|
v.forEachRemaining(r -> {
|
||||||
projectMap
|
projectMap
|
||||||
.get(r.getAs("target"))
|
.get(r.getAs("projectId"))
|
||||||
.forEach(c -> {
|
.forEach(c -> {
|
||||||
if (!cl.contains(c))
|
if (!cl.contains(c))
|
||||||
cl.add(c);
|
cl.add(c);
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
|
if(cl.size() == 0)
|
||||||
|
return null;
|
||||||
rpl.setCommunityList(cl);
|
rpl.setCommunityList(cl);
|
||||||
return rpl;
|
return rpl;
|
||||||
}, Encoders.bean(ResultProjectList.class))
|
}, Encoders.bean(ResultProjectList.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.resulttocommunityfromproject;
|
||||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||||
import static eu.dnetlib.dhp.PropagationConstant.PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME;
|
import static eu.dnetlib.dhp.PropagationConstant.PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -61,20 +62,14 @@ public class SparkResultToCommunityFromProject implements Serializable {
|
||||||
final String possibleupdatespath = parser.get("preparedInfoPath");
|
final String possibleupdatespath = parser.get("preparedInfoPath");
|
||||||
log.info("preparedInfoPath: {}", possibleupdatespath);
|
log.info("preparedInfoPath: {}", possibleupdatespath);
|
||||||
|
|
||||||
final String resultClassName = parser.get("resultTableName");
|
|
||||||
log.info("resultTableName: {}", resultClassName);
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
|
|
||||||
|
|
||||||
runWithSparkHiveSession(
|
|
||||||
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
// removeOutputDir(spark, outputPath);
|
|
||||||
|
|
||||||
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
|
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
|
||||||
|
|
||||||
|
@ -108,7 +103,7 @@ public class SparkResultToCommunityFromProject implements Serializable {
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.json(outputPath + e.name());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -5,12 +5,7 @@
|
||||||
"paramDescription": "the path of the sequencial file to read",
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"paramName":"h",
|
|
||||||
"paramLongName":"hive_metastore_uris",
|
|
||||||
"paramDescription": "the hive metastore uris",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"paramName": "out",
|
"paramName": "out",
|
||||||
"paramLongName": "outputPath",
|
"paramLongName": "outputPath",
|
||||||
|
@ -23,12 +18,6 @@
|
||||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"paramName":"tn",
|
|
||||||
"paramLongName":"resultTableName",
|
|
||||||
"paramDescription": "the name of the result table we are currently working on",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"paramName": "p",
|
"paramName": "p",
|
||||||
"paramLongName": "preparedInfoPath",
|
"paramLongName": "preparedInfoPath",
|
||||||
|
|
|
@ -5,12 +5,6 @@
|
||||||
"paramDescription": "the path of the sequencial file to read",
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"paramName":"ocm",
|
|
||||||
"paramLongName":"organizationtoresultcommunitymap",
|
|
||||||
"paramDescription": "the map for the association organization communities",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"paramName":"h",
|
"paramName":"h",
|
||||||
"paramLongName":"hive_metastore_uris",
|
"paramLongName":"hive_metastore_uris",
|
||||||
|
@ -28,6 +22,12 @@
|
||||||
"paramLongName": "outputPath",
|
"paramLongName": "outputPath",
|
||||||
"paramDescription": "the path used to store temporary output files",
|
"paramDescription": "the path used to store temporary output files",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "p",
|
||||||
|
"paramLongName": "production",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
|
@ -22,7 +22,7 @@
|
||||||
</configuration>
|
</configuration>
|
||||||
</global>
|
</global>
|
||||||
|
|
||||||
<start to="reset_outputpath"/>
|
<start to="prepare_result_communitylist"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
@ -90,8 +90,8 @@
|
||||||
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class>
|
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class>
|
||||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=4
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=10G
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
@ -103,20 +103,13 @@
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--production</arg><arg>${production}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="fork-join-exec-propagation"/>
|
<ok to="exec-propagation"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<fork name="fork-join-exec-propagation">
|
<action name="exec-propagation">
|
||||||
<path start="join_propagate_publication"/>
|
|
||||||
<path start="join_propagate_dataset"/>
|
|
||||||
<path start="join_propagate_otherresearchproduct"/>
|
|
||||||
<path start="join_propagate_software"/>
|
|
||||||
</fork>
|
|
||||||
|
|
||||||
<action name="join_propagate_publication">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
|
@ -135,104 +128,14 @@
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="join_propagate_dataset">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
|
||||||
<master>yarn</master>
|
|
||||||
<mode>cluster</mode>
|
|
||||||
<name>community2resultfromorganization-Dataset</name>
|
|
||||||
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
|
|
||||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
|
||||||
<spark-opts>
|
|
||||||
--executor-cores=${sparkExecutorCores}
|
|
||||||
--executor-memory=${sparkExecutorMemory}
|
|
||||||
--driver-memory=${sparkDriverMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
|
||||||
--conf spark.dynamicAllocation.enabled=true
|
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
|
||||||
</spark-opts>
|
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
</spark>
|
|
||||||
<ok to="wait2"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<action name="join_propagate_otherresearchproduct">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
|
||||||
<master>yarn</master>
|
|
||||||
<mode>cluster</mode>
|
|
||||||
<name>community2resultfromorganization-ORP</name>
|
|
||||||
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
|
|
||||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
|
||||||
<spark-opts>
|
|
||||||
--executor-cores=${sparkExecutorCores}
|
|
||||||
--executor-memory=${sparkExecutorMemory}
|
|
||||||
--driver-memory=${sparkDriverMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
|
||||||
--conf spark.dynamicAllocation.enabled=true
|
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
|
||||||
</spark-opts>
|
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
</spark>
|
|
||||||
<ok to="wait2"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<action name="join_propagate_software">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
|
||||||
<master>yarn</master>
|
|
||||||
<mode>cluster</mode>
|
|
||||||
<name>community2resultfromorganization-Software</name>
|
|
||||||
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
|
|
||||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
|
||||||
<spark-opts>
|
|
||||||
--executor-cores=${sparkExecutorCores}
|
|
||||||
--executor-memory=${sparkExecutorMemory}
|
|
||||||
--driver-memory=${sparkDriverMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
|
||||||
--conf spark.dynamicAllocation.enabled=true
|
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
|
||||||
</spark-opts>
|
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
</spark>
|
|
||||||
<ok to="wait2"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<join name="wait2" to="End"/>
|
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "p",
|
||||||
|
"paramLongName": "preparedInfoPath",
|
||||||
|
"paramDescription": "the path where prepared info have been stored",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
|
@ -0,0 +1,33 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"h",
|
||||||
|
"paramLongName":"hive_metastore_uris",
|
||||||
|
"paramDescription": "the hive metastore uris",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "p",
|
||||||
|
"paramLongName": "production",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag;
|
package eu.dnetlib.dhp.api;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -98,14 +98,22 @@ public class QueryCommunityAPITest {
|
||||||
@Test
|
@Test
|
||||||
void getCommunityProjects() throws Exception {
|
void getCommunityProjects() throws Exception {
|
||||||
CommunityEntityMap projectMap = Utils.getCommunityProjects(true);
|
CommunityEntityMap projectMap = Utils.getCommunityProjects(true);
|
||||||
Assertions.assertFalse(projectMap.containsKey("mes"));
|
|
||||||
Assertions.assertEquals(33, projectMap.size());
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertTrue(
|
.assertTrue(
|
||||||
projectMap
|
projectMap
|
||||||
.keySet()
|
.keySet()
|
||||||
.stream()
|
.stream()
|
||||||
.allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|"))));
|
.allMatch(k -> k.startsWith("40|")));
|
||||||
|
|
||||||
|
System.out.println(projectMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getCommunityOrganizations() throws Exception {
|
||||||
|
CommunityEntityMap organizationMap = Utils.getCommunityOrganization(true);
|
||||||
|
Assertions.assertTrue(organizationMap.keySet().stream().allMatch(k -> k.startsWith("20|")));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -0,0 +1,95 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.resulttocommunityfromorganization;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.api.Utils;
|
||||||
|
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
|
||||||
|
import eu.dnetlib.dhp.bulktag.BulkTagJobTest;
|
||||||
|
import eu.dnetlib.dhp.bulktag.SparkBulkTagJob;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 13/10/23
|
||||||
|
*/
|
||||||
|
public class PrepareAssocTest {
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PrepareAssocTest.class);
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(BulkTagJobTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(BulkTagJobTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(PrepareAssocTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test1() throws Exception {
|
||||||
|
|
||||||
|
PrepareResultCommunitySet
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath",
|
||||||
|
getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/relation/").getPath(),
|
||||||
|
"-outputPath", workingDir.toString() + "/prepared",
|
||||||
|
"-production", Boolean.TRUE.toString(),
|
||||||
|
"-hive_metastore_uris", ""
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<ResultCommunityList> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/prepared")
|
||||||
|
.map(item -> new ObjectMapper().readValue(item, ResultCommunityList.class));
|
||||||
|
|
||||||
|
tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,88 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.resulttocommunityfromproject;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.BulkTagJobTest;
|
||||||
|
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 13/10/23
|
||||||
|
*/
|
||||||
|
public class PrepareAssocTest {
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PrepareAssocTest.class);
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(BulkTagJobTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(BulkTagJobTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(PrepareAssocTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test1() throws Exception {
|
||||||
|
|
||||||
|
PrepareResultCommunitySet
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath",
|
||||||
|
getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/relation/").getPath(),
|
||||||
|
"-outputPath", workingDir.toString() + "/prepared",
|
||||||
|
"-production", Boolean.TRUE.toString(),
|
||||||
|
"-hive_metastore_uris", ""
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<ResultProjectList> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/prepared")
|
||||||
|
.map(item -> new ObjectMapper().readValue(item, ResultProjectList.class));
|
||||||
|
|
||||||
|
tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,323 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.resulttocommunityfromproject;
|
||||||
|
|
||||||
|
import static org.apache.spark.sql.functions.desc;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.Row;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest;
|
||||||
|
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
|
||||||
|
public class ResultToCommunityJobTest {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class);
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(ResultToCommunityJobTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(OrcidPropagationJobTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testSparkResultToCommunityFromProjectJob() throws Exception {
|
||||||
|
final String preparedInfoPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/preparedInfo")
|
||||||
|
.getPath();
|
||||||
|
SparkResultToCommunityFromProject
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath", getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/sample/")
|
||||||
|
.getPath(),
|
||||||
|
|
||||||
|
"-outputPath", workingDir.toString() + "/",
|
||||||
|
"-preparedInfoPath", preparedInfoPath
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Dataset> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/dataset")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||||
|
|
||||||
|
tmp.foreach(d -> System.out.println(new ObjectMapper().writeValueAsString(d)));
|
||||||
|
// Assertions.assertEquals(10, tmp.count());
|
||||||
|
// org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
|
||||||
|
// .createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
|
||||||
|
//
|
||||||
|
// verificationDataset.createOrReplaceTempView("dataset");
|
||||||
|
//
|
||||||
|
// String query = "select id, MyT.id community "
|
||||||
|
// + "from dataset "
|
||||||
|
// + "lateral view explode(context) c as MyT "
|
||||||
|
// + "lateral view explode(MyT.datainfo) d as MyD "
|
||||||
|
// + "where MyD.inferenceprovenance = 'propagation'";
|
||||||
|
//
|
||||||
|
// org.apache.spark.sql.Dataset<Row> resultExplodedProvenance = spark.sql(query);
|
||||||
|
// Assertions.assertEquals(5, resultExplodedProvenance.count());
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// 0,
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .filter("id = '50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe'")
|
||||||
|
// .count());
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// 1,
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .filter("id = '50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b'")
|
||||||
|
// .count());
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// "beopen",
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .select("community")
|
||||||
|
// .where(
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .col("id")
|
||||||
|
// .equalTo(
|
||||||
|
// "50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b"))
|
||||||
|
// .collectAsList()
|
||||||
|
// .get(0)
|
||||||
|
// .getString(0));
|
||||||
|
//
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// 2,
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .filter("id = '50|od________18::8887b1df8b563c4ea851eb9c882c9d7b'")
|
||||||
|
// .count());
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// "mes",
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .select("community")
|
||||||
|
// .where(
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .col("id")
|
||||||
|
// .equalTo(
|
||||||
|
// "50|od________18::8887b1df8b563c4ea851eb9c882c9d7b"))
|
||||||
|
// .sort(desc("community"))
|
||||||
|
// .collectAsList()
|
||||||
|
// .get(0)
|
||||||
|
// .getString(0));
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// "euromarine",
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .select("community")
|
||||||
|
// .where(
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .col("id")
|
||||||
|
// .equalTo(
|
||||||
|
// "50|od________18::8887b1df8b563c4ea851eb9c882c9d7b"))
|
||||||
|
// .sort(desc("community"))
|
||||||
|
// .collectAsList()
|
||||||
|
// .get(1)
|
||||||
|
// .getString(0));
|
||||||
|
//
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// 1,
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .filter("id = '50|doajarticles::8d817039a63710fcf97e30f14662c6c8'")
|
||||||
|
// .count());
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// "mes",
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .select("community")
|
||||||
|
// .where(
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .col("id")
|
||||||
|
// .equalTo(
|
||||||
|
// "50|doajarticles::8d817039a63710fcf97e30f14662c6c8"))
|
||||||
|
// .sort(desc("community"))
|
||||||
|
// .collectAsList()
|
||||||
|
// .get(0)
|
||||||
|
// .getString(0));
|
||||||
|
//
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// 1,
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .filter("id = '50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6'")
|
||||||
|
// .count());
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// "mes",
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .select("community")
|
||||||
|
// .where(
|
||||||
|
// resultExplodedProvenance
|
||||||
|
// .col("id")
|
||||||
|
// .equalTo(
|
||||||
|
// "50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"))
|
||||||
|
// .sort(desc("community"))
|
||||||
|
// .collectAsList()
|
||||||
|
// .get(0)
|
||||||
|
// .getString(0));
|
||||||
|
//
|
||||||
|
// query = "select id, MyT.id community "
|
||||||
|
// + "from dataset "
|
||||||
|
// + "lateral view explode(context) c as MyT "
|
||||||
|
// + "lateral view explode(MyT.datainfo) d as MyD ";
|
||||||
|
//
|
||||||
|
// org.apache.spark.sql.Dataset<Row> resultCommunityId = spark.sql(query);
|
||||||
|
//
|
||||||
|
// Assertions.assertEquals(10, resultCommunityId.count());
|
||||||
|
//
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// 1,
|
||||||
|
// resultCommunityId
|
||||||
|
// .filter("id = '50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe'")
|
||||||
|
// .count());
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// "beopen",
|
||||||
|
// resultCommunityId
|
||||||
|
// .select("community")
|
||||||
|
// .where(
|
||||||
|
// resultCommunityId
|
||||||
|
// .col("id")
|
||||||
|
// .equalTo(
|
||||||
|
// "50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe"))
|
||||||
|
// .collectAsList()
|
||||||
|
// .get(0)
|
||||||
|
// .getString(0));
|
||||||
|
//
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// 1,
|
||||||
|
// resultCommunityId
|
||||||
|
// .filter("id = '50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b'")
|
||||||
|
// .count());
|
||||||
|
//
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// 3,
|
||||||
|
// resultCommunityId
|
||||||
|
// .filter("id = '50|od________18::8887b1df8b563c4ea851eb9c882c9d7b'")
|
||||||
|
// .count());
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// "beopen",
|
||||||
|
// resultCommunityId
|
||||||
|
// .select("community")
|
||||||
|
// .where(
|
||||||
|
// resultCommunityId
|
||||||
|
// .col("id")
|
||||||
|
// .equalTo(
|
||||||
|
// "50|od________18::8887b1df8b563c4ea851eb9c882c9d7b"))
|
||||||
|
// .sort(desc("community"))
|
||||||
|
// .collectAsList()
|
||||||
|
// .get(2)
|
||||||
|
// .getString(0));
|
||||||
|
//
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// 2,
|
||||||
|
// resultCommunityId
|
||||||
|
// .filter("id = '50|doajarticles::8d817039a63710fcf97e30f14662c6c8'")
|
||||||
|
// .count());
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// "euromarine",
|
||||||
|
// resultCommunityId
|
||||||
|
// .select("community")
|
||||||
|
// .where(
|
||||||
|
// resultCommunityId
|
||||||
|
// .col("id")
|
||||||
|
// .equalTo(
|
||||||
|
// "50|doajarticles::8d817039a63710fcf97e30f14662c6c8"))
|
||||||
|
// .sort(desc("community"))
|
||||||
|
// .collectAsList()
|
||||||
|
// .get(1)
|
||||||
|
// .getString(0));
|
||||||
|
//
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// 3,
|
||||||
|
// resultCommunityId
|
||||||
|
// .filter("id = '50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6'")
|
||||||
|
// .count());
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// "euromarine",
|
||||||
|
// resultCommunityId
|
||||||
|
// .select("community")
|
||||||
|
// .where(
|
||||||
|
// resultCommunityId
|
||||||
|
// .col("id")
|
||||||
|
// .equalTo(
|
||||||
|
// "50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"))
|
||||||
|
// .sort(desc("community"))
|
||||||
|
// .collectAsList()
|
||||||
|
// .get(2)
|
||||||
|
// .getString(0));
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(
|
||||||
|
// "ni",
|
||||||
|
// resultCommunityId
|
||||||
|
// .select("community")
|
||||||
|
// .where(
|
||||||
|
// resultCommunityId
|
||||||
|
// .col("id")
|
||||||
|
// .equalTo(
|
||||||
|
// "50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"))
|
||||||
|
// .sort(desc("community"))
|
||||||
|
// .collectAsList()
|
||||||
|
// .get(0)
|
||||||
|
// .getString(0));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
{"communityList":["beopen"],"resultId":"50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe"}
|
||||||
|
{"communityList":["beopen"],"resultId":"50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|od________18::8887b1df8b563c4ea851eb9c882c9d7b"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::8d817039a63710fcf97e30f14662c6c8"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::53b70ea6e0769d02ddf93307ec8e3e92"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::ef1ac6efc10f420fa9e190e49644f1f2"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::b738aa93950dddfb0294df2e8fdf0579"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::befccb1f9e6b833fd82e587737ae9e7d"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::bf1cba621615e27db1692865a5f35a0b"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::e105de571b336daae05f0e75cf740c5c"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::1fe4f347c9df657b7ba520987d79436e"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::6d7c00a8c8e59f0215459e2e4ee3fd6c"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::39ec88ef4127db0ea1b88938f1c52889"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::3496709db804d98f76c45d7ed023dd95"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::dc97fffbdb6d35f792fc0ab428ff065c"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::b61d082d96619d9b7a876e6dce44cf65"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::32a96881c3036cf2d2165bb2d276ea82"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::76e9e6a959ba588483c74ec580369864"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::1487a0a92572376d95d6cc3f066504b7"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::413a0a0656f888cce9c15f6be6df60e3"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::25c3e91960cbd7a8f95a2e511cbffddd"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::4d523b52094a689856e479bb99063c7a"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|od______2663::393c7262bb71642b7bb4c67cfeab02c5"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::0b3333d875b91ffa4db0735efec94e7a"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::0699c30043edfae40786d80acd20d300"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::7f29ade677e66ffbf1312fa837bc73ca"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::7ba6627ac7590d367cc01bbac4d518e8"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::8ad9bc047433401947dc0cdb4a989cee"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::cce70f27d85df658479d0ec0046a4eb3"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::74304be834b7013dbaeb73c3a19a654b"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::46fc13a87befb6a83ac9c63580528ab0"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::66f1867488b62d9c9fb734273775e203"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::79236995d5c30e5234a47cee4a728cae"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|od______2386::cb7f6cb01d1a835612731d645842f699"}
|
||||||
|
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::d424daa43f97a434eb0a12289410cade"}
|
Binary file not shown.
|
@ -0,0 +1,20 @@
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "20|openorgs____::775eab3c1281cb91d53a31c4a1ba1090", "lastupdatetimestamp": 1694431186898, "relType": "resultOrganization", "source": "50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::775eab3c1281cb91d53a31c4a1ba1090", "lastupdatetimestamp": 1694431155490, "relType": "resultOrganization", "source": "50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1", "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::e66fe5dd092752e1dd6fd29fc699933a", "lastupdatetimestamp": 1694431195409, "relType": "resultOrganization", "source": "50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "20|pending_org_::eb0669daa9efeb898a3090d8aac7c953", "lastupdatetimestamp": 1694431195538, "relType": "resultOrganization", "source": "50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::eb0669daa9efeb898a3090d8aac7c953", "lastupdatetimestamp": 1694431216929, "relType": "resultOrganization", "source": "50|RECOLECTA___::031d8312287a0108202acd8c5957fcb5", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::26d4324bfca459ab17e1efd966fba8d7", "lastupdatetimestamp": 1694431156296, "relType": "resultOrganization", "source": "50|RECOLECTA___::0ac43c9933175f1d0e091ba4dc814565", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::fc8f734ba211cfc8a3769189808338c2", "lastupdatetimestamp": 1694431218717, "relType": "resultOrganization", "source": "50|RECOLECTA___::1c6c582aa2d57c932069bc4382653229", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::15c4bd8602727bff4ecbaf69e7ac43af", "lastupdatetimestamp": 1694431211627, "relType": "resultOrganization", "source": "50|RECOLECTA___::21c4e85b761c898dd0f6d59d9f9b85f1", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::6e4694bb54928162e3736e2042663ff1", "lastupdatetimestamp": 1694431213910, "relType": "resultOrganization", "source": "50|RECOLECTA___::22436b0491ae186b64671a0667551830", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::5665ec851e301a1f816ee0be3e98757b", "lastupdatetimestamp": 1694431212739, "relType": "resultOrganization", "source": "50|RECOLECTA___::6739f9dda9d6e4703b70f908a4ab6259", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::7ccc3885d6c13b55952c4af07e95f6a6", "lastupdatetimestamp": 1694431203420, "relType": "resultOrganization", "source": "50|RECOLECTA___::a3baa9c23ff95de4f09bf711bcf934a5", "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8997"}, "target": "20|openorgs____::2cd85ac41e550e32a488a1b26a71fdd8", "lastupdatetimestamp": 1694431162512, "relType": "resultOrganization", "source": "50|RECOLECTA___::ce1f62947f2773709b75e57ec91eca51", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::2f224aa8c572c740fac1ed8b1a38aa21", "lastupdatetimestamp": 1694431204317, "relType": "resultOrganization", "source": "50|RECOLECTA___::df660a72ea2d758e542cdd0d63dcdbd1", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "20|openorgs____::d4c564d89cc281f00a27ff7684da5668", "lastupdatetimestamp": 1694431222761, "relType": "resultOrganization", "source": "50|altaiap_____::5c53480b2300ae410d183822b0c1ee9b", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::5f8192f98afda4a06afb4386b7c913cc", "lastupdatetimestamp": 1694431180171, "relType": "resultOrganization", "source": "50|altaiap_____::d036acbdb3155339d2fa77455a0b3d95", "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "20|pending_org_::afc6d809fd8a8db5ee592469c12a7dc8", "lastupdatetimestamp": 1694431193653, "relType": "resultOrganization", "source": "50|arXiv_______::0007ee48e3aa0c409dc68f172bbc76a8", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "20|openorgs____::5ab85d71ee109cc81a2bc28cbdeba96a", "lastupdatetimestamp": 1694431224324, "relType": "resultOrganization", "source": "50|arXiv_______::00285f6ee59920bcdb4d96d6cd240095", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::7d22ea802b3405becbde1e2e1ea59197", "lastupdatetimestamp": 1694431220671, "relType": "resultOrganization", "source": "50|arXiv_______::002eb5d6ee54ba872b882bbd3824aa96", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8847"}, "target": "20|pending_org_::c44367f733bf59e75fc7e92eb9ad4489", "lastupdatetimestamp": 1694431179461, "relType": "resultOrganization", "source": "50|arXiv_______::00480a6b5ae2cd88dbd7781e1ec341aa", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "20|openorgs____::293e60068d63e17e20f2de54944cae01", "lastupdatetimestamp": 1694431219918, "relType": "resultOrganization", "source": "50|arXiv_______::00766c4ea96b8b797528b511b793ba62", "collectedfrom": [], "validated": false, "properties": []}
|
|
@ -0,0 +1,4 @@
|
||||||
|
{"resultId":"50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f","communityList":["aurora"]}
|
||||||
|
{"resultId":"50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e","communityList":["aurora"]}
|
||||||
|
{"resultId":"50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1","communityList":["sdsn-gr"]}
|
||||||
|
{"resultId":"50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1","communityList":["netherlands"]}
|
|
@ -0,0 +1,20 @@
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "40|corda__h2020::5c16b849965ee04493a5e244471aae16", "lastupdatetimestamp": 1694431186898, "relType": "resultOrganization", "source": "50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|nwo_________::a9f6d38fb3626d6659d385f71be9657e", "lastupdatetimestamp": 1694431155490, "relType": "resultOrganization", "source": "50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1", "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|ukri________::93b0983bc4fb1a17a8c3d1e6ab45c03b", "lastupdatetimestamp": 1694431195409, "relType": "resultOrganization", "source": "50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "40|ukri________::93b0983bc4fb1a17a8c3d1e6ab45c03b", "lastupdatetimestamp": 1694431195538, "relType": "resultOrganization", "source": "50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|openorgs____::eb0669daa9efeb898a3090d8aac7c953", "lastupdatetimestamp": 1694431216929, "relType": "resultOrganization", "source": "50|RECOLECTA___::031d8312287a0108202acd8c5957fcb5", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::26d4324bfca459ab17e1efd966fba8d7", "lastupdatetimestamp": 1694431156296, "relType": "resultOrganization", "source": "50|RECOLECTA___::0ac43c9933175f1d0e091ba4dc814565", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::fc8f734ba211cfc8a3769189808338c2", "lastupdatetimestamp": 1694431218717, "relType": "resultOrganization", "source": "50|RECOLECTA___::1c6c582aa2d57c932069bc4382653229", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::15c4bd8602727bff4ecbaf69e7ac43af", "lastupdatetimestamp": 1694431211627, "relType": "resultOrganization", "source": "50|RECOLECTA___::21c4e85b761c898dd0f6d59d9f9b85f1", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::6e4694bb54928162e3736e2042663ff1", "lastupdatetimestamp": 1694431213910, "relType": "resultOrganization", "source": "50|RECOLECTA___::22436b0491ae186b64671a0667551830", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|openorgs____::5665ec851e301a1f816ee0be3e98757b", "lastupdatetimestamp": 1694431212739, "relType": "resultOrganization", "source": "50|RECOLECTA___::6739f9dda9d6e4703b70f908a4ab6259", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::7ccc3885d6c13b55952c4af07e95f6a6", "lastupdatetimestamp": 1694431203420, "relType": "resultOrganization", "source": "50|RECOLECTA___::a3baa9c23ff95de4f09bf711bcf934a5", "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8997"}, "target": "40|openorgs____::2cd85ac41e550e32a488a1b26a71fdd8", "lastupdatetimestamp": 1694431162512, "relType": "resultOrganization", "source": "50|RECOLECTA___::ce1f62947f2773709b75e57ec91eca51", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::2f224aa8c572c740fac1ed8b1a38aa21", "lastupdatetimestamp": 1694431204317, "relType": "resultOrganization", "source": "50|RECOLECTA___::df660a72ea2d758e542cdd0d63dcdbd1", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "40|openorgs____::d4c564d89cc281f00a27ff7684da5668", "lastupdatetimestamp": 1694431222761, "relType": "resultOrganization", "source": "50|altaiap_____::5c53480b2300ae410d183822b0c1ee9b", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::5f8192f98afda4a06afb4386b7c913cc", "lastupdatetimestamp": 1694431180171, "relType": "resultOrganization", "source": "50|altaiap_____::d036acbdb3155339d2fa77455a0b3d95", "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "40|pending_org_::afc6d809fd8a8db5ee592469c12a7dc8", "lastupdatetimestamp": 1694431193653, "relType": "resultOrganization", "source": "50|arXiv_______::0007ee48e3aa0c409dc68f172bbc76a8", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "40|openorgs____::5ab85d71ee109cc81a2bc28cbdeba96a", "lastupdatetimestamp": 1694431224324, "relType": "resultOrganization", "source": "50|arXiv_______::00285f6ee59920bcdb4d96d6cd240095", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::7d22ea802b3405becbde1e2e1ea59197", "lastupdatetimestamp": 1694431220671, "relType": "resultOrganization", "source": "50|arXiv_______::002eb5d6ee54ba872b882bbd3824aa96", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8847"}, "target": "40|pending_org_::c44367f733bf59e75fc7e92eb9ad4489", "lastupdatetimestamp": 1694431179461, "relType": "resultOrganization", "source": "50|arXiv_______::00480a6b5ae2cd88dbd7781e1ec341aa", "collectedfrom": [], "validated": false, "properties": []}
|
||||||
|
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "40|openorgs____::293e60068d63e17e20f2de54944cae01", "lastupdatetimestamp": 1694431219918, "relType": "resultOrganization", "source": "50|arXiv_______::00766c4ea96b8b797528b511b793ba62", "collectedfrom": [], "validated": false, "properties": []}
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue