testing and fix some issues

This commit is contained in:
Miriam Baglioni 2023-10-16 11:26:07 +02:00
parent 8e9493fad9
commit 159388f9c2
24 changed files with 736 additions and 207 deletions

View File

@ -11,6 +11,9 @@ import java.util.stream.Collectors;
import javax.management.Query;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.amazonaws.util.StringUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
@ -21,6 +24,7 @@ import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.bulktag.community.Provider;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
/**
* @author miriam.baglioni
@ -30,6 +34,8 @@ public class Utils implements Serializable {
private static final ObjectMapper MAPPER = new ObjectMapper();
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
private static final Logger log = LoggerFactory.getLogger(Utils.class);
public static CommunityConfiguration getCommunityConfiguration(boolean production) throws IOException {
final Map<String, Community> communities = Maps.newHashMap();
List<Community> validCommunities = new ArrayList<>();
@ -126,6 +132,7 @@ public class Utils implements Serializable {
throw new RuntimeException(e);
}
});
return organizationMap;
}

View File

@ -95,10 +95,7 @@ public class SparkBulkTagJob {
Dataset<String> datasources = readPath(
spark, inputPath
.substring(
0,
inputPath.lastIndexOf("/"))
+ "/datasource",
+ "datasource",
Datasource.class)
.filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds))
.map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING());
@ -106,10 +103,10 @@ public class SparkBulkTagJob {
Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap();
for (String ds : datasources.collectAsList()) {
final String dsId = ds.substring(3);
if (!dsm.containsKey(dsId)) {
// final String dsId = ds.substring(3);
if (!dsm.containsKey(ds)) {
ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>();
dsm.put(dsId, eoscList);
dsm.put(ds, eoscList);
}
}

View File

@ -1,34 +0,0 @@
package eu.dnetlib.dhp.bulktag.community;
import java.io.IOException;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.dom4j.DocumentException;
import org.xml.sax.SAXException;
import com.google.common.base.Joiner;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class QueryInformationSystem {
public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
throws ISLookUpException, DocumentException, SAXException, IOException {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
final List<String> res = isLookUp
.quickSearchProfile(
IOUtils
.toString(
QueryInformationSystem.class
.getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/query.xq")));
final String xmlConf = "<communities>" + Joiner.on(" ").join(res) + "</communities>";
return CommunityConfigurationFactory.newInstance(xmlConf);
}
}

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.resulttocommunityfromorganization;
import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList;
import java.util.Arrays;
@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
@ -53,22 +54,15 @@ public class SparkResultToCommunityFromOrganizationJob {
final String possibleupdatespath = parser.get("preparedInfoPath");
log.info("preparedInfoPath: {}", possibleupdatespath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
@SuppressWarnings("unchecked")
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf();
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
runWithSparkHiveSession(
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, outputPath);
// removeOutputDir(spark, outputPath);
execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
});
}
@ -77,22 +71,32 @@ public class SparkResultToCommunityFromOrganizationJob {
SparkSession spark,
String inputPath,
String outputPath,
Class<R> resultClazz,
String possibleUpdatesPath) {
Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class);
Dataset<R> result = readPath(spark, inputPath, resultClazz);
result
.joinWith(
possibleUpdates,
result.col("id").equalTo(possibleUpdates.col("resultId")),
"left_outer")
.map(resultCommunityFn(), Encoders.bean(resultClazz))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
ModelSupport.entityTypes
.keySet()
.parallelStream()
.forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
removeOutputDir(spark, outputPath + e.name());
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
result
.joinWith(
possibleUpdates,
result.col("id").equalTo(possibleUpdates.col("resultId")),
"left_outer")
.map(resultCommunityFn(), Encoders.bean(resultClazz))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + e.name());
}
});
}
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.resulttocommunityfromproject;
import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.*;
@ -51,16 +52,15 @@ public class PrepareResultCommunitySet {
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final boolean production = Boolean.valueOf(parser.get("outputPath"));
final boolean production = Boolean.valueOf(parser.get("production"));
log.info("production: {}", production);
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(production);
log.info("projectsMap: {}", new Gson().toJson(projectsMap));
SparkConf conf = new SparkConf();
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
runWithSparkHiveSession(
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
@ -94,24 +94,27 @@ public class PrepareResultCommunitySet {
.select(
new Column("source").as("resultId"),
new Column("target").as("projectId"))
.groupByKey((MapFunction<Row, String>) r -> (String) r.getAs("source"), Encoders.STRING())
.groupByKey((MapFunction<Row, String>) r -> (String) r.getAs("resultId"), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Row, ResultProjectList>) (k, v) -> {
ResultProjectList rpl = new ResultProjectList();
rpl.setResultId(k);
ArrayList<String> cl = new ArrayList<>();
cl.addAll(projectMap.get(v.next().getAs("target")));
cl.addAll(projectMap.get(v.next().getAs("projectId")));
v.forEachRemaining(r -> {
projectMap
.get(r.getAs("target"))
.get(r.getAs("projectId"))
.forEach(c -> {
if (!cl.contains(c))
cl.add(c);
});
});
if(cl.size() == 0)
return null;
rpl.setCommunityList(cl);
return rpl;
}, Encoders.bean(ResultProjectList.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.resulttocommunityfromproject;
import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.PropagationConstant.PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.ArrayList;
@ -61,20 +62,14 @@ public class SparkResultToCommunityFromProject implements Serializable {
final String possibleupdatespath = parser.get("preparedInfoPath");
log.info("preparedInfoPath: {}", possibleupdatespath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
@SuppressWarnings("unchecked")
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf();
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
runWithSparkHiveSession(
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
// removeOutputDir(spark, outputPath);
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
@ -108,7 +103,7 @@ public class SparkResultToCommunityFromProject implements Serializable {
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
.json(outputPath + e.name());
}
});

View File

@ -5,12 +5,7 @@
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName":"h",
"paramLongName":"hive_metastore_uris",
"paramDescription": "the hive metastore uris",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
@ -23,12 +18,6 @@
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName": "p",
"paramLongName": "preparedInfoPath",

View File

@ -5,12 +5,6 @@
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName":"ocm",
"paramLongName":"organizationtoresultcommunitymap",
"paramDescription": "the map for the association organization communities",
"paramRequired": true
},
{
"paramName":"h",
"paramLongName":"hive_metastore_uris",
@ -28,6 +22,12 @@
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "p",
"paramLongName": "production",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
}
]

View File

@ -22,7 +22,7 @@
</configuration>
</global>
<start to="reset_outputpath"/>
<start to="prepare_result_communitylist"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
@ -90,8 +90,8 @@
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--executor-cores=4
--executor-memory=10G
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -103,20 +103,13 @@
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--production</arg><arg>${production}</arg>
</spark>
<ok to="fork-join-exec-propagation"/>
<ok to="exec-propagation"/>
<error to="Kill"/>
</action>
<fork name="fork-join-exec-propagation">
<path start="join_propagate_publication"/>
<path start="join_propagate_dataset"/>
<path start="join_propagate_otherresearchproduct"/>
<path start="join_propagate_software"/>
</fork>
<action name="join_propagate_publication">
<action name="exec-propagation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
@ -135,104 +128,14 @@
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/</arg>
</spark>
<ok to="wait2"/>
<ok to="End"/>
<error to="Kill"/>
</action>
<action name="join_propagate_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-Dataset</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<action name="join_propagate_otherresearchproduct">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-ORP</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<action name="join_propagate_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-Software</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<join name="wait2" to="End"/>
<end name="End"/>

View File

@ -0,0 +1,28 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "p",
"paramLongName": "preparedInfoPath",
"paramDescription": "the path where prepared info have been stored",
"paramRequired": true
}
]

View File

@ -0,0 +1,33 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName":"h",
"paramLongName":"hive_metastore_uris",
"paramDescription": "the hive metastore uris",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "p",
"paramLongName": "production",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
}
]

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.bulktag;
package eu.dnetlib.dhp.api;
import java.util.List;
@ -98,14 +98,22 @@ public class QueryCommunityAPITest {
@Test
void getCommunityProjects() throws Exception {
CommunityEntityMap projectMap = Utils.getCommunityProjects(true);
Assertions.assertFalse(projectMap.containsKey("mes"));
Assertions.assertEquals(33, projectMap.size());
Assertions
.assertTrue(
projectMap
.keySet()
.stream()
.allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|"))));
.allMatch(k -> k.startsWith("40|")));
System.out.println(projectMap);
}
@Test
void getCommunityOrganizations() throws Exception {
CommunityEntityMap organizationMap = Utils.getCommunityOrganization(true);
Assertions.assertTrue(organizationMap.keySet().stream().allMatch(k -> k.startsWith("20|")));
}
}

View File

@ -0,0 +1,95 @@
package eu.dnetlib.dhp.resulttocommunityfromorganization;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.bulktag.BulkTagJobTest;
import eu.dnetlib.dhp.bulktag.SparkBulkTagJob;
import eu.dnetlib.dhp.schema.oaf.Dataset;
/**
* @author miriam.baglioni
* @Date 13/10/23
*/
public class PrepareAssocTest {
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(PrepareAssocTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(BulkTagJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(BulkTagJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(PrepareAssocTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void test1() throws Exception {
PrepareResultCommunitySet
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/relation/").getPath(),
"-outputPath", workingDir.toString() + "/prepared",
"-production", Boolean.TRUE.toString(),
"-hive_metastore_uris", ""
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<ResultCommunityList> tmp = sc
.textFile(workingDir.toString() + "/prepared")
.map(item -> new ObjectMapper().readValue(item, ResultCommunityList.class));
tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
}
}

View File

@ -0,0 +1,88 @@
package eu.dnetlib.dhp.resulttocommunityfromproject;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.bulktag.BulkTagJobTest;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
/**
* @author miriam.baglioni
* @Date 13/10/23
*/
public class PrepareAssocTest {
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(PrepareAssocTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(BulkTagJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(BulkTagJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(PrepareAssocTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void test1() throws Exception {
PrepareResultCommunitySet
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/relation/").getPath(),
"-outputPath", workingDir.toString() + "/prepared",
"-production", Boolean.TRUE.toString(),
"-hive_metastore_uris", ""
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<ResultProjectList> tmp = sc
.textFile(workingDir.toString() + "/prepared")
.map(item -> new ObjectMapper().readValue(item, ResultProjectList.class));
tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
}
}

View File

@ -0,0 +1,323 @@
package eu.dnetlib.dhp.resulttocommunityfromproject;
import static org.apache.spark.sql.functions.desc;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest;
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
import eu.dnetlib.dhp.schema.oaf.Dataset;
public class ResultToCommunityJobTest {
private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(ResultToCommunityJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(OrcidPropagationJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void testSparkResultToCommunityFromProjectJob() throws Exception {
final String preparedInfoPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/preparedInfo")
.getPath();
SparkResultToCommunityFromProject
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", getClass()
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/sample/")
.getPath(),
"-outputPath", workingDir.toString() + "/",
"-preparedInfoPath", preparedInfoPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
tmp.foreach(d -> System.out.println(new ObjectMapper().writeValueAsString(d)));
// Assertions.assertEquals(10, tmp.count());
// org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
//
// verificationDataset.createOrReplaceTempView("dataset");
//
// String query = "select id, MyT.id community "
// + "from dataset "
// + "lateral view explode(context) c as MyT "
// + "lateral view explode(MyT.datainfo) d as MyD "
// + "where MyD.inferenceprovenance = 'propagation'";
//
// org.apache.spark.sql.Dataset<Row> resultExplodedProvenance = spark.sql(query);
// Assertions.assertEquals(5, resultExplodedProvenance.count());
// Assertions
// .assertEquals(
// 0,
// resultExplodedProvenance
// .filter("id = '50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe'")
// .count());
// Assertions
// .assertEquals(
// 1,
// resultExplodedProvenance
// .filter("id = '50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b'")
// .count());
// Assertions
// .assertEquals(
// "beopen",
// resultExplodedProvenance
// .select("community")
// .where(
// resultExplodedProvenance
// .col("id")
// .equalTo(
// "50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b"))
// .collectAsList()
// .get(0)
// .getString(0));
//
// Assertions
// .assertEquals(
// 2,
// resultExplodedProvenance
// .filter("id = '50|od________18::8887b1df8b563c4ea851eb9c882c9d7b'")
// .count());
// Assertions
// .assertEquals(
// "mes",
// resultExplodedProvenance
// .select("community")
// .where(
// resultExplodedProvenance
// .col("id")
// .equalTo(
// "50|od________18::8887b1df8b563c4ea851eb9c882c9d7b"))
// .sort(desc("community"))
// .collectAsList()
// .get(0)
// .getString(0));
// Assertions
// .assertEquals(
// "euromarine",
// resultExplodedProvenance
// .select("community")
// .where(
// resultExplodedProvenance
// .col("id")
// .equalTo(
// "50|od________18::8887b1df8b563c4ea851eb9c882c9d7b"))
// .sort(desc("community"))
// .collectAsList()
// .get(1)
// .getString(0));
//
// Assertions
// .assertEquals(
// 1,
// resultExplodedProvenance
// .filter("id = '50|doajarticles::8d817039a63710fcf97e30f14662c6c8'")
// .count());
// Assertions
// .assertEquals(
// "mes",
// resultExplodedProvenance
// .select("community")
// .where(
// resultExplodedProvenance
// .col("id")
// .equalTo(
// "50|doajarticles::8d817039a63710fcf97e30f14662c6c8"))
// .sort(desc("community"))
// .collectAsList()
// .get(0)
// .getString(0));
//
// Assertions
// .assertEquals(
// 1,
// resultExplodedProvenance
// .filter("id = '50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6'")
// .count());
// Assertions
// .assertEquals(
// "mes",
// resultExplodedProvenance
// .select("community")
// .where(
// resultExplodedProvenance
// .col("id")
// .equalTo(
// "50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"))
// .sort(desc("community"))
// .collectAsList()
// .get(0)
// .getString(0));
//
// query = "select id, MyT.id community "
// + "from dataset "
// + "lateral view explode(context) c as MyT "
// + "lateral view explode(MyT.datainfo) d as MyD ";
//
// org.apache.spark.sql.Dataset<Row> resultCommunityId = spark.sql(query);
//
// Assertions.assertEquals(10, resultCommunityId.count());
//
// Assertions
// .assertEquals(
// 1,
// resultCommunityId
// .filter("id = '50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe'")
// .count());
// Assertions
// .assertEquals(
// "beopen",
// resultCommunityId
// .select("community")
// .where(
// resultCommunityId
// .col("id")
// .equalTo(
// "50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe"))
// .collectAsList()
// .get(0)
// .getString(0));
//
// Assertions
// .assertEquals(
// 1,
// resultCommunityId
// .filter("id = '50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b'")
// .count());
//
// Assertions
// .assertEquals(
// 3,
// resultCommunityId
// .filter("id = '50|od________18::8887b1df8b563c4ea851eb9c882c9d7b'")
// .count());
// Assertions
// .assertEquals(
// "beopen",
// resultCommunityId
// .select("community")
// .where(
// resultCommunityId
// .col("id")
// .equalTo(
// "50|od________18::8887b1df8b563c4ea851eb9c882c9d7b"))
// .sort(desc("community"))
// .collectAsList()
// .get(2)
// .getString(0));
//
// Assertions
// .assertEquals(
// 2,
// resultCommunityId
// .filter("id = '50|doajarticles::8d817039a63710fcf97e30f14662c6c8'")
// .count());
// Assertions
// .assertEquals(
// "euromarine",
// resultCommunityId
// .select("community")
// .where(
// resultCommunityId
// .col("id")
// .equalTo(
// "50|doajarticles::8d817039a63710fcf97e30f14662c6c8"))
// .sort(desc("community"))
// .collectAsList()
// .get(1)
// .getString(0));
//
// Assertions
// .assertEquals(
// 3,
// resultCommunityId
// .filter("id = '50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6'")
// .count());
// Assertions
// .assertEquals(
// "euromarine",
// resultCommunityId
// .select("community")
// .where(
// resultCommunityId
// .col("id")
// .equalTo(
// "50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"))
// .sort(desc("community"))
// .collectAsList()
// .get(2)
// .getString(0));
// Assertions
// .assertEquals(
// "ni",
// resultCommunityId
// .select("community")
// .where(
// resultCommunityId
// .col("id")
// .equalTo(
// "50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"))
// .sort(desc("community"))
// .collectAsList()
// .get(0)
// .getString(0));
}
}

View File

@ -0,0 +1,36 @@
{"communityList":["beopen"],"resultId":"50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe"}
{"communityList":["beopen"],"resultId":"50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b"}
{"communityList":["euromarine","mes"],"resultId":"50|od________18::8887b1df8b563c4ea851eb9c882c9d7b"}
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::8d817039a63710fcf97e30f14662c6c8"}
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"}
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::53b70ea6e0769d02ddf93307ec8e3e92"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::ef1ac6efc10f420fa9e190e49644f1f2"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::b738aa93950dddfb0294df2e8fdf0579"}
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::befccb1f9e6b833fd82e587737ae9e7d"}
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::bf1cba621615e27db1692865a5f35a0b"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::e105de571b336daae05f0e75cf740c5c"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::1fe4f347c9df657b7ba520987d79436e"}
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::6d7c00a8c8e59f0215459e2e4ee3fd6c"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::39ec88ef4127db0ea1b88938f1c52889"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::3496709db804d98f76c45d7ed023dd95"}
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::dc97fffbdb6d35f792fc0ab428ff065c"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::b61d082d96619d9b7a876e6dce44cf65"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::32a96881c3036cf2d2165bb2d276ea82"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::76e9e6a959ba588483c74ec580369864"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::1487a0a92572376d95d6cc3f066504b7"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::413a0a0656f888cce9c15f6be6df60e3"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::25c3e91960cbd7a8f95a2e511cbffddd"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::4d523b52094a689856e479bb99063c7a"}
{"communityList":["euromarine","mes"],"resultId":"50|od______2663::393c7262bb71642b7bb4c67cfeab02c5"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::0b3333d875b91ffa4db0735efec94e7a"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::0699c30043edfae40786d80acd20d300"}
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::7f29ade677e66ffbf1312fa837bc73ca"}
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::7ba6627ac7590d367cc01bbac4d518e8"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::8ad9bc047433401947dc0cdb4a989cee"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::cce70f27d85df658479d0ec0046a4eb3"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::74304be834b7013dbaeb73c3a19a654b"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::46fc13a87befb6a83ac9c63580528ab0"}
{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::66f1867488b62d9c9fb734273775e203"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::79236995d5c30e5234a47cee4a728cae"}
{"communityList":["euromarine","mes"],"resultId":"50|od______2386::cb7f6cb01d1a835612731d645842f699"}
{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::d424daa43f97a434eb0a12289410cade"}

View File

@ -0,0 +1,20 @@
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "20|openorgs____::775eab3c1281cb91d53a31c4a1ba1090", "lastupdatetimestamp": 1694431186898, "relType": "resultOrganization", "source": "50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::775eab3c1281cb91d53a31c4a1ba1090", "lastupdatetimestamp": 1694431155490, "relType": "resultOrganization", "source": "50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1", "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::e66fe5dd092752e1dd6fd29fc699933a", "lastupdatetimestamp": 1694431195409, "relType": "resultOrganization", "source": "50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "20|pending_org_::eb0669daa9efeb898a3090d8aac7c953", "lastupdatetimestamp": 1694431195538, "relType": "resultOrganization", "source": "50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::eb0669daa9efeb898a3090d8aac7c953", "lastupdatetimestamp": 1694431216929, "relType": "resultOrganization", "source": "50|RECOLECTA___::031d8312287a0108202acd8c5957fcb5", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::26d4324bfca459ab17e1efd966fba8d7", "lastupdatetimestamp": 1694431156296, "relType": "resultOrganization", "source": "50|RECOLECTA___::0ac43c9933175f1d0e091ba4dc814565", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::fc8f734ba211cfc8a3769189808338c2", "lastupdatetimestamp": 1694431218717, "relType": "resultOrganization", "source": "50|RECOLECTA___::1c6c582aa2d57c932069bc4382653229", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::15c4bd8602727bff4ecbaf69e7ac43af", "lastupdatetimestamp": 1694431211627, "relType": "resultOrganization", "source": "50|RECOLECTA___::21c4e85b761c898dd0f6d59d9f9b85f1", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::6e4694bb54928162e3736e2042663ff1", "lastupdatetimestamp": 1694431213910, "relType": "resultOrganization", "source": "50|RECOLECTA___::22436b0491ae186b64671a0667551830", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::5665ec851e301a1f816ee0be3e98757b", "lastupdatetimestamp": 1694431212739, "relType": "resultOrganization", "source": "50|RECOLECTA___::6739f9dda9d6e4703b70f908a4ab6259", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::7ccc3885d6c13b55952c4af07e95f6a6", "lastupdatetimestamp": 1694431203420, "relType": "resultOrganization", "source": "50|RECOLECTA___::a3baa9c23ff95de4f09bf711bcf934a5", "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8997"}, "target": "20|openorgs____::2cd85ac41e550e32a488a1b26a71fdd8", "lastupdatetimestamp": 1694431162512, "relType": "resultOrganization", "source": "50|RECOLECTA___::ce1f62947f2773709b75e57ec91eca51", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::2f224aa8c572c740fac1ed8b1a38aa21", "lastupdatetimestamp": 1694431204317, "relType": "resultOrganization", "source": "50|RECOLECTA___::df660a72ea2d758e542cdd0d63dcdbd1", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "20|openorgs____::d4c564d89cc281f00a27ff7684da5668", "lastupdatetimestamp": 1694431222761, "relType": "resultOrganization", "source": "50|altaiap_____::5c53480b2300ae410d183822b0c1ee9b", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::5f8192f98afda4a06afb4386b7c913cc", "lastupdatetimestamp": 1694431180171, "relType": "resultOrganization", "source": "50|altaiap_____::d036acbdb3155339d2fa77455a0b3d95", "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "20|pending_org_::afc6d809fd8a8db5ee592469c12a7dc8", "lastupdatetimestamp": 1694431193653, "relType": "resultOrganization", "source": "50|arXiv_______::0007ee48e3aa0c409dc68f172bbc76a8", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "20|openorgs____::5ab85d71ee109cc81a2bc28cbdeba96a", "lastupdatetimestamp": 1694431224324, "relType": "resultOrganization", "source": "50|arXiv_______::00285f6ee59920bcdb4d96d6cd240095", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|pending_org_::7d22ea802b3405becbde1e2e1ea59197", "lastupdatetimestamp": 1694431220671, "relType": "resultOrganization", "source": "50|arXiv_______::002eb5d6ee54ba872b882bbd3824aa96", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8847"}, "target": "20|pending_org_::c44367f733bf59e75fc7e92eb9ad4489", "lastupdatetimestamp": 1694431179461, "relType": "resultOrganization", "source": "50|arXiv_______::00480a6b5ae2cd88dbd7781e1ec341aa", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "20|openorgs____::293e60068d63e17e20f2de54944cae01", "lastupdatetimestamp": 1694431219918, "relType": "resultOrganization", "source": "50|arXiv_______::00766c4ea96b8b797528b511b793ba62", "collectedfrom": [], "validated": false, "properties": []}

View File

@ -0,0 +1,4 @@
{"resultId":"50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f","communityList":["aurora"]}
{"resultId":"50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e","communityList":["aurora"]}
{"resultId":"50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1","communityList":["sdsn-gr"]}
{"resultId":"50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1","communityList":["netherlands"]}

View File

@ -0,0 +1,20 @@
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "40|corda__h2020::5c16b849965ee04493a5e244471aae16", "lastupdatetimestamp": 1694431186898, "relType": "resultOrganization", "source": "50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|nwo_________::a9f6d38fb3626d6659d385f71be9657e", "lastupdatetimestamp": 1694431155490, "relType": "resultOrganization", "source": "50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1", "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|ukri________::93b0983bc4fb1a17a8c3d1e6ab45c03b", "lastupdatetimestamp": 1694431195409, "relType": "resultOrganization", "source": "50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "40|ukri________::93b0983bc4fb1a17a8c3d1e6ab45c03b", "lastupdatetimestamp": 1694431195538, "relType": "resultOrganization", "source": "50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|openorgs____::eb0669daa9efeb898a3090d8aac7c953", "lastupdatetimestamp": 1694431216929, "relType": "resultOrganization", "source": "50|RECOLECTA___::031d8312287a0108202acd8c5957fcb5", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::26d4324bfca459ab17e1efd966fba8d7", "lastupdatetimestamp": 1694431156296, "relType": "resultOrganization", "source": "50|RECOLECTA___::0ac43c9933175f1d0e091ba4dc814565", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::fc8f734ba211cfc8a3769189808338c2", "lastupdatetimestamp": 1694431218717, "relType": "resultOrganization", "source": "50|RECOLECTA___::1c6c582aa2d57c932069bc4382653229", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::15c4bd8602727bff4ecbaf69e7ac43af", "lastupdatetimestamp": 1694431211627, "relType": "resultOrganization", "source": "50|RECOLECTA___::21c4e85b761c898dd0f6d59d9f9b85f1", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::6e4694bb54928162e3736e2042663ff1", "lastupdatetimestamp": 1694431213910, "relType": "resultOrganization", "source": "50|RECOLECTA___::22436b0491ae186b64671a0667551830", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|openorgs____::5665ec851e301a1f816ee0be3e98757b", "lastupdatetimestamp": 1694431212739, "relType": "resultOrganization", "source": "50|RECOLECTA___::6739f9dda9d6e4703b70f908a4ab6259", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::7ccc3885d6c13b55952c4af07e95f6a6", "lastupdatetimestamp": 1694431203420, "relType": "resultOrganization", "source": "50|RECOLECTA___::a3baa9c23ff95de4f09bf711bcf934a5", "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8997"}, "target": "40|openorgs____::2cd85ac41e550e32a488a1b26a71fdd8", "lastupdatetimestamp": 1694431162512, "relType": "resultOrganization", "source": "50|RECOLECTA___::ce1f62947f2773709b75e57ec91eca51", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::2f224aa8c572c740fac1ed8b1a38aa21", "lastupdatetimestamp": 1694431204317, "relType": "resultOrganization", "source": "50|RECOLECTA___::df660a72ea2d758e542cdd0d63dcdbd1", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "40|openorgs____::d4c564d89cc281f00a27ff7684da5668", "lastupdatetimestamp": 1694431222761, "relType": "resultOrganization", "source": "50|altaiap_____::5c53480b2300ae410d183822b0c1ee9b", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::5f8192f98afda4a06afb4386b7c913cc", "lastupdatetimestamp": 1694431180171, "relType": "resultOrganization", "source": "50|altaiap_____::d036acbdb3155339d2fa77455a0b3d95", "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "40|pending_org_::afc6d809fd8a8db5ee592469c12a7dc8", "lastupdatetimestamp": 1694431193653, "relType": "resultOrganization", "source": "50|arXiv_______::0007ee48e3aa0c409dc68f172bbc76a8", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "40|openorgs____::5ab85d71ee109cc81a2bc28cbdeba96a", "lastupdatetimestamp": 1694431224324, "relType": "resultOrganization", "source": "50|arXiv_______::00285f6ee59920bcdb4d96d6cd240095", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "40|pending_org_::7d22ea802b3405becbde1e2e1ea59197", "lastupdatetimestamp": 1694431220671, "relType": "resultOrganization", "source": "50|arXiv_______::002eb5d6ee54ba872b882bbd3824aa96", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8847"}, "target": "40|pending_org_::c44367f733bf59e75fc7e92eb9ad4489", "lastupdatetimestamp": 1694431179461, "relType": "resultOrganization", "source": "50|arXiv_______::00480a6b5ae2cd88dbd7781e1ec341aa", "collectedfrom": [], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "isProducedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.8998"}, "target": "40|openorgs____::293e60068d63e17e20f2de54944cae01", "lastupdatetimestamp": 1694431219918, "relType": "resultOrganization", "source": "50|arXiv_______::00766c4ea96b8b797528b511b793ba62", "collectedfrom": [], "validated": false, "properties": []}

File diff suppressed because one or more lines are too long