Merge pull request 'propagation of projects through parent-child relations' (#299) from propagationProjectThroughParentChils into beta

Reviewed-on: D-Net/dnet-hadoop#299
This commit is contained in:
Claudio Atzori 2023-06-12 09:57:20 +02:00
commit a98e6591e2
26 changed files with 1327 additions and 428 deletions

View File

@ -57,7 +57,10 @@ public class PropagationConstant {
public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME = "Propagation of affiliation to result collected from datasources of type institutional repository"; public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME = "Propagation of affiliation to result collected from datasources of type institutional repository";
public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID = "result:organization:semrel"; public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID = "result:organization:semrel";
public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME = "Propagation of affiliation to result through sematic relations"; public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME = "Propagation of affiliation to result through semantic relations";
public static final String PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID = "project:organization:semrel";
public static final String PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_NAME = "Propagation of participation to project through semantic relations";
public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID = "result:project:semrel"; public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID = "result:project:semrel";
public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME = "Propagation of result to project through semantic relation"; public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME = "Propagation of result to project through semantic relation";
@ -171,6 +174,41 @@ public class PropagationConstant {
return newRelations; return newRelations;
} }
public static Relation getRelation(String source, String target, String rel_class) {
if (ModelConstants.HAS_PARTICIPANT.equals(rel_class)) {
return getParticipantRelation(source, target, rel_class);
} else
return getAffiliationRelation(source, target, rel_class);
}
public static Relation getParticipantRelation(
String source,
String target,
String rel_class) {
return getRelation(
source, target,
rel_class,
ModelConstants.PROJECT_ORGANIZATION,
ModelConstants.PARTICIPATION,
PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID,
PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_NAME);
}
public static Relation getAffiliationRelation(
String source,
String target,
String rel_class) {
return getRelation(
source, target,
rel_class,
ModelConstants.RESULT_ORGANIZATION,
ModelConstants.AFFILIATION,
PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID,
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME);
}
public static Relation getRelation( public static Relation getRelation(
String source, String source,
String target, String target,

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.resulttoorganizationfromsemrel; package eu.dnetlib.dhp.entitytoorganizationfromsemrel;
import java.io.Serializable; import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.resulttoorganizationfromsemrel; package eu.dnetlib.dhp.entitytoorganizationfromsemrel;
import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
@ -47,13 +47,20 @@ public class PrepareInfo implements Serializable {
"' and datainfo.deletedbyinference = false " + "' and datainfo.deletedbyinference = false " +
"GROUP BY source"; "GROUP BY source";
// associate projects to all the participant orgs
private static final String PROJECT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " +
"FROM relation " +
"WHERE lower(relclass) = '" + ModelConstants.HAS_PARTICIPANT.toLowerCase() +
"' and datainfo.deletedbyinference = false " +
"GROUP BY source";
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
.toString( .toString(
SparkResultToOrganizationFromIstRepoJob.class SparkResultToOrganizationFromIstRepoJob.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json")); "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
@ -74,6 +81,9 @@ public class PrepareInfo implements Serializable {
final String resultOrganizationPath = parser.get("resultOrgPath"); final String resultOrganizationPath = parser.get("resultOrgPath");
log.info("resultOrganizationPath: {}", resultOrganizationPath); log.info("resultOrganizationPath: {}", resultOrganizationPath);
final String projectOrgPath = parser.get("projectOrganizationPath");
log.info("projectOrgPath: {}", projectOrgPath);
final String relationPath = parser.get("relationPath"); final String relationPath = parser.get("relationPath");
log.info("relationPath: {}", relationPath); log.info("relationPath: {}", relationPath);
@ -89,11 +99,13 @@ public class PrepareInfo implements Serializable {
childParentPath, childParentPath,
leavesPath, leavesPath,
resultOrganizationPath, resultOrganizationPath,
projectOrgPath,
relationPath)); relationPath));
} }
private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath, private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath,
String currentIterationPath, String resultOrganizationPath, String relationPath) { String currentIterationPath, String resultOrganizationPath, String projectOrganizationPath,
String relationPath) {
Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class); Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class);
relation.createOrReplaceTempView("relation"); relation.createOrReplaceTempView("relation");
@ -113,6 +125,14 @@ public class PrepareInfo implements Serializable {
.option("compression", "gzip") .option("compression", "gzip")
.json(resultOrganizationPath); .json(resultOrganizationPath);
spark
.sql(PROJECT_ORGANIZATION_QUERY)
.as(Encoders.bean(KeyValueSet.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(projectOrganizationPath);
relation relation
.filter( .filter(
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() && (FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
@ -120,7 +140,16 @@ public class PrepareInfo implements Serializable {
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(relationPath); .json(relationPath + "/result");
relation
.filter(
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
r.getRelClass().equals(ModelConstants.HAS_PARTICIPANT))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(relationPath + "/project");
Dataset<String> children = spark Dataset<String> children = spark
.sql( .sql(

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.resulttoorganizationfromsemrel; package eu.dnetlib.dhp.entitytoorganizationfromsemrel;
import java.io.Serializable; import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.resulttoorganizationfromsemrel; package eu.dnetlib.dhp.entitytoorganizationfromsemrel;
import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
@ -30,7 +30,8 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
public class SparkResultToOrganizationFromSemRel implements Serializable { public class SparkResultToOrganizationFromSemRel implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class); private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class);
private static final int MAX_ITERATION = 5; private static final int MAX_ITERATION = 5;
public static final String NEW_RELATION_PATH = "/newRelation"; public static final String NEW_RESULT_RELATION_PATH = "/newResultRelation";
public static final String NEW_PROJECT_RELATION_PATH = "/newProjectRelation";
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
@ -38,7 +39,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
.toString( .toString(
SparkResultToOrganizationFromIstRepoJob.class SparkResultToOrganizationFromIstRepoJob.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json")); "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
@ -62,6 +63,9 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
final String resultOrganizationPath = parser.get("resultOrgPath"); final String resultOrganizationPath = parser.get("resultOrgPath");
log.info("resultOrganizationPath: {}", resultOrganizationPath); log.info("resultOrganizationPath: {}", resultOrganizationPath);
final String projectOrganizationPath = parser.get("projectOrganizationPath");
log.info("projectOrganizationPath: {}", projectOrganizationPath);
final String workingPath = parser.get("workingDir"); final String workingPath = parser.get("workingDir");
log.info("workingPath: {}", workingPath); log.info("workingPath: {}", workingPath);
@ -88,6 +92,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
leavesPath, leavesPath,
childParentPath, childParentPath,
resultOrganizationPath, resultOrganizationPath,
projectOrganizationPath,
relationPath, relationPath,
workingPath, workingPath,
outputPath, outputPath,
@ -98,13 +103,14 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
String leavesPath, String leavesPath,
String childParentPath, String childParentPath,
String resultOrganizationPath, String resultOrganizationPath,
String projectOrganizationPath,
String graphPath, String graphPath,
String workingPath, String workingPath,
String outputPath, String outputPath,
int iterations) { int iterations) {
if (iterations == 1) { if (iterations == 1) {
doPropagateOnce( doPropagateOnce(
spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, spark, leavesPath, childParentPath, resultOrganizationPath, projectOrganizationPath, graphPath,
workingPath, outputPath); workingPath, outputPath);
} else { } else {
@ -123,26 +129,34 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
notReachedFirstParent); notReachedFirstParent);
doPropagate( doPropagate(
spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, spark, leavesPath, childParentPath, resultOrganizationPath, projectOrganizationPath, graphPath,
workingPath, outputPath, propagationCounter); workingPath, outputPath, propagationCounter);
} }
} }
private static void doPropagateOnce(SparkSession spark, String leavesPath, String childParentPath, private static void doPropagateOnce(SparkSession spark, String leavesPath, String childParentPath,
String resultOrganizationPath, String graphPath, String workingPath, String resultOrganizationPath, String projectOrganizationPath, String graphPath, String workingPath,
String outputPath) { String outputPath) {
StepActions StepActions
.execStep( .execStep(
spark, graphPath, workingPath + NEW_RELATION_PATH, spark, graphPath + "/result", workingPath + NEW_RESULT_RELATION_PATH,
leavesPath, childParentPath, resultOrganizationPath); leavesPath, childParentPath, resultOrganizationPath, ModelConstants.HAS_AUTHOR_INSTITUTION);
addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath); addNewRelations(spark, workingPath + NEW_RESULT_RELATION_PATH, outputPath);
StepActions
.execStep(
spark, graphPath + "/project", workingPath + NEW_PROJECT_RELATION_PATH,
leavesPath, childParentPath, projectOrganizationPath, ModelConstants.HAS_PARTICIPANT);
addNewRelations(spark, workingPath + NEW_PROJECT_RELATION_PATH, outputPath);
} }
private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath, private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath,
String resultOrganizationPath, String graphPath, String workingPath, String outputPath, String resultOrganizationPath, String projectOrganizationPath, String graphPath, String workingPath,
String outputPath,
PropagationCounter propagationCounter) { PropagationCounter propagationCounter) {
int iteration = 0; int iteration = 0;
long leavesCount; long leavesCount;
@ -151,13 +165,18 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
iteration++; iteration++;
StepActions StepActions
.execStep( .execStep(
spark, graphPath, workingPath + NEW_RELATION_PATH, spark, graphPath + "/result", workingPath + NEW_RESULT_RELATION_PATH,
leavesPath, childParentPath, resultOrganizationPath); leavesPath, childParentPath, resultOrganizationPath, ModelConstants.HAS_AUTHOR_INSTITUTION);
StepActions
.execStep(
spark, graphPath + "/project", workingPath + NEW_PROJECT_RELATION_PATH,
leavesPath, childParentPath, projectOrganizationPath, ModelConstants.HAS_PARTICIPANT);
StepActions StepActions
.prepareForNextStep( .prepareForNextStep(
spark, workingPath + NEW_RELATION_PATH, resultOrganizationPath, leavesPath, spark, workingPath, resultOrganizationPath, projectOrganizationPath, leavesPath,
childParentPath, workingPath + "/leaves", workingPath + "/resOrg"); childParentPath, workingPath + "/leaves", workingPath + "/resOrg", workingPath + "/projOrg");
moveOutput(spark, workingPath, leavesPath, resultOrganizationPath); moveOutput(spark, workingPath, leavesPath, resultOrganizationPath, projectOrganizationPath);
leavesCount = readPath(spark, leavesPath, Leaves.class).count(); leavesCount = readPath(spark, leavesPath, Leaves.class).count();
} while (leavesCount > 0 && iteration < MAX_ITERATION); } while (leavesCount > 0 && iteration < MAX_ITERATION);
@ -185,7 +204,8 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
propagationCounter.getNotReachedFirstParent().add(1); propagationCounter.getNotReachedFirstParent().add(1);
} }
addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath); addNewRelations(spark, workingPath + NEW_RESULT_RELATION_PATH, outputPath);
addNewRelations(spark, workingPath + NEW_PROJECT_RELATION_PATH, outputPath);
} }
private static void moveOutput(SparkSession spark, String workingPath, String leavesPath, private static void moveOutput(SparkSession spark, String workingPath, String leavesPath,
@ -204,6 +224,28 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
} }
private static void moveOutput(SparkSession spark, String workingPath, String leavesPath,
String resultOrganizationPath, String projectOrganizationPath) {
readPath(spark, workingPath + "/leaves", Leaves.class)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(leavesPath);
readPath(spark, workingPath + "/resOrg", KeyValueSet.class)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(resultOrganizationPath);
readPath(spark, workingPath + "/projOrg", KeyValueSet.class)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(projectOrganizationPath);
}
private static void addNewRelations(SparkSession spark, String newRelationPath, String outputPath) { private static void addNewRelations(SparkSession spark, String newRelationPath, String outputPath) {
Dataset<Relation> relation = readPath(spark, newRelationPath, Relation.class); Dataset<Relation> relation = readPath(spark, newRelationPath, Relation.class);
@ -212,16 +254,21 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
.mapGroups( .mapGroups(
(MapGroupsFunction<String, Relation, Relation>) (k, it) -> it.next(), Encoders.bean(Relation.class)) (MapGroupsFunction<String, Relation, Relation>) (k, it) -> it.next(), Encoders.bean(Relation.class))
.flatMap( .flatMap(
(FlatMapFunction<Relation, Relation>) r -> Arrays (FlatMapFunction<Relation, Relation>) r -> {
.asList( if (r.getSource().startsWith("50|")) {
r, getRelation( return Arrays
r.getTarget(), r.getSource(), ModelConstants.IS_AUTHOR_INSTITUTION_OF, .asList(
ModelConstants.RESULT_ORGANIZATION, r, getAffiliationRelation(
ModelConstants.AFFILIATION, r.getTarget(), r.getSource(), ModelConstants.IS_AUTHOR_INSTITUTION_OF))
PROPAGATION_DATA_INFO_TYPE, .iterator();
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, } else {
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME)) return Arrays
.iterator() .asList(
r, getParticipantRelation(
r.getTarget(), r.getSource(), ModelConstants.IS_PARTICIPANT))
.iterator();
}
}
, Encoders.bean(Relation.class)) , Encoders.bean(Relation.class))
.write() .write()

View File

@ -1,8 +1,10 @@
package eu.dnetlib.dhp.resulttoorganizationfromsemrel; package eu.dnetlib.dhp.entitytoorganizationfromsemrel;
import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.PropagationConstant.readPath; import static eu.dnetlib.dhp.PropagationConstant.readPath;
import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_PROJECT_RELATION_PATH;
import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_RESULT_RELATION_PATH;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
@ -14,8 +16,6 @@ import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
@ -28,13 +28,14 @@ public class StepActions implements Serializable {
public static void execStep(SparkSession spark, public static void execStep(SparkSession spark,
String graphPath, String newRelationPath, String graphPath, String newRelationPath,
String leavesPath, String chldParentOrgPath, String resultOrgPath) { String leavesPath, String chldParentOrgPath, String entityOrgPath, String rel_class) {
Dataset<Relation> relationGraph = readPath(spark, graphPath, Relation.class); Dataset<Relation> relationGraph = readPath(spark, graphPath, Relation.class);
// select only the relation source target among those proposed by propagation that are not already existent // select only the relation source target among those proposed by propagation that are not already existent
getNewRels( getNewRels(
newRelationPath, relationGraph, newRelationPath, relationGraph,
getPropagationRelation(spark, leavesPath, chldParentOrgPath, resultOrgPath)); getPropagationRelation(spark, leavesPath, chldParentOrgPath, entityOrgPath, rel_class));
} }
@ -45,16 +46,33 @@ public class StepActions implements Serializable {
changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath); changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath);
// add the new relations obtained from propagation to the keyvalueset result organization // add the new relations obtained from propagation to the keyvalueset result organization
updateResultOrganization( updateEntityOrganization(
spark, resultOrgPath, readPath(spark, selectedRelsPath, Relation.class), orgOutputPath); spark, resultOrgPath, readPath(spark, selectedRelsPath, Relation.class), orgOutputPath);
} }
private static void updateResultOrganization(SparkSession spark, String resultOrgPath, public static void prepareForNextStep(SparkSession spark, String selectedRelsPath, String resultOrgPath,
String projectOrgPath,
String leavesPath, String chldParentOrgPath, String leavesOutputPath,
String orgOutputPath, String outputProjectPath) {
// use of the parents as new leaves set
changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath);
// add the new relations obtained from propagation to the keyvalueset result organization
updateEntityOrganization(
spark, resultOrgPath, readPath(spark, selectedRelsPath + NEW_RESULT_RELATION_PATH, Relation.class),
orgOutputPath);
updateEntityOrganization(
spark, projectOrgPath, readPath(spark, selectedRelsPath + NEW_PROJECT_RELATION_PATH, Relation.class),
outputProjectPath);
}
private static void updateEntityOrganization(SparkSession spark, String entityOrgPath,
Dataset<Relation> selectedRels, String outputPath) { Dataset<Relation> selectedRels, String outputPath) {
Dataset<KeyValueSet> resultOrg = readPath(spark, resultOrgPath, KeyValueSet.class); Dataset<KeyValueSet> entityOrg = readPath(spark, entityOrgPath, KeyValueSet.class);
resultOrg entityOrg
.joinWith( .joinWith(
selectedRels, resultOrg selectedRels, entityOrg
.col("key") .col("key")
.equalTo(selectedRels.col("source")), .equalTo(selectedRels.col("source")),
"left") "left")
@ -129,7 +147,12 @@ public class StepActions implements Serializable {
.getDataInfo() .getDataInfo()
.getProvenanceaction() .getProvenanceaction()
.getClassid() .getClassid()
.equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)) .equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)
&& !rel
.getDataInfo()
.getProvenanceaction()
.getClassid()
.equals(PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID))
.count() > 0) { .count() > 0) {
return null; return null;
} }
@ -152,19 +175,20 @@ public class StepActions implements Serializable {
private static Dataset<Relation> getPropagationRelation(SparkSession spark, private static Dataset<Relation> getPropagationRelation(SparkSession spark,
String leavesPath, String leavesPath,
String chldParentOrgPath, String chldParentOrgPath,
String resultOrgPath) { String entityOrgPath,
String semantics) {
Dataset<KeyValueSet> childParent = readPath(spark, chldParentOrgPath, KeyValueSet.class); Dataset<KeyValueSet> childParent = readPath(spark, chldParentOrgPath, KeyValueSet.class);
Dataset<KeyValueSet> resultOrg = readPath(spark, resultOrgPath, KeyValueSet.class); Dataset<KeyValueSet> entityOrg = readPath(spark, entityOrgPath, KeyValueSet.class);
Dataset<Leaves> leaves = readPath(spark, leavesPath, Leaves.class); Dataset<Leaves> leaves = readPath(spark, leavesPath, Leaves.class);
childParent.createOrReplaceTempView("childParent"); childParent.createOrReplaceTempView("childParent");
resultOrg.createOrReplaceTempView("resultOrg"); entityOrg.createOrReplaceTempView("entityOrg");
leaves.createOrReplaceTempView("leaves"); leaves.createOrReplaceTempView("leaves");
Dataset<KeyValueSet> resultParent = spark Dataset<KeyValueSet> resultParent = spark
.sql( .sql(
"SELECT resId as key, " + "SELECT entityId as key, " +
"collect_set(parent) valueSet " + "collect_set(parent) valueSet " +
"FROM (SELECT key as child, parent " + "FROM (SELECT key as child, parent " +
" FROM childParent " + " FROM childParent " +
@ -172,14 +196,14 @@ public class StepActions implements Serializable {
"JOIN leaves " + "JOIN leaves " +
"ON leaves.value = cp.child " + "ON leaves.value = cp.child " +
"JOIN (" + "JOIN (" +
"SELECT key as resId, org " + "SELECT key as entityId, org " +
"FROM resultOrg " + "FROM entityOrg " +
"LATERAL VIEW explode (valueSet) ks as org ) as ro " + "LATERAL VIEW explode (valueSet) ks as org ) as ro " +
"ON leaves.value = ro.org " + "ON leaves.value = ro.org " +
"GROUP BY resId") "GROUP BY entityId")
.as(Encoders.bean(KeyValueSet.class)); .as(Encoders.bean(KeyValueSet.class));
// create new relations from result to organization for each result linked to a leaf // create new relations from entity to organization for each entity linked to a leaf
return resultParent return resultParent
.flatMap( .flatMap(
(FlatMapFunction<KeyValueSet, Relation>) v -> v (FlatMapFunction<KeyValueSet, Relation>) v -> v
@ -189,12 +213,7 @@ public class StepActions implements Serializable {
orgId -> getRelation( orgId -> getRelation(
v.getKey(), v.getKey(),
orgId, orgId,
ModelConstants.HAS_AUTHOR_INSTITUTION, semantics))
ModelConstants.RESULT_ORGANIZATION,
ModelConstants.AFFILIATION,
PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID,
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME))
.collect(Collectors.toList()) .collect(Collectors.toList())
.iterator(), .iterator(),
Encoders.bean(Relation.class)); Encoders.bean(Relation.class));

View File

@ -40,5 +40,11 @@
"paramLongName": "relationPath", "paramLongName": "relationPath",
"paramDescription": "the path where to store the selected subset of relations", "paramDescription": "the path where to store the selected subset of relations",
"paramRequired": false "paramRequired": false
},
{
"paramName": "pop",
"paramLongName": "projectOrganizationPath",
"paramDescription": "the number of iterations to be computed",
"paramRequired": true
} }
] ]

View File

@ -52,5 +52,10 @@
"paramLongName": "iterations", "paramLongName": "iterations",
"paramDescription": "the number of iterations to be computed", "paramDescription": "the number of iterations to be computed",
"paramRequired": false "paramRequired": false
} },{
"paramName": "pop",
"paramLongName": "projectOrganizationPath",
"paramDescription": "the number of iterations to be computed",
"paramRequired": true
}
] ]

View File

@ -134,7 +134,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>PrepareResultOrganizationAssociation</name> <name>PrepareResultOrganizationAssociation</name>
<class>eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo</class> <class>eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
@ -150,6 +150,7 @@
<arg>--leavesPath</arg><arg>${workingDir}/preparedInfo/leavesPath</arg> <arg>--leavesPath</arg><arg>${workingDir}/preparedInfo/leavesPath</arg>
<arg>--childParentPath</arg><arg>${workingDir}/preparedInfo/childParentPath</arg> <arg>--childParentPath</arg><arg>${workingDir}/preparedInfo/childParentPath</arg>
<arg>--resultOrgPath</arg><arg>${workingDir}/preparedInfo/resultOrgPath</arg> <arg>--resultOrgPath</arg><arg>${workingDir}/preparedInfo/resultOrgPath</arg>
<arg>--projectOrganizationPath</arg><arg>${workingDir}/preparedInfo/projectOrganizationPath</arg>
<arg>--relationPath</arg><arg>${workingDir}/preparedInfo/relation</arg> <arg>--relationPath</arg><arg>${workingDir}/preparedInfo/relation</arg>
</spark> </spark>
<ok to="apply_resulttoorganization_propagation"/> <ok to="apply_resulttoorganization_propagation"/>
@ -161,7 +162,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>resultToOrganizationFromSemRel</name> <name>resultToOrganizationFromSemRel</name>
<class>eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel</class> <class>eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}

View File

@ -1,22 +1,17 @@
package eu.dnetlib.dhp.resulttoorganizationfromsemrel; package eu.dnetlib.dhp.entitytoorganizationfromsemrel;
import static eu.dnetlib.dhp.PropagationConstant.readPath;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.List;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
@ -28,7 +23,6 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.KeyValueSet; import eu.dnetlib.dhp.KeyValueSet;
import eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
public class PrepareInfoJobTest { public class PrepareInfoJobTest {
@ -78,11 +72,12 @@ public class PrepareInfoJobTest {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphPath", getClass() "-graphPath", getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/childparenttest1")
.getPath(), .getPath(),
"-hive_metastore_uris", "", "-hive_metastore_uris", "",
"-leavesPath", workingDir.toString() + "/currentIteration/", "-leavesPath", workingDir.toString() + "/currentIteration/",
"-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/",
"-projectOrganizationPath", workingDir.toString() + "/projectOrganization/",
"-childParentPath", workingDir.toString() + "/childParentOrg/", "-childParentPath", workingDir.toString() + "/childParentOrg/",
"-relationPath", workingDir.toString() + "/relation" "-relationPath", workingDir.toString() + "/relation"
@ -223,11 +218,12 @@ public class PrepareInfoJobTest {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphPath", getClass() "-graphPath", getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/childparenttest2")
.getPath(), .getPath(),
"-hive_metastore_uris", "", "-hive_metastore_uris", "",
"-leavesPath", workingDir.toString() + "/currentIteration/", "-leavesPath", workingDir.toString() + "/currentIteration/",
"-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/",
"-projectOrganizationPath", workingDir.toString() + "/projectOrganization/",
"-childParentPath", workingDir.toString() + "/childParentOrg/", "-childParentPath", workingDir.toString() + "/childParentOrg/",
"-relationPath", workingDir.toString() + "/relation" "-relationPath", workingDir.toString() + "/relation"
@ -343,11 +339,12 @@ public class PrepareInfoJobTest {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphPath", getClass() "-graphPath", getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/resultorganizationtest")
.getPath(), .getPath(),
"-hive_metastore_uris", "", "-hive_metastore_uris", "",
"-leavesPath", workingDir.toString() + "/currentIteration/", "-leavesPath", workingDir.toString() + "/currentIteration/",
"-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/",
"-projectOrganizationPath", workingDir.toString() + "/projectOrganization/",
"-childParentPath", workingDir.toString() + "/childParentOrg/", "-childParentPath", workingDir.toString() + "/childParentOrg/",
"-relationPath", workingDir.toString() + "/relation" "-relationPath", workingDir.toString() + "/relation"
@ -355,7 +352,38 @@ public class PrepareInfoJobTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation") .textFile(workingDir.toString() + "/relation/result")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Dataset<Relation> verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
Assertions.assertEquals(7, verificationDs.count());
}
@Test
public void relationProjectTest() throws Exception {
PrepareInfo
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphPath", getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest")
.getPath(),
"-hive_metastore_uris", "",
"-leavesPath", workingDir.toString() + "/currentIteration/",
"-resultOrgPath", workingDir.toString() + "/resultOrganization/",
"-projectOrganizationPath", workingDir.toString() + "/projectOrganization/",
"-childParentPath", workingDir.toString() + "/childParentOrg/",
"-relationPath", workingDir.toString() + "/relation"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation/project")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Dataset<Relation> verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); Dataset<Relation> verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
@ -373,11 +401,12 @@ public class PrepareInfoJobTest {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphPath", getClass() "-graphPath", getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/resultorganizationtest")
.getPath(), .getPath(),
"-hive_metastore_uris", "", "-hive_metastore_uris", "",
"-leavesPath", workingDir.toString() + "/currentIteration/", "-leavesPath", workingDir.toString() + "/currentIteration/",
"-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/",
"-projectOrganizationPath", workingDir.toString() + "/projectOrganization/",
"-childParentPath", workingDir.toString() + "/childParentOrg/", "-childParentPath", workingDir.toString() + "/childParentOrg/",
"-relationPath", workingDir.toString() + "/relation" "-relationPath", workingDir.toString() + "/relation"
@ -498,6 +527,141 @@ public class PrepareInfoJobTest {
} }
@Test
public void projectOrganizationTest1() throws Exception {
PrepareInfo
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphPath", getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest")
.getPath(),
"-hive_metastore_uris", "",
"-leavesPath", workingDir.toString() + "/currentIteration/",
"-resultOrgPath", workingDir.toString() + "/resultOrganization/",
"-projectOrganizationPath", workingDir.toString() + "/projectOrganization/",
"-childParentPath", workingDir.toString() + "/childParentOrg/",
"-relationPath", workingDir.toString() + "/relation"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<KeyValueSet> tmp = sc
.textFile(workingDir.toString() + "/projectOrganization/")
.map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class));
Dataset<KeyValueSet> verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class));
Assertions.assertEquals(5, verificationDs.count());
Assertions
.assertEquals(
2, verificationDs
.filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'")
.collectAsList()
.get(0)
.getValueSet()
.size());
Assertions
.assertTrue(
verificationDs
.filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
verificationDs
.filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertEquals(
2, verificationDs
.filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'")
.collectAsList()
.get(0)
.getValueSet()
.size());
Assertions
.assertTrue(
verificationDs
.filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"));
Assertions
.assertTrue(
verificationDs
.filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertEquals(
1, verificationDs
.filter("key = '40|doajarticles::03748bcb5d754c951efec9700e18a56d'")
.collectAsList()
.get(0)
.getValueSet()
.size());
Assertions
.assertTrue(
verificationDs
.filter("key = '40|doajarticles::03748bcb5d754c951efec9700e18a56d'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertEquals(
1, verificationDs
.filter("key = '40|openaire____::ec653e804967133b9436fdd30d3ff51d'")
.collectAsList()
.get(0)
.getValueSet()
.size());
Assertions
.assertTrue(
verificationDs
.filter("key = '40|openaire____::ec653e804967133b9436fdd30d3ff51d'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions
.assertEquals(
1, verificationDs
.filter("key = '40|doajarticles::1cae0b82b56ccd97c2db1f698def7074'")
.collectAsList()
.get(0)
.getValueSet()
.size());
Assertions
.assertTrue(
verificationDs
.filter("key = '40|doajarticles::1cae0b82b56ccd97c2db1f698def7074'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"));
verificationDs
.foreach((ForeachFunction<KeyValueSet>) v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v)));
}
@Test @Test
public void foundLeavesTest1() throws Exception { public void foundLeavesTest1() throws Exception {
@ -507,11 +671,12 @@ public class PrepareInfoJobTest {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphPath", getClass() "-graphPath", getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/resultorganizationtest")
.getPath(), .getPath(),
"-hive_metastore_uris", "", "-hive_metastore_uris", "",
"-leavesPath", workingDir.toString() + "/currentIteration/", "-leavesPath", workingDir.toString() + "/currentIteration/",
"-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/",
"-projectOrganizationPath", workingDir.toString() + "/projectOrganization/",
"-childParentPath", workingDir.toString() + "/childParentOrg/", "-childParentPath", workingDir.toString() + "/childParentOrg/",
"-relationPath", workingDir.toString() + "/relation" "-relationPath", workingDir.toString() + "/relation"
@ -534,11 +699,12 @@ public class PrepareInfoJobTest {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphPath", getClass() "-graphPath", getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/childparenttest1")
.getPath(), .getPath(),
"-hive_metastore_uris", "", "-hive_metastore_uris", "",
"-leavesPath", workingDir.toString() + "/currentIteration/", "-leavesPath", workingDir.toString() + "/currentIteration/",
"-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/",
"-projectOrganizationPath", workingDir.toString() + "/projectOrganization/",
"-childParentPath", workingDir.toString() + "/childParentOrg/", "-childParentPath", workingDir.toString() + "/childParentOrg/",
"-relationPath", workingDir.toString() + "/relation" "-relationPath", workingDir.toString() + "/relation"

View File

@ -0,0 +1,900 @@
package eu.dnetlib.dhp.entitytoorganizationfromsemrel;
import static eu.dnetlib.dhp.PropagationConstant.readPath;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.KeyValueSet;
import eu.dnetlib.dhp.PropagationConstant;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class SparkJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(SparkJobTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(StepActionsTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(PrepareInfoJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(PrepareInfoJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void completeResultExecution() throws Exception {
final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph")
.getPath();
final String leavesPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath();
final String childParentPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath();
final String resultOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath();
final String projectOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/")
.getPath();
readPath(spark, leavesPath, Leaves.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/leavesInput");
readPath(spark, resultOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/orgsInput");
readPath(spark, projectOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/projectInput");
SparkResultToOrganizationFromSemRel
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-relationPath", graphPath,
"-hive_metastore_uris", "",
"-outputPath", workingDir.toString() + "/finalrelation",
"-leavesPath", workingDir.toString() + "/leavesInput",
"-resultOrgPath", workingDir.toString() + "/orgsInput",
"-projectOrganizationPath", workingDir.toString() + "/projectInput",
"-childParentPath", childParentPath,
"-workingDir", workingDir.toString()
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> temp = sc
.textFile(workingDir.toString() + "/finalrelation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Assertions.assertEquals(36, temp.count());
JavaRDD<Relation> result = temp.filter(r -> r.getSource().startsWith("50|") || r.getTarget().startsWith("50|"));
Assertions.assertEquals(18, result.count());
result.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType()));
result.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType()));
result
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance()));
result
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID,
r.getDataInfo().getProvenanceaction().getClassid()));
result
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME,
r.getDataInfo().getProvenanceaction().getClassname()));
result
.foreach(
r -> Assertions
.assertEquals(
"0.85",
r.getDataInfo().getTrust()));
Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("50|")).count());
result
.filter(r -> r.getSource().substring(0, 3).equals("50|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass()));
Assertions
.assertEquals(
2,
result.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
Assertions
.assertEquals(
3,
result.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
Assertions
.assertEquals(
2,
result.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
Assertions
.assertEquals(
1,
result.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
Assertions
.assertEquals(
1,
result.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("20|")).count());
result
.filter(r -> r.getSource().substring(0, 3).equals("20|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass()));
Assertions
.assertEquals(
1,
result.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
Assertions
.assertEquals(
1,
result.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
Assertions
.assertEquals(
2,
result.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
Assertions
.assertEquals(
2,
result.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
Assertions
.assertEquals(
3,
result.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("50|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("50|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
result
.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
}
@Test
public void completeProjectExecution() throws Exception {
final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph")
.getPath();
final String leavesPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath();
final String childParentPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath();
final String resultOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath();
final String projectOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/")
.getPath();
readPath(spark, leavesPath, Leaves.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/leavesInput");
readPath(spark, resultOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/orgsInput");
readPath(spark, projectOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/projectInput");
SparkResultToOrganizationFromSemRel
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-relationPath", graphPath,
"-hive_metastore_uris", "",
"-outputPath", workingDir.toString() + "/finalrelation",
"-leavesPath", workingDir.toString() + "/leavesInput",
"-resultOrgPath", workingDir.toString() + "/orgsInput",
"-projectOrganizationPath", workingDir.toString() + "/projectInput",
"-childParentPath", childParentPath,
"-workingDir", workingDir.toString()
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> temp = sc
.textFile(workingDir.toString() + "/finalrelation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Assertions.assertEquals(36, temp.count());
JavaRDD<Relation> project = temp
.filter(r -> r.getSource().startsWith("40|") || r.getTarget().startsWith("40|"));
Assertions.assertEquals(18, project.count());
project.foreach(r -> Assertions.assertEquals(ModelConstants.PARTICIPATION, r.getSubRelType()));
project.foreach(r -> Assertions.assertEquals(ModelConstants.PROJECT_ORGANIZATION, r.getRelType()));
project
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance()));
project
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID,
r.getDataInfo().getProvenanceaction().getClassid()));
project
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_NAME,
r.getDataInfo().getProvenanceaction().getClassname()));
project
.foreach(
r -> Assertions
.assertEquals(
"0.85",
r.getDataInfo().getTrust()));
Assertions.assertEquals(9, project.filter(r -> r.getSource().substring(0, 3).equals("40|")).count());
project
.filter(r -> r.getSource().substring(0, 3).equals("40|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.HAS_PARTICIPANT, r.getRelClass()));
Assertions
.assertEquals(
2,
project.filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
Assertions
.assertEquals(
3,
project.filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
Assertions
.assertEquals(
2,
project.filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
Assertions
.assertEquals(
1,
project.filter(r -> r.getSource().equals("40|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
Assertions
.assertEquals(
1,
project.filter(r -> r.getSource().equals("40|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
Assertions.assertEquals(9, project.filter(r -> r.getSource().substring(0, 3).equals("20|")).count());
project
.filter(r -> r.getSource().substring(0, 3).equals("20|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.IS_PARTICIPANT, r.getRelClass()));
Assertions
.assertEquals(
1,
project.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
Assertions
.assertEquals(
1,
project.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
Assertions
.assertEquals(
2,
project.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
Assertions
.assertEquals(
2,
project.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
Assertions
.assertEquals(
3,
project.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("40|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("40|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
}
@Test
public void singleIterationExecution() throws Exception {
final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph")
.getPath();
final String leavesPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath();
final String childParentPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath();
final String resultOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath();
final String projectOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/")
.getPath();
readPath(spark, leavesPath, Leaves.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/leavesInput");
readPath(spark, resultOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/orgsInput");
readPath(spark, projectOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/projectInput");
SparkResultToOrganizationFromSemRel
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-relationPath", graphPath,
"-hive_metastore_uris", "",
"-outputPath", workingDir.toString() + "/finalrelation",
"-leavesPath", workingDir.toString() + "/leavesInput",
"-resultOrgPath", workingDir.toString() + "/orgsInput",
"-projectOrganizationPath", workingDir.toString() + "/projectInput",
"-childParentPath", childParentPath,
"-workingDir", workingDir.toString(),
"-iterations", "1"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> temp = sc
.textFile(workingDir.toString() + "/finalrelation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Assertions.assertEquals(16, temp.count());
Assertions.assertEquals(4, temp.filter(r -> r.getSource().startsWith("50|")).count());
Assertions.assertEquals(4, temp.filter(r -> r.getTarget().startsWith("50|")).count());
Assertions.assertEquals(4, temp.filter(r -> r.getSource().startsWith("40|")).count());
Assertions.assertEquals(4, temp.filter(r -> r.getTarget().startsWith("40|")).count());
Assertions.assertEquals(8, temp.filter(r -> r.getSource().startsWith("20|")).count());
Assertions.assertEquals(8, temp.filter(r -> r.getSource().startsWith("20|")).count());
// JavaRDD<Relation> result = temp.filter(r -> r.getSource().startsWith("50|") || r.getTarget().startsWith("50|"));
// Assertions.assertEquals(18, result.count());
// result.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType()));
// result.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType()));
// result
// .foreach(
// r -> Assertions
// .assertEquals(
// PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance()));
// result
// .foreach(
// r -> Assertions
// .assertEquals(
// PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID,
// r.getDataInfo().getProvenanceaction().getClassid()));
// result
// .foreach(
// r -> Assertions
// .assertEquals(
// PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME,
// r.getDataInfo().getProvenanceaction().getClassname()));
// result
// .foreach(
// r -> Assertions
// .assertEquals(
// "0.85",
// r.getDataInfo().getTrust()));
//
// Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("50|")).count());
// result
// .filter(r -> r.getSource().substring(0, 3).equals("50|"))
// .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass()));
// Assertions
// .assertEquals(
// 2, result.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
// Assertions
// .assertEquals(
// 3, result.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
// Assertions
// .assertEquals(
// 2, result.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
// Assertions
// .assertEquals(
// 1, result.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
// Assertions
// .assertEquals(
// 1, result.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
//
// Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("20|")).count());
// result
// .filter(r -> r.getSource().substring(0, 3).equals("20|"))
// .foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass()));
// Assertions
// .assertEquals(
// 1, result.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
// Assertions
// .assertEquals(
// 1, result.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
// Assertions
// .assertEquals(
// 2, result.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
// Assertions
// .assertEquals(
// 2, result.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
// Assertions
// .assertEquals(
// 3, result.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|openaire____::ec653e804967133b9436fdd30d3ff51d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|doajarticles::03748bcb5d754c951efec9700e18a56d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.resulttoorganizationfromsemrel; package eu.dnetlib.dhp.entitytoorganizationfromsemrel;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
@ -73,21 +73,22 @@ public class StepActionsTest {
.execStep( .execStep(
spark, getClass() spark, getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/result")
.getPath(), .getPath(),
workingDir.toString() + "/newRelationPath", workingDir.toString() + "/newRelationPath",
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath()); .getPath(),
ModelConstants.HAS_AUTHOR_INSTITUTION);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -203,19 +204,19 @@ public class StepActionsTest {
spark, spark,
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relsforiteration1/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath(), .getPath(),
workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs");
@ -248,19 +249,19 @@ public class StepActionsTest {
spark, spark,
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relsforiteration1/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath(), .getPath(),
workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs");

View File

@ -1,325 +0,0 @@
package eu.dnetlib.dhp.resulttoorganizationfromsemrel;
import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged;
import static eu.dnetlib.dhp.PropagationConstant.readPath;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.KeyValueSet;
import eu.dnetlib.dhp.PropagationConstant;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class SparkJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(SparkJobTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(StepActionsTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(PrepareInfoJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(PrepareInfoJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void completeExecution() throws Exception {
final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep")
.getPath();
final String leavesPath = getClass()
.getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/")
.getPath();
final String childParentPath = getClass()
.getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/")
.getPath();
final String resultOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/")
.getPath();
readPath(spark, leavesPath, Leaves.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/leavesInput");
readPath(spark, resultOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/orgsInput");
SparkResultToOrganizationFromSemRel
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-relationPath", graphPath,
"-hive_metastore_uris", "",
"-outputPath", workingDir.toString() + "/finalrelation",
"-leavesPath", workingDir.toString() + "/leavesInput",
"-resultOrgPath", workingDir.toString() + "/orgsInput",
"-childParentPath", childParentPath,
"-workingDir", workingDir.toString()
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/finalrelation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
Assertions.assertEquals(18, tmp.count());
tmp.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType()));
tmp.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType()));
tmp
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance()));
tmp
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID,
r.getDataInfo().getProvenanceaction().getClassid()));
tmp
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME,
r.getDataInfo().getProvenanceaction().getClassname()));
tmp
.foreach(
r -> Assertions
.assertEquals(
"0.85",
r.getDataInfo().getTrust()));
Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("50|")).count());
tmp
.filter(r -> r.getSource().substring(0, 3).equals("50|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass()));
Assertions
.assertEquals(
2, tmp.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
Assertions
.assertEquals(
3, tmp.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
Assertions
.assertEquals(
2, tmp.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("20|")).count());
tmp
.filter(r -> r.getSource().substring(0, 3).equals("20|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass()));
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
Assertions
.assertEquals(
2, tmp.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
Assertions
.assertEquals(
2, tmp.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
Assertions
.assertEquals(
3, tmp.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("50|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("50|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
tmp
.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
}
}

View File

@ -0,0 +1,7 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1","validated":false}

View File

@ -0,0 +1,5 @@
{"key":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","valueSet":["20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"]}
{"key":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","valueSet":["20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"]}
{"key":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","valueSet":["20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"]}
{"key":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"]}
{"key":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|dedup_wf_001::2899e571609779168222fdeb59cb916d"]}

View File

@ -0,0 +1,7 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"}

View File

@ -0,0 +1,7 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"}

View File

@ -1,14 +0,0 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"}