propagation of projects through parent-child relations #299

Merged
claudio.atzori merged 4 commits from propagationProjectThroughParentChils into beta 2023-06-12 09:57:21 +02:00
12 changed files with 921 additions and 127 deletions
Showing only changes of commit 97d72d41c3 - Show all commits

View File

@ -174,6 +174,13 @@ public class PropagationConstant {
return newRelations; return newRelations;
} }
public static Relation getRelation(String source, String target, String rel_class){
if (ModelConstants.HAS_PARTICIPANT.equals(rel_class)){
return getParticipantRelation(source, target, rel_class);
}else
return getAffiliationRelation(source, target, rel_class);
}
public static Relation getParticipantRelation( public static Relation getParticipantRelation(
String source, String source,
String target, String target,

View File

@ -49,7 +49,7 @@ public class PrepareInfo implements Serializable {
// associate projects to all the participant orgs // associate projects to all the participant orgs
private static final String PROJECT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " + private static final String PROJECT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " +
"FROM relation " + "FROM relation " +
"WHERE lower(relclass) = '" + ModelConstants.IS_PARTICIPANT.toLowerCase() + "WHERE lower(relclass) = '" + ModelConstants.HAS_PARTICIPANT.toLowerCase() +
"' and datainfo.deletedbyinference = false " + "' and datainfo.deletedbyinference = false " +
"GROUP BY source"; "GROUP BY source";
@ -103,7 +103,7 @@ public class PrepareInfo implements Serializable {
} }
private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath, private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath,
String currentIterationPath, String resultOrganizationPath, String resultProjectPath, String relationPath) { String currentIterationPath, String resultOrganizationPath, String projectOrganizationPath, String relationPath) {
Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class); Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class);
relation.createOrReplaceTempView("relation"); relation.createOrReplaceTempView("relation");
@ -129,7 +129,7 @@ public class PrepareInfo implements Serializable {
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(resultProjectPath); .json(projectOrganizationPath);
relation relation
.filter( .filter(
@ -143,7 +143,7 @@ public class PrepareInfo implements Serializable {
relation relation
.filter( .filter(
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() && (FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
r.getRelClass().equals(ModelConstants.IS_PARTICIPANT)) r.getRelClass().equals(ModelConstants.HAS_PARTICIPANT))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")

View File

@ -39,7 +39,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
.toString( .toString(
SparkResultToOrganizationFromIstRepoJob.class SparkResultToOrganizationFromIstRepoJob.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json")); "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
@ -129,7 +129,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
notReachedFirstParent); notReachedFirstParent);
doPropagate( doPropagate(
spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, spark, leavesPath, childParentPath, resultOrganizationPath, projectOrganizationPath, graphPath,
workingPath, outputPath, propagationCounter); workingPath, outputPath, propagationCounter);
} }
@ -142,20 +142,20 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
StepActions StepActions
.execStep( .execStep(
spark, graphPath + "/result", workingPath + NEW_RESULT_RELATION_PATH, spark, graphPath + "/result", workingPath + NEW_RESULT_RELATION_PATH,
leavesPath, childParentPath, resultOrganizationPath); leavesPath, childParentPath, resultOrganizationPath, ModelConstants.HAS_AUTHOR_INSTITUTION);
addNewRelations(spark, workingPath + NEW_RESULT_RELATION_PATH, outputPath); addNewRelations(spark, workingPath + NEW_RESULT_RELATION_PATH, outputPath);
StepActions StepActions
.execStep( .execStep(
spark, graphPath + "/project", workingPath + NEW_PROJECT_RELATION_PATH, spark, graphPath + "/project", workingPath + NEW_PROJECT_RELATION_PATH,
leavesPath, childParentPath, projectOrganizationPath); leavesPath, childParentPath, projectOrganizationPath, ModelConstants.HAS_PARTICIPANT);
addNewRelations(spark, workingPath + NEW_PROJECT_RELATION_PATH, outputPath); addNewRelations(spark, workingPath + NEW_PROJECT_RELATION_PATH, outputPath);
} }
private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath, private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath,
String resultOrganizationPath, String graphPath, String workingPath, String outputPath, String resultOrganizationPath, String projectOrganizationPath, String graphPath, String workingPath, String outputPath,
PropagationCounter propagationCounter) { PropagationCounter propagationCounter) {
int iteration = 0; int iteration = 0;
long leavesCount; long leavesCount;
@ -164,13 +164,18 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
iteration++; iteration++;
StepActions StepActions
.execStep( .execStep(
spark, graphPath, workingPath + NEW_RESULT_RELATION_PATH, spark, graphPath + "/result", workingPath + NEW_RESULT_RELATION_PATH,
leavesPath, childParentPath, resultOrganizationPath); leavesPath, childParentPath, resultOrganizationPath, ModelConstants.HAS_AUTHOR_INSTITUTION);
StepActions
.execStep(
spark, graphPath + "/project", workingPath + NEW_PROJECT_RELATION_PATH,
leavesPath, childParentPath, projectOrganizationPath, ModelConstants.HAS_PARTICIPANT);
StepActions StepActions
.prepareForNextStep( .prepareForNextStep(
spark, workingPath + NEW_RESULT_RELATION_PATH, resultOrganizationPath, leavesPath, spark, workingPath , resultOrganizationPath, projectOrganizationPath, leavesPath,
childParentPath, workingPath + "/leaves", workingPath + "/resOrg"); childParentPath, workingPath + "/leaves", workingPath + "/resOrg", workingPath + "/projOrg");
moveOutput(spark, workingPath, leavesPath, resultOrganizationPath); moveOutput(spark, workingPath, leavesPath, resultOrganizationPath, projectOrganizationPath);
leavesCount = readPath(spark, leavesPath, Leaves.class).count(); leavesCount = readPath(spark, leavesPath, Leaves.class).count();
} while (leavesCount > 0 && iteration < MAX_ITERATION); } while (leavesCount > 0 && iteration < MAX_ITERATION);
@ -199,6 +204,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
} }
addNewRelations(spark, workingPath + NEW_RESULT_RELATION_PATH, outputPath); addNewRelations(spark, workingPath + NEW_RESULT_RELATION_PATH, outputPath);
addNewRelations(spark, workingPath + NEW_PROJECT_RELATION_PATH, outputPath);
} }
private static void moveOutput(SparkSession spark, String workingPath, String leavesPath, private static void moveOutput(SparkSession spark, String workingPath, String leavesPath,
@ -217,6 +223,28 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
} }
private static void moveOutput(SparkSession spark, String workingPath, String leavesPath,
String resultOrganizationPath, String projectOrganizationPath) {
readPath(spark, workingPath + "/leaves", Leaves.class)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(leavesPath);
readPath(spark, workingPath + "/resOrg", KeyValueSet.class)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(resultOrganizationPath);
readPath(spark, workingPath + "/projOrg", KeyValueSet.class)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(projectOrganizationPath);
}
private static void addNewRelations(SparkSession spark, String newRelationPath, String outputPath) { private static void addNewRelations(SparkSession spark, String newRelationPath, String outputPath) {
Dataset<Relation> relation = readPath(spark, newRelationPath, Relation.class); Dataset<Relation> relation = readPath(spark, newRelationPath, Relation.class);
@ -237,7 +265,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
return Arrays return Arrays
.asList( .asList(
r, getParticipantRelation( r, getParticipantRelation(
r.getTarget(), r.getSource(), ModelConstants.HAS_PARTICIPANT)) r.getTarget(), r.getSource(), ModelConstants.IS_PARTICIPANT))
.iterator(); .iterator();
} }
} }

View File

@ -3,6 +3,8 @@ package eu.dnetlib.dhp.entitytoorganizationfromsemrel;
import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.PropagationConstant.readPath; import static eu.dnetlib.dhp.PropagationConstant.readPath;
import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_PROJECT_RELATION_PATH;
import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_RESULT_RELATION_PATH;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
@ -26,13 +28,14 @@ public class StepActions implements Serializable {
public static void execStep(SparkSession spark, public static void execStep(SparkSession spark,
String graphPath, String newRelationPath, String graphPath, String newRelationPath,
String leavesPath, String chldParentOrgPath, String entityOrgPath) { String leavesPath, String chldParentOrgPath, String entityOrgPath, String rel_class) {
Dataset<Relation> relationGraph = readPath(spark, graphPath, Relation.class); Dataset<Relation> relationGraph = readPath(spark, graphPath, Relation.class);
// select only the relation source target among those proposed by propagation that are not already existent // select only the relation source target among those proposed by propagation that are not already existent
getNewRels( getNewRels(
newRelationPath, relationGraph, newRelationPath, relationGraph,
getPropagationRelation(spark, leavesPath, chldParentOrgPath, entityOrgPath, ModelConstants.HAS_AUTHOR_INSTITUTION)); getPropagationRelation(spark, leavesPath, chldParentOrgPath, entityOrgPath, rel_class));
} }
@ -43,16 +46,30 @@ public class StepActions implements Serializable {
changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath); changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath);
// add the new relations obtained from propagation to the keyvalueset result organization // add the new relations obtained from propagation to the keyvalueset result organization
updateResultOrganization( updateEntityOrganization(
spark, resultOrgPath, readPath(spark, selectedRelsPath, Relation.class), orgOutputPath); spark, resultOrgPath, readPath(spark, selectedRelsPath, Relation.class), orgOutputPath);
} }
private static void updateResultOrganization(SparkSession spark, String resultOrgPath, public static void prepareForNextStep(SparkSession spark, String selectedRelsPath, String resultOrgPath, String projectOrgPath,
Dataset<Relation> selectedRels, String outputPath) { String leavesPath, String chldParentOrgPath, String leavesOutputPath,
Dataset<KeyValueSet> resultOrg = readPath(spark, resultOrgPath, KeyValueSet.class); String orgOutputPath, String outputProjectPath) {
resultOrg // use of the parents as new leaves set
changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath);
// add the new relations obtained from propagation to the keyvalueset result organization
updateEntityOrganization(
spark, resultOrgPath, readPath(spark, selectedRelsPath + NEW_RESULT_RELATION_PATH, Relation.class), orgOutputPath);
updateEntityOrganization(
spark, projectOrgPath, readPath(spark, selectedRelsPath + NEW_PROJECT_RELATION_PATH, Relation.class), outputProjectPath);
}
private static void updateEntityOrganization(SparkSession spark, String entityOrgPath,
Dataset<Relation> selectedRels, String outputPath) {
Dataset<KeyValueSet> entityOrg = readPath(spark, entityOrgPath, KeyValueSet.class);
entityOrg
.joinWith( .joinWith(
selectedRels, resultOrg selectedRels, entityOrg
.col("key") .col("key")
.equalTo(selectedRels.col("source")), .equalTo(selectedRels.col("source")),
"left") "left")
@ -111,38 +128,45 @@ public class StepActions implements Serializable {
// construction of the set) // construction of the set)
// if at least one relation in the set was not produced by propagation no new relation will be returned // if at least one relation in the set was not produced by propagation no new relation will be returned
relationDataset relationDataset
.union(newRels) .union(newRels)
.groupByKey((MapFunction<Relation, String>) r -> r.getSource() + r.getTarget(), Encoders.STRING()) .groupByKey((MapFunction<Relation, String>) r -> r.getSource() + r.getTarget(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Relation, String>) (k, it) -> { .mapGroups((MapGroupsFunction<String, Relation, String>) (k, it) -> {
ArrayList<Relation> relationList = new ArrayList<>(); ArrayList<Relation> relationList = new ArrayList<>();
relationList.add(it.next()); relationList.add(it.next());
it.forEachRemaining(rel -> relationList.add(rel)); it.forEachRemaining(rel -> relationList.add(rel));
if (relationList if (relationList
.stream() .stream()
.filter( .filter(
rel -> !rel rel -> !rel
.getDataInfo() .getDataInfo()
.getProvenanceaction() .getProvenanceaction()
.getClassid() .getClassid()
.equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)) .equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID) && !rel
.count() > 0) { .getDataInfo()
return null; .getProvenanceaction()
} .getClassid()
.equals(PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID))
.count() > 0) {
return null;
}
return new ObjectMapper().writeValueAsString(relationList.get(0));
}, Encoders.STRING())
.filter(Objects::nonNull)
.map(
(MapFunction<String, Relation>) r -> new ObjectMapper().readValue(r, Relation.class),
Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(newRelationPath);
return new ObjectMapper().writeValueAsString(relationList.get(0));
}, Encoders.STRING())
.filter(Objects::nonNull)
.map(
(MapFunction<String, Relation>) r -> new ObjectMapper().readValue(r, Relation.class),
Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(newRelationPath);
} }
@ -172,20 +196,21 @@ public class StepActions implements Serializable {
"ON leaves.value = cp.child " + "ON leaves.value = cp.child " +
"JOIN (" + "JOIN (" +
"SELECT key as entityId, org " + "SELECT key as entityId, org " +
"FROM resultOrg " + "FROM entityOrg " +
"LATERAL VIEW explode (valueSet) ks as org ) as ro " + "LATERAL VIEW explode (valueSet) ks as org ) as ro " +
"ON leaves.value = ro.org " + "ON leaves.value = ro.org " +
"GROUP BY resId") "GROUP BY entityId")
.as(Encoders.bean(KeyValueSet.class)); .as(Encoders.bean(KeyValueSet.class));
// create new relations from result to organization for each result linked to a leaf
// create new relations from entity to organization for each entity linked to a leaf
return resultParent return resultParent
.flatMap( .flatMap(
(FlatMapFunction<KeyValueSet, Relation>) v -> v (FlatMapFunction<KeyValueSet, Relation>) v -> v
.getValueSet() .getValueSet()
.stream() .stream()
.map( .map(
orgId -> getAffiliationRelation( orgId -> getRelation(
v.getKey(), v.getKey(),
orgId, orgId,
semantics)) semantics))

View File

@ -361,6 +361,37 @@ public class PrepareInfoJobTest {
} }
@Test
public void relationProjectTest() throws Exception {
PrepareInfo
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphPath", getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest")
.getPath(),
"-hive_metastore_uris", "",
"-leavesPath", workingDir.toString() + "/currentIteration/",
"-resultOrgPath", workingDir.toString() + "/resultOrganization/",
"-projectOrganizationPath", workingDir.toString() + "/projectOrganization/",
"-childParentPath", workingDir.toString() + "/childParentOrg/",
"-relationPath", workingDir.toString() + "/relation"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation/project")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Dataset<Relation> verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
Assertions.assertEquals(7, verificationDs.count());
}
@Test @Test
public void resultOrganizationTest1() throws Exception { public void resultOrganizationTest1() throws Exception {
@ -496,6 +527,141 @@ public class PrepareInfoJobTest {
} }
@Test
public void projectOrganizationTest1() throws Exception {
PrepareInfo
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphPath", getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest")
.getPath(),
"-hive_metastore_uris", "",
"-leavesPath", workingDir.toString() + "/currentIteration/",
"-resultOrgPath", workingDir.toString() + "/resultOrganization/",
"-projectOrganizationPath", workingDir.toString() + "/projectOrganization/",
"-childParentPath", workingDir.toString() + "/childParentOrg/",
"-relationPath", workingDir.toString() + "/relation"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<KeyValueSet> tmp = sc
.textFile(workingDir.toString() + "/projectOrganization/")
.map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class));
Dataset<KeyValueSet> verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class));
Assertions.assertEquals(5, verificationDs.count());
Assertions
.assertEquals(
2, verificationDs
.filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'")
.collectAsList()
.get(0)
.getValueSet()
.size());
Assertions
.assertTrue(
verificationDs
.filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
verificationDs
.filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertEquals(
2, verificationDs
.filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'")
.collectAsList()
.get(0)
.getValueSet()
.size());
Assertions
.assertTrue(
verificationDs
.filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"));
Assertions
.assertTrue(
verificationDs
.filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertEquals(
1, verificationDs
.filter("key = '40|doajarticles::03748bcb5d754c951efec9700e18a56d'")
.collectAsList()
.get(0)
.getValueSet()
.size());
Assertions
.assertTrue(
verificationDs
.filter("key = '40|doajarticles::03748bcb5d754c951efec9700e18a56d'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertEquals(
1, verificationDs
.filter("key = '40|openaire____::ec653e804967133b9436fdd30d3ff51d'")
.collectAsList()
.get(0)
.getValueSet()
.size());
Assertions
.assertTrue(
verificationDs
.filter("key = '40|openaire____::ec653e804967133b9436fdd30d3ff51d'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions
.assertEquals(
1, verificationDs
.filter("key = '40|doajarticles::1cae0b82b56ccd97c2db1f698def7074'")
.collectAsList()
.get(0)
.getValueSet()
.size());
Assertions
.assertTrue(
verificationDs
.filter("key = '40|doajarticles::1cae0b82b56ccd97c2db1f698def7074'")
.collectAsList()
.get(0)
.getValueSet()
.contains("20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"));
verificationDs
.foreach((ForeachFunction<KeyValueSet>) v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v)));
}
@Test @Test
public void foundLeavesTest1() throws Exception { public void foundLeavesTest1() throws Exception {

View File

@ -65,25 +65,30 @@ public class SparkJobTest {
} }
@Test @Test
public void completeExecution() throws Exception { public void completeResultExecution() throws Exception {
final String graphPath = getClass() final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep") .getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph")
.getPath(); .getPath();
final String leavesPath = getClass() final String leavesPath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath(); .getPath();
final String childParentPath = getClass() final String childParentPath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath(); .getPath();
final String resultOrgPath = getClass() final String resultOrgPath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath(); .getPath();
final String projectOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/")
.getPath();
readPath(spark, leavesPath, Leaves.class) readPath(spark, leavesPath, Leaves.class)
.write() .write()
.option("compression", "gzip") .option("compression", "gzip")
@ -94,6 +99,11 @@ public class SparkJobTest {
.option("compression", "gzip") .option("compression", "gzip")
.json(workingDir.toString() + "/orgsInput"); .json(workingDir.toString() + "/orgsInput");
readPath(spark, projectOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/projectInput");
SparkResultToOrganizationFromSemRel SparkResultToOrganizationFromSemRel
.main( .main(
@ -104,95 +114,97 @@ public class SparkJobTest {
"-outputPath", workingDir.toString() + "/finalrelation", "-outputPath", workingDir.toString() + "/finalrelation",
"-leavesPath", workingDir.toString() + "/leavesInput", "-leavesPath", workingDir.toString() + "/leavesInput",
"-resultOrgPath", workingDir.toString() + "/orgsInput", "-resultOrgPath", workingDir.toString() + "/orgsInput",
"-projectOrganizationPath", workingDir.toString() + "/projectInput",
"-childParentPath", childParentPath, "-childParentPath", childParentPath,
"-workingDir", workingDir.toString() "-workingDir", workingDir.toString()
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc JavaRDD<Relation> temp = sc
.textFile(workingDir.toString() + "/finalrelation") .textFile(workingDir.toString() + "/finalrelation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); Assertions.assertEquals(36, temp.count());
Assertions.assertEquals(18, tmp.count()); JavaRDD<Relation> result = temp.filter(r -> r.getSource().startsWith("50|") || r.getTarget().startsWith("50|"));
tmp.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); Assertions.assertEquals(18, result.count());
tmp.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); result.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType()));
tmp result.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType()));
result
.foreach( .foreach(
r -> Assertions r -> Assertions
.assertEquals( .assertEquals(
PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance()));
tmp result
.foreach( .foreach(
r -> Assertions r -> Assertions
.assertEquals( .assertEquals(
PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID,
r.getDataInfo().getProvenanceaction().getClassid())); r.getDataInfo().getProvenanceaction().getClassid()));
tmp result
.foreach( .foreach(
r -> Assertions r -> Assertions
.assertEquals( .assertEquals(
PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME, PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME,
r.getDataInfo().getProvenanceaction().getClassname())); r.getDataInfo().getProvenanceaction().getClassname()));
tmp result
.foreach( .foreach(
r -> Assertions r -> Assertions
.assertEquals( .assertEquals(
"0.85", "0.85",
r.getDataInfo().getTrust())); r.getDataInfo().getTrust()));
Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("50|")).count()); Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("50|")).count());
tmp result
.filter(r -> r.getSource().substring(0, 3).equals("50|")) .filter(r -> r.getSource().substring(0, 3).equals("50|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass()));
Assertions Assertions
.assertEquals( .assertEquals(
2, tmp.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); 2, result.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
Assertions Assertions
.assertEquals( .assertEquals(
3, tmp.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); 3, result.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
Assertions Assertions
.assertEquals( .assertEquals(
2, tmp.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); 2, result.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
Assertions Assertions
.assertEquals( .assertEquals(
1, tmp.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); 1, result.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
Assertions Assertions
.assertEquals( .assertEquals(
1, tmp.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); 1, result.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("20|")).count());
tmp result
.filter(r -> r.getSource().substring(0, 3).equals("20|")) .filter(r -> r.getSource().substring(0, 3).equals("20|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass())); .foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass()));
Assertions Assertions
.assertEquals( .assertEquals(
1, tmp.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); 1, result.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
Assertions Assertions
.assertEquals( .assertEquals(
1, tmp.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); 1, result.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
Assertions Assertions
.assertEquals( .assertEquals(
2, tmp.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); 2, result.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
Assertions Assertions
.assertEquals( .assertEquals(
2, tmp.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); 2, result.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
Assertions Assertions
.assertEquals( .assertEquals(
3, tmp.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); 3, result.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
.contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
@ -200,14 +212,14 @@ public class SparkJobTest {
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
@ -215,21 +227,21 @@ public class SparkJobTest {
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
.contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
@ -237,7 +249,7 @@ public class SparkJobTest {
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) .filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
@ -245,7 +257,7 @@ public class SparkJobTest {
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) .filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
@ -253,14 +265,14 @@ public class SparkJobTest {
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
.contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
@ -268,14 +280,14 @@ public class SparkJobTest {
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
.contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
@ -283,21 +295,21 @@ public class SparkJobTest {
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
.contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
.contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
@ -305,7 +317,7 @@ public class SparkJobTest {
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
@ -313,11 +325,555 @@ public class SparkJobTest {
Assertions Assertions
.assertTrue( .assertTrue(
tmp result
.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget()) .map(r -> r.getTarget())
.collect() .collect()
.contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
} }
@Test
public void completeProjectExecution() throws Exception {
final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph")
.getPath();
final String leavesPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath();
final String childParentPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath();
final String resultOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath();
final String projectOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/")
.getPath();
readPath(spark, leavesPath, Leaves.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/leavesInput");
readPath(spark, resultOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/orgsInput");
readPath(spark, projectOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/projectInput");
SparkResultToOrganizationFromSemRel
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-relationPath", graphPath,
"-hive_metastore_uris", "",
"-outputPath", workingDir.toString() + "/finalrelation",
"-leavesPath", workingDir.toString() + "/leavesInput",
"-resultOrgPath", workingDir.toString() + "/orgsInput",
"-projectOrganizationPath", workingDir.toString() + "/projectInput",
"-childParentPath", childParentPath,
"-workingDir", workingDir.toString()
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> temp = sc
.textFile(workingDir.toString() + "/finalrelation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Assertions.assertEquals(36, temp.count());
JavaRDD<Relation> project = temp.filter(r -> r.getSource().startsWith("40|") || r.getTarget().startsWith("40|"));
Assertions.assertEquals(18, project.count());
project.foreach(r -> Assertions.assertEquals(ModelConstants.PARTICIPATION, r.getSubRelType()));
project.foreach(r -> Assertions.assertEquals(ModelConstants.PROJECT_ORGANIZATION, r.getRelType()));
project
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance()));
project
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID,
r.getDataInfo().getProvenanceaction().getClassid()));
project
.foreach(
r -> Assertions
.assertEquals(
PropagationConstant.PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_NAME,
r.getDataInfo().getProvenanceaction().getClassname()));
project
.foreach(
r -> Assertions
.assertEquals(
"0.85",
r.getDataInfo().getTrust()));
Assertions.assertEquals(9, project.filter(r -> r.getSource().substring(0, 3).equals("40|")).count());
project
.filter(r -> r.getSource().substring(0, 3).equals("40|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.HAS_PARTICIPANT, r.getRelClass()));
Assertions
.assertEquals(
2, project.filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
Assertions
.assertEquals(
3, project.filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
Assertions
.assertEquals(
2, project.filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
Assertions
.assertEquals(
1, project.filter(r -> r.getSource().equals("40|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
Assertions
.assertEquals(
1, project.filter(r -> r.getSource().equals("40|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
Assertions.assertEquals(9, project.filter(r -> r.getSource().substring(0, 3).equals("20|")).count());
project
.filter(r -> r.getSource().substring(0, 3).equals("20|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.IS_PARTICIPANT, r.getRelClass()));
Assertions
.assertEquals(
1, project.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
Assertions
.assertEquals(
1, project.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
Assertions
.assertEquals(
2, project.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
Assertions
.assertEquals(
2, project.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
Assertions
.assertEquals(
3, project.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("40|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
.map(r -> r.getTarget())
.collect()
.contains("40|openaire____::ec653e804967133b9436fdd30d3ff51d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
.map(r -> r.getTarget())
.collect()
.contains("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
.map(r -> r.getTarget())
.collect()
.contains("40|doajarticles::03748bcb5d754c951efec9700e18a56d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
.map(r -> r.getTarget())
.collect()
.contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
Assertions
.assertTrue(
project
.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
.map(r -> r.getTarget())
.collect()
.contains("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
}
@Test
public void singleIterationExecution() throws Exception {
final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph")
.getPath();
final String leavesPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath();
final String childParentPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath();
final String resultOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath();
final String projectOrgPath = getClass()
.getResource(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/")
.getPath();
readPath(spark, leavesPath, Leaves.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/leavesInput");
readPath(spark, resultOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/orgsInput");
readPath(spark, projectOrgPath, KeyValueSet.class)
.write()
.option("compression", "gzip")
.json(workingDir.toString() + "/projectInput");
SparkResultToOrganizationFromSemRel
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-relationPath", graphPath,
"-hive_metastore_uris", "",
"-outputPath", workingDir.toString() + "/finalrelation",
"-leavesPath", workingDir.toString() + "/leavesInput",
"-resultOrgPath", workingDir.toString() + "/orgsInput",
"-projectOrganizationPath", workingDir.toString() + "/projectInput",
"-childParentPath", childParentPath,
"-workingDir", workingDir.toString(),
"-iterations", "1"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> temp = sc
.textFile(workingDir.toString() + "/finalrelation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Assertions.assertEquals(16, temp.count());
Assertions.assertEquals(4, temp.filter(r -> r.getSource().startsWith("50|")).count());
Assertions.assertEquals(4, temp.filter(r -> r.getTarget().startsWith("50|")).count());
Assertions.assertEquals(4, temp.filter(r -> r.getSource().startsWith("40|")).count());
Assertions.assertEquals(4, temp.filter(r -> r.getTarget().startsWith("40|")).count());
Assertions.assertEquals(8, temp.filter(r -> r.getSource().startsWith("20|")).count());
Assertions.assertEquals(8, temp.filter(r -> r.getSource().startsWith("20|")).count());
// JavaRDD<Relation> result = temp.filter(r -> r.getSource().startsWith("50|") || r.getTarget().startsWith("50|"));
// Assertions.assertEquals(18, result.count());
// result.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType()));
// result.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType()));
// result
// .foreach(
// r -> Assertions
// .assertEquals(
// PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance()));
// result
// .foreach(
// r -> Assertions
// .assertEquals(
// PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID,
// r.getDataInfo().getProvenanceaction().getClassid()));
// result
// .foreach(
// r -> Assertions
// .assertEquals(
// PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME,
// r.getDataInfo().getProvenanceaction().getClassname()));
// result
// .foreach(
// r -> Assertions
// .assertEquals(
// "0.85",
// r.getDataInfo().getTrust()));
//
// Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("50|")).count());
// result
// .filter(r -> r.getSource().substring(0, 3).equals("50|"))
// .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass()));
// Assertions
// .assertEquals(
// 2, result.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
// Assertions
// .assertEquals(
// 3, result.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
// Assertions
// .assertEquals(
// 2, result.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
// Assertions
// .assertEquals(
// 1, result.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
// Assertions
// .assertEquals(
// 1, result.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
//
// Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("20|")).count());
// result
// .filter(r -> r.getSource().substring(0, 3).equals("20|"))
// .foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass()));
// Assertions
// .assertEquals(
// 1, result.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
// Assertions
// .assertEquals(
// 1, result.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count());
// Assertions
// .assertEquals(
// 2, result.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count());
// Assertions
// .assertEquals(
// 2, result.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count());
// Assertions
// .assertEquals(
// 3, result.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count());
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|openaire____::ec653e804967133b9436fdd30d3ff51d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"));
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|doajarticles::03748bcb5d754c951efec9700e18a56d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d"));
//
// Assertions
// .assertTrue(
// result
// .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"))
// .map(r -> r.getTarget())
// .collect()
// .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"));
}
} }

View File

@ -73,21 +73,21 @@ public class StepActionsTest {
.execStep( .execStep(
spark, getClass() spark, getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/result")
.getPath(), .getPath(),
workingDir.toString() + "/newRelationPath", workingDir.toString() + "/newRelationPath",
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath()); .getPath(), ModelConstants.HAS_AUTHOR_INSTITUTION);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -203,19 +203,19 @@ public class StepActionsTest {
spark, spark,
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relsforiteration1/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath(), .getPath(),
workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs");
@ -248,19 +248,19 @@ public class StepActionsTest {
spark, spark,
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relsforiteration1/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/")
.getPath(), .getPath(),
getClass() getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/")
.getPath(), .getPath(),
workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs");

View File

@ -0,0 +1,7 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","validated":false}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1","validated":false}

View File

@ -0,0 +1,7 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"}

View File

@ -0,0 +1,5 @@
{"key":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","valueSet":["20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"]}
{"key":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","valueSet":["20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"]}
{"key":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","valueSet":["20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"]}
{"key":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"]}
{"key":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|dedup_wf_001::2899e571609779168222fdeb59cb916d"]}

View File

@ -1,14 +0,0 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"}

View File

@ -0,0 +1,7 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"}