Added parameter file. Fixed issues in path name
This commit is contained in:
parent
5a3f0d949c
commit
94656b6530
|
@ -10,6 +10,7 @@ import java.util.stream.Collectors;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.ForeachFunction;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
|
@ -19,6 +20,8 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.eosc.model.Affiliation;
|
import eu.dnetlib.dhp.eosc.model.Affiliation;
|
||||||
import eu.dnetlib.dhp.eosc.model.Country;
|
import eu.dnetlib.dhp.eosc.model.Country;
|
||||||
|
@ -72,7 +75,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, workingPath + "publicationextendedaffiliation");
|
Utils.removeOutputDir(spark, workingPath + resultType + "extendedaffiliation");
|
||||||
addOrganizations(spark, inputPath, workingPath, outputPath, resultType);
|
addOrganizations(spark, inputPath, workingPath, outputPath, resultType);
|
||||||
dumpOrganizationAndRelations(spark, inputPath, workingPath, outputPath, resultType);
|
dumpOrganizationAndRelations(spark, inputPath, workingPath, outputPath, resultType);
|
||||||
});
|
});
|
||||||
|
@ -98,7 +101,9 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable {
|
||||||
.joinWith(relation, result.col("id").equalTo(relation.col("source")))
|
.joinWith(relation, result.col("id").equalTo(relation.col("source")))
|
||||||
.map((MapFunction<Tuple2<Result, Relation>, Relation>) t2 -> t2._2(), Encoders.bean(Relation.class));
|
.map((MapFunction<Tuple2<Result, Relation>, Relation>) t2 -> t2._2(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
System.out.println(eoscRelation.count());
|
eoscRelation
|
||||||
|
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
|
||||||
|
|
||||||
// from eoscRelation select the organization
|
// from eoscRelation select the organization
|
||||||
eoscRelation
|
eoscRelation
|
||||||
.joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id")))
|
.joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id")))
|
||||||
|
@ -106,6 +111,11 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable {
|
||||||
(MapFunction<Tuple2<Relation, Organization>, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization(
|
(MapFunction<Tuple2<Relation, Organization>, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization(
|
||||||
t2._2()),
|
t2._2()),
|
||||||
Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class))
|
Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class))
|
||||||
|
.groupByKey((MapFunction<eu.dnetlib.dhp.eosc.model.Organization, String>) o -> o.getId(), Encoders.STRING())
|
||||||
|
.mapGroups(
|
||||||
|
(MapGroupsFunction<String, eu.dnetlib.dhp.eosc.model.Organization, eu.dnetlib.dhp.eosc.model.Organization>) (
|
||||||
|
k, v) -> v.next(),
|
||||||
|
Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
@ -136,7 +146,9 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable {
|
||||||
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
!r.getDataInfo().getInvisible() && r.getSubRelType().equalsIgnoreCase(ModelConstants.AFFILIATION));
|
!r.getDataInfo().getInvisible() && r.getSubRelType().equalsIgnoreCase(ModelConstants.AFFILIATION));
|
||||||
|
|
||||||
Dataset<Organization> organizations = Utils.readPath(spark, inputPath + "/organization", Organization.class);
|
Dataset<Organization> organizations = Utils
|
||||||
|
.readPath(spark, inputPath + "/organization", Organization.class)
|
||||||
|
.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference());
|
||||||
|
|
||||||
Dataset<ResultOrganizations> resultOrganization = relations
|
Dataset<ResultOrganizations> resultOrganization = relations
|
||||||
.joinWith(organizations, relations.col("source").equalTo(organizations.col("id")), "left")
|
.joinWith(organizations, relations.col("source").equalTo(organizations.col("id")), "left")
|
||||||
|
|
|
@ -68,6 +68,9 @@ public class SparkUpdateProjectInfo implements Serializable {
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultType = parser.get("resultType");
|
||||||
|
log.info("resultType: {}", resultType);
|
||||||
|
|
||||||
final String dumpType = Optional
|
final String dumpType = Optional
|
||||||
.ofNullable(parser.get("dumpType"))
|
.ofNullable(parser.get("dumpType"))
|
||||||
.orElse(Constants.DUMPTYPE.COMMUNITY.getType());
|
.orElse(Constants.DUMPTYPE.COMMUNITY.getType());
|
||||||
|
@ -79,8 +82,8 @@ public class SparkUpdateProjectInfo implements Serializable {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, workingPath + "publicationextendedproject");
|
Utils.removeOutputDir(spark, workingPath + resultType + "extendedproject");
|
||||||
extend(spark, inputPath, workingPath, preparedInfoPath, outputPath);
|
extend(spark, inputPath, workingPath, preparedInfoPath, outputPath, resultType);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,9 +92,10 @@ public class SparkUpdateProjectInfo implements Serializable {
|
||||||
String inputPath,
|
String inputPath,
|
||||||
String workingPath,
|
String workingPath,
|
||||||
String preparedInfoPath,
|
String preparedInfoPath,
|
||||||
String outputPath) {
|
String outputPath,
|
||||||
|
String resultType) {
|
||||||
|
|
||||||
Dataset<Result> result = Utils.readPath(spark, workingPath + "publicationextendedaffiliation", Result.class);
|
Dataset<Result> result = Utils.readPath(spark, workingPath + resultType + "extendedaffiliation", Result.class);
|
||||||
|
|
||||||
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
|
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
|
||||||
result
|
result
|
||||||
|
@ -106,7 +110,7 @@ public class SparkUpdateProjectInfo implements Serializable {
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Append)
|
.mode(SaveMode.Append)
|
||||||
.json(workingPath + "publicationextendedproject");
|
.json(workingPath + resultType + "extendedproject");
|
||||||
|
|
||||||
Dataset<Project> project = Utils.readPath(spark, inputPath + "/project", Project.class);
|
Dataset<Project> project = Utils.readPath(spark, inputPath + "/project", Project.class);
|
||||||
|
|
||||||
|
|
|
@ -388,6 +388,7 @@
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
<arg>--dumpType</arg><arg>eosc</arg>
|
<arg>--dumpType</arg><arg>eosc</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/dump/</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/dump/</arg>
|
||||||
|
<arg>--resultType</arg><arg>publication</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="join_extend"/>
|
<ok to="join_extend"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -410,10 +411,12 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${workingDir}/dump/datasetextendedaffiliation</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/dump/datasetextendedproject</arg>
|
<arg>--workingPath</arg><arg>${workingDir}/dump/</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
<arg>--dumpType</arg><arg>eosc</arg>
|
<arg>--dumpType</arg><arg>eosc</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}/dump/</arg>
|
||||||
|
<arg>--resultType</arg><arg>dataset</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="join_extend"/>
|
<ok to="join_extend"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -436,10 +439,12 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproductextendedaffiliation</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproductextendedproject</arg>
|
<arg>--workingPath</arg><arg>${workingDir}/dump/</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
<arg>--dumpType</arg><arg>eosc</arg>
|
<arg>--dumpType</arg><arg>eosc</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}/dump/</arg>
|
||||||
|
<arg>--resultType</arg><arg>otherresearchproduct</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="join_extend"/>
|
<ok to="join_extend"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -462,11 +467,12 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${workingDir}/dump/softwareextendedaffiliation
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
</arg>
|
<arg>--workingPath</arg><arg>${workingDir}/dump/</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/dump/softwareextendedproject</arg>
|
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
<arg>--dumpType</arg><arg>eosc</arg>
|
<arg>--dumpType</arg><arg>eosc</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}/dump/</arg>
|
||||||
|
<arg>--resultType</arg><arg>software</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="join_extend"/>
|
<ok to="join_extend"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -35,6 +35,12 @@
|
||||||
"paramLongName": "workingPath",
|
"paramLongName": "workingPath",
|
||||||
"paramDescription": "the working path",
|
"paramDescription": "the working path",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "rt",
|
||||||
|
"paramLongName": "resultType",
|
||||||
|
"paramDescription": "the working path",
|
||||||
|
"paramRequired": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@ import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
|
@ -21,7 +22,9 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.eosc.model.Affiliation;
|
||||||
import eu.dnetlib.dhp.eosc.model.Organization;
|
import eu.dnetlib.dhp.eosc.model.Organization;
|
||||||
|
import eu.dnetlib.dhp.eosc.model.Relation;
|
||||||
import eu.dnetlib.dhp.eosc.model.Result;
|
import eu.dnetlib.dhp.eosc.model.Result;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -70,6 +73,139 @@ public class ExtendAffiliationTest {
|
||||||
spark.stop();
|
spark.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void ExtendEoscResultWithOrganizationTest() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String workingPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
spark
|
||||||
|
.read()
|
||||||
|
.textFile("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/publication")
|
||||||
|
.write()
|
||||||
|
.text(workingDir.toString() + "/working/publication");
|
||||||
|
|
||||||
|
ExtendEoscResultWithOrganizationStep2.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/",
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-resultType", "publication",
|
||||||
|
"-workingPath", workingDir.toString() + "/working"
|
||||||
|
});
|
||||||
|
|
||||||
|
JavaRDD<Result> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/working/publicationextendedaffiliation")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(3, tmp.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
2,
|
||||||
|
tmp
|
||||||
|
.filter(r -> Optional.ofNullable(r.getAffiliation()).isPresent() && r.getAffiliation().size() > 0)
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
2,
|
||||||
|
tmp
|
||||||
|
.filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba"))
|
||||||
|
.first()
|
||||||
|
.getAffiliation()
|
||||||
|
.size());
|
||||||
|
|
||||||
|
List<Affiliation> affiliations = tmp
|
||||||
|
.filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba"))
|
||||||
|
.first()
|
||||||
|
.getAffiliation();
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("Doris Engineering (France)")));
|
||||||
|
Assertions.assertTrue(affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("RENNES METROPOLE")));
|
||||||
|
|
||||||
|
Affiliation organization = affiliations
|
||||||
|
.stream()
|
||||||
|
.filter(a -> a.getId().equalsIgnoreCase("20|13811704aa70::51a6ade52065e3b371d1ae822e07f1ff"))
|
||||||
|
.findFirst()
|
||||||
|
.get();
|
||||||
|
Assertions.assertEquals("Doris Engineering (France)", organization.getName());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
organization
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
p -> p.getValue().equalsIgnoreCase("grid.432986.2") && p.getType().equalsIgnoreCase("grid")));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
organization
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
p -> p.getValue().equalsIgnoreCase("https://ror.org/03nd0ms94")
|
||||||
|
&& p.getType().equalsIgnoreCase("ror")));
|
||||||
|
Assertions.assertEquals(2, organization.getPid().size());
|
||||||
|
|
||||||
|
organization = affiliations
|
||||||
|
.stream()
|
||||||
|
.filter(a -> a.getId().equalsIgnoreCase("20|MetisRadboud::b58bdbe8ae5acead04fc76777d2f8017"))
|
||||||
|
.findFirst()
|
||||||
|
.get();
|
||||||
|
Assertions.assertEquals("RENNES METROPOLE", organization.getName());
|
||||||
|
Assertions.assertEquals(1, organization.getPid().size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
organization.getPid().get(0).getValue().equalsIgnoreCase("892062829")
|
||||||
|
&& organization.getPid().get(0).getType().equalsIgnoreCase("pic"));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
tmp
|
||||||
|
.filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98"))
|
||||||
|
.first()
|
||||||
|
.getAffiliation()
|
||||||
|
.size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"MIKARE RESEARCH",
|
||||||
|
tmp
|
||||||
|
.filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98"))
|
||||||
|
.first()
|
||||||
|
.getAffiliation()
|
||||||
|
.get(0)
|
||||||
|
.getName());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
0,
|
||||||
|
tmp
|
||||||
|
.filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98"))
|
||||||
|
.first()
|
||||||
|
.getAffiliation()
|
||||||
|
.get(0)
|
||||||
|
.getPid()
|
||||||
|
.size());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertFalse(
|
||||||
|
Optional
|
||||||
|
.ofNullable(
|
||||||
|
tmp
|
||||||
|
.filter(
|
||||||
|
r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::ff21e3c55d527fa7db171137c5fd1f1f"))
|
||||||
|
.first()
|
||||||
|
.getAffiliation())
|
||||||
|
.isPresent());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void selectEoscResults() throws Exception {
|
public void selectEoscResults() throws Exception {
|
||||||
|
|
||||||
|
@ -80,9 +216,6 @@ public class ExtendAffiliationTest {
|
||||||
final String workingPath = getClass()
|
final String workingPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String mdp = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/masterduplicate")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
ExtendEoscResultWithOrganizationStep2.main(new String[] {
|
ExtendEoscResultWithOrganizationStep2.main(new String[] {
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
@ -98,18 +231,18 @@ public class ExtendAffiliationTest {
|
||||||
.textFile(workingDir.toString() + "/organization")
|
.textFile(workingDir.toString() + "/organization")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
|
||||||
|
|
||||||
|
JavaRDD<Relation> rels = sc
|
||||||
|
.textFile(workingDir.toString() + "/resultOrganization")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
|
||||||
|
|
||||||
System.out.println(tmp.count());
|
System.out.println(tmp.count());
|
||||||
|
|
||||||
// Assertions.assertEquals(3, tmp.count());
|
Assertions.assertEquals(2, tmp.count());
|
||||||
//
|
|
||||||
// Assertions
|
Assertions.assertEquals(2, rels.count());
|
||||||
// .assertEquals(
|
|
||||||
// 0,
|
rels.foreach(r -> Assertions.assertTrue(r.getSource().startsWith("50|")));
|
||||||
// tmp
|
rels.foreach(r -> Assertions.assertTrue(r.getTarget().startsWith("20|")));
|
||||||
// .filter(r -> Optional.ofNullable(r.getAffiliation()).isPresent() && r.getAffiliation().size() > 0)
|
|
||||||
// .count());
|
|
||||||
//
|
|
||||||
// tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -128,136 +128,6 @@ public class SelectEoscResultTest {
|
||||||
// legalname = MIKARE RESEARCH
|
// legalname = MIKARE RESEARCH
|
||||||
// pid = []
|
// pid = []
|
||||||
// for 50|06cdd3ff4700::ff21e3c55d527fa7db171137c5fd1f1f no affiliation relation is provided
|
// for 50|06cdd3ff4700::ff21e3c55d527fa7db171137c5fd1f1f no affiliation relation is provided
|
||||||
@Test
|
|
||||||
public void ExtendEoscResultWithOrganizationTest() throws Exception {
|
|
||||||
final String sourcePath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
final String cmp = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
String resultPath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/publication")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
ExtendEoscResultWithOrganizationStep2.main(new String[] {
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"-outputPath", workingDir.toString() + "/publication",
|
|
||||||
"-sourcePath", sourcePath,
|
|
||||||
// "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
|
||||||
"-resultPath", resultPath
|
|
||||||
});
|
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
JavaRDD<Result> tmp = sc
|
|
||||||
.textFile(workingDir.toString() + "/publication")
|
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
|
|
||||||
|
|
||||||
Assertions.assertEquals(3, tmp.count());
|
|
||||||
|
|
||||||
Assertions
|
|
||||||
.assertEquals(
|
|
||||||
2,
|
|
||||||
tmp
|
|
||||||
.filter(r -> Optional.ofNullable(r.getAffiliation()).isPresent() && r.getAffiliation().size() > 0)
|
|
||||||
.count());
|
|
||||||
|
|
||||||
Assertions
|
|
||||||
.assertEquals(
|
|
||||||
2,
|
|
||||||
tmp
|
|
||||||
.filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba"))
|
|
||||||
.first()
|
|
||||||
.getAffiliation()
|
|
||||||
.size());
|
|
||||||
|
|
||||||
List<Affiliation> affiliations = tmp
|
|
||||||
.filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba"))
|
|
||||||
.first()
|
|
||||||
.getAffiliation();
|
|
||||||
|
|
||||||
Assertions
|
|
||||||
.assertTrue(
|
|
||||||
affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("Doris Engineering (France)")));
|
|
||||||
Assertions.assertTrue(affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("RENNES METROPOLE")));
|
|
||||||
|
|
||||||
Affiliation organization = affiliations
|
|
||||||
.stream()
|
|
||||||
.filter(a -> a.getId().equalsIgnoreCase("20|13811704aa70::51a6ade52065e3b371d1ae822e07f1ff"))
|
|
||||||
.findFirst()
|
|
||||||
.get();
|
|
||||||
Assertions.assertEquals("Doris Engineering (France)", organization.getName());
|
|
||||||
Assertions
|
|
||||||
.assertTrue(
|
|
||||||
organization
|
|
||||||
.getPid()
|
|
||||||
.stream()
|
|
||||||
.anyMatch(
|
|
||||||
p -> p.getValue().equalsIgnoreCase("grid.432986.2") && p.getType().equalsIgnoreCase("grid")));
|
|
||||||
Assertions
|
|
||||||
.assertTrue(
|
|
||||||
organization
|
|
||||||
.getPid()
|
|
||||||
.stream()
|
|
||||||
.anyMatch(
|
|
||||||
p -> p.getValue().equalsIgnoreCase("https://ror.org/03nd0ms94")
|
|
||||||
&& p.getType().equalsIgnoreCase("ror")));
|
|
||||||
Assertions.assertEquals(2, organization.getPid().size());
|
|
||||||
|
|
||||||
organization = affiliations
|
|
||||||
.stream()
|
|
||||||
.filter(a -> a.getId().equalsIgnoreCase("20|MetisRadboud::b58bdbe8ae5acead04fc76777d2f8017"))
|
|
||||||
.findFirst()
|
|
||||||
.get();
|
|
||||||
Assertions.assertEquals("RENNES METROPOLE", organization.getName());
|
|
||||||
Assertions.assertEquals(1, organization.getPid().size());
|
|
||||||
Assertions
|
|
||||||
.assertTrue(
|
|
||||||
organization.getPid().get(0).getValue().equalsIgnoreCase("892062829")
|
|
||||||
&& organization.getPid().get(0).getType().equalsIgnoreCase("pic"));
|
|
||||||
|
|
||||||
Assertions
|
|
||||||
.assertEquals(
|
|
||||||
1,
|
|
||||||
tmp
|
|
||||||
.filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98"))
|
|
||||||
.first()
|
|
||||||
.getAffiliation()
|
|
||||||
.size());
|
|
||||||
Assertions
|
|
||||||
.assertEquals(
|
|
||||||
"MIKARE RESEARCH",
|
|
||||||
tmp
|
|
||||||
.filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98"))
|
|
||||||
.first()
|
|
||||||
.getAffiliation()
|
|
||||||
.get(0)
|
|
||||||
.getName());
|
|
||||||
Assertions
|
|
||||||
.assertEquals(
|
|
||||||
0,
|
|
||||||
tmp
|
|
||||||
.filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98"))
|
|
||||||
.first()
|
|
||||||
.getAffiliation()
|
|
||||||
.get(0)
|
|
||||||
.getPid()
|
|
||||||
.size());
|
|
||||||
|
|
||||||
Assertions
|
|
||||||
.assertFalse(
|
|
||||||
Optional
|
|
||||||
.ofNullable(
|
|
||||||
tmp
|
|
||||||
.filter(
|
|
||||||
r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::ff21e3c55d527fa7db171137c5fd1f1f"))
|
|
||||||
.first()
|
|
||||||
.getAffiliation())
|
|
||||||
.isPresent());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void verifyIndicatorsTest() throws Exception {
|
public void verifyIndicatorsTest() throws Exception {
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -9,4 +9,8 @@
|
||||||
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466741040,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|____________::d1b0ee22411434cf905692d0fac25749","subRelType":"affiliation","target":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98","validated":false}
|
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466741040,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|____________::d1b0ee22411434cf905692d0fac25749","subRelType":"affiliation","target":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98","validated":false}
|
||||||
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466737372,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|____________::d1b0ee22411434cf905692d0fac25749","subRelType":"affiliation","target":"50|pmid________::3a5bb2b50c18e755cbe67b9ca7d821ee","validated":false}
|
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466737372,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|____________::d1b0ee22411434cf905692d0fac25749","subRelType":"affiliation","target":"50|pmid________::3a5bb2b50c18e755cbe67b9ca7d821ee","validated":false}
|
||||||
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466717565,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::04ab269cfcf6bd571b6285151ec554b5","subRelType":"affiliation","target":"50|nora_uio__no::01152f3e683765695bbad68fc692b85e","validated":false}
|
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466717565,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::04ab269cfcf6bd571b6285151ec554b5","subRelType":"affiliation","target":"50|nora_uio__no::01152f3e683765695bbad68fc692b85e","validated":false}
|
||||||
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466733174,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::0838366fa1df3c1599ddefc2168ada5d","subRelType":"affiliation","target":"50|arXiv_______::abe2b16af6067994dda4beab6410b35d","validated":false}
|
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466733174,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::0838366fa1df3c1599ddefc2168ada5d","subRelType":"affiliation","target":"50|arXiv_______::abe2b16af6067994dda4beab6410b35d","validated":false}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466741040,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|chistera____::9146e9ef10640675f361d674e77bd254","subRelType":"affiliation","source":"50|355e65625b88::38d0ab3b2212878dee7072170f1561ee","validated":false}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466737372,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|chistera____::9146e9ef10640675f361d674e77bd254","subRelType":"affiliation","source":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98","validated":false}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466717565,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|aka_________::04ab269cfcf6bd571b6285151ec554b5","subRelType":"affiliation","source":"50|355e65625b88::38d0ab3b2212878dee7072170f1561ee","validated":false}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466733174,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64","subRelType":"affiliation","source":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98","validated":false}
|
Loading…
Reference in New Issue