[AffiliationFromPublisher]Added test, included new path parameter
This commit is contained in:
parent
7df492fa34
commit
a6195fb7de
|
@ -17,6 +17,7 @@ import org.apache.spark.api.java.JavaPairRDD;
|
|||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -76,8 +77,8 @@ public class PrepareAffiliationRelations implements Serializable {
|
|||
final String webcrawlInputPath = parser.get("webCrawlInputPath");
|
||||
log.info("webcrawlInputPath: {}", webcrawlInputPath);
|
||||
|
||||
final String publisherlInputPath = parser.get("publisherlInputPath");
|
||||
log.info("publisherlInputPath: {}", publisherlInputPath);
|
||||
final String publisherInputPath = parser.get("publisherInputPath");
|
||||
log.info("publisherInputPath: {}", publisherInputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
@ -89,7 +90,7 @@ public class PrepareAffiliationRelations implements Serializable {
|
|||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Constants.removeOutputDir(spark, outputPath);
|
||||
createActionSet(spark, crossrefInputPath, pubmedInputPath, openapcInputPath, dataciteInputPath, webcrawlInputPath, publisherlInputPath, outputPath);
|
||||
createActionSet(spark, crossrefInputPath, pubmedInputPath, openapcInputPath, dataciteInputPath, webcrawlInputPath, publisherInputPath, outputPath);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -136,9 +137,11 @@ public class PrepareAffiliationRelations implements Serializable {
|
|||
|
||||
private static JavaPairRDD<Text,Text> prepareAffiliationRelationFromPublisher(SparkSession spark, String inputPath,
|
||||
List<KeyValue> collectedfrom){
|
||||
|
||||
|
||||
Dataset<Row> df = spark
|
||||
.read()
|
||||
.schema("`DOI` STRING, `Organizations` ARRAY<STRUCT<RORid`:STRING,`Confidence`:DOUBLE>>")
|
||||
.schema("`DOI` STRING, `Organizations` ARRAY<STRUCT<`RORid`:STRING,`Confidence`:DOUBLE>>")
|
||||
.json(inputPath)
|
||||
.where("DOI is not null");
|
||||
|
||||
|
|
|
@ -33,6 +33,11 @@
|
|||
"paramLongName": "webCrawlInputPath",
|
||||
"paramDescription": "the path to get the input data from Web Crawl",
|
||||
"paramRequired": true
|
||||
},{
|
||||
"paramName": "pip",
|
||||
"paramLongName": "publisherInputPath",
|
||||
"paramDescription": "the path to get the input data from publishers",
|
||||
"paramRequired": true
|
||||
}
|
||||
,
|
||||
{
|
||||
|
|
|
@ -78,6 +78,10 @@ public class PrepareAffiliationRelationsTest {
|
|||
.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
|
||||
.getPath();
|
||||
|
||||
String publisherAffiliationRelationPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/publishers")
|
||||
.getPath();
|
||||
|
||||
String outputPath = workingDir.toString() + "/actionSet";
|
||||
|
||||
PrepareAffiliationRelations
|
||||
|
@ -89,6 +93,7 @@ public class PrepareAffiliationRelationsTest {
|
|||
"-openapcInputPath", crossrefAffiliationRelationPath,
|
||||
"-dataciteInputPath", crossrefAffiliationRelationPath,
|
||||
"-webCrawlInputPath", crossrefAffiliationRelationPath,
|
||||
"-publisherInputPath", publisherAffiliationRelationPath,
|
||||
"-outputPath", outputPath
|
||||
});
|
||||
|
||||
|
@ -105,7 +110,7 @@ public class PrepareAffiliationRelationsTest {
|
|||
// );
|
||||
// }
|
||||
// count the number of relations
|
||||
assertEquals(120, tmp.count());
|
||||
assertEquals(138, tmp.count());
|
||||
|
||||
Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||
dataset.createOrReplaceTempView("result");
|
||||
|
@ -116,7 +121,7 @@ public class PrepareAffiliationRelationsTest {
|
|||
// verify that we have equal number of bi-directional relations
|
||||
Assertions
|
||||
.assertEquals(
|
||||
60, execVerification
|
||||
69, execVerification
|
||||
.filter(
|
||||
"relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'")
|
||||
.collectAsList()
|
||||
|
@ -124,7 +129,7 @@ public class PrepareAffiliationRelationsTest {
|
|||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
60, execVerification
|
||||
69, execVerification
|
||||
.filter(
|
||||
"relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF + "'")
|
||||
.collectAsList()
|
||||
|
|
Loading…
Reference in New Issue