added test for the report generation

This commit is contained in:
Miriam Baglioni 2020-11-04 13:20:16 +01:00
parent c694457acc
commit b610d08399
1 changed files with 94 additions and 2 deletions

View File

@ -2,15 +2,18 @@ package eu.dnetlib.dhp.oa.graph.clean;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.clean.authorpids.PrepareResultsSparkJob;
import eu.dnetlib.dhp.oa.graph.clean.authorpids.ResultInfo;
import eu.dnetlib.dhp.oa.graph.clean.authorpids.*;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
@ -60,6 +63,60 @@ public class CleanOrcidTest {
spark.stop();
}
@Test
public void loadOrcid(){
final String orcidInputPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/clean/authors.seq" )
.getPath();
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
sc.sequenceFile(orcidInputPath, Text.class, Text.class)
.foreach(pair -> {
OrcidAuthotitative tmp = OBJECT_MAPPER.readValue(pair._2().toString(), OrcidAuthotitative.class);
if (tmp.getOid().equalsIgnoreCase("0000-0001-6689-5079")) {
System.out.println(pair._2().toString());
}
}
);
}
@Test
public void makeReportTest() throws Exception {
final String inputPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000.gz" )
.getPath();
final String preparedInfoPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000-prepared.json.gz" )
.getPath();
final String orcidInputPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/clean/authors.seq" )
.getPath();
MakeReportSparkJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir + "/reports",
"-inputPath", inputPath,
"-preparedInfoPath", preparedInfoPath,
"-orcidInputPath", orcidInputPath,
"-graphTableClassName", "eu.dnetlib.dhp.schema.oaf.Publication"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<ResultInfo> tmp = sc
.textFile(workingDir.toString() + "/preparedInfo")
.map(item -> OBJECT_MAPPER.readValue(item, ResultInfo.class));
}
@Test
public void prepareInfoTest() throws Exception {
@ -80,7 +137,42 @@ public class CleanOrcidTest {
.textFile(workingDir.toString() + "/preparedInfo")
.map(item -> OBJECT_MAPPER.readValue(item, ResultInfo.class));
tmp.foreach(rdd -> System.out.println(OBJECT_MAPPER.writeValueAsString(rdd)));
}
@Test
public void cleanAuthors() throws Exception {
final String inputPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000.gz" )
.getPath();
final String preparedInfoPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000-prepared.json.gz" )
.getPath();
final String orcidInputPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/clean/authors.seq" )
.getPath();
CleanAuthorPidsSparkJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir + "/cleaned",
"-inputPath", inputPath,
"-preparedInfoPath", preparedInfoPath,
"-orcidInputPath", orcidInputPath,
"-graphTableClassName", "eu.dnetlib.dhp.schema.oaf.Publication"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<ResultInfo> tmp = sc
.textFile(workingDir.toString() + "/cleaned")
.map(item -> OBJECT_MAPPER.readValue(item, ResultInfo.class));
}
}