forked from D-Net/dnet-hadoop
added test for the report generation
This commit is contained in:
parent
c694457acc
commit
b610d08399
|
@ -2,15 +2,18 @@ package eu.dnetlib.dhp.oa.graph.clean;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.clean.authorpids.PrepareResultsSparkJob;
|
||||
import eu.dnetlib.dhp.oa.graph.clean.authorpids.ResultInfo;
|
||||
import eu.dnetlib.dhp.oa.graph.clean.authorpids.*;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
|
@ -60,6 +63,60 @@ public class CleanOrcidTest {
|
|||
spark.stop();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void loadOrcid(){
|
||||
|
||||
final String orcidInputPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/clean/authors.seq" )
|
||||
.getPath();
|
||||
|
||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
sc.sequenceFile(orcidInputPath, Text.class, Text.class)
|
||||
.foreach(pair -> {
|
||||
OrcidAuthotitative tmp = OBJECT_MAPPER.readValue(pair._2().toString(), OrcidAuthotitative.class);
|
||||
if (tmp.getOid().equalsIgnoreCase("0000-0001-6689-5079")) {
|
||||
System.out.println(pair._2().toString());
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void makeReportTest() throws Exception {
|
||||
final String inputPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000.gz" )
|
||||
.getPath();
|
||||
|
||||
final String preparedInfoPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000-prepared.json.gz" )
|
||||
.getPath();
|
||||
|
||||
final String orcidInputPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/clean/authors.seq" )
|
||||
.getPath();
|
||||
|
||||
MakeReportSparkJob.main(new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-outputPath", workingDir + "/reports",
|
||||
"-inputPath", inputPath,
|
||||
"-preparedInfoPath", preparedInfoPath,
|
||||
"-orcidInputPath", orcidInputPath,
|
||||
"-graphTableClassName", "eu.dnetlib.dhp.schema.oaf.Publication"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<ResultInfo> tmp = sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, ResultInfo.class));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void prepareInfoTest() throws Exception {
|
||||
|
||||
|
@ -80,7 +137,42 @@ public class CleanOrcidTest {
|
|||
.textFile(workingDir.toString() + "/preparedInfo")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, ResultInfo.class));
|
||||
|
||||
tmp.foreach(rdd -> System.out.println(OBJECT_MAPPER.writeValueAsString(rdd)));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void cleanAuthors() throws Exception {
|
||||
|
||||
final String inputPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000.gz" )
|
||||
.getPath();
|
||||
|
||||
final String preparedInfoPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000-prepared.json.gz" )
|
||||
.getPath();
|
||||
|
||||
final String orcidInputPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/clean/authors.seq" )
|
||||
.getPath();
|
||||
|
||||
CleanAuthorPidsSparkJob.main(new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-outputPath", workingDir + "/cleaned",
|
||||
"-inputPath", inputPath,
|
||||
"-preparedInfoPath", preparedInfoPath,
|
||||
"-orcidInputPath", orcidInputPath,
|
||||
"-graphTableClassName", "eu.dnetlib.dhp.schema.oaf.Publication"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<ResultInfo> tmp = sc
|
||||
.textFile(workingDir.toString() + "/cleaned")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, ResultInfo.class));
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue