added test for the report generation

2020-11-04 13:20:16 +01:00 · 2020-11-04 13:20:16 +01:00 · b610d08399
parent c694457acc
commit b610d08399
1 changed files with 94 additions and 2 deletions
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanOrcidTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanOrcidTest.java
@ -2,15 +2,18 @@ package eu.dnetlib.dhp.oa.graph.clean;

 import com.fasterxml.jackson.databind.ObjectMapper;

-import eu.dnetlib.dhp.oa.graph.clean.authorpids.PrepareResultsSparkJob;
-import eu.dnetlib.dhp.oa.graph.clean.authorpids.ResultInfo;
+import eu.dnetlib.dhp.oa.graph.clean.authorpids.*;

 import org.apache.commons.io.FileUtils;

+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 import org.apache.spark.SparkConf;

 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
@ -60,6 +63,60 @@ public class CleanOrcidTest {
        spark.stop();
    }

+
+@Test
+public void loadOrcid(){
+
+    final String  orcidInputPath = getClass()
+            .getResource("/eu/dnetlib/dhp/oa/graph/clean/authors.seq" )
+            .getPath();
+
+    JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+    sc.sequenceFile(orcidInputPath, Text.class, Text.class)
+            .foreach(pair -> {
+                        OrcidAuthotitative tmp = OBJECT_MAPPER.readValue(pair._2().toString(), OrcidAuthotitative.class);
+                        if (tmp.getOid().equalsIgnoreCase("0000-0001-6689-5079")) {
+                            System.out.println(pair._2().toString());
+                        }
+                    }
+                );
+
+
+
+}
+
+    @Test
+    public void makeReportTest() throws Exception {
+        final String inputPath = getClass()
+                .getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000.gz" )
+                .getPath();
+
+        final String preparedInfoPath = getClass()
+                .getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000-prepared.json.gz" )
+                .getPath();
+
+        final String  orcidInputPath = getClass()
+                .getResource("/eu/dnetlib/dhp/oa/graph/clean/authors.seq" )
+                .getPath();
+
+        MakeReportSparkJob.main(new String[] {
+                "-isSparkSessionManaged", Boolean.FALSE.toString(),
+                "-outputPath", workingDir + "/reports",
+                "-inputPath", inputPath,
+                "-preparedInfoPath", preparedInfoPath,
+                "-orcidInputPath", orcidInputPath,
+                "-graphTableClassName", "eu.dnetlib.dhp.schema.oaf.Publication"
+        });
+
+        final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+        JavaRDD<ResultInfo> tmp = sc
+                .textFile(workingDir.toString() + "/preparedInfo")
+                .map(item -> OBJECT_MAPPER.readValue(item, ResultInfo.class));
+
+    }
+
    @Test
    public void prepareInfoTest() throws Exception {

@ -80,7 +137,42 @@ public class CleanOrcidTest {
                .textFile(workingDir.toString() + "/preparedInfo")
                .map(item -> OBJECT_MAPPER.readValue(item, ResultInfo.class));

+        tmp.foreach(rdd -> System.out.println(OBJECT_MAPPER.writeValueAsString(rdd)));

    }

+
+    @Test
+    public void cleanAuthors() throws Exception {
+
+        final String inputPath = getClass()
+                .getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000.gz" )
+                .getPath();
+
+        final String preparedInfoPath = getClass()
+                .getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000-prepared.json.gz" )
+                .getPath();
+
+        final String  orcidInputPath = getClass()
+                .getResource("/eu/dnetlib/dhp/oa/graph/clean/authors.seq" )
+                .getPath();
+
+        CleanAuthorPidsSparkJob.main(new String[] {
+                "-isSparkSessionManaged", Boolean.FALSE.toString(),
+                "-outputPath", workingDir + "/cleaned",
+                "-inputPath", inputPath,
+                "-preparedInfoPath", preparedInfoPath,
+                "-orcidInputPath", orcidInputPath,
+                "-graphTableClassName", "eu.dnetlib.dhp.schema.oaf.Publication"
+        });
+
+        final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+        JavaRDD<ResultInfo> tmp = sc
+                .textFile(workingDir.toString() + "/cleaned")
+                .map(item -> OBJECT_MAPPER.readValue(item, ResultInfo.class));
+    }
+
+
+
 }