1
0
Fork 0

changed due to changes in the model

This commit is contained in:
Miriam Baglioni 2020-07-29 17:32:02 +02:00
parent 481585e9d3
commit a2f73ec2c7
3 changed files with 26 additions and 198 deletions

View File

@ -27,7 +27,6 @@ import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
//@ExtendWith(MockitoExtension.class)
public class DumpJobTest { public class DumpJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -36,8 +35,6 @@ public class DumpJobTest {
private static Path workingDir; private static Path workingDir;
private static String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class); private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
private static CommunityMap map = new CommunityMap(); private static CommunityMap map = new CommunityMap();

View File

@ -1,14 +1,14 @@
package eu.dnetlib.dhp.oa.graph.dump; package eu.dnetlib.dhp.oa.graph.dump;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunitySplit;
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@ -23,7 +23,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.schema.dump.oaf.Result; import eu.dnetlib.dhp.schema.dump.oaf.Result;
@ -35,11 +34,9 @@ public class SplitForCommunityTest {
private static Path workingDir; private static Path workingDir;
private static String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class); private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
private static HashMap<String, String> map = new HashMap<>(); private static CommunityMap map = new CommunityMap();
static { static {
map.put("egi", "EGI Federation"); map.put("egi", "EGI Federation");
@ -68,49 +65,6 @@ public class SplitForCommunityTest {
} }
// @Mock
// private SparkDumpCommunityProducts dumpCommunityProducts;
// private QueryInformationSystem queryInformationSystem;
// @Mock
// private ISLookUpService isLookUpService;
List<String> communityMap = Arrays
.asList(
"<community id=\"egi\" label=\"EGI Federation\"/>",
"<community id=\"fet-fp7\" label=\"FET FP7\"/>",
"<community id=\"fet-h2020\" label=\"FET H2020\"/>",
"<community id=\"clarin\" label=\"CLARIN\"/>",
"<community id=\"rda\" label=\"Research Data Alliance\"/>",
"<community id=\"ee\" label=\"SDSN - Greece\"/>",
"<community id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\"/>",
"<community id=\"fam\" label=\"Fisheries and Aquaculture Management\"/>",
"<community id=\"ni\" label=\"Neuroinformatics\"/>",
"<community id=\"mes\" label=\"European Marine Science\"/>",
"<community id=\"instruct\" label=\"Instruct-ERIC\"/>",
"<community id=\"elixir-gr\" label=\"ELIXIR GR\"/>",
"<community id=\"aginfra\" label=\"Agricultural and Food Sciences\"/>",
"<community id=\"dariah\" label=\"DARIAH EU\"/>",
"<community id=\"risis\" label=\"RISIS\"/>",
"<community id=\"epos\" label=\"EPOS\"/>",
"<community id=\"beopen\" label=\"Transport Research\"/>",
"<community id=\"euromarine\" label=\"EuroMarine\"/>",
"<community id=\"ifremer\" label=\"Ifremer\"/>",
"<community id=\"oa-pg\" label=\"EC Post-Grant Open Access Pilot\"/>",
"<community id=\"science-innovation-policy\" label=\"Science and Innovation Policy Studies\"/>",
"<community id=\"covid-19\" label=\"COVID-19\"/>",
"<community id=\"enermaps\" label=\"Energy Research\"/>");
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" return " +
"<community> " +
"{$x//CONFIGURATION/context/@id}" +
"{$x//CONFIGURATION/context/@label}" +
"</community>";
@BeforeAll @BeforeAll
public static void beforeAll() throws IOException { public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(SplitForCommunityTest.class.getSimpleName()); workingDir = Files.createTempDirectory(SplitForCommunityTest.class.getSimpleName());
@ -133,12 +87,6 @@ public class SplitForCommunityTest {
.getOrCreate(); .getOrCreate();
} }
// @BeforeEach
// public void setUp() throws ISLookUpException {
// lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap);
// lenient().when(dumpCommunityProducts.getIsLookUpService(MOCK_IS_LOOK_UP_URL)).thenReturn(isLookUpService);
//
// }
@AfterAll @AfterAll
public static void afterAll() throws IOException { public static void afterAll() throws IOException {
@ -147,28 +95,24 @@ public class SplitForCommunityTest {
} }
@Test @Test
public void test1() throws Exception { public void test1() {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity")
.getPath(); .getPath();
SparkSplitForCommunity.main(new String[] { CommunitySplit split = new CommunitySplit();
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-isSparkSessionManaged", Boolean.FALSE.toString(), split.run(false, sourcePath, workingDir.toString() + "/split", map);
"-outputPath", workingDir.toString() + "/split",
"-sourcePath", sourcePath,
"-communityMap", new Gson().toJson(map)
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Result> tmp = sc JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/split/dh-ch") .textFile(workingDir.toString() + "/split/dh-ch")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<Result> verificationDataset = spark org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(19, verificationDataset.count()); Assertions.assertEquals(19, verificationDataset.count());
@ -178,10 +122,10 @@ public class SplitForCommunityTest {
tmp = sc tmp = sc
.textFile(workingDir.toString() + "/split/egi") .textFile(workingDir.toString() + "/split/egi")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
verificationDataset = spark verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(1, verificationDataset.count()); Assertions.assertEquals(1, verificationDataset.count());
@ -191,10 +135,10 @@ public class SplitForCommunityTest {
tmp = sc tmp = sc
.textFile(workingDir.toString() + "/split/ni") .textFile(workingDir.toString() + "/split/ni")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
verificationDataset = spark verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(5, verificationDataset.count()); Assertions.assertEquals(5, verificationDataset.count());
@ -204,12 +148,12 @@ public class SplitForCommunityTest {
tmp = sc tmp = sc
.textFile(workingDir.toString() + "/split/science-innovation-policy") .textFile(workingDir.toString() + "/split/science-innovation-policy")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
verificationDataset = spark verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(5, verificationDataset.count()); Assertions.assertEquals(4, verificationDataset.count());
Assertions Assertions
.assertEquals( .assertEquals(
@ -224,119 +168,5 @@ public class SplitForCommunityTest {
.assertEquals( .assertEquals(
1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); 1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
tmp = sc
.textFile(workingDir.toString() + "/split/fet-fp7")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/fet-h2020")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/clarin")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/rda")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/ee")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/fam")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/mes")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/instruct")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/elixir-gr")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/aginfra")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/dariah")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/risis")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/epos")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/beopen")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/euromarine")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/ifremer")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/oa-pg")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/covid-19")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/enermaps")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
} }
} }

View File

@ -6,8 +6,8 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.HashMap; import java.util.HashMap;
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.neethi.Assertion;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
@ -23,6 +23,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo;
import eu.dnetlib.dhp.schema.dump.oaf.Result; import eu.dnetlib.dhp.schema.dump.oaf.Result;
public class UpdateProjectInfoTest { public class UpdateProjectInfoTest {
@ -82,12 +83,12 @@ public class UpdateProjectInfoTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Result> tmp = sc JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result") .textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<Result> verificationDataset = spark org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
verificationDataset.show(false); verificationDataset.show(false);