adding test classes and modifying test for bulktag

This commit is contained in:
Miriam Baglioni 2023-10-20 15:47:03 +02:00
parent 69dac91659
commit 18bfff8af3
3 changed files with 236 additions and 96 deletions

View File

@ -1,9 +1,116 @@
package eu.dnetlib.dhp.api;/**
package eu.dnetlib.dhp.api;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.api.model.CommunityModel;
import eu.dnetlib.dhp.api.model.CommunitySummary;
import eu.dnetlib.dhp.api.model.DatasourceList;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
/**
* @author miriam.baglioni
* @Date 20/10/23
* @Date 06/10/23
*/
public class QueryCommunityAPITest {
@Test
void communityList() throws Exception {
String body = QueryCommunityAPI.communities(true);
new ObjectMapper()
.readValue(body, CommunitySummary.class)
.forEach(p -> {
try {
System.out.println(new ObjectMapper().writeValueAsString(p));
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
});
}
@Test
void community() throws Exception {
String id = "dh-ch";
String body = QueryCommunityAPI.community(id, true);
System.out
.println(
new ObjectMapper()
.writeValueAsString(
new ObjectMapper()
.readValue(body, CommunityModel.class)));
}
@Test
void communityDatasource() throws Exception {
String id = "dh-ch";
String body = QueryCommunityAPI.communityDatasource(id, true);
new ObjectMapper()
.readValue(body, DatasourceList.class)
.forEach(ds -> {
try {
System.out.println(new ObjectMapper().writeValueAsString(ds));
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
});
;
}
@Test
void validCommunities() throws Exception {
CommunityConfiguration cc = Utils.getCommunityConfiguration(true);
System.out.println(cc.getCommunities().keySet());
Community community = cc.getCommunities().get("aurora");
Assertions.assertEquals(0, community.getSubjects().size());
Assertions.assertEquals(null, community.getConstraints());
Assertions.assertEquals(null, community.getRemoveConstraints());
Assertions.assertEquals(2, community.getZenodoCommunities().size());
Assertions
.assertTrue(
community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network")));
Assertions
.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck")));
Assertions.assertEquals(35, community.getProviders().size());
Assertions
.assertEquals(
35, community.getProviders().stream().filter(p -> p.getSelectionConstraints() == null).count());
}
@Test
void eutopiaCommunityConfiguration() throws Exception {
CommunityConfiguration cc = Utils.getCommunityConfiguration(true);
System.out.println(cc.getCommunities().keySet());
Community community = cc.getCommunities().get("eutopia");
community.getProviders().forEach(p -> System.out.println(p.getOpenaireId()));
}
@Test
void getCommunityProjects() throws Exception {
CommunityEntityMap projectMap = Utils.getCommunityProjects(true);
Assertions
.assertTrue(
projectMap
.keySet()
.stream()
.allMatch(k -> k.startsWith("40|")));
System.out.println(projectMap);
}
@Test
void getCommunityOrganizations() throws Exception {
CommunityEntityMap organizationMap = Utils.getCommunityOrganization(true);
Assertions.assertTrue(organizationMap.keySet().stream().allMatch(k -> k.startsWith("20|")));
}
}

View File

@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.io.FileUtils;
@ -98,14 +99,11 @@ public class BulkTagJobTest {
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -133,19 +131,16 @@ public class BulkTagJobTest {
@Test
void bulktagBySubjectNoPreviousContextTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext")
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext/")
.getPath();
final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -230,19 +225,19 @@ public class BulkTagJobTest {
void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance")
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/")
.getPath();
final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -311,18 +306,18 @@ public class BulkTagJobTest {
@Test
void bulktagByDatasourceTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource")
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/publication",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -384,25 +379,25 @@ public class BulkTagJobTest {
void bulktagByZenodoCommunityTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity")
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/orp",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<OtherResearchProduct> tmp = sc
.textFile(workingDir.toString() + "/orp")
.textFile(workingDir.toString() + "/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
Assertions.assertEquals(10, tmp.count());
@ -505,18 +500,18 @@ public class BulkTagJobTest {
@Test
void bulktagBySubjectDatasourceTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource")
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -636,14 +631,14 @@ public class BulkTagJobTest {
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz").getPath(),
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(),
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -732,18 +727,18 @@ public class BulkTagJobTest {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -774,19 +769,19 @@ public class BulkTagJobTest {
void bulkTagOtherJupyter() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct")
"/eu/dnetlib/dhp/eosctag/jupyter/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -829,18 +824,18 @@ public class BulkTagJobTest {
public void bulkTagDatasetJupyter() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/dataset")
"/eu/dnetlib/dhp/eosctag/jupyter/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -878,18 +873,18 @@ public class BulkTagJobTest {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/software")
"/eu/dnetlib/dhp/eosctag/jupyter/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -1096,18 +1091,18 @@ public class BulkTagJobTest {
void galaxyOtherTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct")
"/eu/dnetlib/dhp/eosctag/galaxy/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -1214,18 +1209,18 @@ public class BulkTagJobTest {
void galaxySoftwareTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/galaxy/software")
"/eu/dnetlib/dhp/eosctag/galaxy/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -1333,19 +1328,19 @@ public class BulkTagJobTest {
void twitterDatasetTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/twitter/dataset")
"/eu/dnetlib/dhp/eosctag/twitter/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -1373,19 +1368,19 @@ public class BulkTagJobTest {
void twitterOtherTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct")
"/eu/dnetlib/dhp/eosctag/twitter/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -1418,19 +1413,19 @@ public class BulkTagJobTest {
void twitterSoftwareTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/eosctag/twitter/software")
"/eu/dnetlib/dhp/eosctag/twitter/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -1455,19 +1450,19 @@ public class BulkTagJobTest {
void EoscContextTagTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json")
"/eu/dnetlib/dhp/bulktag/eosc/dataset/")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
@ -1533,16 +1528,16 @@ public class BulkTagJobTest {
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
.getPath(),
"-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1568,4 +1563,42 @@ public class BulkTagJobTest {
}
@Test
void newConfTest() throws Exception {
final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
"-taggingConf", taggingConf,
"-outputPath", workingDir.toString() + "/",
"-production", Boolean.TRUE.toString(),
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, tmp.count());
org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
verificationDataset.createOrReplaceTempView("dataset");
String query = "select id, MyT.id community "
+ "from dataset "
+ "lateral view explode(context) c as MyT "
+ "lateral view explode(MyT.datainfo) d as MyD "
+ "where MyD.inferenceprovenance = 'bulktagging'";
Assertions.assertEquals(0, spark.sql(query).count());
}
}

View File

@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest {
sc.setVerb("not_contains");
sc.setField("contributor");
sc.setValue("DARIAH");
sc.setSelection(resolver.getSelectionCriteria(sc.getVerb(), sc.getValue()));
sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue()));
String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
Assertions.assertFalse(sc.verifyCriteria(metadata));
}