forked from D-Net/dnet-hadoop
adding test classes and modifying test for bulktag
This commit is contained in:
parent
69dac91659
commit
18bfff8af3
|
@ -1,9 +1,116 @@
|
||||||
package eu.dnetlib.dhp.api;/**
|
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api;
|
||||||
|
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
|
||||||
|
import eu.dnetlib.dhp.api.model.CommunityModel;
|
||||||
|
import eu.dnetlib.dhp.api.model.CommunitySummary;
|
||||||
|
import eu.dnetlib.dhp.api.model.DatasourceList;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.Community;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||||
|
|
||||||
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
|
* @Date 06/10/23
|
||||||
* @Date 20/10/23
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
public class QueryCommunityAPITest {
|
public class QueryCommunityAPITest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void communityList() throws Exception {
|
||||||
|
String body = QueryCommunityAPI.communities(true);
|
||||||
|
new ObjectMapper()
|
||||||
|
.readValue(body, CommunitySummary.class)
|
||||||
|
.forEach(p -> {
|
||||||
|
try {
|
||||||
|
System.out.println(new ObjectMapper().writeValueAsString(p));
|
||||||
|
} catch (JsonProcessingException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void community() throws Exception {
|
||||||
|
String id = "dh-ch";
|
||||||
|
String body = QueryCommunityAPI.community(id, true);
|
||||||
|
System.out
|
||||||
|
.println(
|
||||||
|
new ObjectMapper()
|
||||||
|
.writeValueAsString(
|
||||||
|
new ObjectMapper()
|
||||||
|
.readValue(body, CommunityModel.class)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void communityDatasource() throws Exception {
|
||||||
|
String id = "dh-ch";
|
||||||
|
String body = QueryCommunityAPI.communityDatasource(id, true);
|
||||||
|
new ObjectMapper()
|
||||||
|
.readValue(body, DatasourceList.class)
|
||||||
|
.forEach(ds -> {
|
||||||
|
try {
|
||||||
|
System.out.println(new ObjectMapper().writeValueAsString(ds));
|
||||||
|
} catch (JsonProcessingException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void validCommunities() throws Exception {
|
||||||
|
CommunityConfiguration cc = Utils.getCommunityConfiguration(true);
|
||||||
|
System.out.println(cc.getCommunities().keySet());
|
||||||
|
Community community = cc.getCommunities().get("aurora");
|
||||||
|
Assertions.assertEquals(0, community.getSubjects().size());
|
||||||
|
Assertions.assertEquals(null, community.getConstraints());
|
||||||
|
Assertions.assertEquals(null, community.getRemoveConstraints());
|
||||||
|
Assertions.assertEquals(2, community.getZenodoCommunities().size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network")));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck")));
|
||||||
|
Assertions.assertEquals(35, community.getProviders().size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
35, community.getProviders().stream().filter(p -> p.getSelectionConstraints() == null).count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void eutopiaCommunityConfiguration() throws Exception {
|
||||||
|
CommunityConfiguration cc = Utils.getCommunityConfiguration(true);
|
||||||
|
System.out.println(cc.getCommunities().keySet());
|
||||||
|
Community community = cc.getCommunities().get("eutopia");
|
||||||
|
community.getProviders().forEach(p -> System.out.println(p.getOpenaireId()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getCommunityProjects() throws Exception {
|
||||||
|
CommunityEntityMap projectMap = Utils.getCommunityProjects(true);
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
projectMap
|
||||||
|
.keySet()
|
||||||
|
.stream()
|
||||||
|
.allMatch(k -> k.startsWith("40|")));
|
||||||
|
|
||||||
|
System.out.println(projectMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getCommunityOrganizations() throws Exception {
|
||||||
|
CommunityEntityMap organizationMap = Utils.getCommunityOrganization(true);
|
||||||
|
Assertions.assertTrue(organizationMap.keySet().stream().allMatch(k -> k.startsWith("20|")));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
|
@ -98,14 +99,11 @@ public class BulkTagJobTest {
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath",
|
"-sourcePath",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -133,19 +131,16 @@ public class BulkTagJobTest {
|
||||||
@Test
|
@Test
|
||||||
void bulktagBySubjectNoPreviousContextTest() throws Exception {
|
void bulktagBySubjectNoPreviousContextTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext")
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext/")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String pathMap = BulkTagJobTest.pathMap;
|
final String pathMap = BulkTagJobTest.pathMap;
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -230,19 +225,19 @@ public class BulkTagJobTest {
|
||||||
void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
|
void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance")
|
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String pathMap = BulkTagJobTest.pathMap;
|
final String pathMap = BulkTagJobTest.pathMap;
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -311,18 +306,18 @@ public class BulkTagJobTest {
|
||||||
@Test
|
@Test
|
||||||
void bulktagByDatasourceTest() throws Exception {
|
void bulktagByDatasourceTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource")
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
|
||||||
"-outputPath", workingDir.toString() + "/publication",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -384,25 +379,25 @@ public class BulkTagJobTest {
|
||||||
void bulktagByZenodoCommunityTest() throws Exception {
|
void bulktagByZenodoCommunityTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity")
|
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
|
||||||
"-outputPath", workingDir.toString() + "/orp",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<OtherResearchProduct> tmp = sc
|
JavaRDD<OtherResearchProduct> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/orp")
|
.textFile(workingDir.toString() + "/otherresearchproduct")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
|
||||||
|
|
||||||
Assertions.assertEquals(10, tmp.count());
|
Assertions.assertEquals(10, tmp.count());
|
||||||
|
@ -505,18 +500,18 @@ public class BulkTagJobTest {
|
||||||
@Test
|
@Test
|
||||||
void bulktagBySubjectDatasourceTest() throws Exception {
|
void bulktagBySubjectDatasourceTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource")
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -636,14 +631,14 @@ public class BulkTagJobTest {
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath",
|
"-sourcePath",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(),
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
"-outputPath", workingDir.toString() + "/software",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -732,18 +727,18 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
|
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -774,19 +769,19 @@ public class BulkTagJobTest {
|
||||||
void bulkTagOtherJupyter() throws Exception {
|
void bulkTagOtherJupyter() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct")
|
"/eu/dnetlib/dhp/eosctag/jupyter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
|
||||||
"-outputPath", workingDir.toString() + "/otherresearchproduct",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -829,18 +824,18 @@ public class BulkTagJobTest {
|
||||||
public void bulkTagDatasetJupyter() throws Exception {
|
public void bulkTagDatasetJupyter() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/jupyter/dataset")
|
"/eu/dnetlib/dhp/eosctag/jupyter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -878,18 +873,18 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/jupyter/software")
|
"/eu/dnetlib/dhp/eosctag/jupyter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
"-outputPath", workingDir.toString() + "/software",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1096,18 +1091,18 @@ public class BulkTagJobTest {
|
||||||
void galaxyOtherTest() throws Exception {
|
void galaxyOtherTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct")
|
"/eu/dnetlib/dhp/eosctag/galaxy/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
|
||||||
"-outputPath", workingDir.toString() + "/otherresearchproduct",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1214,18 +1209,18 @@ public class BulkTagJobTest {
|
||||||
void galaxySoftwareTest() throws Exception {
|
void galaxySoftwareTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/galaxy/software")
|
"/eu/dnetlib/dhp/eosctag/galaxy/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
"-outputPath", workingDir.toString() + "/software",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1333,19 +1328,19 @@ public class BulkTagJobTest {
|
||||||
void twitterDatasetTest() throws Exception {
|
void twitterDatasetTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/twitter/dataset")
|
"/eu/dnetlib/dhp/eosctag/twitter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1373,19 +1368,19 @@ public class BulkTagJobTest {
|
||||||
void twitterOtherTest() throws Exception {
|
void twitterOtherTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct")
|
"/eu/dnetlib/dhp/eosctag/twitter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
|
||||||
"-outputPath", workingDir.toString() + "/otherresearchproduct",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1418,19 +1413,19 @@ public class BulkTagJobTest {
|
||||||
void twitterSoftwareTest() throws Exception {
|
void twitterSoftwareTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/twitter/software")
|
"/eu/dnetlib/dhp/eosctag/twitter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
"-outputPath", workingDir.toString() + "/software",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1455,19 +1450,19 @@ public class BulkTagJobTest {
|
||||||
void EoscContextTagTest() throws Exception {
|
void EoscContextTagTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json")
|
"/eu/dnetlib/dhp/bulktag/eosc/dataset/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1533,16 +1528,16 @@ public class BulkTagJobTest {
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath",
|
"-sourcePath",
|
||||||
getClass()
|
getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
|
||||||
.getPath(),
|
.getPath(),
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
@ -1568,4 +1563,42 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void newConfTest() throws Exception {
|
||||||
|
final String pathMap = BulkTagJobTest.pathMap;
|
||||||
|
SparkBulkTagJob
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath",
|
||||||
|
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
|
||||||
|
"-taggingConf", taggingConf,
|
||||||
|
|
||||||
|
"-outputPath", workingDir.toString() + "/",
|
||||||
|
"-production", Boolean.TRUE.toString(),
|
||||||
|
"-pathMap", pathMap
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Dataset> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/dataset")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(10, tmp.count());
|
||||||
|
org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("dataset");
|
||||||
|
|
||||||
|
String query = "select id, MyT.id community "
|
||||||
|
+ "from dataset "
|
||||||
|
+ "lateral view explode(context) c as MyT "
|
||||||
|
+ "lateral view explode(MyT.datainfo) d as MyD "
|
||||||
|
+ "where MyD.inferenceprovenance = 'bulktagging'";
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, spark.sql(query).count());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest {
|
||||||
sc.setVerb("not_contains");
|
sc.setVerb("not_contains");
|
||||||
sc.setField("contributor");
|
sc.setField("contributor");
|
||||||
sc.setValue("DARIAH");
|
sc.setValue("DARIAH");
|
||||||
sc.setSelection(resolver.getSelectionCriteria(sc.getVerb(), sc.getValue()));
|
sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue()));
|
||||||
String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
|
String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
|
||||||
Assertions.assertFalse(sc.verifyCriteria(metadata));
|
Assertions.assertFalse(sc.verifyCriteria(metadata));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue