[graph cleaning] applying coar based vocabularies in bulk

This commit is contained in:
Claudio Atzori 2023-11-22 12:22:14 +01:00
parent a24178cb93
commit 11a1207f9c
11 changed files with 294 additions and 109 deletions

View File

@ -21,10 +21,15 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;
/**
@ -35,6 +40,12 @@ public class GroupEntitiesSparkJob {
private static final Encoder<OafEntity> OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class);
private ArgumentApplicationParser parser;
public GroupEntitiesSparkJob(ArgumentApplicationParser parser) {
this.parser = parser;
}
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@ -51,6 +62,17 @@ public class GroupEntitiesSparkJob {
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
new GroupEntitiesSparkJob(parser).run(isSparkSessionManaged, isLookupService);
}
public void run(Boolean isSparkSessionManaged, ISLookUpService isLookUpService)
throws ISLookUpException {
String graphInputPath = parser.get("graphInputPath");
log.info("graphInputPath: {}", graphInputPath);
@ -60,19 +82,21 @@ public class GroupEntitiesSparkJob {
String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
boolean filterInvisible = Boolean.valueOf(parser.get("filterInvisible"));
boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible"));
log.info("filterInvisible: {}", filterInvisible);
SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration());
groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible);
groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible, vocs);
});
}
@ -81,7 +105,7 @@ public class GroupEntitiesSparkJob {
String inputPath,
String checkpointPath,
String outputPath,
boolean filterInvisible) {
boolean filterInvisible, VocabularyGroup vocs) {
Dataset<OafEntity> allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC);
@ -106,10 +130,14 @@ public class GroupEntitiesSparkJob {
}
Dataset<?> groupedEntities = allEntities
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
.reduceGroups((ReduceFunction<OafEntity>) (b, a) -> OafMapperUtils.mergeEntities(b, a))
.map(
(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2(
(MapFunction<OafEntity, OafEntity>) entity -> GraphCleaningFunctions
.applyCoarVocabularies(entity, vocs),
OAFENTITY_KRYO_ENC)
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
.reduceGroups((ReduceFunction<OafEntity>) OafMapperUtils::mergeEntities)
.map(
(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
t._2().getClass().getName(), t._2()),
Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC));

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OPENAIRE_META_RESOURCE_TYPE;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import java.time.LocalDate;
@ -784,4 +786,97 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return s;
}
public static OafEntity applyCoarVocabularies(OafEntity entity, VocabularyGroup vocs) {
if (entity instanceof Result) {
final Result result = (Result) entity;
Optional
.ofNullable(result.getInstance())
.ifPresent(
instances -> instances
.forEach(
instance -> {
if (Objects.isNull(instance.getInstanceTypeMapping())) {
List<InstanceTypeMapping> mapping = Lists.newArrayList();
mapping
.add(
OafMapperUtils
.instanceTypeMapping(
instance.getInstancetype().getClassname(),
OPENAIRE_COAR_RESOURCE_TYPES_3_1));
instance.setInstanceTypeMapping(mapping);
}
Optional<InstanceTypeMapping> optionalItm = instance
.getInstanceTypeMapping()
.stream()
.filter(GraphCleaningFunctions::originalResourceType)
.findFirst();
if (optionalItm.isPresent()) {
InstanceTypeMapping coarItm = optionalItm.get();
Optional
.ofNullable(
vocs
.lookupTermBySynonym(
OPENAIRE_COAR_RESOURCE_TYPES_3_1, coarItm.getOriginalType()))
.ifPresent(type -> {
coarItm.setTypeCode(type.getClassid());
coarItm.setTypeLabel(type.getClassname());
});
final List<InstanceTypeMapping> mappings = Lists.newArrayList();
if (vocs.vocabularyExists(OPENAIRE_USER_RESOURCE_TYPES)) {
Optional
.ofNullable(
vocs
.lookupTermBySynonym(
OPENAIRE_USER_RESOURCE_TYPES, coarItm.getTypeCode()))
.ifPresent(
type -> mappings
.add(
OafMapperUtils
.instanceTypeMapping(coarItm.getTypeCode(), type)));
}
if (!mappings.isEmpty()) {
instance.getInstanceTypeMapping().addAll(mappings);
}
}
}));
result.setMetaResourceType(getMetaResourceType(result.getInstance(), vocs));
}
return entity;
}
private static boolean originalResourceType(InstanceTypeMapping itm) {
return StringUtils.isNotBlank(itm.getOriginalType()) &&
OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()) &&
StringUtils.isBlank(itm.getTypeCode()) &&
StringUtils.isBlank(itm.getTypeLabel());
}
private static Qualifier getMetaResourceType(final List<Instance> instances, final VocabularyGroup vocs) {
if (vocs.vocabularyExists(OPENAIRE_META_RESOURCE_TYPE)) {
Optional<InstanceTypeMapping> instanceTypeMapping = instances
.stream()
.flatMap(
i -> Optional.ofNullable(i.getInstanceTypeMapping()).map(Collection::stream).orElse(Stream.empty()))
.filter(t -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(t.getVocabularyName()))
.findFirst();
if (!instanceTypeMapping.isPresent()) {
return null;
} else {
final String typeCode = instanceTypeMapping.get().getTypeCode();
return Optional
.ofNullable(vocs.lookupTermBySynonym(OPENAIRE_META_RESOURCE_TYPE, typeCode))
.orElseThrow(
() -> new IllegalStateException("unable to find a synonym for '" + typeCode + "' in " +
OPENAIRE_META_RESOURCE_TYPE));
}
} else {
throw new IllegalStateException("vocabulary '" + OPENAIRE_META_RESOURCE_TYPE + "' not available");
}
}
}

View File

@ -140,15 +140,28 @@ public class OafMapperUtils {
.collect(Collectors.toList());
}
public static InstanceTypeMapping instanceTypeMapping(String originalType, Qualifier term) {
public static InstanceTypeMapping instanceTypeMapping(String originalType, String code, String label,
String vocabularyName) {
final InstanceTypeMapping m = new InstanceTypeMapping();
m.setVocabularyName(term.getSchemeid());
m.setVocabularyName(vocabularyName);
m.setOriginalType(originalType);
m.setTypeCode(term.getClassid());
m.setTypeLabel(term.getClassname());
m.setTypeCode(code);
m.setTypeLabel(label);
return m;
}
public static InstanceTypeMapping instanceTypeMapping(String originalType, Qualifier term) {
return instanceTypeMapping(originalType, term.getClassid(), term.getClassname(), term.getSchemeid());
}
public static InstanceTypeMapping instanceTypeMapping(String originalType) {
return instanceTypeMapping(originalType, null, null, null);
}
public static InstanceTypeMapping instanceTypeMapping(String originalType, String vocabularyName) {
return instanceTypeMapping(originalType, null, null, vocabularyName);
}
public static Qualifier unknown(final String schemeid, final String schemename) {
return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
}

View File

@ -28,5 +28,11 @@
"paramLongName": "filterInvisible",
"paramDescription": "if true filters out invisible entities",
"paramRequired": true
},
{
"paramName": "isu",
"paramLongName": "isLookupUrl",
"paramDescription": "url to the ISLookup Service",
"paramRequired": true
}
]

View File

@ -125,9 +125,7 @@ public abstract class AbstractMdRecordToOafMapper {
final String type = getResultType(doc, instances);
final Qualifier metaResourceType = getMetaResourceType(instances);
return createOafs(doc, type, metaResourceType, instances, collectedFrom, entityInfo, lastUpdateTimestamp);
return createOafs(doc, type, instances, collectedFrom, entityInfo, lastUpdateTimestamp);
} catch (DocumentException e) {
log.error("Error with record:\n" + xml);
return Lists.newArrayList();
@ -153,30 +151,6 @@ public abstract class AbstractMdRecordToOafMapper {
return type;
}
protected Qualifier getMetaResourceType(final List<Instance> instances) {
if (vocs.vocabularyExists(OPENAIRE_META_RESOURCE_TYPE)) {
Optional<InstanceTypeMapping> instanceTypeMapping = instances
.stream()
.flatMap(i -> i.getInstanceTypeMapping().stream())
.filter(t -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(t.getVocabularyName()))
.findFirst();
if (!instanceTypeMapping.isPresent()) {
return null;
} else {
final String typeCode = instanceTypeMapping.get().getTypeCode();
return Optional
.ofNullable(vocs.lookupTermBySynonym(OPENAIRE_META_RESOURCE_TYPE, typeCode))
.orElseThrow(
() -> new IllegalStateException("unable to find a synonym for '" + typeCode + "' in " +
OPENAIRE_META_RESOURCE_TYPE));
}
} else {
throw new IllegalStateException("vocabulary '" + OPENAIRE_META_RESOURCE_TYPE + "' not available");
}
}
private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) {
final String dsId = doc.valueOf(xpathId);
final String dsName = doc.valueOf(xpathName);
@ -191,14 +165,13 @@ public abstract class AbstractMdRecordToOafMapper {
protected List<Oaf> createOafs(
final Document doc,
final String type,
final Qualifier metaResourceType,
final List<Instance> instances,
final KeyValue collectedFrom,
final DataInfo info,
final long lastUpdateTimestamp) {
final OafEntity entity = createEntity(
doc, type, metaResourceType, instances, collectedFrom, info, lastUpdateTimestamp);
doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
final Set<String> originalId = Sets.newHashSet(entity.getOriginalId());
originalId.add(entity.getId());
@ -231,7 +204,6 @@ public abstract class AbstractMdRecordToOafMapper {
private OafEntity createEntity(final Document doc,
final String type,
final Qualifier metaResourceType,
final List<Instance> instances,
final KeyValue collectedFrom,
final DataInfo info,
@ -239,12 +211,12 @@ public abstract class AbstractMdRecordToOafMapper {
switch (type.toLowerCase()) {
case "publication":
final Publication p = new Publication();
populateResultFields(p, metaResourceType, doc, instances, collectedFrom, info, lastUpdateTimestamp);
populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp);
p.setJournal(prepareJournal(doc, info));
return p;
case "dataset":
final Dataset d = new Dataset();
populateResultFields(d, metaResourceType, doc, instances, collectedFrom, info, lastUpdateTimestamp);
populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp);
d.setStoragedate(prepareDatasetStorageDate(doc, info));
d.setDevice(prepareDatasetDevice(doc, info));
d.setSize(prepareDatasetSize(doc, info));
@ -255,7 +227,7 @@ public abstract class AbstractMdRecordToOafMapper {
return d;
case "software":
final Software s = new Software();
populateResultFields(s, metaResourceType, doc, instances, collectedFrom, info, lastUpdateTimestamp);
populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp);
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
s.setLicense(prepareSoftwareLicenses(doc, info));
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
@ -265,7 +237,7 @@ public abstract class AbstractMdRecordToOafMapper {
case "otherresearchproducts":
default:
final OtherResearchProduct o = new OtherResearchProduct();
populateResultFields(o, metaResourceType, doc, instances, collectedFrom, info, lastUpdateTimestamp);
populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp);
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
o.setTool(prepareOtherResearchProductTools(doc, info));
@ -402,13 +374,11 @@ public abstract class AbstractMdRecordToOafMapper {
private void populateResultFields(
final Result r,
final Qualifier metaResourceType,
final Document doc,
final List<Instance> instances,
final KeyValue collectedFrom,
final DataInfo info,
final long lastUpdateTimestamp) {
r.setMetaResourceType(metaResourceType);
r.setDataInfo(info);
r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
@ -555,26 +525,7 @@ public abstract class AbstractMdRecordToOafMapper {
.ofNullable(findOriginalType(doc))
.map(originalType -> {
final List<InstanceTypeMapping> mappings = Lists.newArrayList();
if (vocs.vocabularyExists(OPENAIRE_COAR_RESOURCE_TYPES_3_1)) {
// TODO verify what the vocabs return when a synonym is not defined
Optional
.ofNullable(vocs.lookupTermBySynonym(OPENAIRE_COAR_RESOURCE_TYPES_3_1, originalType))
.ifPresent(coarTerm -> {
mappings.add(OafMapperUtils.instanceTypeMapping(originalType, coarTerm));
if (vocs.vocabularyExists(OPENAIRE_USER_RESOURCE_TYPES)) {
// TODO verify what the vocabs return when a synonym is not defined
Optional
.ofNullable(
vocs.lookupTermBySynonym(OPENAIRE_USER_RESOURCE_TYPES, coarTerm.getClassid()))
.ifPresent(
type -> mappings.add(OafMapperUtils.instanceTypeMapping(originalType, type)));
}
});
}
mappings.add(OafMapperUtils.instanceTypeMapping(originalType, OPENAIRE_COAR_RESOURCE_TYPES_3_1));
return mappings;
})
.orElse(new ArrayList<>());

View File

@ -1,15 +1,23 @@
package eu.dnetlib.dhp.oa.graph.group;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.lenient;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
@ -17,20 +25,36 @@ import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.InstanceTypeMapping;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class GroupEntitiesSparkJobTest {
@Mock
private ISLookUpService isLookUpService;
private VocabularyGroup vocabularies;
private static SparkSession spark;
private static ObjectMapper mapper = new ObjectMapper()
@ -45,10 +69,10 @@ public class GroupEntitiesSparkJobTest {
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName());
workingDir = Files.createTempDirectory(GroupEntitiesSparkJobTest.class.getSimpleName());
SparkConf conf = new SparkConf();
conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName());
conf.setAppName(GroupEntitiesSparkJobTest.class.getSimpleName());
conf.setMaster("local");
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
@ -56,10 +80,17 @@ public class GroupEntitiesSparkJobTest {
}
@BeforeEach
public void beforeEach() throws IOException, URISyntaxException {
public void beforeEach() throws IOException, URISyntaxException, ISLookUpException {
dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI());
checkpointPath = workingDir.resolve("grouped_entity");
outputPath = workingDir.resolve("dispatched_entity");
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
lenient()
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
.thenReturn(synonyms());
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
}
@AfterAll
@ -71,18 +102,17 @@ public class GroupEntitiesSparkJobTest {
@Test
@Order(1)
void testGroupEntities() throws Exception {
GroupEntitiesSparkJob.main(new String[] {
"-isSparkSessionManaged",
Boolean.FALSE.toString(),
"-graphInputPath",
dataInputPath.toString(),
"-checkpointPath",
checkpointPath.toString(),
"-outputPath",
outputPath.toString(),
"-filterInvisible",
Boolean.FALSE.toString()
});
new GroupEntitiesSparkJob(
args(
"/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json",
new String[] {
"--isSparkSessionManaged", Boolean.FALSE.toString(),
"--graphInputPath", dataInputPath.toString(),
"--checkpointPath", checkpointPath.toString(),
"--outputPath", outputPath.toString(),
"--filterInvisible", Boolean.FALSE.toString(),
"--isLookupUrl", "lookupurl"
})).run(false, isLookUpService);
Dataset<OafEntity> checkpointTable = spark
.read()
@ -109,6 +139,14 @@ public class GroupEntitiesSparkJobTest {
.map((MapFunction<String, Result>) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class));
assertEquals(3, output.count());
List<String> resultTypes = output
.map((MapFunction<Result, String>) value -> value.getResulttype().getClassid(), Encoders.STRING())
.distinct()
.collectAsList();
assertEquals(2, resultTypes.size());
assertEquals(
2,
output
@ -121,5 +159,68 @@ public class GroupEntitiesSparkJobTest {
.map((MapFunction<Result, String>) r -> r.getResulttype().getClassid(), Encoders.STRING())
.filter((FilterFunction<String>) s -> s.equals("dataset"))
.count());
Result result = output
.filter("id = '50|doi_________::09821844208a5cd6300b2bfb13bca1b9'")
.first();
result.getInstance().forEach(instance -> {
Optional<InstanceTypeMapping> coarType = instance
.getInstanceTypeMapping()
.stream()
.filter(itm -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()))
.filter(itm -> "journal-article".equals(itm.getOriginalType()))
.findFirst();
assertTrue(coarType.isPresent());
assertEquals("http://purl.org/coar/resource_type/c_2df8fbb1", coarType.get().getTypeCode());
assertEquals("research article", coarType.get().getTypeLabel());
});
final Dataset<Result> filtered = output.filter("id = '50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c'");
assertEquals(1, filtered.count());
result = filtered.first();
result
.getInstance()
.stream()
.flatMap(instance -> instance.getInstanceTypeMapping().stream())
.filter(itm -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()))
.filter(itm -> "Patent".equals(itm.getOriginalType()))
.forEach(itm -> {
assertEquals("http://purl.org/coar/resource_type/c_15cd", itm.getTypeCode());
assertEquals("patent", itm.getTypeLabel());
});
}
private List<String> vocs() throws IOException {
return IOUtils
.readLines(
Objects
.requireNonNull(
getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt")));
}
private List<String> synonyms() throws IOException {
return IOUtils
.readLines(
Objects
.requireNonNull(
getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")));
}
private ArgumentApplicationParser args(String paramSpecs, String[] args) throws IOException, ParseException {
ArgumentApplicationParser parser = new ArgumentApplicationParser(classPathResourceAsString(paramSpecs));
parser.parseArgument(args);
return parser;
}
private static String classPathResourceAsString(String path) throws IOException {
return IOUtils
.toString(
Objects
.requireNonNull(
GroupEntitiesSparkJobTest.class.getResourceAsStream(path)));
}
}

View File

@ -92,13 +92,14 @@ class GenerateEntitiesApplicationTest {
private List<String> vocs() throws IOException {
return IOUtils
.readLines(
GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
GenerateEntitiesApplicationTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
}
private List<String> synonyms() throws IOException {
return IOUtils
.readLines(
GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
GenerateEntitiesApplicationTest.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
}
}

View File

@ -122,7 +122,7 @@ class MappersTest {
assertTrue(instance.getPid().isEmpty());
assertNotNull(instance.getInstanceTypeMapping());
assertEquals(2, instance.getInstanceTypeMapping().size());
assertEquals(1, instance.getInstanceTypeMapping().size());
Optional<InstanceTypeMapping> coarType = instance
.getInstanceTypeMapping()
@ -131,8 +131,8 @@ class MappersTest {
.findFirst();
assertTrue(coarType.isPresent());
assertEquals("http://purl.org/coar/resource_type/c_5794", coarType.get().getTypeCode());
assertEquals("conference paper", coarType.get().getTypeLabel());
assertNull(coarType.get().getTypeCode());
assertNull(coarType.get().getTypeLabel());
Optional<InstanceTypeMapping> userType = instance
.getInstanceTypeMapping()
@ -140,9 +140,7 @@ class MappersTest {
.filter(itm -> ModelConstants.OPENAIRE_USER_RESOURCE_TYPES.equals(itm.getVocabularyName()))
.findFirst();
assertTrue(userType.isPresent());
assertEquals("Article", userType.get().getTypeCode());
assertEquals("Article", userType.get().getTypeLabel());
assertFalse(userType.isPresent());
assertFalse(instance.getAlternateIdentifier().isEmpty());
assertEquals("doi", instance.getAlternateIdentifier().get(0).getQualifier().getClassid());
@ -710,14 +708,10 @@ class MappersTest {
assertEquals("0001", p_cleaned.getInstance().get(0).getRefereed().getClassid());
assertEquals("peerReviewed", p_cleaned.getInstance().get(0).getRefereed().getClassname());
assertNotNull(p_cleaned.getMetaResourceType());
assertEquals("Research Literature", p_cleaned.getMetaResourceType().getClassid());
assertEquals("Research Literature", p_cleaned.getMetaResourceType().getClassname());
assertEquals(ModelConstants.OPENAIRE_META_RESOURCE_TYPE, p_cleaned.getMetaResourceType().getSchemeid());
assertEquals(ModelConstants.OPENAIRE_META_RESOURCE_TYPE, p_cleaned.getMetaResourceType().getSchemename());
assertNull(p_cleaned.getMetaResourceType());
assertNotNull(p_cleaned.getInstance().get(0).getInstanceTypeMapping());
assertEquals(2, p_cleaned.getInstance().get(0).getInstanceTypeMapping().size());
assertEquals(1, p_cleaned.getInstance().get(0).getInstanceTypeMapping().size());
assertTrue(
p_cleaned
@ -728,8 +722,7 @@ class MappersTest {
.anyMatch(
t -> "journal-article".equals(t.getOriginalType()) &&
ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(t.getVocabularyName()) &&
"http://purl.org/coar/resource_type/c_2df8fbb1".equals(t.getTypeCode()) &&
"research article".equals(t.getTypeLabel())));
Objects.isNull(t.getTypeCode()) && Objects.isNull(t.getTypeLabel())));
assertTrue(
p_cleaned
@ -737,11 +730,8 @@ class MappersTest {
.get(0)
.getInstanceTypeMapping()
.stream()
.anyMatch(
t -> "journal-article".equals(t.getOriginalType()) &&
ModelConstants.OPENAIRE_USER_RESOURCE_TYPES.equals(t.getVocabularyName()) &&
"Article".equals(t.getTypeCode()) &&
"Article".equals(t.getTypeLabel())));
.noneMatch(
t -> ModelConstants.OPENAIRE_USER_RESOURCE_TYPES.equals(t.getVocabularyName())));
}
@Test

View File

@ -888,7 +888,7 @@
<mockito-core.version>3.3.3</mockito-core.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version>
<vtd.version>[2.12,3.0)</vtd.version>
<dhp-schemas.version>[4.17.2-SNAPSHOT]</dhp-schemas.version>
<dhp-schemas.version>[4.17.2]</dhp-schemas.version>
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>