[graph raw] let the mapping exceptions propagate

This commit is contained in:
Claudio Atzori 2021-08-12 11:32:26 +02:00
parent 9f4db73f30
commit 8cdce59e0e
4 changed files with 36 additions and 55 deletions

View File

@ -28,10 +28,7 @@ import java.util.Optional;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.DocumentFactory;
import org.dom4j.DocumentHelper;
import org.dom4j.Node;
import org.dom4j.*;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
@ -112,43 +109,40 @@ public abstract class AbstractMdRecordToOafMapper {
this.forceOriginalId = false;
}
public List<Oaf> processMdRecord(final String xml) {
try {
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
public List<Oaf> processMdRecord(final String xml) throws DocumentException {
final Document doc = DocumentHelper
.parseText(
xml
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
final KeyValue collectedFrom = getProvenanceDatasource(
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
final Document doc = DocumentHelper
.parseText(
xml
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
if (collectedFrom == null) {
return Lists.newArrayList();
}
final KeyValue collectedFrom = getProvenanceDatasource(
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
? collectedFrom
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
if (hostedBy == null) {
return Lists.newArrayList();
}
final DataInfo info = prepareDataInfo(doc, invisible);
final long lastUpdateTimestamp = new Date().getTime();
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
final String type = getResultType(doc, instances);
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
} catch (final Exception e) {
throw new RuntimeException(e);
if (collectedFrom == null) {
return Lists.newArrayList();
}
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
? collectedFrom
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
if (hostedBy == null) {
return Lists.newArrayList();
}
final DataInfo info = prepareDataInfo(doc, invisible);
final long lastUpdateTimestamp = new Date().getTime();
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
final String type = getResultType(doc, instances);
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
}
protected String getResultType(final Document doc, final List<Instance> instances) {
@ -499,22 +493,6 @@ public abstract class AbstractMdRecordToOafMapper {
return vocs.getTermAsQualifier(schemeId, classId);
}
protected List<StructuredProperty> prepareListStructProps(
final Node node,
final String xpath,
final String xpathClassId,
final String schemeId,
final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o;
final String classId = n.valueOf(xpathClassId).trim();
res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info));
}
return res;
}
protected List<StructuredProperty> prepareListStructPropsWithValidQualifier(
final Node node,
final String xpath,

View File

@ -17,6 +17,7 @@ import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -158,7 +159,7 @@ public class GenerateEntitiesApplication {
final String id,
final String s,
final boolean shouldHashId,
final VocabularyGroup vocs) {
final VocabularyGroup vocs) throws DocumentException {
final String type = StringUtils.substringAfter(id, ":");
switch (type.toLowerCase()) {

View File

@ -9,6 +9,7 @@ import java.io.IOException;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.dom4j.DocumentException;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
@ -44,7 +45,7 @@ class GenerateEntitiesApplicationTest {
}
@Test
void testMergeResult() throws IOException {
void testMergeResult() throws IOException, DocumentException {
Result publication = getResult("oaf_record.xml", Publication.class);
Result dataset = getResult("odf_dataset.xml", Dataset.class);
Result software = getResult("odf_software.xml", Software.class);
@ -76,7 +77,8 @@ class GenerateEntitiesApplicationTest {
assertEquals(resultType, merge.getResulttype().getClassid());
}
protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz) throws IOException {
protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz)
throws IOException, DocumentException {
final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName));
return new OdfToOafMapper(vocs, false, true)
.processMdRecord(xml)