1
0
Fork 0

[graph raw] let the mapping exceptions propagate

This commit is contained in:
Claudio Atzori 2021-08-12 11:32:26 +02:00
parent 9f4db73f30
commit 8cdce59e0e
4 changed files with 36 additions and 55 deletions

View File

@ -28,10 +28,7 @@ import java.util.Optional;
import java.util.Set; import java.util.Set;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.*;
import org.dom4j.DocumentFactory;
import org.dom4j.DocumentHelper;
import org.dom4j.Node;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
@ -112,43 +109,40 @@ public abstract class AbstractMdRecordToOafMapper {
this.forceOriginalId = false; this.forceOriginalId = false;
} }
public List<Oaf> processMdRecord(final String xml) { public List<Oaf> processMdRecord(final String xml) throws DocumentException {
try {
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
final Document doc = DocumentHelper DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
.parseText(
xml
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
final KeyValue collectedFrom = getProvenanceDatasource( final Document doc = DocumentHelper
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); .parseText(
xml
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
if (collectedFrom == null) { final KeyValue collectedFrom = getProvenanceDatasource(
return Lists.newArrayList(); doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
}
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) if (collectedFrom == null) {
? collectedFrom return Lists.newArrayList();
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
if (hostedBy == null) {
return Lists.newArrayList();
}
final DataInfo info = prepareDataInfo(doc, invisible);
final long lastUpdateTimestamp = new Date().getTime();
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
final String type = getResultType(doc, instances);
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
} catch (final Exception e) {
throw new RuntimeException(e);
} }
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
? collectedFrom
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
if (hostedBy == null) {
return Lists.newArrayList();
}
final DataInfo info = prepareDataInfo(doc, invisible);
final long lastUpdateTimestamp = new Date().getTime();
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
final String type = getResultType(doc, instances);
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
} }
protected String getResultType(final Document doc, final List<Instance> instances) { protected String getResultType(final Document doc, final List<Instance> instances) {
@ -499,22 +493,6 @@ public abstract class AbstractMdRecordToOafMapper {
return vocs.getTermAsQualifier(schemeId, classId); return vocs.getTermAsQualifier(schemeId, classId);
} }
protected List<StructuredProperty> prepareListStructProps(
final Node node,
final String xpath,
final String xpathClassId,
final String schemeId,
final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o;
final String classId = n.valueOf(xpathClassId).trim();
res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info));
}
return res;
}
protected List<StructuredProperty> prepareListStructPropsWithValidQualifier( protected List<StructuredProperty> prepareListStructPropsWithValidQualifier(
final Node node, final Node node,
final String xpath, final String xpath,

View File

@ -17,6 +17,7 @@ import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.dom4j.DocumentException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -158,7 +159,7 @@ public class GenerateEntitiesApplication {
final String id, final String id,
final String s, final String s,
final boolean shouldHashId, final boolean shouldHashId,
final VocabularyGroup vocs) { final VocabularyGroup vocs) throws DocumentException {
final String type = StringUtils.substringAfter(id, ":"); final String type = StringUtils.substringAfter(id, ":");
switch (type.toLowerCase()) { switch (type.toLowerCase()) {

View File

@ -9,6 +9,7 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.dom4j.DocumentException;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.extension.ExtendWith;
@ -44,7 +45,7 @@ class GenerateEntitiesApplicationTest {
} }
@Test @Test
void testMergeResult() throws IOException { void testMergeResult() throws IOException, DocumentException {
Result publication = getResult("oaf_record.xml", Publication.class); Result publication = getResult("oaf_record.xml", Publication.class);
Result dataset = getResult("odf_dataset.xml", Dataset.class); Result dataset = getResult("odf_dataset.xml", Dataset.class);
Result software = getResult("odf_software.xml", Software.class); Result software = getResult("odf_software.xml", Software.class);
@ -76,7 +77,8 @@ class GenerateEntitiesApplicationTest {
assertEquals(resultType, merge.getResulttype().getClassid()); assertEquals(resultType, merge.getResulttype().getClassid());
} }
protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz) throws IOException { protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz)
throws IOException, DocumentException {
final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName)); final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName));
return new OdfToOafMapper(vocs, false, true) return new OdfToOafMapper(vocs, false, true)
.processMdRecord(xml) .processMdRecord(xml)