forked from D-Net/dnet-hadoop
[graph raw] let the mapping exceptions propagate
This commit is contained in:
parent
9f4db73f30
commit
8cdce59e0e
|
@ -28,10 +28,7 @@ import java.util.Optional;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentFactory;
|
||||
import org.dom4j.DocumentHelper;
|
||||
import org.dom4j.Node;
|
||||
import org.dom4j.*;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
|
@ -112,43 +109,40 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
this.forceOriginalId = false;
|
||||
}
|
||||
|
||||
public List<Oaf> processMdRecord(final String xml) {
|
||||
try {
|
||||
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
||||
public List<Oaf> processMdRecord(final String xml) throws DocumentException {
|
||||
|
||||
final Document doc = DocumentHelper
|
||||
.parseText(
|
||||
xml
|
||||
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
|
||||
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
|
||||
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
|
||||
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
||||
|
||||
final KeyValue collectedFrom = getProvenanceDatasource(
|
||||
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
||||
final Document doc = DocumentHelper
|
||||
.parseText(
|
||||
xml
|
||||
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
|
||||
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
|
||||
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
|
||||
|
||||
if (collectedFrom == null) {
|
||||
return Lists.newArrayList();
|
||||
}
|
||||
final KeyValue collectedFrom = getProvenanceDatasource(
|
||||
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
||||
|
||||
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
||||
? collectedFrom
|
||||
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
||||
|
||||
if (hostedBy == null) {
|
||||
return Lists.newArrayList();
|
||||
}
|
||||
|
||||
final DataInfo info = prepareDataInfo(doc, invisible);
|
||||
final long lastUpdateTimestamp = new Date().getTime();
|
||||
|
||||
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
|
||||
|
||||
final String type = getResultType(doc, instances);
|
||||
|
||||
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
if (collectedFrom == null) {
|
||||
return Lists.newArrayList();
|
||||
}
|
||||
|
||||
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
||||
? collectedFrom
|
||||
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
||||
|
||||
if (hostedBy == null) {
|
||||
return Lists.newArrayList();
|
||||
}
|
||||
|
||||
final DataInfo info = prepareDataInfo(doc, invisible);
|
||||
final long lastUpdateTimestamp = new Date().getTime();
|
||||
|
||||
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
|
||||
|
||||
final String type = getResultType(doc, instances);
|
||||
|
||||
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
}
|
||||
|
||||
protected String getResultType(final Document doc, final List<Instance> instances) {
|
||||
|
@ -499,22 +493,6 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
return vocs.getTermAsQualifier(schemeId, classId);
|
||||
}
|
||||
|
||||
protected List<StructuredProperty> prepareListStructProps(
|
||||
final Node node,
|
||||
final String xpath,
|
||||
final String xpathClassId,
|
||||
final String schemeId,
|
||||
final DataInfo info) {
|
||||
final List<StructuredProperty> res = new ArrayList<>();
|
||||
|
||||
for (final Object o : node.selectNodes(xpath)) {
|
||||
final Node n = (Node) o;
|
||||
final String classId = n.valueOf(xpathClassId).trim();
|
||||
res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
protected List<StructuredProperty> prepareListStructPropsWithValidQualifier(
|
||||
final Node node,
|
||||
final String xpath,
|
||||
|
|
|
@ -17,6 +17,7 @@ import org.apache.spark.SparkConf;
|
|||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -158,7 +159,7 @@ public class GenerateEntitiesApplication {
|
|||
final String id,
|
||||
final String s,
|
||||
final boolean shouldHashId,
|
||||
final VocabularyGroup vocs) {
|
||||
final VocabularyGroup vocs) throws DocumentException {
|
||||
final String type = StringUtils.substringAfter(id, ":");
|
||||
|
||||
switch (type.toLowerCase()) {
|
||||
|
|
|
@ -9,6 +9,7 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
|
@ -44,7 +45,7 @@ class GenerateEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
void testMergeResult() throws IOException {
|
||||
void testMergeResult() throws IOException, DocumentException {
|
||||
Result publication = getResult("oaf_record.xml", Publication.class);
|
||||
Result dataset = getResult("odf_dataset.xml", Dataset.class);
|
||||
Result software = getResult("odf_software.xml", Software.class);
|
||||
|
@ -76,7 +77,8 @@ class GenerateEntitiesApplicationTest {
|
|||
assertEquals(resultType, merge.getResulttype().getClassid());
|
||||
}
|
||||
|
||||
protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz) throws IOException {
|
||||
protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz)
|
||||
throws IOException, DocumentException {
|
||||
final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName));
|
||||
return new OdfToOafMapper(vocs, false, true)
|
||||
.processMdRecord(xml)
|
||||
|
|
Loading…
Reference in New Issue