forked from D-Net/dnet-hadoop
[graph raw] let the mapping exceptions propagate
This commit is contained in:
parent
9f4db73f30
commit
8cdce59e0e
|
@ -28,10 +28,7 @@ import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.*;
|
||||||
import org.dom4j.DocumentFactory;
|
|
||||||
import org.dom4j.DocumentHelper;
|
|
||||||
import org.dom4j.Node;
|
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
@ -112,43 +109,40 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
this.forceOriginalId = false;
|
this.forceOriginalId = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Oaf> processMdRecord(final String xml) {
|
public List<Oaf> processMdRecord(final String xml) throws DocumentException {
|
||||||
try {
|
|
||||||
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
|
||||||
|
|
||||||
final Document doc = DocumentHelper
|
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
||||||
.parseText(
|
|
||||||
xml
|
|
||||||
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
|
|
||||||
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
|
|
||||||
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
|
|
||||||
|
|
||||||
final KeyValue collectedFrom = getProvenanceDatasource(
|
final Document doc = DocumentHelper
|
||||||
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
.parseText(
|
||||||
|
xml
|
||||||
|
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
|
||||||
|
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
|
||||||
|
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
|
||||||
|
|
||||||
if (collectedFrom == null) {
|
final KeyValue collectedFrom = getProvenanceDatasource(
|
||||||
return Lists.newArrayList();
|
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
||||||
}
|
|
||||||
|
|
||||||
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
if (collectedFrom == null) {
|
||||||
? collectedFrom
|
return Lists.newArrayList();
|
||||||
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
|
||||||
|
|
||||||
if (hostedBy == null) {
|
|
||||||
return Lists.newArrayList();
|
|
||||||
}
|
|
||||||
|
|
||||||
final DataInfo info = prepareDataInfo(doc, invisible);
|
|
||||||
final long lastUpdateTimestamp = new Date().getTime();
|
|
||||||
|
|
||||||
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
|
|
||||||
|
|
||||||
final String type = getResultType(doc, instances);
|
|
||||||
|
|
||||||
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
|
|
||||||
} catch (final Exception e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
||||||
|
? collectedFrom
|
||||||
|
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
||||||
|
|
||||||
|
if (hostedBy == null) {
|
||||||
|
return Lists.newArrayList();
|
||||||
|
}
|
||||||
|
|
||||||
|
final DataInfo info = prepareDataInfo(doc, invisible);
|
||||||
|
final long lastUpdateTimestamp = new Date().getTime();
|
||||||
|
|
||||||
|
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
|
||||||
|
|
||||||
|
final String type = getResultType(doc, instances);
|
||||||
|
|
||||||
|
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String getResultType(final Document doc, final List<Instance> instances) {
|
protected String getResultType(final Document doc, final List<Instance> instances) {
|
||||||
|
@ -499,22 +493,6 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
return vocs.getTermAsQualifier(schemeId, classId);
|
return vocs.getTermAsQualifier(schemeId, classId);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<StructuredProperty> prepareListStructProps(
|
|
||||||
final Node node,
|
|
||||||
final String xpath,
|
|
||||||
final String xpathClassId,
|
|
||||||
final String schemeId,
|
|
||||||
final DataInfo info) {
|
|
||||||
final List<StructuredProperty> res = new ArrayList<>();
|
|
||||||
|
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
|
||||||
final Node n = (Node) o;
|
|
||||||
final String classId = n.valueOf(xpathClassId).trim();
|
|
||||||
res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info));
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected List<StructuredProperty> prepareListStructPropsWithValidQualifier(
|
protected List<StructuredProperty> prepareListStructPropsWithValidQualifier(
|
||||||
final Node node,
|
final Node node,
|
||||||
final String xpath,
|
final String xpath,
|
||||||
|
|
|
@ -17,6 +17,7 @@ import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -158,7 +159,7 @@ public class GenerateEntitiesApplication {
|
||||||
final String id,
|
final String id,
|
||||||
final String s,
|
final String s,
|
||||||
final boolean shouldHashId,
|
final boolean shouldHashId,
|
||||||
final VocabularyGroup vocs) {
|
final VocabularyGroup vocs) throws DocumentException {
|
||||||
final String type = StringUtils.substringAfter(id, ":");
|
final String type = StringUtils.substringAfter(id, ":");
|
||||||
|
|
||||||
switch (type.toLowerCase()) {
|
switch (type.toLowerCase()) {
|
||||||
|
|
|
@ -9,6 +9,7 @@ import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.junit.jupiter.api.extension.ExtendWith;
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
@ -44,7 +45,7 @@ class GenerateEntitiesApplicationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testMergeResult() throws IOException {
|
void testMergeResult() throws IOException, DocumentException {
|
||||||
Result publication = getResult("oaf_record.xml", Publication.class);
|
Result publication = getResult("oaf_record.xml", Publication.class);
|
||||||
Result dataset = getResult("odf_dataset.xml", Dataset.class);
|
Result dataset = getResult("odf_dataset.xml", Dataset.class);
|
||||||
Result software = getResult("odf_software.xml", Software.class);
|
Result software = getResult("odf_software.xml", Software.class);
|
||||||
|
@ -76,7 +77,8 @@ class GenerateEntitiesApplicationTest {
|
||||||
assertEquals(resultType, merge.getResulttype().getClassid());
|
assertEquals(resultType, merge.getResulttype().getClassid());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz) throws IOException {
|
protected <T extends Result> Result getResult(String xmlFileName, Class<T> clazz)
|
||||||
|
throws IOException, DocumentException {
|
||||||
final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName));
|
final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName));
|
||||||
return new OdfToOafMapper(vocs, false, true)
|
return new OdfToOafMapper(vocs, false, true)
|
||||||
.processMdRecord(xml)
|
.processMdRecord(xml)
|
||||||
|
|
Loading…
Reference in New Issue