1
0
Fork 0

making ODF record parsing namespace unaware (#6629)

This commit is contained in:
Claudio Atzori 2021-04-23 17:09:36 +02:00
parent 906d50563c
commit 99cfb027fa
1 changed files with 46 additions and 35 deletions

View File

@ -40,19 +40,20 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
return prepareListStructProps(doc, "//datacite:title", MAIN_TITLE_QUALIFIER, info);
return prepareListStructProps(
doc, "//*[local-name()='titles']/*[local-name()='title']", MAIN_TITLE_QUALIFIER, info);
}
@Override
protected List<Author> prepareAuthors(final Document doc, final DataInfo info) {
final List<Author> res = new ArrayList<>();
int pos = 1;
for (final Object o : doc.selectNodes("//datacite:creator")) {
for (final Object o : doc.selectNodes("//*[local-name()='creator']")) {
final Node n = (Node) o;
final Author author = new Author();
final String fullname = n.valueOf("./datacite:creatorName");
final String name = n.valueOf("./datacite:givenName");
final String surname = n.valueOf("./datacite:familyName");
final String fullname = n.valueOf("./*[local-name()='creatorName']");
final String name = n.valueOf("./*[local-name()='givenName']");
final String surname = n.valueOf("./*[local-name()='familyName']");
if (StringUtils.isNotBlank(fullname) || StringUtils.isNotBlank(name) || StringUtils.isNotBlank(surname)) {
author.setFullname(fullname);
@ -74,7 +75,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
}
author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info));
author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info));
author.setPid(preparePids(n, info));
author.setRank(pos++);
res.add(author);
@ -85,7 +86,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
private List<StructuredProperty> preparePids(final Node n, final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
for (final Object o : n.selectNodes("./*[local-name()='nameIdentifier']")) {
final String id = ((Node) o).getText();
final String type = ((Node) o)
@ -128,23 +129,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
final Set<String> url = new HashSet<>();
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
for (final Object o : doc
.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']")) {
url.add(((Node) o).getText().trim());
}
for (final Object o : doc
.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='landingPage']")) {
.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='landingPage']")) {
url.add(((Node) o).getText().trim());
}
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='URL']")) {
url.add(((Node) o).getText().trim());
}
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='landingPage']")) {
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) {
url.add(((Node) o).getText().trim());
}
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
for (final Object o : doc
.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']")) {
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
}
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='DOI']")) {
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
}
if (!url.isEmpty()) {
@ -162,7 +165,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override
protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//datacite:date")) {
for (final Object o : doc.selectNodes("//*[local-name()='date']")) {
final String dateType = ((Node) o).valueOf("@dateType");
if (StringUtils.isBlank(dateType)
|| (!dateType.equalsIgnoreCase("Accepted")
@ -192,32 +195,32 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override
protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//datacite:contributorName", info);
return prepareListFields(doc, "//*[local-name()='contributorName']", info);
}
@Override
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//datacite:format", info);
return prepareListFields(doc, "//*[local-name()=':format']", info);
}
@Override
protected Field<String> preparePublisher(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:publisher", info);
return prepareField(doc, "//*[local-name()=':publisher']", info);
}
@Override
protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//datacite:description[@descriptionType='Abstract']", info);
return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']", info);
}
@Override
protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) {
return prepareListStructProps(doc, "//datacite:subject", info);
return prepareListStructProps(doc, "//*[local-name()='subject']", info);
}
@Override
protected Qualifier prepareLanguages(final Document doc) {
return prepareQualifier(doc, "//datacite:language", DNET_LANGUAGES);
return prepareQualifier(doc, "//*[local-name()=':language']", DNET_LANGUAGES);
}
@Override
@ -232,7 +235,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final Document doc,
final DataInfo info) {
return prepareListFields(
doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info);
doc,
"//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']",
info);
}
@Override
@ -240,12 +245,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final Document doc,
final DataInfo info) {
return prepareListFields(
doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info);
doc,
"//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']",
info);
}
@Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages");
return prepareQualifier(doc, "//*[local-name()='format']", "dnet:programming_languages");
}
@Override
@ -267,7 +274,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final Document doc,
final DataInfo info) {
return prepareListFields(
doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info);
doc,
"//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']",
info);
}
// DATASETS
@ -276,11 +285,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) {
final List<GeoLocation> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//datacite:geoLocation")) {
for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) {
final GeoLocation loc = new GeoLocation();
loc.setBox(((Node) o).valueOf("./datacite:geoLocationBox"));
loc.setPlace(((Node) o).valueOf("./datacite:geoLocationPlace"));
loc.setPoint(((Node) o).valueOf("./datacite:geoLocationPoint"));
loc.setBox(((Node) o).valueOf("./*[local-name()=':geoLocationBox']"));
loc.setPlace(((Node) o).valueOf("./*[local-name()=':geoLocationPlace']"));
loc.setPoint(((Node) o).valueOf("./*[local-name()=':geoLocationPoint']"));
res.add(loc);
}
return res;
@ -297,17 +306,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
protected Field<String> prepareDatasetLastMetadataUpdate(
final Document doc,
final DataInfo info) {
return prepareField(doc, "//datacite:date[@dateType='Updated']", info);
return prepareField(doc, "//*[local-name()='date' and ./@dateType='Updated']", info);
}
@Override
protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:version", info);
return prepareField(doc, "//*[local-name()='version']", info);
}
@Override
protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:size", info);
return prepareField(doc, "//*[local-name()='size']", info);
}
@Override
@ -317,7 +326,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override
protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:date[@dateType='Issued']", info);
return prepareField(doc, "//*[local-name()='date' and ./@dateType='Issued']", info);
}
@Override
@ -331,7 +340,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final List<Oaf> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//datacite:relatedIdentifier[@relatedIdentifierType='OPENAIRE']")) {
for (final Object o : doc
.selectNodes("//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE']")) {
final String originalId = ((Node) o).getText();
@ -385,13 +395,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
res
.addAll(
prepareListStructPropsWithValidQualifier(
doc, "//datacite:identifier[@identifierType != 'URL' and @identifierType != 'landingPage']",
doc,
"//*[local-name()='identifier' and ./@identifierType != 'URL' and ./@identifierType != 'landingPage']",
"@identifierType", DNET_PID_TYPES, info));
res
.addAll(
prepareListStructPropsWithValidQualifier(
doc,
"//datacite:alternateIdentifier[@alternateIdentifierType != 'URL' and @alternateIdentifierType != 'landingPage']",
"//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType != 'URL' and ./@alternateIdentifierType != 'landingPage']",
"@alternateIdentifierType", DNET_PID_TYPES, info));
return Lists.newArrayList(res);
}