making ODF record parsing namespace unaware (#6629)

This commit is contained in:
Claudio Atzori 2021-04-23 17:09:36 +02:00
parent 906d50563c
commit 99cfb027fa
1 changed files with 46 additions and 35 deletions

View File

@ -40,19 +40,20 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
return prepareListStructProps(doc, "//datacite:title", MAIN_TITLE_QUALIFIER, info); return prepareListStructProps(
doc, "//*[local-name()='titles']/*[local-name()='title']", MAIN_TITLE_QUALIFIER, info);
} }
@Override @Override
protected List<Author> prepareAuthors(final Document doc, final DataInfo info) { protected List<Author> prepareAuthors(final Document doc, final DataInfo info) {
final List<Author> res = new ArrayList<>(); final List<Author> res = new ArrayList<>();
int pos = 1; int pos = 1;
for (final Object o : doc.selectNodes("//datacite:creator")) { for (final Object o : doc.selectNodes("//*[local-name()='creator']")) {
final Node n = (Node) o; final Node n = (Node) o;
final Author author = new Author(); final Author author = new Author();
final String fullname = n.valueOf("./datacite:creatorName"); final String fullname = n.valueOf("./*[local-name()='creatorName']");
final String name = n.valueOf("./datacite:givenName"); final String name = n.valueOf("./*[local-name()='givenName']");
final String surname = n.valueOf("./datacite:familyName"); final String surname = n.valueOf("./*[local-name()='familyName']");
if (StringUtils.isNotBlank(fullname) || StringUtils.isNotBlank(name) || StringUtils.isNotBlank(surname)) { if (StringUtils.isNotBlank(fullname) || StringUtils.isNotBlank(name) || StringUtils.isNotBlank(surname)) {
author.setFullname(fullname); author.setFullname(fullname);
@ -74,7 +75,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
author.setFullname(String.format("%s, %s", author.getSurname(), author.getName())); author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
} }
author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info)); author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info));
author.setPid(preparePids(n, info)); author.setPid(preparePids(n, info));
author.setRank(pos++); author.setRank(pos++);
res.add(author); res.add(author);
@ -85,7 +86,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
private List<StructuredProperty> preparePids(final Node n, final DataInfo info) { private List<StructuredProperty> preparePids(final Node n, final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { for (final Object o : n.selectNodes("./*[local-name()='nameIdentifier']")) {
final String id = ((Node) o).getText(); final String id = ((Node) o).getText();
final String type = ((Node) o) final String type = ((Node) o)
@ -128,23 +129,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
final Set<String> url = new HashSet<>(); final Set<String> url = new HashSet<>();
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { for (final Object o : doc
.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']")) {
url.add(((Node) o).getText().trim()); url.add(((Node) o).getText().trim());
} }
for (final Object o : doc for (final Object o : doc
.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='landingPage']")) { .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='landingPage']")) {
url.add(((Node) o).getText().trim()); url.add(((Node) o).getText().trim());
} }
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) { for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='URL']")) {
url.add(((Node) o).getText().trim()); url.add(((Node) o).getText().trim());
} }
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='landingPage']")) { for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) {
url.add(((Node) o).getText().trim()); url.add(((Node) o).getText().trim());
} }
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) { for (final Object o : doc
.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']")) {
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
} }
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) { for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='DOI']")) {
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
} }
if (!url.isEmpty()) { if (!url.isEmpty()) {
@ -162,7 +165,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//datacite:date")) { for (final Object o : doc.selectNodes("//*[local-name()='date']")) {
final String dateType = ((Node) o).valueOf("@dateType"); final String dateType = ((Node) o).valueOf("@dateType");
if (StringUtils.isBlank(dateType) if (StringUtils.isBlank(dateType)
|| (!dateType.equalsIgnoreCase("Accepted") || (!dateType.equalsIgnoreCase("Accepted")
@ -192,32 +195,32 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) { protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//datacite:contributorName", info); return prepareListFields(doc, "//*[local-name()='contributorName']", info);
} }
@Override @Override
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) { protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//datacite:format", info); return prepareListFields(doc, "//*[local-name()=':format']", info);
} }
@Override @Override
protected Field<String> preparePublisher(final Document doc, final DataInfo info) { protected Field<String> preparePublisher(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:publisher", info); return prepareField(doc, "//*[local-name()=':publisher']", info);
} }
@Override @Override
protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) { protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//datacite:description[@descriptionType='Abstract']", info); return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']", info);
} }
@Override @Override
protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) {
return prepareListStructProps(doc, "//datacite:subject", info); return prepareListStructProps(doc, "//*[local-name()='subject']", info);
} }
@Override @Override
protected Qualifier prepareLanguages(final Document doc) { protected Qualifier prepareLanguages(final Document doc) {
return prepareQualifier(doc, "//datacite:language", DNET_LANGUAGES); return prepareQualifier(doc, "//*[local-name()=':language']", DNET_LANGUAGES);
} }
@Override @Override
@ -232,7 +235,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final Document doc, final Document doc,
final DataInfo info) { final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); doc,
"//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']",
info);
} }
@Override @Override
@ -240,12 +245,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final Document doc, final Document doc,
final DataInfo info) { final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); doc,
"//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']",
info);
} }
@Override @Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages"); return prepareQualifier(doc, "//*[local-name()='format']", "dnet:programming_languages");
} }
@Override @Override
@ -267,7 +274,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final Document doc, final Document doc,
final DataInfo info) { final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); doc,
"//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']",
info);
} }
// DATASETS // DATASETS
@ -276,11 +285,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) { protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) {
final List<GeoLocation> res = new ArrayList<>(); final List<GeoLocation> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//datacite:geoLocation")) { for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) {
final GeoLocation loc = new GeoLocation(); final GeoLocation loc = new GeoLocation();
loc.setBox(((Node) o).valueOf("./datacite:geoLocationBox")); loc.setBox(((Node) o).valueOf("./*[local-name()=':geoLocationBox']"));
loc.setPlace(((Node) o).valueOf("./datacite:geoLocationPlace")); loc.setPlace(((Node) o).valueOf("./*[local-name()=':geoLocationPlace']"));
loc.setPoint(((Node) o).valueOf("./datacite:geoLocationPoint")); loc.setPoint(((Node) o).valueOf("./*[local-name()=':geoLocationPoint']"));
res.add(loc); res.add(loc);
} }
return res; return res;
@ -297,17 +306,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
protected Field<String> prepareDatasetLastMetadataUpdate( protected Field<String> prepareDatasetLastMetadataUpdate(
final Document doc, final Document doc,
final DataInfo info) { final DataInfo info) {
return prepareField(doc, "//datacite:date[@dateType='Updated']", info); return prepareField(doc, "//*[local-name()='date' and ./@dateType='Updated']", info);
} }
@Override @Override
protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:version", info); return prepareField(doc, "//*[local-name()='version']", info);
} }
@Override @Override
protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:size", info); return prepareField(doc, "//*[local-name()='size']", info);
} }
@Override @Override
@ -317,7 +326,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:date[@dateType='Issued']", info); return prepareField(doc, "//*[local-name()='date' and ./@dateType='Issued']", info);
} }
@Override @Override
@ -331,7 +340,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final List<Oaf> res = new ArrayList<>(); final List<Oaf> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//datacite:relatedIdentifier[@relatedIdentifierType='OPENAIRE']")) { for (final Object o : doc
.selectNodes("//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE']")) {
final String originalId = ((Node) o).getText(); final String originalId = ((Node) o).getText();
@ -385,13 +395,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
res res
.addAll( .addAll(
prepareListStructPropsWithValidQualifier( prepareListStructPropsWithValidQualifier(
doc, "//datacite:identifier[@identifierType != 'URL' and @identifierType != 'landingPage']", doc,
"//*[local-name()='identifier' and ./@identifierType != 'URL' and ./@identifierType != 'landingPage']",
"@identifierType", DNET_PID_TYPES, info)); "@identifierType", DNET_PID_TYPES, info));
res res
.addAll( .addAll(
prepareListStructPropsWithValidQualifier( prepareListStructPropsWithValidQualifier(
doc, doc,
"//datacite:alternateIdentifier[@alternateIdentifierType != 'URL' and @alternateIdentifierType != 'landingPage']", "//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType != 'URL' and ./@alternateIdentifierType != 'landingPage']",
"@alternateIdentifierType", DNET_PID_TYPES, info)); "@alternateIdentifierType", DNET_PID_TYPES, info));
return Lists.newArrayList(res); return Lists.newArrayList(res);
} }