2020-04-28 11:23:29 +02:00
|
|
|
|
2020-04-10 17:53:07 +02:00
|
|
|
package eu.dnetlib.dhp.oa.graph.raw;
|
2020-03-02 16:12:14 +01:00
|
|
|
|
2020-06-09 19:52:53 +02:00
|
|
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
2021-04-27 15:44:01 +02:00
|
|
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
2022-01-21 10:50:34 +01:00
|
|
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
2020-05-22 10:08:02 +02:00
|
|
|
|
2021-08-20 17:03:30 +02:00
|
|
|
import java.net.URLDecoder;
|
2020-11-19 14:34:54 +01:00
|
|
|
import java.util.*;
|
2020-10-30 10:56:42 +01:00
|
|
|
import java.util.stream.Collectors;
|
2020-04-28 11:23:29 +02:00
|
|
|
|
2023-10-11 16:09:19 +02:00
|
|
|
import org.apache.commons.lang3.ObjectUtils;
|
2020-04-28 11:23:29 +02:00
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
import org.dom4j.Document;
|
2022-01-21 10:50:34 +01:00
|
|
|
import org.dom4j.Element;
|
2020-04-28 11:23:29 +02:00
|
|
|
import org.dom4j.Node;
|
|
|
|
|
2022-01-21 10:50:34 +01:00
|
|
|
import com.google.common.collect.Lists;
|
|
|
|
|
2020-05-25 10:35:39 +02:00
|
|
|
import eu.dnetlib.dhp.common.PacePerson;
|
2021-01-25 15:43:04 +01:00
|
|
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
2022-09-26 11:24:13 +02:00
|
|
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
|
|
import eu.dnetlib.dhp.schema.common.RelationInverse;
|
2020-10-16 17:02:10 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.*;
|
2021-04-27 15:44:01 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
2021-03-16 14:19:32 +01:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
2020-02-05 15:35:40 +01:00
|
|
|
|
2020-03-02 16:12:14 +01:00
|
|
|
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
2020-02-05 15:35:40 +01:00
|
|
|
|
2022-09-07 16:29:54 +02:00
|
|
|
public static final String HTTP_DOI_PREIFX = "https://doi.org/";
|
2022-09-09 12:16:28 +02:00
|
|
|
public static final String HTTP_HANDLE_PREIFX = "https://hdl.handle.net/";
|
2020-05-06 13:20:02 +02:00
|
|
|
|
2021-07-19 17:43:52 +02:00
|
|
|
public OdfToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId,
|
|
|
|
final boolean forceOrginalId) {
|
|
|
|
super(vocs, invisible, shouldHashId, forceOrginalId);
|
|
|
|
}
|
|
|
|
|
2020-11-30 12:00:38 +01:00
|
|
|
public OdfToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId) {
|
|
|
|
super(vocs, invisible, shouldHashId);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
|
2022-01-21 10:50:34 +01:00
|
|
|
|
|
|
|
final List<StructuredProperty> title = Lists.newArrayList();
|
|
|
|
final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']";
|
|
|
|
|
|
|
|
for (Object o : doc.selectNodes(xpath)) {
|
|
|
|
Element e = (Element) o;
|
|
|
|
final String titleValue = e.getTextTrim();
|
|
|
|
final String titleType = e.attributeValue("titleType");
|
|
|
|
if (StringUtils.isNotBlank(titleType)) {
|
|
|
|
title
|
|
|
|
.add(
|
|
|
|
structuredProperty(
|
|
|
|
titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info));
|
|
|
|
} else {
|
|
|
|
title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return title;
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Author> prepareAuthors(final Document doc, final DataInfo info) {
|
|
|
|
final List<Author> res = new ArrayList<>();
|
|
|
|
int pos = 1;
|
2021-04-23 17:09:36 +02:00
|
|
|
for (final Object o : doc.selectNodes("//*[local-name()='creator']")) {
|
2020-04-28 11:23:29 +02:00
|
|
|
final Node n = (Node) o;
|
|
|
|
final Author author = new Author();
|
2021-04-23 17:09:36 +02:00
|
|
|
final String fullname = n.valueOf("./*[local-name()='creatorName']");
|
|
|
|
final String name = n.valueOf("./*[local-name()='givenName']");
|
|
|
|
final String surname = n.valueOf("./*[local-name()='familyName']");
|
2021-01-25 18:02:49 +01:00
|
|
|
if (StringUtils.isNotBlank(fullname) || StringUtils.isNotBlank(name) || StringUtils.isNotBlank(surname)) {
|
2021-01-25 17:57:51 +01:00
|
|
|
author.setFullname(fullname);
|
2020-05-14 15:07:24 +02:00
|
|
|
|
2021-01-25 17:57:51 +01:00
|
|
|
final PacePerson pp = new PacePerson(fullname, false);
|
2020-05-22 10:08:02 +02:00
|
|
|
|
2021-01-25 17:57:51 +01:00
|
|
|
if (StringUtils.isBlank(name) & pp.isAccurate()) {
|
|
|
|
author.setName(pp.getNormalisedFirstName());
|
|
|
|
} else {
|
|
|
|
author.setName(name);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringUtils.isBlank(surname) & pp.isAccurate()) {
|
|
|
|
author.setSurname(pp.getNormalisedSurname());
|
|
|
|
} else {
|
|
|
|
author.setSurname(surname);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringUtils.isBlank(author.getFullname())) {
|
|
|
|
author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
|
|
|
|
}
|
|
|
|
|
2021-04-23 17:09:36 +02:00
|
|
|
author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info));
|
2021-01-25 17:57:51 +01:00
|
|
|
author.setPid(preparePids(n, info));
|
|
|
|
author.setRank(pos++);
|
|
|
|
res.add(author);
|
|
|
|
}
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2020-05-15 12:26:16 +02:00
|
|
|
private List<StructuredProperty> preparePids(final Node n, final DataInfo info) {
|
2020-04-28 11:23:29 +02:00
|
|
|
final List<StructuredProperty> res = new ArrayList<>();
|
2021-04-23 17:09:36 +02:00
|
|
|
for (final Object o : n.selectNodes("./*[local-name()='nameIdentifier']")) {
|
2020-05-22 10:47:39 +02:00
|
|
|
|
|
|
|
final String id = ((Node) o).getText();
|
2020-05-22 12:34:00 +02:00
|
|
|
final String type = ((Node) o)
|
|
|
|
.valueOf("./@nameIdentifierScheme")
|
2020-05-22 10:47:39 +02:00
|
|
|
.trim()
|
|
|
|
.toUpperCase()
|
2021-08-11 12:13:22 +02:00
|
|
|
.replace(" ", "")
|
|
|
|
.replace("_", "");
|
2020-05-22 10:47:39 +02:00
|
|
|
|
2021-05-26 18:20:23 +02:00
|
|
|
if (type.toLowerCase().startsWith(ORCID)) {
|
2021-08-11 12:13:22 +02:00
|
|
|
final String cleanedId = id.replace("http://orcid.org/", "").replace("https://orcid.org/", "");
|
2020-05-22 10:47:39 +02:00
|
|
|
res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info));
|
|
|
|
} else if (type.startsWith("MAGID")) {
|
|
|
|
res.add(structuredProperty(id, MAG_PID_TYPE, info));
|
|
|
|
}
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Instance> prepareInstances(
|
|
|
|
final Document doc,
|
|
|
|
final DataInfo info,
|
|
|
|
final KeyValue collectedfrom,
|
|
|
|
final KeyValue hostedby) {
|
|
|
|
|
|
|
|
final Instance instance = new Instance();
|
|
|
|
instance
|
2020-05-27 11:34:13 +02:00
|
|
|
.setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE));
|
2020-04-28 11:23:29 +02:00
|
|
|
instance.setCollectedfrom(collectedfrom);
|
|
|
|
instance.setHostedby(hostedby);
|
2021-03-16 14:19:32 +01:00
|
|
|
|
|
|
|
final List<StructuredProperty> alternateIdentifier = prepareResultPids(doc, info);
|
|
|
|
final List<StructuredProperty> pid = IdentifierFactory.getPids(alternateIdentifier, collectedfrom);
|
|
|
|
|
2023-10-11 16:09:19 +02:00
|
|
|
instance.setInstanceTypeMapping(prepareInstanceTypeMapping(doc));
|
|
|
|
|
2023-05-26 11:33:42 +02:00
|
|
|
final Set<StructuredProperty> pids = new HashSet<>(pid);
|
2021-03-16 14:19:32 +01:00
|
|
|
|
|
|
|
instance
|
|
|
|
.setAlternateIdentifier(
|
|
|
|
alternateIdentifier.stream().filter(i -> !pids.contains(i)).collect(Collectors.toList()));
|
|
|
|
instance.setPid(pid);
|
|
|
|
|
2020-04-28 11:23:29 +02:00
|
|
|
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
2021-05-05 16:36:15 +02:00
|
|
|
final String distributionlocation = doc.valueOf("//oaf:distributionlocation");
|
|
|
|
instance.setDistributionlocation(StringUtils.isNotBlank(distributionlocation) ? distributionlocation : null);
|
2020-04-28 11:23:29 +02:00
|
|
|
instance
|
2021-01-12 15:36:38 +01:00
|
|
|
.setAccessright(prepareAccessRight(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
|
2020-04-28 11:23:29 +02:00
|
|
|
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
2020-06-09 19:52:53 +02:00
|
|
|
instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS));
|
2020-04-28 11:23:29 +02:00
|
|
|
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
|
|
|
instance
|
2020-05-22 10:08:02 +02:00
|
|
|
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
2023-05-26 11:33:42 +02:00
|
|
|
prepareListURL(doc, "//oaf:fulltext", info)
|
|
|
|
.stream()
|
|
|
|
.findFirst()
|
|
|
|
.map(Field::getValue)
|
|
|
|
.ifPresent(instance::setFulltext);
|
2020-04-28 11:23:29 +02:00
|
|
|
|
2020-05-14 15:07:24 +02:00
|
|
|
final Set<String> url = new HashSet<>();
|
2021-04-23 17:09:36 +02:00
|
|
|
for (final Object o : doc
|
|
|
|
.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']")) {
|
2021-08-20 17:03:30 +02:00
|
|
|
url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
2020-06-11 12:28:34 +02:00
|
|
|
for (final Object o : doc
|
2021-04-23 17:09:36 +02:00
|
|
|
.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='landingPage']")) {
|
2021-08-20 17:03:30 +02:00
|
|
|
url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
|
2020-06-11 12:28:34 +02:00
|
|
|
}
|
2021-04-23 17:09:36 +02:00
|
|
|
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='URL']")) {
|
2021-08-20 17:03:30 +02:00
|
|
|
url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
2021-04-23 17:09:36 +02:00
|
|
|
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) {
|
2021-08-20 17:03:30 +02:00
|
|
|
url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
|
2020-06-11 12:28:34 +02:00
|
|
|
}
|
2022-09-28 14:16:39 +02:00
|
|
|
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='w3id']")) {
|
|
|
|
url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
|
|
|
|
}
|
2022-09-19 11:19:10 +02:00
|
|
|
|
|
|
|
Set<String> validUrl = validateUrl(url);
|
|
|
|
|
|
|
|
if (validUrl.stream().noneMatch(s -> s.contains("doi.org"))) {
|
|
|
|
for (final Object o : doc
|
|
|
|
.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']")) {
|
|
|
|
validUrl.add(HTTP_DOI_PREIFX + ((Node) o).getText().trim());
|
|
|
|
}
|
|
|
|
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='DOI']")) {
|
|
|
|
validUrl.add(HTTP_DOI_PREIFX + ((Node) o).getText().trim());
|
|
|
|
}
|
2022-09-07 16:29:54 +02:00
|
|
|
}
|
2022-09-19 11:19:10 +02:00
|
|
|
if (validUrl.stream().noneMatch(s -> s.contains("hdl.handle.net"))) {
|
|
|
|
for (final Object o : doc
|
|
|
|
.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='Handle']")) {
|
|
|
|
validUrl.add(HTTP_HANDLE_PREIFX + ((Node) o).getText().trim());
|
|
|
|
}
|
|
|
|
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='Handle']")) {
|
|
|
|
validUrl.add(HTTP_HANDLE_PREIFX + ((Node) o).getText().trim());
|
|
|
|
}
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
2022-09-19 11:19:10 +02:00
|
|
|
|
|
|
|
if (!validUrl.isEmpty()) {
|
2020-05-14 15:07:24 +02:00
|
|
|
instance.setUrl(new ArrayList<>());
|
2022-09-19 11:19:10 +02:00
|
|
|
instance.getUrl().addAll(validUrl);
|
2020-05-14 15:07:24 +02:00
|
|
|
}
|
2020-04-28 11:23:29 +02:00
|
|
|
return Arrays.asList(instance);
|
|
|
|
}
|
|
|
|
|
2021-08-23 11:57:21 +02:00
|
|
|
protected String trimAndDecodeUrl(String url) {
|
2021-08-20 17:03:30 +02:00
|
|
|
try {
|
|
|
|
return URLDecoder.decode(url.trim(), "UTF-8");
|
2021-08-25 10:07:58 +02:00
|
|
|
} catch (Throwable t) {
|
2021-08-20 17:03:30 +02:00
|
|
|
return url;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-11 16:09:19 +02:00
|
|
|
/**
|
2024-01-11 16:28:26 +01:00
|
|
|
* Extracts the resource type from The Datacite element
|
2023-10-11 16:09:19 +02:00
|
|
|
*
|
2024-01-11 16:28:26 +01:00
|
|
|
* <datacite:resourceType
|
|
|
|
* anyURI="http://purl.org/coar/resource_type/c_6501"
|
|
|
|
* uri="http://purl.org/coar/resource_type/c_6501"
|
|
|
|
* resourceTypeGeneral="Dataset">journal article</datacite:resourceType>
|
2023-10-11 16:09:19 +02:00
|
|
|
*
|
|
|
|
* @param doc the input document
|
|
|
|
* @return the chosen resource type
|
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
protected String findOriginalType(Document doc) {
|
2024-01-11 16:28:26 +01:00
|
|
|
final String resourceType = Optional
|
2023-10-16 12:57:18 +02:00
|
|
|
.ofNullable(
|
|
|
|
(Element) doc
|
|
|
|
.selectSingleNode(
|
2023-10-16 08:57:47 +02:00
|
|
|
"//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']"))
|
2024-02-09 10:19:53 +01:00
|
|
|
.map(e -> {
|
|
|
|
final String resourceTypeURI = Optional
|
|
|
|
.ofNullable(e.attributeValue("uri"))
|
|
|
|
.filter(StringUtils::isNotBlank)
|
|
|
|
.orElse(null);
|
|
|
|
final String resourceTypeAnyURI = Optional
|
|
|
|
.ofNullable(e.attributeValue("anyURI"))
|
|
|
|
.filter(StringUtils::isNotBlank)
|
|
|
|
.orElse(null);
|
|
|
|
final String resourceTypeTxt = Optional
|
|
|
|
.ofNullable(e.getText())
|
|
|
|
.filter(StringUtils::isNotBlank)
|
|
|
|
.orElse(null);
|
|
|
|
final String resourceTypeGeneral = Optional
|
|
|
|
.ofNullable(e.attributeValue("resourceTypeGeneral"))
|
|
|
|
.filter(StringUtils::isNotBlank)
|
|
|
|
.orElse(null);
|
2023-10-11 16:09:19 +02:00
|
|
|
|
2024-01-11 16:28:26 +01:00
|
|
|
return ObjectUtils
|
|
|
|
.firstNonNull(resourceTypeURI, resourceTypeAnyURI, resourceTypeTxt, resourceTypeGeneral);
|
2023-10-16 12:57:18 +02:00
|
|
|
})
|
|
|
|
.orElse(null);
|
2024-01-11 16:28:26 +01:00
|
|
|
|
|
|
|
final String drCobjCategory = doc.valueOf("//dr:CobjCategory/text()");
|
|
|
|
return ObjectUtils.firstNonNull(resourceType, drCobjCategory);
|
2023-10-11 16:09:19 +02:00
|
|
|
}
|
|
|
|
|
2020-04-28 11:23:29 +02:00
|
|
|
@Override
|
|
|
|
protected List<Field<String>> prepareSources(final Document doc, final DataInfo info) {
|
|
|
|
return new ArrayList<>(); // Not present in ODF ???
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) {
|
|
|
|
final List<StructuredProperty> res = new ArrayList<>();
|
2021-04-23 17:09:36 +02:00
|
|
|
for (final Object o : doc.selectNodes("//*[local-name()='date']")) {
|
2020-04-28 11:23:29 +02:00
|
|
|
final String dateType = ((Node) o).valueOf("@dateType");
|
|
|
|
if (StringUtils.isBlank(dateType)
|
2021-01-12 15:36:38 +01:00
|
|
|
|| (!dateType.equalsIgnoreCase("Accepted")
|
|
|
|
&& !dateType.equalsIgnoreCase("Issued")
|
|
|
|
&& !dateType.equalsIgnoreCase("Updated")
|
|
|
|
&& !dateType.equalsIgnoreCase("Available"))) {
|
2020-04-28 11:23:29 +02:00
|
|
|
res
|
2020-05-26 13:11:09 +02:00
|
|
|
.add(
|
|
|
|
structuredProperty(
|
2021-03-31 18:33:57 +02:00
|
|
|
((Node) o).getText(), UNKNOWN, UNKNOWN, DNET_DATACITE_DATE, DNET_DATACITE_DATE,
|
2021-01-12 15:36:38 +01:00
|
|
|
info));
|
|
|
|
} else {
|
|
|
|
res
|
|
|
|
.add(
|
|
|
|
structuredProperty(
|
|
|
|
((Node) o).getText(), dateType, dateType, DNET_DATACITE_DATE, DNET_DATACITE_DATE,
|
2020-05-26 13:11:09 +02:00
|
|
|
info));
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Field<String>> prepareCoverages(final Document doc, final DataInfo info) {
|
|
|
|
return new ArrayList<>(); // Not present in ODF ???
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) {
|
2021-04-23 17:09:36 +02:00
|
|
|
return prepareListFields(doc, "//*[local-name()='contributorName']", info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) {
|
2021-05-05 16:36:15 +02:00
|
|
|
return prepareListFields(doc, "//*[local-name()='format']", info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Field<String> preparePublisher(final Document doc, final DataInfo info) {
|
2021-05-05 16:36:15 +02:00
|
|
|
return prepareField(doc, "//*[local-name()='publisher']", info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) {
|
2021-04-23 17:09:36 +02:00
|
|
|
return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']", info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-08-04 11:39:39 +02:00
|
|
|
protected List<Subject> prepareSubjects(final Document doc, final DataInfo info) {
|
|
|
|
return prepareSubjectList(doc, "//*[local-name()='subject']", info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Qualifier prepareLanguages(final Document doc) {
|
2021-05-05 16:36:15 +02:00
|
|
|
return prepareQualifier(doc, "//*[local-name()='language']", DNET_LANGUAGES);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Field<String>> prepareOtherResearchProductTools(
|
2020-05-22 10:08:02 +02:00
|
|
|
final Document doc,
|
|
|
|
final DataInfo info) {
|
2020-04-28 11:23:29 +02:00
|
|
|
return new ArrayList<>(); // Not present in ODF ???
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
2020-05-22 10:08:02 +02:00
|
|
|
final Document doc,
|
|
|
|
final DataInfo info) {
|
2020-05-26 13:11:09 +02:00
|
|
|
return prepareListFields(
|
2021-04-23 17:09:36 +02:00
|
|
|
doc,
|
|
|
|
"//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']",
|
|
|
|
info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
2020-05-22 10:08:02 +02:00
|
|
|
final Document doc,
|
|
|
|
final DataInfo info) {
|
2020-05-26 13:11:09 +02:00
|
|
|
return prepareListFields(
|
2021-04-23 17:09:36 +02:00
|
|
|
doc,
|
|
|
|
"//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']",
|
|
|
|
info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
|
2021-04-23 17:09:36 +02:00
|
|
|
return prepareQualifier(doc, "//*[local-name()='format']", DNET_PROGRAMMING_LANGUAGES);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Field<String> prepareSoftwareCodeRepositoryUrl(
|
2020-05-22 10:08:02 +02:00
|
|
|
final Document doc,
|
|
|
|
final DataInfo info) {
|
2020-04-28 11:23:29 +02:00
|
|
|
return null; // Not present in ODF ???
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<StructuredProperty> prepareSoftwareLicenses(
|
2020-05-22 10:08:02 +02:00
|
|
|
final Document doc,
|
|
|
|
final DataInfo info) {
|
2020-04-28 11:23:29 +02:00
|
|
|
return new ArrayList<>(); // Not present in ODF ???
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
2020-05-22 10:08:02 +02:00
|
|
|
final Document doc,
|
|
|
|
final DataInfo info) {
|
2020-05-26 13:11:09 +02:00
|
|
|
return prepareListFields(
|
2021-04-23 17:09:36 +02:00
|
|
|
doc,
|
|
|
|
"//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']",
|
|
|
|
info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// DATASETS
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) {
|
|
|
|
final List<GeoLocation> res = new ArrayList<>();
|
|
|
|
|
2021-04-23 17:09:36 +02:00
|
|
|
for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) {
|
2020-04-28 11:23:29 +02:00
|
|
|
final GeoLocation loc = new GeoLocation();
|
2021-05-05 16:36:15 +02:00
|
|
|
loc.setBox(((Node) o).valueOf("./*[local-name()='geoLocationBox']"));
|
|
|
|
loc.setPlace(((Node) o).valueOf("./*[local-name()='geoLocationPlace']"));
|
|
|
|
loc.setPoint(((Node) o).valueOf("./*[local-name()='geoLocationPoint']"));
|
2020-04-28 11:23:29 +02:00
|
|
|
res.add(loc);
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Field<String> prepareDatasetMetadataVersionNumber(
|
2020-05-22 10:08:02 +02:00
|
|
|
final Document doc,
|
|
|
|
final DataInfo info) {
|
2020-04-28 11:23:29 +02:00
|
|
|
return null; // Not present in ODF ???
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Field<String> prepareDatasetLastMetadataUpdate(
|
2020-05-22 10:08:02 +02:00
|
|
|
final Document doc,
|
|
|
|
final DataInfo info) {
|
2021-04-23 17:09:36 +02:00
|
|
|
return prepareField(doc, "//*[local-name()='date' and ./@dateType='Updated']", info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) {
|
2021-04-23 17:09:36 +02:00
|
|
|
return prepareField(doc, "//*[local-name()='version']", info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) {
|
2021-04-23 17:09:36 +02:00
|
|
|
return prepareField(doc, "//*[local-name()='size']", info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Field<String> prepareDatasetDevice(final Document doc, final DataInfo info) {
|
|
|
|
return null; // Not present in ODF ???
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) {
|
2021-04-23 17:09:36 +02:00
|
|
|
return prepareField(doc, "//*[local-name()='date' and ./@dateType='Issued']", info);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Oaf> addOtherResultRels(
|
2020-10-16 17:02:10 +02:00
|
|
|
final Document doc,
|
2023-09-04 15:15:24 +02:00
|
|
|
final OafEntity entity, DataInfo info) {
|
2020-04-28 11:23:29 +02:00
|
|
|
|
2020-10-16 16:00:19 +02:00
|
|
|
final String docId = entity.getId();
|
2020-04-28 11:23:29 +02:00
|
|
|
|
|
|
|
final List<Oaf> res = new ArrayList<>();
|
2022-09-23 12:06:06 +02:00
|
|
|
|
|
|
|
for (final Object o : doc
|
2022-09-23 15:17:13 +02:00
|
|
|
.selectNodes("//*[local-name()='relatedIdentifier']")) {
|
2022-09-23 12:06:06 +02:00
|
|
|
|
2022-09-23 15:17:13 +02:00
|
|
|
final String originalId = ((Node) o).getText().trim();
|
2022-09-23 12:06:06 +02:00
|
|
|
|
|
|
|
if (StringUtils.isNotBlank(originalId)) {
|
2022-09-23 15:17:13 +02:00
|
|
|
final String idType = ((Node) o).valueOf("@relatedIdentifierType");
|
2022-09-26 11:24:13 +02:00
|
|
|
final String relType = ((Node) o).valueOf("@relationType");
|
2022-09-23 15:17:13 +02:00
|
|
|
String otherId = guessRelatedIdentifier(idType, originalId);
|
|
|
|
if (StringUtils.isNotBlank(otherId)) {
|
2023-09-04 15:15:24 +02:00
|
|
|
res.addAll(getRelations(relType, docId, otherId, entity, info));
|
2022-09-23 12:06:06 +02:00
|
|
|
}
|
2022-09-26 11:24:13 +02:00
|
|
|
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2022-09-23 15:17:13 +02:00
|
|
|
protected String guessRelatedIdentifier(final String idType, final String value) {
|
|
|
|
if (StringUtils.isBlank(idType) || StringUtils.isBlank(value))
|
|
|
|
return null;
|
2022-09-26 11:24:13 +02:00
|
|
|
if (idType.equalsIgnoreCase("OPENAIRE"))
|
|
|
|
return createOpenaireId(50, value, false);
|
|
|
|
if (pidTypeWithAuthority.containsKey(idType.toLowerCase())) {
|
|
|
|
return IdentifierFactory.idFromPid("50", pidTypeWithAuthority.get(idType.toLowerCase()), value, true);
|
|
|
|
}
|
2022-09-23 15:47:05 +02:00
|
|
|
return null;
|
|
|
|
|
2022-09-23 15:17:13 +02:00
|
|
|
}
|
|
|
|
|
2022-09-26 11:24:13 +02:00
|
|
|
protected List<Oaf> getRelations(final String reltype, final String entityId, final String otherId,
|
2023-09-04 15:15:24 +02:00
|
|
|
final OafEntity entity, DataInfo info) {
|
2022-09-26 11:24:13 +02:00
|
|
|
final List<Oaf> res = new ArrayList<>();
|
|
|
|
RelationInverse rel = ModelSupport.findRelation(reltype);
|
|
|
|
if (rel != null) {
|
|
|
|
res
|
|
|
|
.add(
|
|
|
|
getRelation(
|
2023-09-04 15:15:24 +02:00
|
|
|
entityId, otherId, rel.getRelType(), rel.getSubReltype(), rel.getRelClass(),
|
|
|
|
entity.getCollectedfrom(), info, entity.getLastupdatetimestamp(), null, null));
|
2022-09-26 11:24:13 +02:00
|
|
|
res
|
|
|
|
.add(
|
|
|
|
getRelation(
|
2023-09-04 15:15:24 +02:00
|
|
|
otherId, entityId, rel.getRelType(), rel.getSubReltype(), rel.getInverseRelClass(),
|
|
|
|
entity.getCollectedfrom(), info, entity.getLastupdatetimestamp(), null, null));
|
2022-09-26 11:24:13 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2020-04-28 11:23:29 +02:00
|
|
|
@Override
|
|
|
|
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
|
2020-05-26 13:11:09 +02:00
|
|
|
return prepareQualifier(
|
2020-05-27 11:34:13 +02:00
|
|
|
doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE);
|
2020-04-28 11:23:29 +02:00
|
|
|
}
|
2020-05-26 13:06:55 +02:00
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<StructuredProperty> prepareResultPids(final Document doc, final DataInfo info) {
|
2021-08-11 12:13:22 +02:00
|
|
|
final Set<StructuredProperty> res = new HashSet<>();
|
2020-05-26 13:11:09 +02:00
|
|
|
res
|
|
|
|
.addAll(
|
2020-05-27 11:34:13 +02:00
|
|
|
prepareListStructPropsWithValidQualifier(
|
|
|
|
doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info));
|
2020-05-26 13:11:09 +02:00
|
|
|
res
|
|
|
|
.addAll(
|
2020-05-27 11:34:13 +02:00
|
|
|
prepareListStructPropsWithValidQualifier(
|
2021-04-23 17:09:36 +02:00
|
|
|
doc,
|
|
|
|
"//*[local-name()='identifier' and ./@identifierType != 'URL' and ./@identifierType != 'landingPage']",
|
2020-06-11 12:28:34 +02:00
|
|
|
"@identifierType", DNET_PID_TYPES, info));
|
2020-05-26 13:11:09 +02:00
|
|
|
res
|
|
|
|
.addAll(
|
2020-05-27 11:34:13 +02:00
|
|
|
prepareListStructPropsWithValidQualifier(
|
2020-06-11 12:28:34 +02:00
|
|
|
doc,
|
2021-04-23 17:09:36 +02:00
|
|
|
"//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType != 'URL' and ./@alternateIdentifierType != 'landingPage']",
|
2020-05-27 11:34:13 +02:00
|
|
|
"@alternateIdentifierType", DNET_PID_TYPES, info));
|
2020-10-30 10:56:42 +01:00
|
|
|
|
|
|
|
return res
|
|
|
|
.stream()
|
2020-11-03 12:19:46 +01:00
|
|
|
.map(CleaningFunctions::normalizePidValue)
|
2020-10-30 10:56:42 +01:00
|
|
|
.collect(Collectors.toList());
|
2020-05-26 13:06:55 +02:00
|
|
|
}
|
|
|
|
|
2020-02-05 15:35:40 +01:00
|
|
|
}
|