2020-04-27 14:52:31 +02:00
|
|
|
|
2020-03-27 10:42:17 +01:00
|
|
|
package eu.dnetlib.dhp.oa.provision.utils;
|
2020-02-13 16:53:27 +01:00
|
|
|
|
2021-12-13 13:27:20 +01:00
|
|
|
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes;
|
|
|
|
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor;
|
|
|
|
import static org.apache.commons.lang3.StringUtils.isNotBlank;
|
|
|
|
import static org.apache.commons.lang3.StringUtils.substringBefore;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.Serializable;
|
|
|
|
import java.io.StringReader;
|
|
|
|
import java.io.StringWriter;
|
2021-12-23 12:33:53 +01:00
|
|
|
import java.net.MalformedURLException;
|
2021-12-13 13:27:20 +01:00
|
|
|
import java.net.URL;
|
|
|
|
import java.util.*;
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
import java.util.stream.Stream;
|
|
|
|
|
|
|
|
import javax.xml.transform.*;
|
|
|
|
import javax.xml.transform.dom.DOMSource;
|
|
|
|
import javax.xml.transform.stream.StreamResult;
|
|
|
|
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
|
|
|
import org.apache.commons.lang3.tuple.Pair;
|
|
|
|
import org.apache.spark.util.LongAccumulator;
|
|
|
|
import org.dom4j.Document;
|
|
|
|
import org.dom4j.DocumentException;
|
|
|
|
import org.dom4j.Element;
|
|
|
|
import org.dom4j.Node;
|
|
|
|
import org.dom4j.io.OutputFormat;
|
|
|
|
import org.dom4j.io.SAXReader;
|
|
|
|
import org.dom4j.io.XMLWriter;
|
|
|
|
|
2020-04-28 11:23:29 +02:00
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
import com.google.common.base.Joiner;
|
|
|
|
import com.google.common.base.Splitter;
|
|
|
|
import com.google.common.collect.Lists;
|
|
|
|
import com.google.common.collect.Maps;
|
|
|
|
import com.google.common.collect.Sets;
|
|
|
|
import com.mycila.xmltool.XMLDoc;
|
|
|
|
import com.mycila.xmltool.XMLTag;
|
2021-12-13 13:27:20 +01:00
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
|
|
|
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
|
|
|
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
2021-12-02 17:20:33 +01:00
|
|
|
import eu.dnetlib.dhp.oa.provision.model.XmlInstance;
|
|
|
|
import eu.dnetlib.dhp.schema.common.*;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.*;
|
2021-12-13 13:27:20 +01:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
2021-07-28 10:23:00 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
2022-02-23 15:54:18 +01:00
|
|
|
import scala.Tuple2;
|
2020-04-28 11:23:29 +02:00
|
|
|
|
2020-02-13 16:53:27 +01:00
|
|
|
public class XmlRecordFactory implements Serializable {
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
private static final long serialVersionUID = 2912912999272373172L;
|
2021-12-23 12:33:53 +01:00
|
|
|
public static final String DOI_ORG_AUTHORITY = "doi.org";
|
|
|
|
public static final String HTTPS = "https";
|
2020-04-27 14:52:31 +02:00
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
private final Map<String, LongAccumulator> accumulators;
|
2020-04-27 14:52:31 +02:00
|
|
|
|
2020-04-29 19:09:07 +02:00
|
|
|
private final ContextMapper contextMapper;
|
2020-04-27 14:52:31 +02:00
|
|
|
|
2020-04-29 19:09:07 +02:00
|
|
|
private final String schemaLocation;
|
2020-04-27 14:52:31 +02:00
|
|
|
|
|
|
|
private boolean indent = false;
|
|
|
|
|
|
|
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
|
|
|
|
|
|
public XmlRecordFactory(
|
|
|
|
final ContextMapper contextMapper,
|
|
|
|
final boolean indent,
|
2021-07-28 10:23:00 +02:00
|
|
|
final String schemaLocation) {
|
2020-04-27 14:52:31 +02:00
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
this(Maps.newHashMap(), contextMapper, indent, schemaLocation);
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public XmlRecordFactory(
|
|
|
|
final Map<String, LongAccumulator> accumulators,
|
|
|
|
final ContextMapper contextMapper,
|
|
|
|
final boolean indent,
|
2021-07-28 10:23:00 +02:00
|
|
|
final String schemaLocation) {
|
2020-04-27 14:52:31 +02:00
|
|
|
|
|
|
|
this.accumulators = accumulators;
|
|
|
|
this.contextMapper = contextMapper;
|
|
|
|
this.schemaLocation = schemaLocation;
|
|
|
|
|
|
|
|
this.indent = indent;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String build(final JoinedEntity je) {
|
|
|
|
|
|
|
|
final Set<String> contexts = Sets.newHashSet();
|
|
|
|
|
2020-06-01 00:32:42 +02:00
|
|
|
// final OafEntity entity = toOafEntity(je.getEntity());
|
2021-07-28 10:23:00 +02:00
|
|
|
final OafEntity entity = je.getEntity();
|
|
|
|
final TemplateFactory templateFactory = new TemplateFactory();
|
2020-04-27 14:52:31 +02:00
|
|
|
try {
|
2020-06-01 00:32:42 +02:00
|
|
|
|
|
|
|
final EntityType type = EntityType.fromClass(entity.getClass());
|
2020-04-27 14:52:31 +02:00
|
|
|
final List<String> metadata = metadata(type, entity, contexts);
|
|
|
|
|
|
|
|
// rels has to be processed before the contexts because they enrich the contextMap with
|
|
|
|
// the
|
|
|
|
// funding info.
|
2020-06-01 00:32:42 +02:00
|
|
|
final List<RelatedEntityWrapper> links = je.getLinks();
|
|
|
|
final List<String> relations = links
|
2020-04-27 14:52:31 +02:00
|
|
|
.stream()
|
2020-05-04 11:51:17 +02:00
|
|
|
.filter(link -> !isDuplicate(link))
|
|
|
|
.map(link -> mapRelation(contexts, templateFactory, type, link))
|
2020-04-27 14:52:31 +02:00
|
|
|
.collect(Collectors.toCollection(ArrayList::new));
|
|
|
|
|
|
|
|
final String mainType = ModelSupport.getMainType(type);
|
|
|
|
metadata.addAll(buildContexts(mainType, contexts));
|
|
|
|
metadata.add(XmlSerializationUtils.parseDataInfo(entity.getDataInfo()));
|
|
|
|
|
|
|
|
final String body = templateFactory
|
|
|
|
.buildBody(
|
2021-07-28 10:23:00 +02:00
|
|
|
mainType, metadata, relations, listChildren(entity, je, templateFactory), listExtraInfo(entity));
|
2020-04-27 14:52:31 +02:00
|
|
|
|
|
|
|
return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent);
|
|
|
|
} catch (final Throwable e) {
|
|
|
|
throw new RuntimeException(String.format("error building record '%s'", entity.getId()), e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private static OafEntity parseOaf(final String json, final String type) {
|
|
|
|
try {
|
|
|
|
switch (EntityType.valueOf(type)) {
|
|
|
|
case publication:
|
|
|
|
return OBJECT_MAPPER.readValue(json, Publication.class);
|
|
|
|
case dataset:
|
|
|
|
return OBJECT_MAPPER.readValue(json, Dataset.class);
|
|
|
|
case otherresearchproduct:
|
|
|
|
return OBJECT_MAPPER.readValue(json, OtherResearchProduct.class);
|
|
|
|
case software:
|
|
|
|
return OBJECT_MAPPER.readValue(json, Software.class);
|
|
|
|
case datasource:
|
|
|
|
return OBJECT_MAPPER.readValue(json, Datasource.class);
|
|
|
|
case organization:
|
|
|
|
return OBJECT_MAPPER.readValue(json, Organization.class);
|
|
|
|
case project:
|
|
|
|
return OBJECT_MAPPER.readValue(json, Project.class);
|
|
|
|
default:
|
|
|
|
throw new IllegalArgumentException("invalid type: " + type);
|
|
|
|
}
|
2021-07-28 10:23:00 +02:00
|
|
|
} catch (final IOException e) {
|
2020-04-27 14:52:31 +02:00
|
|
|
throw new IllegalArgumentException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
private String printXML(final String xml, final boolean indent) {
|
2020-04-27 14:52:31 +02:00
|
|
|
try {
|
|
|
|
final Document doc = new SAXReader().read(new StringReader(xml));
|
2021-07-28 10:23:00 +02:00
|
|
|
final OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat();
|
2020-04-27 14:52:31 +02:00
|
|
|
format.setExpandEmptyElements(false);
|
|
|
|
format.setSuppressDeclaration(true);
|
2021-07-28 10:23:00 +02:00
|
|
|
final StringWriter sw = new StringWriter();
|
|
|
|
final XMLWriter writer = new XMLWriter(sw, format);
|
2020-04-27 14:52:31 +02:00
|
|
|
writer.write(doc);
|
|
|
|
return sw.toString();
|
|
|
|
} catch (IOException | DocumentException e) {
|
|
|
|
throw new IllegalArgumentException("Unable to indent XML. Invalid record:\n" + xml, e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private List<String> metadata(
|
2021-07-28 10:23:00 +02:00
|
|
|
final EntityType type,
|
|
|
|
final OafEntity entity,
|
|
|
|
final Set<String> contexts) {
|
2020-04-27 14:52:31 +02:00
|
|
|
|
|
|
|
final List<String> metadata = Lists.newArrayList();
|
|
|
|
|
|
|
|
if (entity.getCollectedfrom() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
entity
|
|
|
|
.getCollectedfrom()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(XmlRecordFactory::kvNotBlank)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(kv -> XmlSerializationUtils.mapKeyValue("collectedfrom", kv))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (entity.getOriginalId() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
entity
|
|
|
|
.getOriginalId()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2021-07-20 11:51:33 +02:00
|
|
|
.filter(id -> !id.matches("^\\d{2}" + IdentifierFactory.ID_PREFIX_SEPARATOR))
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(s -> XmlSerializationUtils.asXmlElement("originalId", s))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (entity.getPid() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
entity
|
|
|
|
.getPid()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ModelSupport.isResult(type)) {
|
|
|
|
final Result r = (Result) entity;
|
|
|
|
|
2022-02-23 15:54:18 +01:00
|
|
|
if (r.getMeasures() != null) {
|
|
|
|
metadata.addAll(measuresAsXml(r.getMeasures()));
|
|
|
|
}
|
|
|
|
|
2020-04-27 14:52:31 +02:00
|
|
|
if (r.getContext() != null) {
|
|
|
|
contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList()));
|
|
|
|
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
|
|
|
|
if (contexts.contains("dh-ch::subcommunity::2")) {
|
|
|
|
contexts.add("clarin");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (r.getTitle() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
r
|
|
|
|
.getTitle()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(t -> XmlSerializationUtils.mapStructuredProperty("title", t))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (r.getBestaccessright() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("bestaccessright", r.getBestaccessright()));
|
|
|
|
}
|
|
|
|
if (r.getAuthor() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
r
|
|
|
|
.getAuthor()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2021-07-28 10:23:00 +02:00
|
|
|
.map(a -> {
|
|
|
|
final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
|
|
|
|
if (isNotBlank(a.getName())) {
|
|
|
|
sb.append(" name=\"" + XmlSerializationUtils.escapeXml(a.getName()) + "\"");
|
|
|
|
}
|
|
|
|
if (isNotBlank(a.getSurname())) {
|
2020-04-27 14:52:31 +02:00
|
|
|
sb
|
2021-07-28 10:23:00 +02:00
|
|
|
.append(" surname=\"" + XmlSerializationUtils.escapeXml(a.getSurname()) + "\"");
|
|
|
|
}
|
|
|
|
if (a.getPid() != null) {
|
|
|
|
a
|
|
|
|
.getPid()
|
|
|
|
.stream()
|
|
|
|
.filter(Objects::nonNull)
|
|
|
|
.filter(
|
|
|
|
sp -> isNotBlank(sp.getQualifier().getClassid())
|
|
|
|
&& isNotBlank(sp.getValue()))
|
|
|
|
.collect(
|
|
|
|
Collectors
|
|
|
|
.toMap(
|
|
|
|
p -> getAuthorPidType(p.getQualifier().getClassid()), p -> p,
|
|
|
|
(p1, p2) -> p1))
|
|
|
|
.values()
|
|
|
|
.stream()
|
|
|
|
.collect(
|
|
|
|
Collectors
|
|
|
|
.groupingBy(
|
|
|
|
p -> p.getValue(), Collectors
|
|
|
|
.mapping(
|
|
|
|
p -> p, Collectors.minBy(new AuthorPidTypeComparator()))))
|
|
|
|
.values()
|
|
|
|
.stream()
|
|
|
|
.map(op -> op.get())
|
|
|
|
.forEach(sp -> {
|
|
|
|
final String pidType = getAuthorPidType(sp.getQualifier().getClassid());
|
|
|
|
final String pidValue = XmlSerializationUtils.escapeXml(sp.getValue());
|
|
|
|
|
|
|
|
// ugly hack: some records provide swapped pidtype and pidvalue
|
|
|
|
if (authorPidTypes.contains(pidValue.toLowerCase().trim())) {
|
|
|
|
sb.append(String.format(" %s=\"%s\"", pidValue, pidType));
|
|
|
|
} else {
|
|
|
|
if (isNotBlank(pidType)) {
|
|
|
|
sb
|
|
|
|
.append(
|
|
|
|
String
|
|
|
|
.format(
|
|
|
|
" %s=\"%s\"", pidType, pidValue
|
|
|
|
.toLowerCase()
|
|
|
|
.replaceAll("^.*orcid\\.org\\/", "")));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
sb
|
|
|
|
.append(">" + XmlSerializationUtils.escapeXml(a.getFullname()) + "</creator>");
|
|
|
|
return sb.toString();
|
|
|
|
})
|
2020-04-27 14:52:31 +02:00
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (r.getContributor() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
r
|
|
|
|
.getContributor()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("contributor", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (r.getCountry() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
r
|
|
|
|
.getCountry()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.mapQualifier("country", c))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (r.getCoverage() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
r
|
|
|
|
.getCoverage()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("coverage", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (r.getDateofacceptance() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("dateofacceptance", r.getDateofacceptance().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (r.getDescription() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
r
|
|
|
|
.getDescription()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("description", c.getValue()))
|
2020-07-16 13:45:53 +02:00
|
|
|
.collect(Collectors.toCollection(HashSet::new)));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (r.getEmbargoenddate() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (r.getSubject() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
r
|
|
|
|
.getSubject()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(s -> XmlSerializationUtils.mapStructuredProperty("subject", s))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (r.getLanguage() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("language", r.getLanguage()));
|
|
|
|
}
|
|
|
|
if (r.getRelevantdate() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
r
|
|
|
|
.getRelevantdate()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(s -> XmlSerializationUtils.mapStructuredProperty("relevantdate", s))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (r.getPublisher() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("publisher", r.getPublisher().getValue()));
|
|
|
|
}
|
|
|
|
if (r.getSource() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
r
|
|
|
|
.getSource()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("source", c.getValue()))
|
2020-07-16 13:45:53 +02:00
|
|
|
.collect(Collectors.toCollection(HashSet::new)));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (r.getFormat() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
r
|
|
|
|
.getFormat()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("format", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (r.getResulttype() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("resulttype", r.getResulttype()));
|
|
|
|
}
|
|
|
|
if (r.getResourcetype() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("resourcetype", r.getResourcetype()));
|
|
|
|
}
|
2022-02-11 11:00:20 +01:00
|
|
|
if (r.getProcessingchargeamount() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
|
|
|
.asXmlElement("processingchargeamount", r.getProcessingchargeamount().getValue()));
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
|
|
|
.asXmlElement("processingchargecurrency", r.getProcessingchargecurrency().getValue()));
|
|
|
|
}
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case publication:
|
|
|
|
final Publication pub = (Publication) entity;
|
|
|
|
|
|
|
|
if (pub.getJournal() != null) {
|
|
|
|
final Journal j = pub.getJournal();
|
|
|
|
metadata.add(XmlSerializationUtils.mapJournal(j));
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
case dataset:
|
|
|
|
final Dataset d = (Dataset) entity;
|
|
|
|
if (d.getDevice() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("device", d.getDevice().getValue()));
|
|
|
|
}
|
|
|
|
if (d.getLastmetadataupdate() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (d.getMetadataversionnumber() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (d.getSize() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue()));
|
|
|
|
}
|
|
|
|
if (d.getStoragedate() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (d.getVersion() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue()));
|
|
|
|
}
|
|
|
|
// TODO d.getGeolocation()
|
|
|
|
|
|
|
|
break;
|
|
|
|
case otherresearchproduct:
|
|
|
|
final OtherResearchProduct orp = (OtherResearchProduct) entity;
|
|
|
|
|
|
|
|
if (orp.getContactperson() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
orp
|
|
|
|
.getContactperson()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("contactperson", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (orp.getContactgroup() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
orp
|
|
|
|
.getContactgroup()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (orp.getTool() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
orp
|
|
|
|
.getTool()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("tool", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case software:
|
|
|
|
final Software s = (Software) entity;
|
|
|
|
|
|
|
|
if (s.getDocumentationUrl() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
s
|
|
|
|
.getDocumentationUrl()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (s.getLicense() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
s
|
|
|
|
.getLicense()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(l -> XmlSerializationUtils.mapStructuredProperty("license", l))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (s.getCodeRepositoryUrl() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (s.getProgrammingLanguage() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.mapQualifier("programmingLanguage", s.getProgrammingLanguage()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case datasource:
|
|
|
|
final Datasource ds = (Datasource) entity;
|
|
|
|
|
|
|
|
if (ds.getDatasourcetype() != null) {
|
2021-07-28 10:23:00 +02:00
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", ds.getDatasourcetype()));
|
|
|
|
}
|
|
|
|
if (ds.getDatasourcetypeui() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getOpenairecompatibility() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.mapQualifier("openairecompatibility", ds.getOpenairecompatibility()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getOfficialname() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getEnglishname() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getWebsiteurl() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getLogourl() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue()));
|
|
|
|
}
|
|
|
|
if (ds.getContactemail() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getNamespaceprefix() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getLatitude() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue()));
|
|
|
|
}
|
|
|
|
if (ds.getLongitude() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getDateofvalidation() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getDescription() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getOdnumberofitems() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getOdnumberofitemsdate() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getOdpolicies() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getOdlanguages() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
ds
|
|
|
|
.getOdlanguages()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (ds.getOdcontenttypes() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
ds
|
|
|
|
.getOdcontenttypes()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (ds.getAccessinfopackage() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
ds
|
|
|
|
.getAccessinfopackage()
|
|
|
|
.stream()
|
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("accessinfopackage", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (ds.getReleaseenddate() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("releasestartdate", ds.getReleaseenddate().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getReleaseenddate() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("releaseenddate", ds.getReleaseenddate().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getMissionstatementurl() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getDataprovider() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("dataprovider", ds.getDataprovider().getValue().toString()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getServiceprovider() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getDatabaseaccesstype() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getDatauploadtype() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("datauploadtype", ds.getDatauploadtype().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getDatabaseaccessrestriction() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
|
|
|
.asXmlElement(
|
|
|
|
"databaseaccessrestriction", ds.getDatabaseaccessrestriction().getValue()));
|
|
|
|
}
|
|
|
|
if (ds.getDatauploadrestriction() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getVersioning() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("versioning", ds.getVersioning().getValue().toString()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getCitationguidelineurl() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getQualitymanagementkind() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getPidsystems() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getCertificates() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (ds.getPolicies() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
ds
|
|
|
|
.getPolicies()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(XmlRecordFactory::kvNotBlank)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(kv -> XmlSerializationUtils.mapKeyValue("policies", kv))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (ds.getJournal() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapJournal(ds.getJournal()));
|
|
|
|
}
|
|
|
|
if (ds.getSubjects() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
ds
|
|
|
|
.getSubjects()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
|
2021-07-28 11:56:55 +02:00
|
|
|
if (ds.getJurisdiction() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction()));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ds.getThematic() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString()));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ds.getKnowledgegraph() != null) {
|
|
|
|
metadata
|
|
|
|
.add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString()));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ds.getContentpolicies() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
ds
|
|
|
|
.getContentpolicies()
|
|
|
|
.stream()
|
|
|
|
.filter(Objects::nonNull)
|
|
|
|
.map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
|
2020-04-27 14:52:31 +02:00
|
|
|
break;
|
|
|
|
case organization:
|
|
|
|
final Organization o = (Organization) entity;
|
|
|
|
|
|
|
|
if (o.getLegalshortname() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("legalshortname", o.getLegalshortname().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getLegalname() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getAlternativeNames() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
o
|
|
|
|
.getAlternativeNames()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c.getValue()))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (o.getWebsiteurl() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getLogourl() != null) {
|
2020-05-26 10:30:09 +02:00
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (o.getEclegalbody() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getEclegalperson() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getEcnonprofit() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getEcresearchorganization() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getEchighereducation() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("echighereducation", o.getEchighereducation().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
2020-05-26 10:30:09 +02:00
|
|
|
if (o.getEcinternationalorganizationeurinterests() != null) {
|
2020-04-27 14:52:31 +02:00
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
|
|
|
.asXmlElement(
|
|
|
|
"ecinternationalorganizationeurinterests",
|
2020-05-26 10:30:09 +02:00
|
|
|
o.getEcinternationalorganizationeurinterests().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getEcinternationalorganization() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
|
|
|
.asXmlElement(
|
|
|
|
"ecinternationalorganization", o.getEcinternationalorganization().getValue()));
|
|
|
|
}
|
|
|
|
if (o.getEcenterprise() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getEcsmevalidated() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getEcnutscode() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (o.getCountry() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry()));
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
case project:
|
|
|
|
final Project p = (Project) entity;
|
|
|
|
|
|
|
|
if (p.getWebsiteurl() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (p.getCode() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue()));
|
|
|
|
}
|
|
|
|
if (p.getAcronym() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("acronym", p.getAcronym().getValue()));
|
|
|
|
}
|
|
|
|
if (p.getTitle() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("title", p.getTitle().getValue()));
|
|
|
|
}
|
|
|
|
if (p.getStartdate() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (p.getEnddate() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue()));
|
|
|
|
}
|
|
|
|
if (p.getCallidentifier() != null) {
|
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.asXmlElement("callidentifier", p.getCallidentifier().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (p.getKeywords() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue()));
|
|
|
|
}
|
|
|
|
if (p.getDuration() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("duration", p.getDuration().getValue()));
|
|
|
|
}
|
|
|
|
if (p.getEcarticle29_3() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (p.getSubjects() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
p
|
|
|
|
.getSubjects()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(Objects::nonNull)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(sp -> XmlSerializationUtils.mapStructuredProperty("subject", sp))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (p.getContracttype() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("contracttype", p.getContracttype()));
|
|
|
|
}
|
2020-06-11 12:49:31 +02:00
|
|
|
if (p.getOamandatepublications() != null) {
|
2020-06-12 12:03:25 +02:00
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
|
|
|
.asXmlElement("oamandatepublications", p.getOamandatepublications().getValue()));
|
2020-06-11 12:49:31 +02:00
|
|
|
}
|
2020-04-27 14:52:31 +02:00
|
|
|
if (p.getEcsc39() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("ecsc39", p.getEcsc39().getValue()));
|
|
|
|
}
|
|
|
|
if (p.getSummary() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("summary", p.getSummary().getValue()));
|
|
|
|
}
|
|
|
|
if (p.getCurrency() != null) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("currency", p.getCurrency().getValue()));
|
|
|
|
}
|
|
|
|
if (p.getTotalcost() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (p.getFundedamount() != null) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (p.getFundingtree() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
2020-05-22 08:50:44 +02:00
|
|
|
p
|
|
|
|
.getFundingtree()
|
|
|
|
.stream()
|
|
|
|
.filter(Objects::nonNull)
|
|
|
|
.map(ft -> ft.getValue())
|
|
|
|
.collect(Collectors.toList()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
throw new IllegalArgumentException("invalid entity type: " + type);
|
|
|
|
}
|
|
|
|
|
|
|
|
return metadata;
|
|
|
|
}
|
|
|
|
|
2022-02-23 15:54:18 +01:00
|
|
|
private List<String> measuresAsXml(List<Measure> measures) {
|
|
|
|
return measures
|
|
|
|
.stream()
|
|
|
|
.flatMap(
|
|
|
|
m -> m
|
|
|
|
.getUnit()
|
|
|
|
.stream()
|
|
|
|
.map(
|
|
|
|
u -> Lists
|
|
|
|
.newArrayList(
|
|
|
|
new Tuple2<>("id", m.getId()),
|
|
|
|
new Tuple2<>("key", u.getKey()),
|
|
|
|
new Tuple2<>("value", u.getValue())))
|
|
|
|
.map(l -> XmlSerializationUtils.asXmlElement("measure", l)))
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
}
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
private String getAuthorPidType(final String s) {
|
2020-05-22 08:50:44 +02:00
|
|
|
return XmlSerializationUtils
|
|
|
|
.escapeXml(s)
|
|
|
|
.replaceAll("\\W", "")
|
|
|
|
.replaceAll("\\d", "");
|
|
|
|
}
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
private static boolean kvNotBlank(final KeyValue kv) {
|
2020-05-22 08:50:44 +02:00
|
|
|
return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue());
|
|
|
|
}
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
private List<String> mapFields(final RelatedEntityWrapper link, final Set<String> contexts) {
|
2020-04-27 14:52:31 +02:00
|
|
|
final Relation rel = link.getRelation();
|
2020-06-01 00:32:42 +02:00
|
|
|
final RelatedEntity re = link.getTarget();
|
|
|
|
final String targetType = link.getTarget().getType();
|
2020-04-27 14:52:31 +02:00
|
|
|
|
|
|
|
final List<String> metadata = Lists.newArrayList();
|
|
|
|
switch (EntityType.valueOf(targetType)) {
|
|
|
|
case publication:
|
|
|
|
case dataset:
|
|
|
|
case otherresearchproduct:
|
|
|
|
case software:
|
|
|
|
if (re.getTitle() != null && isNotBlank(re.getTitle().getValue())) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapStructuredProperty("title", re.getTitle()));
|
|
|
|
}
|
|
|
|
if (isNotBlank(re.getDateofacceptance())) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (isNotBlank(re.getPublisher())) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("publisher", re.getPublisher()));
|
|
|
|
}
|
|
|
|
if (isNotBlank(re.getCodeRepositoryUrl())) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
2020-05-22 08:50:44 +02:00
|
|
|
if (re.getResulttype() != null && re.getResulttype().isBlank()) {
|
2020-04-27 14:52:31 +02:00
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype()));
|
|
|
|
}
|
|
|
|
if (re.getCollectedfrom() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
re
|
|
|
|
.getCollectedfrom()
|
|
|
|
.stream()
|
2020-05-22 08:50:44 +02:00
|
|
|
.filter(XmlRecordFactory::kvNotBlank)
|
2020-04-27 14:52:31 +02:00
|
|
|
.map(kv -> XmlSerializationUtils.mapKeyValue("collectedfrom", kv))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (re.getPid() != null) {
|
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
re
|
|
|
|
.getPid()
|
|
|
|
.stream()
|
|
|
|
.map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case datasource:
|
|
|
|
if (isNotBlank(re.getOfficialname())) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname()));
|
|
|
|
}
|
2020-05-22 08:50:44 +02:00
|
|
|
if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) {
|
2021-07-28 10:23:00 +02:00
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype()));
|
|
|
|
}
|
|
|
|
if (re.getDatasourcetypeui() != null && !re.getDatasourcetypeui().isBlank()) {
|
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
2020-05-22 08:50:44 +02:00
|
|
|
if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) {
|
2020-04-27 14:52:31 +02:00
|
|
|
metadata
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
2021-07-28 10:23:00 +02:00
|
|
|
.mapQualifier("openairecompatibility", re.getOpenairecompatibility()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case organization:
|
|
|
|
if (isNotBlank(re.getLegalname())) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("legalname", re.getLegalname()));
|
|
|
|
}
|
|
|
|
if (isNotBlank(re.getLegalshortname())) {
|
|
|
|
metadata
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
2020-05-22 08:50:44 +02:00
|
|
|
if (re.getCountry() != null && !re.getCountry().isBlank()) {
|
2020-04-27 14:52:31 +02:00
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry()));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case project:
|
|
|
|
if (isNotBlank(re.getProjectTitle())) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("title", re.getProjectTitle()));
|
|
|
|
}
|
|
|
|
if (isNotBlank(re.getCode())) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("code", re.getCode()));
|
|
|
|
}
|
|
|
|
if (isNotBlank(re.getAcronym())) {
|
|
|
|
metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym()));
|
|
|
|
}
|
2020-05-22 08:50:44 +02:00
|
|
|
if (re.getContracttype() != null && !re.getContracttype().isBlank()) {
|
2020-04-27 14:52:31 +02:00
|
|
|
metadata.add(XmlSerializationUtils.mapQualifier("contracttype", re.getContracttype()));
|
|
|
|
}
|
2020-05-22 08:50:44 +02:00
|
|
|
if (re.getFundingtree() != null && contexts != null) {
|
2020-04-27 14:52:31 +02:00
|
|
|
metadata
|
|
|
|
.addAll(
|
|
|
|
re
|
|
|
|
.getFundingtree()
|
|
|
|
.stream()
|
|
|
|
.peek(ft -> fillContextMap(ft, contexts))
|
|
|
|
.map(ft -> getRelFundingTree(ft))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
throw new IllegalArgumentException("invalid target type: " + targetType);
|
|
|
|
}
|
|
|
|
|
|
|
|
final String accumulatorName = getRelDescriptor(rel.getRelType(), rel.getSubRelType(), rel.getRelClass());
|
|
|
|
if (accumulators.containsKey(accumulatorName)) {
|
|
|
|
accumulators.get(accumulatorName).add(1);
|
|
|
|
}
|
|
|
|
|
2020-05-04 11:51:17 +02:00
|
|
|
return metadata;
|
|
|
|
}
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
private String mapRelation(final Set<String> contexts,
|
|
|
|
final TemplateFactory templateFactory,
|
|
|
|
final EntityType type,
|
|
|
|
final RelatedEntityWrapper link) {
|
2020-05-04 11:51:17 +02:00
|
|
|
final Relation rel = link.getRelation();
|
2020-06-01 00:32:42 +02:00
|
|
|
final String targetType = link.getTarget().getType();
|
2020-05-04 11:51:17 +02:00
|
|
|
final String scheme = ModelSupport.getScheme(type.toString(), targetType);
|
|
|
|
|
|
|
|
if (StringUtils.isBlank(scheme)) {
|
|
|
|
throw new IllegalArgumentException(
|
2021-05-14 10:58:12 +02:00
|
|
|
String.format("missing scheme for: <%s - %s>", type, targetType));
|
2020-05-04 11:51:17 +02:00
|
|
|
}
|
|
|
|
final HashSet<String> fields = Sets.newHashSet(mapFields(link, contexts));
|
2021-07-28 10:23:00 +02:00
|
|
|
if (rel.getValidated() == null) {
|
2021-02-16 11:01:42 +01:00
|
|
|
rel.setValidated(false);
|
2021-07-28 10:23:00 +02:00
|
|
|
}
|
2020-04-27 14:52:31 +02:00
|
|
|
return templateFactory
|
|
|
|
.getRel(
|
2021-02-10 11:22:09 +01:00
|
|
|
targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(),
|
|
|
|
rel.getValidationDate());
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
private List<String> listChildren(
|
2021-07-28 10:23:00 +02:00
|
|
|
final OafEntity entity,
|
|
|
|
final JoinedEntity je,
|
|
|
|
final TemplateFactory templateFactory) {
|
2020-04-27 14:52:31 +02:00
|
|
|
|
2020-06-01 00:32:42 +02:00
|
|
|
final EntityType entityType = EntityType.fromClass(je.getEntity().getClass());
|
2020-04-27 14:52:31 +02:00
|
|
|
|
2020-06-01 00:32:42 +02:00
|
|
|
final List<RelatedEntityWrapper> links = je.getLinks();
|
2021-07-28 10:23:00 +02:00
|
|
|
final List<String> children = links
|
2020-05-04 11:51:17 +02:00
|
|
|
.stream()
|
|
|
|
.filter(link -> isDuplicate(link))
|
|
|
|
.map(link -> {
|
2020-06-01 00:32:42 +02:00
|
|
|
final String targetType = link.getTarget().getType();
|
2020-05-04 11:51:17 +02:00
|
|
|
final String name = ModelSupport.getMainType(EntityType.valueOf(targetType));
|
|
|
|
final HashSet<String> fields = Sets.newHashSet(mapFields(link, null));
|
|
|
|
return templateFactory
|
2020-06-01 00:32:42 +02:00
|
|
|
.getChild(name, link.getTarget().getId(), Lists.newArrayList(fields));
|
2020-05-04 11:51:17 +02:00
|
|
|
})
|
|
|
|
.collect(Collectors.toCollection(ArrayList::new));
|
2020-04-27 14:52:31 +02:00
|
|
|
|
|
|
|
if (MainEntityType.result.toString().equals(ModelSupport.getMainType(entityType))) {
|
|
|
|
final List<Instance> instances = ((Result) entity).getInstance();
|
|
|
|
if (instances != null) {
|
|
|
|
|
2021-12-02 17:20:33 +01:00
|
|
|
groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> {
|
2020-04-27 14:52:31 +02:00
|
|
|
final List<String> fields = Lists.newArrayList();
|
|
|
|
|
|
|
|
if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) {
|
|
|
|
fields
|
2021-07-28 10:23:00 +02:00
|
|
|
.add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
2021-12-02 17:20:33 +01:00
|
|
|
if (instance.getCollectedfrom() != null) {
|
2020-04-27 14:52:31 +02:00
|
|
|
fields
|
2021-12-02 17:20:33 +01:00
|
|
|
.addAll(
|
|
|
|
instance
|
|
|
|
.getCollectedfrom()
|
|
|
|
.stream()
|
|
|
|
.filter(cf -> kvNotBlank(cf))
|
|
|
|
.map(cf -> XmlSerializationUtils.mapKeyValue("collectedfrom", cf))
|
|
|
|
.collect(Collectors.toList()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
2021-12-02 17:20:33 +01:00
|
|
|
|
|
|
|
if (instance.getHostedby() != null) {
|
|
|
|
fields
|
|
|
|
.addAll(
|
|
|
|
instance
|
|
|
|
.getHostedby()
|
|
|
|
.stream()
|
|
|
|
.filter(hb -> kvNotBlank(hb))
|
|
|
|
.map(hb -> XmlSerializationUtils.mapKeyValue("hostedby", hb))
|
|
|
|
.collect(Collectors.toList()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
2021-12-02 17:20:33 +01:00
|
|
|
if (instance.getDateofacceptance() != null) {
|
2020-04-27 14:52:31 +02:00
|
|
|
fields
|
2021-12-02 17:20:33 +01:00
|
|
|
.addAll(
|
|
|
|
instance
|
|
|
|
.getDateofacceptance()
|
|
|
|
.stream()
|
|
|
|
.filter(d -> isNotBlank(d))
|
|
|
|
.map(d -> XmlSerializationUtils.asXmlElement("dateofacceptance", d))
|
|
|
|
.collect(Collectors.toList()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
2021-12-02 17:20:33 +01:00
|
|
|
if (instance.getInstancetype() != null) {
|
2020-04-27 14:52:31 +02:00
|
|
|
fields
|
2021-12-02 17:20:33 +01:00
|
|
|
.addAll(
|
|
|
|
instance
|
|
|
|
.getInstancetype()
|
|
|
|
.stream()
|
|
|
|
.filter(t -> !t.isBlank())
|
|
|
|
.map(t -> XmlSerializationUtils.mapQualifier("instancetype", t))
|
|
|
|
.collect(Collectors.toList()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
2021-12-02 17:20:33 +01:00
|
|
|
if (instance.getDistributionlocation() != null) {
|
2020-04-27 14:52:31 +02:00
|
|
|
fields
|
2021-12-02 17:20:33 +01:00
|
|
|
.addAll(
|
|
|
|
instance
|
|
|
|
.getDistributionlocation()
|
|
|
|
.stream()
|
|
|
|
.filter(d -> isNotBlank(d))
|
|
|
|
.map(d -> XmlSerializationUtils.asXmlElement("distributionlocation", d))
|
|
|
|
.collect(Collectors.toList()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
2021-05-28 16:57:30 +02:00
|
|
|
if (instance.getPid() != null) {
|
|
|
|
fields
|
|
|
|
.addAll(
|
|
|
|
instance
|
|
|
|
.getPid()
|
|
|
|
.stream()
|
|
|
|
.filter(Objects::nonNull)
|
|
|
|
.map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
if (instance.getAlternateIdentifier() != null) {
|
|
|
|
fields
|
|
|
|
.addAll(
|
|
|
|
instance
|
|
|
|
.getAlternateIdentifier()
|
|
|
|
.stream()
|
|
|
|
.filter(Objects::nonNull)
|
|
|
|
.map(p -> XmlSerializationUtils.mapStructuredProperty("alternateidentifier", p))
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
|
|
|
|
2021-12-02 17:20:33 +01:00
|
|
|
if (instance.getRefereed() != null) {
|
2020-04-27 14:52:31 +02:00
|
|
|
fields
|
2021-12-02 17:20:33 +01:00
|
|
|
.addAll(
|
|
|
|
instance
|
|
|
|
.getRefereed()
|
|
|
|
.stream()
|
|
|
|
.filter(Objects::nonNull)
|
|
|
|
.filter(r -> !r.isBlank())
|
|
|
|
.map(r -> XmlSerializationUtils.mapQualifier("refereed", r))
|
|
|
|
.collect(Collectors.toList()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (instance.getProcessingchargeamount() != null
|
2021-12-02 17:20:33 +01:00
|
|
|
&& isNotBlank(instance.getProcessingchargeamount())) {
|
2020-04-27 14:52:31 +02:00
|
|
|
fields
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
|
|
|
.asXmlElement(
|
2021-12-02 17:20:33 +01:00
|
|
|
"processingchargeamount", instance.getProcessingchargeamount()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
if (instance.getProcessingchargecurrency() != null
|
2021-12-02 17:20:33 +01:00
|
|
|
&& isNotBlank(instance.getProcessingchargecurrency())) {
|
2020-04-27 14:52:31 +02:00
|
|
|
fields
|
|
|
|
.add(
|
|
|
|
XmlSerializationUtils
|
|
|
|
.asXmlElement(
|
2021-12-02 17:20:33 +01:00
|
|
|
"processingchargecurrency", instance.getProcessingchargecurrency()));
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
|
2021-12-09 13:46:48 +01:00
|
|
|
if (instance.getLicense() != null) {
|
|
|
|
fields
|
2021-12-09 15:46:22 +01:00
|
|
|
.addAll(
|
|
|
|
instance
|
|
|
|
.getLicense()
|
|
|
|
.stream()
|
|
|
|
.filter(d -> isNotBlank(d))
|
|
|
|
.map(d -> XmlSerializationUtils.asXmlElement("license", d))
|
|
|
|
.collect(Collectors.toList()));
|
2021-12-09 13:46:48 +01:00
|
|
|
}
|
|
|
|
|
2020-04-27 14:52:31 +02:00
|
|
|
children
|
|
|
|
.add(
|
|
|
|
templateFactory
|
2021-12-02 17:20:33 +01:00
|
|
|
.getInstance(fields, instance.getUrl()));
|
|
|
|
});
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
final List<ExternalReference> ext = ((Result) entity).getExternalReference();
|
|
|
|
if (ext != null) {
|
|
|
|
for (final ExternalReference er : ((Result) entity).getExternalReference()) {
|
|
|
|
|
|
|
|
final List<String> fields = Lists.newArrayList();
|
|
|
|
|
|
|
|
if (isNotBlank(er.getSitename())) {
|
|
|
|
fields.add(XmlSerializationUtils.asXmlElement("sitename", er.getSitename()));
|
|
|
|
}
|
|
|
|
if (isNotBlank(er.getLabel())) {
|
|
|
|
fields.add(XmlSerializationUtils.asXmlElement("label", er.getLabel()));
|
|
|
|
}
|
2021-04-02 12:32:12 +02:00
|
|
|
Optional
|
|
|
|
.ofNullable(er.getAlternateLabel())
|
|
|
|
.map(
|
|
|
|
altLabel -> altLabel
|
|
|
|
.stream()
|
|
|
|
.filter(StringUtils::isNotBlank)
|
|
|
|
.collect(Collectors.toList()))
|
|
|
|
.orElse(Lists.newArrayList())
|
|
|
|
.forEach(alt -> fields.add(XmlSerializationUtils.asXmlElement("alternatelabel", alt)));
|
2020-04-27 14:52:31 +02:00
|
|
|
if (isNotBlank(er.getUrl())) {
|
|
|
|
fields.add(XmlSerializationUtils.asXmlElement("url", er.getUrl()));
|
|
|
|
}
|
|
|
|
if (isNotBlank(er.getUrl())) {
|
|
|
|
fields.add(XmlSerializationUtils.mapQualifier("qualifier", er.getQualifier()));
|
|
|
|
}
|
|
|
|
if (isNotBlank(er.getRefidentifier())) {
|
|
|
|
fields.add(XmlSerializationUtils.asXmlElement("refidentifier", er.getRefidentifier()));
|
|
|
|
}
|
|
|
|
if (isNotBlank(er.getQuery())) {
|
|
|
|
fields.add(XmlSerializationUtils.asXmlElement("query", er.getQuery()));
|
|
|
|
}
|
|
|
|
|
|
|
|
children.add(templateFactory.getChild("externalreference", null, fields));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return children;
|
|
|
|
}
|
|
|
|
|
2021-12-02 17:20:33 +01:00
|
|
|
private Stream<XmlInstance> groupInstancesByUrl(List<Instance> instance) {
|
|
|
|
return instance
|
|
|
|
.stream()
|
2021-12-13 11:48:40 +01:00
|
|
|
.filter(i -> Objects.nonNull(i.getUrl()))
|
2021-12-02 17:20:33 +01:00
|
|
|
.map(i -> {
|
|
|
|
i
|
|
|
|
.setUrl(
|
|
|
|
i
|
|
|
|
.getUrl()
|
|
|
|
.stream()
|
|
|
|
.filter(this::isValidUrl)
|
2021-12-23 12:33:53 +01:00
|
|
|
.map(XmlRecordFactory::normalizeDoiUrl)
|
2021-12-02 17:20:33 +01:00
|
|
|
.collect(Collectors.toList()));
|
|
|
|
return i;
|
|
|
|
})
|
|
|
|
.filter(
|
|
|
|
i -> Optional
|
|
|
|
.ofNullable(i.getUrl())
|
|
|
|
.map(u -> !u.isEmpty())
|
|
|
|
.orElse(false))
|
|
|
|
.map(this::pickByUrl)
|
|
|
|
.collect(Collectors.groupingBy(ImmutablePair::getLeft))
|
|
|
|
.values()
|
|
|
|
.stream()
|
2021-12-12 12:37:38 +01:00
|
|
|
.filter(Objects::nonNull)
|
2021-12-02 17:20:33 +01:00
|
|
|
.map(this::mergeInstances);
|
|
|
|
}
|
|
|
|
|
2021-12-23 12:33:53 +01:00
|
|
|
public static String normalizeDoiUrl(String url) {
|
|
|
|
if (url.contains(DOI_ORG_AUTHORITY)) {
|
|
|
|
try {
|
|
|
|
URL u = new URL(url);
|
|
|
|
return new URL(HTTPS, DOI_ORG_AUTHORITY, u.getFile()).toString();
|
|
|
|
} catch (MalformedURLException e) {
|
|
|
|
e.printStackTrace();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2021-12-02 17:20:33 +01:00
|
|
|
private boolean isValidUrl(String url) {
|
|
|
|
try {
|
|
|
|
new URL(url).toURI();
|
|
|
|
return true;
|
|
|
|
} catch (Exception e) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private ImmutablePair<String, Instance> pickByUrl(Instance i) {
|
|
|
|
return new ImmutablePair<>(i.getUrl().get(0), i);
|
|
|
|
}
|
|
|
|
|
|
|
|
private XmlInstance mergeInstances(List<ImmutablePair<String, Instance>> instances) {
|
|
|
|
|
|
|
|
final XmlInstance instance = new XmlInstance();
|
|
|
|
|
|
|
|
instance.setUrl(instances.get(0).getLeft());
|
|
|
|
instance
|
|
|
|
.setAccessright(
|
|
|
|
instances
|
|
|
|
.stream()
|
|
|
|
.map(Pair::getValue)
|
|
|
|
.map(Instance::getAccessright)
|
|
|
|
.min(new AccessRightComparator<AccessRight>())
|
|
|
|
.orElse(XmlInstance.UNKNOWN_ACCESS_RIGHT));
|
|
|
|
|
|
|
|
instances.forEach(p -> {
|
|
|
|
final Instance i = p.getRight();
|
|
|
|
instance.getCollectedfrom().add(i.getCollectedfrom());
|
|
|
|
instance.getHostedby().add(i.getHostedby());
|
|
|
|
instance.getInstancetype().add(i.getInstancetype());
|
2021-12-13 11:48:40 +01:00
|
|
|
instance.getRefereed().add(i.getRefereed());
|
2021-12-02 17:20:33 +01:00
|
|
|
instance
|
|
|
|
.setProcessingchargeamount(
|
|
|
|
Optional.ofNullable(i.getProcessingchargeamount()).map(apc -> apc.getValue()).orElse(null));
|
|
|
|
instance
|
|
|
|
.setProcessingchargecurrency(
|
|
|
|
Optional.ofNullable(i.getProcessingchargecurrency()).map(c -> c.getValue()).orElse(null));
|
2021-12-13 13:27:20 +01:00
|
|
|
Optional
|
|
|
|
.ofNullable(i.getPid())
|
|
|
|
.ifPresent(pid -> instance.getPid().addAll(pid));
|
|
|
|
Optional
|
|
|
|
.ofNullable(i.getAlternateIdentifier())
|
|
|
|
.ifPresent(altId -> instance.getAlternateIdentifier().addAll(altId));
|
2021-12-13 11:48:40 +01:00
|
|
|
Optional
|
|
|
|
.ofNullable(i.getDateofacceptance())
|
|
|
|
.ifPresent(d -> instance.getDateofacceptance().add(d.getValue()));
|
|
|
|
Optional
|
|
|
|
.ofNullable(i.getLicense())
|
|
|
|
.ifPresent(license -> instance.getLicense().add(license.getValue()));
|
|
|
|
Optional
|
|
|
|
.ofNullable(i.getDistributionlocation())
|
|
|
|
.ifPresent(dl -> instance.getDistributionlocation().add(dl));
|
2021-12-02 17:20:33 +01:00
|
|
|
});
|
|
|
|
|
|
|
|
if (instance.getHostedby().size() > 1
|
|
|
|
&& instance.getHostedby().stream().anyMatch(hb -> ModelConstants.UNKNOWN_REPOSITORY.equals(hb))) {
|
|
|
|
instance.getHostedby().remove(ModelConstants.UNKNOWN_REPOSITORY);
|
|
|
|
}
|
|
|
|
|
|
|
|
return instance;
|
|
|
|
}
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
private boolean isDuplicate(final RelatedEntityWrapper link) {
|
2021-03-31 17:07:13 +02:00
|
|
|
return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType());
|
2020-05-04 11:51:17 +02:00
|
|
|
}
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
private List<String> listExtraInfo(final OafEntity entity) {
|
2020-04-27 14:52:31 +02:00
|
|
|
final List<ExtraInfo> extraInfo = entity.getExtraInfo();
|
|
|
|
return extraInfo != null
|
|
|
|
? extraInfo
|
|
|
|
.stream()
|
|
|
|
.map(e -> XmlSerializationUtils.mapExtraInfo(e))
|
|
|
|
.collect(Collectors.toList())
|
|
|
|
: Lists.newArrayList();
|
|
|
|
}
|
|
|
|
|
|
|
|
private List<String> buildContexts(final String type, final Set<String> contexts) {
|
|
|
|
final List<String> res = Lists.newArrayList();
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
if (contextMapper != null
|
2020-04-27 14:52:31 +02:00
|
|
|
&& !contextMapper.isEmpty()
|
|
|
|
&& MainEntityType.result.toString().equals(type)) {
|
|
|
|
|
|
|
|
XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
|
|
|
|
|
|
|
|
for (final String context : contexts) {
|
|
|
|
|
|
|
|
String id = "";
|
|
|
|
for (final String token : Splitter.on("::").split(context)) {
|
|
|
|
id += token;
|
|
|
|
|
|
|
|
final ContextDef def = contextMapper.get(id);
|
|
|
|
|
|
|
|
if (def == null) {
|
|
|
|
continue;
|
|
|
|
// throw new IllegalStateException(String.format("cannot find context for id
|
|
|
|
// '%s'",
|
|
|
|
// id));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (def.getName().equals("context")) {
|
|
|
|
final String xpath = "//context/@id='" + def.getId() + "'";
|
|
|
|
if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
|
|
|
|
document = addContextDef(document.gotoRoot(), def);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (def.getName().equals("category")) {
|
|
|
|
final String rootId = substringBefore(def.getId(), "::");
|
|
|
|
document = addContextDef(
|
2021-07-28 10:23:00 +02:00
|
|
|
document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
|
2020-04-27 14:52:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (def.getName().equals("concept")) {
|
|
|
|
document = addContextDef(document, def).gotoParent();
|
|
|
|
}
|
|
|
|
id += "::";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
final Transformer transformer = getTransformer();
|
|
|
|
for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
|
|
|
|
try {
|
|
|
|
res.add(asStringElement(x, transformer));
|
|
|
|
} catch (final TransformerException e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
private Transformer getTransformer() {
|
|
|
|
try {
|
2021-07-28 10:23:00 +02:00
|
|
|
final Transformer transformer = TransformerFactory.newInstance().newTransformer();
|
2020-04-27 14:52:31 +02:00
|
|
|
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
|
|
|
|
return transformer;
|
2021-07-28 10:23:00 +02:00
|
|
|
} catch (final TransformerConfigurationException e) {
|
2020-04-27 14:52:31 +02:00
|
|
|
throw new IllegalStateException("unable to create javax.xml.transform.Transformer", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
|
|
|
|
tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
|
2021-07-28 10:23:00 +02:00
|
|
|
if (def.getType() != null && !def.getType().isEmpty()) {
|
2020-04-27 14:52:31 +02:00
|
|
|
tag.addAttribute("type", def.getType());
|
|
|
|
}
|
|
|
|
return tag;
|
|
|
|
}
|
|
|
|
|
|
|
|
private String asStringElement(final org.w3c.dom.Element element, final Transformer transformer)
|
|
|
|
throws TransformerException {
|
|
|
|
final StringWriter buffer = new StringWriter();
|
|
|
|
transformer.transform(new DOMSource(element), new StreamResult(buffer));
|
|
|
|
return buffer.toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
private void fillContextMap(final String xmlTree, final Set<String> contexts) {
|
|
|
|
|
|
|
|
Document fundingPath;
|
|
|
|
try {
|
|
|
|
fundingPath = new SAXReader().read(new StringReader(xmlTree));
|
|
|
|
} catch (final DocumentException e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
final Node funder = fundingPath.selectSingleNode("//funder");
|
|
|
|
|
|
|
|
if (funder != null) {
|
|
|
|
|
|
|
|
final String funderShortName = funder.valueOf("./shortname");
|
|
|
|
contexts.add(funderShortName);
|
|
|
|
|
|
|
|
contextMapper
|
|
|
|
.put(
|
|
|
|
funderShortName,
|
|
|
|
new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
|
|
|
|
final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
|
|
|
|
if (level0 != null) {
|
|
|
|
final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
|
|
|
|
contextMapper
|
2021-07-28 10:23:00 +02:00
|
|
|
.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
|
2020-04-27 14:52:31 +02:00
|
|
|
final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
|
|
|
|
if (level1 == null) {
|
|
|
|
contexts.add(level0Id);
|
|
|
|
} else {
|
|
|
|
final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
|
|
|
|
contextMapper
|
2021-07-28 10:23:00 +02:00
|
|
|
.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
|
2020-04-27 14:52:31 +02:00
|
|
|
final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
|
|
|
|
if (level2 == null) {
|
|
|
|
contexts.add(level1Id);
|
|
|
|
} else {
|
|
|
|
final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
|
|
|
|
contextMapper
|
|
|
|
.put(
|
2021-07-28 10:23:00 +02:00
|
|
|
level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
|
2020-04-27 14:52:31 +02:00
|
|
|
contexts.add(level2Id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (final NullPointerException e) {
|
|
|
|
throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@SuppressWarnings("unchecked")
|
|
|
|
protected static String getRelFundingTree(final String xmlTree) {
|
|
|
|
String funding = "<funding>";
|
|
|
|
try {
|
|
|
|
final Document ftree = new SAXReader().read(new StringReader(xmlTree));
|
|
|
|
funding = "<funding>";
|
|
|
|
|
|
|
|
funding += getFunderElement(ftree);
|
|
|
|
|
|
|
|
for (final Object o : Lists
|
2021-07-28 10:23:00 +02:00
|
|
|
.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
|
2020-04-27 14:52:31 +02:00
|
|
|
final Element e = (Element) o;
|
|
|
|
final String _id = e.valueOf("./id");
|
|
|
|
funding += "<"
|
|
|
|
+ e.getName()
|
|
|
|
+ " name=\""
|
|
|
|
+ XmlSerializationUtils.escapeXml(e.valueOf("./name"))
|
|
|
|
+ "\">"
|
|
|
|
+ XmlSerializationUtils.escapeXml(_id)
|
|
|
|
+ "</"
|
|
|
|
+ e.getName()
|
|
|
|
+ ">";
|
|
|
|
}
|
|
|
|
} catch (final DocumentException e) {
|
|
|
|
throw new IllegalArgumentException(
|
|
|
|
"unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
|
|
|
|
} finally {
|
|
|
|
funding += "</funding>";
|
|
|
|
}
|
|
|
|
return funding;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static String getFunderElement(final Document ftree) {
|
|
|
|
final String funderId = ftree.valueOf("//fundingtree/funder/id");
|
|
|
|
final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname");
|
|
|
|
final String funderName = ftree.valueOf("//fundingtree/funder/name");
|
|
|
|
final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction");
|
|
|
|
|
|
|
|
return "<funder id=\""
|
|
|
|
+ XmlSerializationUtils.escapeXml(funderId)
|
|
|
|
+ "\" shortname=\""
|
|
|
|
+ XmlSerializationUtils.escapeXml(funderShortName)
|
|
|
|
+ "\" name=\""
|
|
|
|
+ XmlSerializationUtils.escapeXml(funderName)
|
|
|
|
+ "\" jurisdiction=\""
|
|
|
|
+ XmlSerializationUtils.escapeXml(funderJurisdiction)
|
|
|
|
+ "\" />";
|
|
|
|
}
|
2020-04-18 12:42:58 +02:00
|
|
|
}
|