dnet-hadoop/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/utils/XmlRecordFactory.java

962 lines
45 KiB
Java

package eu.dnetlib.dhp.graph.utils;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.mycila.xmltool.XMLDoc;
import com.mycila.xmltool.XMLTag;
import eu.dnetlib.dhp.graph.model.JoinedEntity;
import eu.dnetlib.dhp.graph.model.RelatedEntity;
import eu.dnetlib.dhp.graph.model.Tuple2;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.*;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.IOException;
import java.io.Serializable;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.graph.utils.GraphMappingUtils.*;
import static eu.dnetlib.dhp.graph.utils.XmlSerializationUtils.*;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.apache.commons.lang3.StringUtils.substringBefore;
public class XmlRecordFactory implements Serializable {
private Set<String> specialDatasourceTypes;
private ContextMapper contextMapper;
private String schemaLocation;
private Set<String> contextes = Sets.newHashSet();
private boolean indent = false;
public XmlRecordFactory(
final ContextMapper contextMapper, final boolean indent,
final String schemaLocation, final Set<String> otherDatasourceTypesUForUI) {
this.contextMapper = contextMapper;
this.schemaLocation = schemaLocation;
this.specialDatasourceTypes = otherDatasourceTypesUForUI;
this.indent = indent;
}
public String build(final JoinedEntity je) {
final OafEntity entity = je.getEntity();
TemplateFactory templateFactory = new TemplateFactory();
try {
final List<String> metadata = metadata(je.getType(), entity);
// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
final List<String> relations = listRelations(je, templateFactory);
metadata.addAll(buildContexts(getMainType(je.getType())));
metadata.add(parseDataInfo(entity.getDataInfo()));
final String body = templateFactory.buildBody(
getMainType(je.getType()),
metadata,
relations,
listChildren(je, templateFactory), listExtraInfo(je));
return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent);
} catch (final Throwable e) {
throw new RuntimeException(String.format("error building record '%s'", entity.getId()), e);
}
}
private String printXML(String xml, boolean indent) {
try {
final Document doc = new SAXReader().read(new StringReader(xml));
OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat();
format.setExpandEmptyElements(false);
format.setSuppressDeclaration(true);
StringWriter sw = new StringWriter();
XMLWriter writer = new XMLWriter(sw, format);
writer.write(doc);
return sw.toString();
} catch (IOException | DocumentException e) {
throw new IllegalArgumentException("Unable to indent XML. Invalid record:\n" + xml, e);
}
}
private List<String> metadata(final String type, final OafEntity entity) {
final List<String> metadata = Lists.newArrayList();
if (entity.getCollectedfrom() != null) {
metadata.addAll(entity.getCollectedfrom()
.stream()
.map(kv -> mapKeyValue("collectedfrom", kv))
.collect(Collectors.toList()));
}
if (entity.getOriginalId() != null) {
metadata.addAll(entity.getOriginalId()
.stream()
.map(s -> asXmlElement("originalId", s))
.collect(Collectors.toList()));
}
if (entity.getPid() != null) {
metadata.addAll(entity.getPid()
.stream()
.map(p -> mapStructuredProperty("pid", p))
.collect(Collectors.toList()));
}
if (GraphMappingUtils.isResult(type)) {
final Result r = (Result) entity;
if (r.getTitle() != null) {
metadata.addAll(r.getTitle()
.stream()
.map(t -> mapStructuredProperty("title", t))
.collect(Collectors.toList()));
}
if (r.getBestaccessright() != null) {
metadata.add(mapQualifier("bestaccessright", r.getBestaccessright()));
}
if (r.getAuthor() != null) {
metadata.addAll(r.getAuthor()
.stream()
.map(a -> {
final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
if (isNotBlank(a.getName())) {
sb.append(" name=\"" + escapeXml(a.getName()) + "\"");
}
if (isNotBlank(a.getSurname())) {
sb.append(" surname=\"" + escapeXml(a.getSurname()) + "\"");
}
if (a.getPid() != null) {
a.getPid().stream()
.filter(sp -> isNotBlank(sp.getQualifier().getClassid()) && isNotBlank(sp.getValue()))
.forEach(sp -> {
String pidType = escapeXml(sp.getQualifier().getClassid()).replaceAll("\\W", "");
String pidValue = escapeXml(sp.getValue());
// ugly hack: some records provide swapped pidtype and pidvalue
if (authorPidTypes.contains(pidValue.toLowerCase().trim())) {
sb.append(String.format(" %s=\"%s\"", pidValue, pidType));
} else {
pidType = pidType.replaceAll("\\W", "").replaceAll("\\d", "");
if (isNotBlank(pidType)) {
sb.append(String.format(" %s=\"%s\"",
pidType,
pidValue.toLowerCase().replaceAll("orcid", "")));
}
}
});
}
sb.append(">" + escapeXml(a.getFullname()) + "</creator>");
return sb.toString();
}).collect(Collectors.toList()));
}
if (r.getContributor() != null) {
metadata.addAll(r.getContributor()
.stream()
.map(c -> asXmlElement("contributor", c.getValue()))
.collect(Collectors.toList()));
}
if (r.getCountry() != null) {
metadata.addAll(r.getCountry()
.stream()
.map(c -> mapQualifier("country", c))
.collect(Collectors.toList()));
}
if (r.getCoverage() != null) {
metadata.addAll(r.getCoverage()
.stream()
.map(c -> asXmlElement("coverage", c.getValue()))
.collect(Collectors.toList()));
}
if (r.getDateofacceptance() != null) {
metadata.add(asXmlElement("dateofacceptance", r.getDateofacceptance().getValue()));
}
if (r.getDescription() != null) {
metadata.addAll(r.getDescription()
.stream()
.map(c -> asXmlElement("description", c.getValue()))
.collect(Collectors.toList()));
}
if (r.getEmbargoenddate() != null) {
metadata.add(asXmlElement("embargoenddate", r.getEmbargoenddate().getValue()));
}
if (r.getSubject() != null) {
metadata.addAll(r.getSubject()
.stream()
.map(s -> mapStructuredProperty("subject", s))
.collect(Collectors.toList()));
}
if (r.getLanguage() != null) {
metadata.add(mapQualifier("language", r.getLanguage()));
}
if (r.getRelevantdate() != null) {
metadata.addAll(r.getRelevantdate()
.stream()
.map(s -> mapStructuredProperty("relevantdate", s))
.collect(Collectors.toList()));
}
if (r.getPublisher() != null) {
metadata.add(asXmlElement("publisher", r.getPublisher().getValue()));
}
if (r.getSource() != null) {
metadata.addAll(r.getSource()
.stream()
.map(c -> asXmlElement("source", c.getValue()))
.collect(Collectors.toList()));
}
if (r.getFormat() != null) {
metadata.addAll(r.getFormat()
.stream()
.map(c -> asXmlElement("format", c.getValue()))
.collect(Collectors.toList()));
}
if (r.getResulttype() != null) {
metadata.add(mapQualifier("resulttype", r.getResulttype()));
}
if (r.getResourcetype() != null) {
metadata.add(mapQualifier("resourcetype", r.getResourcetype()));
}
metadata.add(mapQualifier("bestaccessright", getBestAccessright(r)));
if (r.getContext() != null) {
contextes.addAll(r.getContext()
.stream()
.map(c -> c.getId())
.collect(Collectors.toList()));
if (contextes.contains("dh-ch::subcommunity::2")) {
contextes.add("clarin");
}
}
}
switch (EntityType.valueOf(type)) {
case publication:
final Publication pub = (Publication) entity;
if (pub.getJournal() != null) {
final Journal j = pub.getJournal();
metadata.add(mapJournal(j));
}
break;
case dataset:
final Dataset d = (Dataset) entity;
if (d.getDevice() != null) {
metadata.add(asXmlElement("device", d.getDevice().getValue()));
}
if (d.getLastmetadataupdate() != null) {
metadata.add(asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue()));
}
if (d.getMetadataversionnumber() != null) {
metadata.add(asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue()));
}
if (d.getSize() != null) {
metadata.add(asXmlElement("size", d.getSize().getValue()));
}
if (d.getStoragedate() != null) {
metadata.add(asXmlElement("storagedate", d.getStoragedate().getValue()));
}
if (d.getVersion() != null) {
metadata.add(asXmlElement("version", d.getVersion().getValue()));
}
//TODO d.getGeolocation()
break;
case otherresearchproduct:
final OtherResearchProduct orp = (OtherResearchProduct) entity;
if (orp.getContactperson() != null) {
metadata.addAll(orp.getContactperson()
.stream()
.map(c -> asXmlElement("contactperson", c.getValue()))
.collect(Collectors.toList()));
}
if (orp.getContactgroup() != null) {
metadata.addAll(orp.getContactgroup()
.stream()
.map(c -> asXmlElement("contactgroup", c.getValue()))
.collect(Collectors.toList()));
}
if (orp.getTool() != null) {
metadata.addAll(orp.getTool()
.stream()
.map(c -> asXmlElement("tool", c.getValue()))
.collect(Collectors.toList()));
}
break;
case software:
final Software s = (Software) entity;
if (s.getDocumentationUrl() != null) {
metadata.addAll(s.getDocumentationUrl()
.stream()
.map(c -> asXmlElement("documentationUrl", c.getValue()))
.collect(Collectors.toList()));
}
if (s.getLicense() != null) {
metadata.addAll(s.getLicense()
.stream()
.map(l -> mapStructuredProperty("license", l))
.collect(Collectors.toList()));
}
if (s.getCodeRepositoryUrl() != null) {
metadata.add(asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue()));
}
if (s.getProgrammingLanguage() != null) {
metadata.add(mapQualifier("programmingLanguage", s.getProgrammingLanguage()));
}
break;
case datasource:
final Datasource ds = (Datasource) entity;
if (ds.getDatasourcetype() != null) {
mapDatasourceType(metadata, ds.getDatasourcetype());
}
if (ds.getOpenairecompatibility() != null) {
metadata.add(mapQualifier("openairecompatibility", ds.getOpenairecompatibility()));
}
if (ds.getOfficialname() != null) {
metadata.add(asXmlElement("officialname", ds.getOfficialname().getValue()));
}
if (ds.getEnglishname() != null) {
metadata.add(asXmlElement("englishname", ds.getEnglishname().getValue()));
}
if (ds.getWebsiteurl() != null) {
metadata.add(asXmlElement("websiteurl", ds.getWebsiteurl().getValue()));
}
if (ds.getLogourl() != null) {
metadata.add(asXmlElement("logourl", ds.getLogourl().getValue()));
}
if (ds.getContactemail() != null) {
metadata.add(asXmlElement("contactemail", ds.getContactemail().getValue()));
}
if (ds.getNamespaceprefix() != null) {
metadata.add(asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue()));
}
if (ds.getLatitude() != null) {
metadata.add(asXmlElement("latitude", ds.getLatitude().getValue()));
}
if (ds.getLongitude() != null) {
metadata.add(asXmlElement("longitude", ds.getLongitude().getValue()));
}
if (ds.getDateofvalidation() != null) {
metadata.add(asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue()));
}
if (ds.getDescription() != null) {
metadata.add(asXmlElement("description", ds.getDescription().getValue()));
}
if (ds.getOdnumberofitems() != null) {
metadata.add(asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue()));
}
if (ds.getOdnumberofitemsdate() != null) {
metadata.add(asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue()));
}
if (ds.getOdpolicies() != null) {
metadata.add(asXmlElement("odpolicies", ds.getOdpolicies().getValue()));
}
if (ds.getOdlanguages() != null) {
metadata.addAll(ds.getOdlanguages()
.stream()
.map(c -> asXmlElement("odlanguages", c.getValue()))
.collect(Collectors.toList()));
}
if (ds.getOdcontenttypes() != null) {
metadata.addAll(ds.getOdcontenttypes()
.stream()
.map(c -> asXmlElement("odcontenttypes", c.getValue()))
.collect(Collectors.toList()));
}
if (ds.getAccessinfopackage() != null) {
metadata.addAll(ds.getAccessinfopackage()
.stream()
.map(c -> asXmlElement("accessinfopackage", c.getValue()))
.collect(Collectors.toList()));
}
if (ds.getReleaseenddate() != null) {
metadata.add(asXmlElement("releasestartdate", ds.getReleaseenddate().getValue()));
}
if (ds.getReleaseenddate() != null) {
metadata.add(asXmlElement("releaseenddate", ds.getReleaseenddate().getValue()));
}
if (ds.getMissionstatementurl() != null) {
metadata.add(asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue()));
}
if (ds.getDataprovider() != null) {
metadata.add(asXmlElement("dataprovider", ds.getDataprovider().getValue().toString()));
}
if (ds.getServiceprovider() != null) {
metadata.add(asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString()));
}
if (ds.getDatabaseaccesstype() != null) {
metadata.add(asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue()));
}
if (ds.getDatauploadtype() != null) {
metadata.add(asXmlElement("datauploadtype", ds.getDatauploadtype().getValue()));
}
if (ds.getDatabaseaccessrestriction() != null) {
metadata.add(asXmlElement("databaseaccessrestriction", ds.getDatabaseaccessrestriction().getValue()));
}
if (ds.getDatauploadrestriction() != null) {
metadata.add(asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue()));
}
if (ds.getVersioning() != null) {
metadata.add(asXmlElement("versioning", ds.getVersioning().getValue().toString()));
}
if (ds.getCitationguidelineurl() != null) {
metadata.add(asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
}
if (ds.getQualitymanagementkind() != null) {
metadata.add(asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue()));
}
if (ds.getPidsystems() != null) {
metadata.add(asXmlElement("pidsystems", ds.getPidsystems().getValue()));
}
if (ds.getCertificates() != null) {
metadata.add(asXmlElement("certificates", ds.getCertificates().getValue()));
}
if (ds.getPolicies() != null) {
metadata.addAll(ds.getPolicies()
.stream()
.map(kv -> mapKeyValue("policies", kv))
.collect(Collectors.toList()));
}
if (ds.getJournal() != null) {
metadata.add(mapJournal(ds.getJournal()));
}
if (ds.getSubjects() != null) {
metadata.addAll(ds.getSubjects()
.stream()
.map(sp -> mapStructuredProperty("subject", sp))
.collect(Collectors.toList()));
}
break;
case organization:
final Organization o = (Organization) entity;
if (o.getLegalshortname() != null) {
metadata.add(asXmlElement("legalshortname", o.getLegalshortname().getValue()));
}
if (o.getLegalname() != null) {
metadata.add(asXmlElement("legalname", o.getLegalname().getValue()));
}
if (o.getAlternativeNames() != null) {
metadata.addAll(o.getAlternativeNames()
.stream()
.map(c -> asXmlElement("alternativeNames", c.getValue()))
.collect(Collectors.toList()));
}
if (o.getWebsiteurl() != null) {
metadata.add(asXmlElement("websiteurl", o.getWebsiteurl().getValue()));
}
if (o.getLogourl() != null) {
metadata.add(asXmlElement("websiteurl", o.getLogourl().getValue()));
}
if (o.getEclegalbody() != null) {
metadata.add(asXmlElement("eclegalbody", o.getEclegalbody().getValue()));
}
if (o.getEclegalperson() != null) {
metadata.add(asXmlElement("eclegalperson", o.getEclegalperson().getValue()));
}
if (o.getEcnonprofit() != null) {
metadata.add(asXmlElement("ecnonprofit", o.getEcnonprofit().getValue()));
}
if (o.getEcresearchorganization() != null) {
metadata.add(asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue()));
}
if (o.getEchighereducation() != null) {
metadata.add(asXmlElement("echighereducation", o.getEchighereducation().getValue()));
}
if (o.getEcinternationalorganization() != null) {
metadata.add(asXmlElement("ecinternationalorganizationeurinterests", o.getEcinternationalorganization().getValue()));
}
if (o.getEcinternationalorganization() != null) {
metadata.add(asXmlElement("ecinternationalorganization", o.getEcinternationalorganization().getValue()));
}
if (o.getEcenterprise() != null) {
metadata.add(asXmlElement("ecenterprise", o.getEcenterprise().getValue()));
}
if (o.getEcsmevalidated() != null) {
metadata.add(asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue()));
}
if (o.getEcnutscode() != null) {
metadata.add(asXmlElement("ecnutscode", o.getEcnutscode().getValue()));
}
if (o.getCountry() != null) {
metadata.add(mapQualifier("country", o.getCountry()));
}
break;
case project:
final Project p = (Project) entity;
if (p.getWebsiteurl() != null) {
metadata.add(asXmlElement("websiteurl", p.getWebsiteurl().getValue()));
}
if (p.getCode() != null) {
metadata.add(asXmlElement("code", p.getCode().getValue()));
}
if (p.getAcronym() != null) {
metadata.add(asXmlElement("acronym", p.getAcronym().getValue()));
}
if (p.getTitle() != null) {
metadata.add(asXmlElement("title", p.getTitle().getValue()));
}
if (p.getStartdate() != null) {
metadata.add(asXmlElement("startdate", p.getStartdate().getValue()));
}
if (p.getEnddate() != null) {
metadata.add(asXmlElement("enddate", p.getEnddate().getValue()));
}
if (p.getCallidentifier() != null) {
metadata.add(asXmlElement("callidentifier", p.getCallidentifier().getValue()));
}
if (p.getKeywords() != null) {
metadata.add(asXmlElement("keywords", p.getKeywords().getValue()));
}
if (p.getDuration() != null) {
metadata.add(asXmlElement("duration", p.getDuration().getValue()));
}
if (p.getEcarticle29_3() != null) {
metadata.add(asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue()));
}
if (p.getSubjects() != null) {
metadata.addAll(p.getSubjects()
.stream()
.map(sp -> mapStructuredProperty("subject", sp))
.collect(Collectors.toList()));
}
if (p.getContracttype() != null) {
metadata.add(mapQualifier("contracttype", p.getContracttype()));
}
if (p.getEcsc39() != null) {
metadata.add(asXmlElement("ecsc39", p.getEcsc39().getValue()));
}
if (p.getContactfullname() != null) {
metadata.add(asXmlElement("contactfullname", p.getContactfullname().getValue()));
}
if (p.getContactfax() != null) {
metadata.add(asXmlElement("contactfax", p.getContactfax().getValue()));
}
if (p.getContactphone() != null) {
metadata.add(asXmlElement("contactphone", p.getContactphone().getValue()));
}
if (p.getContactemail() != null) {
metadata.add(asXmlElement("contactemail", p.getContactemail().getValue()));
}
if (p.getSummary() != null) {
metadata.add(asXmlElement("summary", p.getSummary().getValue()));
}
if (p.getCurrency() != null) {
metadata.add(asXmlElement("currency", p.getCurrency().getValue()));
}
if (p.getTotalcost() != null) {
metadata.add(asXmlElement("totalcost", p.getTotalcost().toString()));
}
if (p.getFundedamount() != null) {
metadata.add(asXmlElement("fundedamount", p.getFundedamount().toString()));
}
if (p.getFundingtree() != null) {
metadata.addAll(p.getFundingtree()
.stream()
.map(ft -> asXmlElement("fundingtree", ft.getValue()))
.collect(Collectors.toList()));
}
break;
default:
throw new IllegalArgumentException("invalid entity type: " + type);
}
return metadata;
}
private void mapDatasourceType(List<String> metadata, final Qualifier dsType) {
metadata.add(mapQualifier("datasourcetype", dsType));
if (specialDatasourceTypes.contains(dsType.getClassid())) {
dsType.setClassid("other");
dsType.setClassname("other");
}
metadata.add(mapQualifier("datasourcetypeui", dsType));
}
private Qualifier getBestAccessright(final Result r) {
Qualifier bestAccessRight = new Qualifier();
bestAccessRight.setClassid("UNKNOWN");
bestAccessRight.setClassname("not available");
bestAccessRight.setSchemeid("dnet:access_modes");
bestAccessRight.setSchemename("dnet:access_modes");
final LicenseComparator lc = new LicenseComparator();
for (final Instance instance : r.getInstance()) {
if (lc.compare(bestAccessRight, instance.getAccessright()) > 0) {
bestAccessRight = instance.getAccessright();
}
}
return bestAccessRight;
}
private List<String> listRelations(final JoinedEntity je, TemplateFactory templateFactory) {
final List<String> rels = Lists.newArrayList();
for (final Tuple2 link : je.getLinks()) {
final Relation rel = link.getRelation();
final RelatedEntity re = link.getRelatedEntity();
final String targetType = link.getRelatedEntity().getType();
final List<String> metadata = Lists.newArrayList();
switch (EntityType.valueOf(targetType)) {
case publication:
case dataset:
case otherresearchproduct:
case software:
if (re.getTitle() != null && isNotBlank(re.getTitle().getValue())) {
metadata.add(mapStructuredProperty("title", re.getTitle()));
}
if (isNotBlank(re.getDateofacceptance())) {
metadata.add(asXmlElement("dateofacceptance", re.getDateofacceptance()));
}
if (isNotBlank(re.getPublisher())) {
metadata.add(asXmlElement("publisher", re.getPublisher()));
}
if (isNotBlank(re.getCodeRepositoryUrl())) {
metadata.add(asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl()));
}
if (re.getResulttype() != null & !re.getResulttype().isBlank()) {
metadata.add(mapQualifier("resulttype", re.getResulttype()));
}
if (re.getCollectedfrom() != null) {
metadata.addAll(re.getCollectedfrom()
.stream()
.map(kv -> mapKeyValue("collectedfrom", kv))
.collect(Collectors.toList()));
}
if (re.getPid() != null) {
metadata.addAll(re.getPid()
.stream()
.map(p -> mapStructuredProperty("pid", p))
.collect(Collectors.toList()));
}
break;
case datasource:
if (isNotBlank(re.getOfficialname())) {
metadata.add(asXmlElement("officialname", re.getOfficialname()));
}
if (re.getDatasourcetype() != null & !re.getDatasourcetype().isBlank()) {
mapDatasourceType(metadata, re.getDatasourcetype());
}
if (re.getOpenairecompatibility() != null & !re.getOpenairecompatibility().isBlank()) {
metadata.add(mapQualifier("openairecompatibility", re.getOpenairecompatibility()));
}
break;
case organization:
if (isNotBlank(re.getLegalname())) {
metadata.add(asXmlElement("legalname", re.getLegalname()));
}
if (isNotBlank(re.getLegalshortname())) {
metadata.add(asXmlElement("legalshortname", re.getLegalshortname()));
}
if (re.getCountry() != null & !re.getCountry().isBlank()) {
metadata.add(mapQualifier("country", re.getCountry()));
}
break;
case project:
if (isNotBlank(re.getProjectTitle())) {
metadata.add(asXmlElement("title", re.getProjectTitle()));
}
if (isNotBlank(re.getCode())) {
metadata.add(asXmlElement("code", re.getCode()));
}
if (isNotBlank(re.getAcronym())) {
metadata.add(asXmlElement("acronym", re.getAcronym()));
}
if (re.getContracttype() != null & !re.getContracttype().isBlank()) {
metadata.add(mapQualifier("contracttype", re.getContracttype()));
}
if (re.getFundingtree() != null) {
metadata.addAll(re.getFundingtree()
.stream()
.peek(ft -> fillContextMap(ft))
.map(ft -> getRelFundingTree(ft))
.collect(Collectors.toList()));
}
break;
default:
throw new IllegalArgumentException("invalid target type: " + targetType);
}
final DataInfo info = rel.getDataInfo();
rels.add(templateFactory.getRel(
targetType,
rel.getTarget(),
Sets.newHashSet(metadata),
getInverseRelClass(rel.getRelClass()),
getScheme(targetType, re.getType()),
info));
}
return rels;
}
private List<String> listChildren(final JoinedEntity je, TemplateFactory templateFactory) {
final List<String> children = Lists.newArrayList();
if (MainEntityType.result.toString().equals(getMainType(je.getType()))) {
final List<Instance> instances = ((Result) je.getEntity()).getInstance();
if (instances != null) {
for (final Instance instance : ((Result) je.getEntity()).getInstance()) {
final List<String> fields = Lists.newArrayList();
if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) {
fields.add(mapQualifier("accessright", instance.getAccessright()));
}
if (instance.getCollectedfrom() != null) {
fields.add(mapKeyValue("collectedfrom", instance.getCollectedfrom()));
}
if (instance.getHostedby() != null) {
fields.add(mapKeyValue("hostedby", instance.getHostedby()));
}
if (instance.getDateofacceptance() != null && isNotBlank(instance.getDateofacceptance().getValue())) {
fields.add(asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue()));
}
if (instance.getInstancetype() != null && !instance.getInstancetype().isBlank()) {
fields.add(mapQualifier("instancetype", instance.getInstancetype()));
}
if (isNotBlank(instance.getDistributionlocation())) {
fields.add(asXmlElement("distributionlocation", instance.getDistributionlocation()));
}
if (instance.getRefereed() != null && isNotBlank(instance.getRefereed().getValue())) {
fields.add(asXmlElement("refereed", instance.getRefereed().getValue()));
}
if (instance.getProcessingchargeamount() != null && isNotBlank(instance.getProcessingchargeamount().getValue())) {
fields.add(asXmlElement("processingchargeamount", instance.getProcessingchargeamount().getValue()));
}
if (instance.getProcessingchargecurrency() != null && isNotBlank(instance.getProcessingchargecurrency().getValue())) {
fields.add(asXmlElement("processingchargecurrency", instance.getProcessingchargecurrency().getValue()));
}
children.add(templateFactory.getInstance(instance.getHostedby().getKey(), fields, instance.getUrl()));
}
}
final List<ExternalReference> ext = ((Result) je.getEntity()).getExternalReference();
if (ext != null) {
for (final ExternalReference er : ((Result) je.getEntity()).getExternalReference()) {
final List<String> fields = Lists.newArrayList();
if (isNotBlank(er.getSitename())) {
fields.add(asXmlElement("sitename", er.getSitename()));
}
if (isNotBlank(er.getLabel())) {
fields.add(asXmlElement("label", er.getLabel()));
}
if (isNotBlank(er.getUrl())) {
fields.add(asXmlElement("url", er.getUrl()));
}
if (isNotBlank(er.getDescription())) {
fields.add(asXmlElement("description", er.getDescription()));
}
if (isNotBlank(er.getUrl())) {
fields.add(mapQualifier("qualifier", er.getQualifier()));
}
if (isNotBlank(er.getRefidentifier())) {
fields.add(asXmlElement("refidentifier", er.getRefidentifier()));
}
if (isNotBlank(er.getQuery())) {
fields.add(asXmlElement("query", er.getQuery()));
}
children.add(templateFactory.getChild("externalreference", null, fields));
}
}
}
return children;
}
private List<String> listExtraInfo(JoinedEntity je) {
final List<ExtraInfo> extraInfo = je.getEntity().getExtraInfo();
return extraInfo != null ? extraInfo
.stream()
.map(e -> mapExtraInfo(e))
.collect(Collectors.toList()) : Lists.newArrayList();
}
private List<String> buildContexts(final String type) {
final List<String> res = Lists.newArrayList();
if ((contextMapper != null) && !contextMapper.isEmpty() && MainEntityType.result.toString().equals(type)) {
XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
for (final String context : contextes) {
String id = "";
for (final String token : Splitter.on("::").split(context)) {
id += token;
final ContextDef def = contextMapper.get(id);
if (def == null) {
continue;
// throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
}
if (def.getName().equals("context")) {
final String xpath = "//context/@id='" + def.getId() + "'";
if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
document = addContextDef(document.gotoRoot(), def);
}
}
if (def.getName().equals("category")) {
final String rootId = substringBefore(def.getId(), "::");
document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
}
if (def.getName().equals("concept")) {
document = addContextDef(document, def).gotoParent();
}
id += "::";
}
}
final Transformer transformer = getTransformer();
for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
try {
res.add(asStringElement(x, transformer));
} catch (final TransformerException e) {
throw new RuntimeException(e);
}
}
}
return res;
}
private Transformer getTransformer() {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
return transformer;
} catch (TransformerConfigurationException e) {
throw new IllegalStateException("unable to create javax.xml.transform.Transformer", e);
}
}
private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
if ((def.getType() != null) && !def.getType().isEmpty()) {
tag.addAttribute("type", def.getType());
}
return tag;
}
private String asStringElement(final org.w3c.dom.Element element, final Transformer transformer) throws TransformerException {
final StringWriter buffer = new StringWriter();
transformer.transform(new DOMSource(element), new StreamResult(buffer));
return buffer.toString();
}
private void fillContextMap(final String xmlTree) {
Document fundingPath;
try {
fundingPath = new SAXReader().read(new StringReader(xmlTree));
} catch (final DocumentException e) {
throw new RuntimeException(e);
}
try {
final Node funder = fundingPath.selectSingleNode("//funder");
if (funder != null) {
final String funderShortName = funder.valueOf("./shortname");
contextes.add(funderShortName);
contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
if (level0 != null) {
final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
if (level1 == null) {
contextes.add(level0Id);
} else {
final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
if (level2 == null) {
contextes.add(level1Id);
} else {
final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
contextes.add(level2Id);
}
}
}
}
} catch (final NullPointerException e) {
throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
}
}
@SuppressWarnings("unchecked")
private String getRelFundingTree(final String xmlTree) {
String funding = "<funding>";
try {
final Document ftree = new SAXReader().read(new StringReader(xmlTree));
funding = "<funding>";
funding += getFunderElement(ftree);
for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
final Element e = (Element) o;
final String _id = e.valueOf("./id");
funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
}
} catch (final DocumentException e) {
throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
} finally {
funding += "</funding>";
}
return funding;
}
private String getFunderElement(final Document ftree) {
final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
}
}