From 6540ad2f4e4a7f1729440f13fe7a0e129b98f90c Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 5 Dec 2024 15:39:13 +0100 Subject: [PATCH] partial implementation of xml serialization --- .../directindex/mapping/SolrRecordMapper.java | 4 +- .../directindex/solr/XMLSolrSerializer.java | 170 +++++++++++++++++- 2 files changed, 170 insertions(+), 4 deletions(-) diff --git a/src/main/java/eu/dnetlib/app/directindex/mapping/SolrRecordMapper.java b/src/main/java/eu/dnetlib/app/directindex/mapping/SolrRecordMapper.java index 12a297b..87e4daf 100644 --- a/src/main/java/eu/dnetlib/app/directindex/mapping/SolrRecordMapper.java +++ b/src/main/java/eu/dnetlib/app/directindex/mapping/SolrRecordMapper.java @@ -215,7 +215,7 @@ public class SolrRecordMapper { final SolrRecordHeader header = new SolrRecordHeader(); header.setId(re.getOpenaireId()); header.setOriginalId(Arrays.asList(re.getOriginalId())); - header.setRecordType(RecordType.valueOf(re.getType())); + header.setRecordType(StringUtils.isNotBlank(re.getType()) ? RecordType.valueOf(re.getType()) : RecordType.publication); header.setStatus(Status.UNDER_CURATION); header.setDeletedbyinference(false); return header; @@ -227,7 +227,7 @@ public class SolrRecordMapper { // String getAnyId() { - r.setResulttype(re.getType()); + r.setResulttype(StringUtils.firstNonBlank(re.getType(), "publication")); r.setMaintitle(re.getTitle()); if (re.getAuthors() != null) { diff --git a/src/main/java/eu/dnetlib/app/directindex/solr/XMLSolrSerializer.java b/src/main/java/eu/dnetlib/app/directindex/solr/XMLSolrSerializer.java index 60155f3..7f9b498 100644 --- a/src/main/java/eu/dnetlib/app/directindex/solr/XMLSolrSerializer.java +++ b/src/main/java/eu/dnetlib/app/directindex/solr/XMLSolrSerializer.java @@ -1,18 +1,184 @@ package eu.dnetlib.app.directindex.solr; +import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentHelper; import org.dom4j.Element; +import org.dom4j.Namespace; +import org.dom4j.QName; +import eu.dnetlib.dhp.schema.solr.BestAccessRight; +import eu.dnetlib.dhp.schema.solr.Language; import eu.dnetlib.dhp.schema.solr.SolrRecord; +// @formatter:off public class XMLSolrSerializer { - public static String generateXML(final SolrRecord record) { + private static final Namespace DRI_NS = new Namespace("dri", "http://www.driver-repository.eu/namespace/dri"); + private static final Namespace OAF_NS = new Namespace("oaf", "http://namespace.openaire.eu/oaf"); + private static final Namespace XSI_NS = new Namespace("xsi", "http://www.w3.org/2001/XMLSchema-instance"); + + public static String generateXML(final SolrRecord sr) { final Element root = DocumentHelper.createElement("record"); - // TODO + final Element result = root.addElement("result"); + + populateHeader(sr, result.addElement("header")); + poulateMetadata(sr, result.addElement("metadata")); return DocumentHelper.createDocument(root).asXML(); } + private static void populateHeader(final SolrRecord sr, final Element header) { + header.addAttribute(new QName("objIdentifier", DRI_NS), sr.getHeader().getId()); + header.addAttribute(new QName("dateOfCollection", DRI_NS), ""); + header.addAttribute(new QName("dateOfTransformation", DRI_NS), ""); + header.addAttribute(new QName("status", DRI_NS), "under curation"); + } + + private static void poulateMetadata(final SolrRecord sr, final Element metadata) { + final Element entity = metadata.addElement(new QName("entity", OAF_NS)); + entity.addAttribute(new QName("schemaLocation", XSI_NS), "http://namespace.openaire.eu/oaf https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"); + if (sr.getResult() != null) { + populateResultFields(sr, entity.addElement(new QName("result", OAF_NS))); + } + } + + private static void populateResultFields(final SolrRecord sr, final Element fields) { + + if (sr.getCollectedfrom() != null) { + sr.getCollectedfrom().forEach(p -> { + final Element collectedFrom = fields.addElement("collectedfrom"); + collectedFrom.addAttribute("id", p.getDsId()); + collectedFrom.addAttribute("name", p.getDsName()); + }); + } + + if (sr.getHeader().getOriginalId() != null) { + sr.getHeader().getOriginalId().forEach(id -> fields.addElement("originalId").addText(id)); + } + + addStructuredField(fields, "title", "main title", "dnet:dataCite_title", sr.getResult().getMaintitle()); + + if (sr.getResult().getBestaccessright() != null) { + final BestAccessRight r = sr.getResult().getBestaccessright(); + addStructuredField(fields, "bestaccessright", r.getCode() , r.getLabel(), "dnet:access_modes", null); + } + + if (sr.getResult().getAuthor() != null) { + sr.getResult().getAuthor().forEach(a -> { + final Element creator = fields.addElement("creator"); + creator.addAttribute("rank", "" + a.getRank()); + creator.setText(a.getFullname()); + }); + } + + if (sr.getResult().getDescription() != null) { + sr.getResult().getDescription().forEach(s -> fields.addElement("description").setText(s)); + } + + if (sr.getResult().getLanguage() != null) { + final Language l = sr.getResult().getLanguage(); + addStructuredField(fields, "language", l.getCode(), l.getLabel(), "dnet:languages", null); + } + + if (sr.getResult().getResulttype() != null) { + addStructuredField(fields, "resulttype", sr.getResult().getResulttype(), "dnet:result_typologies", null); + } + + if (sr.getContext() != null) { + sr.getContext().forEach(ctx -> { + final Element ctxNode = fields.addElement("context"); + ctxNode.addAttribute("id", ctx.getId()); + ctxNode.addAttribute("label", ctx.getLabel()); + ctxNode.addAttribute("type", ctx.getType()); + if (ctx.getCategory() != null) { + ctx.getCategory().forEach(cat -> { + final Element catNode = ctxNode.addElement("category"); + catNode.addAttribute("id", cat.getId()); + catNode.addAttribute("label", cat.getLabel()); + if (cat.getConcept() != null) { + cat.getConcept().forEach(cpt -> { + final Element cptNode = catNode.addElement("concept"); + cptNode.addAttribute("id",cpt.getId()); + cptNode.addAttribute("label",cpt.getLabel()); + }); + } + }); + } + }); + } + + final Element datainfo = fields.addElement("datainfo"); + datainfo.addElement("inferred").setText("false"); + datainfo.addElement("deletedbyinference").setText("false"); + datainfo.addElement("trust").setText("0.9"); + datainfo.addElement("inferenceprovenance").setText(""); + addStructuredField(datainfo, "provenanceaction", "user:insert", "dnet:provenanceActions", null); + + + //TODO add project rels + /* + + #foreach($link in $!pub.linksToProjects) + + #set( $info = $!util.calculateProjectInfo($!link) ) + + $!esc.evaluate($!info.id) + $!esc.evaluate($!info.code) + $!esc.evaluate($!info.acronym) + $!esc.evaluate($!info.title) + + + + + #if($!info.fundingId) + $!esc.evaluate($!info.fundingId) + #end + + + + #end + + + */ + + + //TODO add instances + /* + + + + + + + 2013-01-01 + + + + https://www.bib.irb.hr/683750 + + + + + */ + + + } + + private static void addStructuredField(final Element parent, final String fieldName, final String classid, final String scheme, final String value) { + addStructuredField(parent, fieldName, classid, classid, scheme, value); + } + + private static void addStructuredField(final Element parent, final String fieldName, final String classid, final String classname, final String scheme, final String value) { + final Element title = parent.addElement(fieldName); + title.addAttribute("classid", classid); + title.addAttribute("classname", classname); + title.addAttribute("schemeid", scheme); + title.addAttribute("schemename", scheme); + title.setText(StringUtils.firstNonBlank(value, "")); + } + }