partial implementation of xml serialization

This commit is contained in:
Michele Artini 2024-12-05 15:39:13 +01:00
parent 541350e9e0
commit 6540ad2f4e
2 changed files with 170 additions and 4 deletions

View File

@ -215,7 +215,7 @@ public class SolrRecordMapper {
final SolrRecordHeader header = new SolrRecordHeader();
header.setId(re.getOpenaireId());
header.setOriginalId(Arrays.asList(re.getOriginalId()));
header.setRecordType(RecordType.valueOf(re.getType()));
header.setRecordType(StringUtils.isNotBlank(re.getType()) ? RecordType.valueOf(re.getType()) : RecordType.publication);
header.setStatus(Status.UNDER_CURATION);
header.setDeletedbyinference(false);
return header;
@ -227,7 +227,7 @@ public class SolrRecordMapper {
// String getAnyId() {
r.setResulttype(re.getType());
r.setResulttype(StringUtils.firstNonBlank(re.getType(), "publication"));
r.setMaintitle(re.getTitle());
if (re.getAuthors() != null) {

View File

@ -1,18 +1,184 @@
package eu.dnetlib.app.directindex.solr;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.QName;
import eu.dnetlib.dhp.schema.solr.BestAccessRight;
import eu.dnetlib.dhp.schema.solr.Language;
import eu.dnetlib.dhp.schema.solr.SolrRecord;
// @formatter:off
public class XMLSolrSerializer {
public static String generateXML(final SolrRecord record) {
private static final Namespace DRI_NS = new Namespace("dri", "http://www.driver-repository.eu/namespace/dri");
private static final Namespace OAF_NS = new Namespace("oaf", "http://namespace.openaire.eu/oaf");
private static final Namespace XSI_NS = new Namespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
public static String generateXML(final SolrRecord sr) {
final Element root = DocumentHelper.createElement("record");
// TODO
final Element result = root.addElement("result");
populateHeader(sr, result.addElement("header"));
poulateMetadata(sr, result.addElement("metadata"));
return DocumentHelper.createDocument(root).asXML();
}
private static void populateHeader(final SolrRecord sr, final Element header) {
header.addAttribute(new QName("objIdentifier", DRI_NS), sr.getHeader().getId());
header.addAttribute(new QName("dateOfCollection", DRI_NS), "");
header.addAttribute(new QName("dateOfTransformation", DRI_NS), "");
header.addAttribute(new QName("status", DRI_NS), "under curation");
}
private static void poulateMetadata(final SolrRecord sr, final Element metadata) {
final Element entity = metadata.addElement(new QName("entity", OAF_NS));
entity.addAttribute(new QName("schemaLocation", XSI_NS), "http://namespace.openaire.eu/oaf https://www.openaire.eu/schema/1.0/oaf-1.0.xsd");
if (sr.getResult() != null) {
populateResultFields(sr, entity.addElement(new QName("result", OAF_NS)));
}
}
private static void populateResultFields(final SolrRecord sr, final Element fields) {
if (sr.getCollectedfrom() != null) {
sr.getCollectedfrom().forEach(p -> {
final Element collectedFrom = fields.addElement("collectedfrom");
collectedFrom.addAttribute("id", p.getDsId());
collectedFrom.addAttribute("name", p.getDsName());
});
}
if (sr.getHeader().getOriginalId() != null) {
sr.getHeader().getOriginalId().forEach(id -> fields.addElement("originalId").addText(id));
}
addStructuredField(fields, "title", "main title", "dnet:dataCite_title", sr.getResult().getMaintitle());
if (sr.getResult().getBestaccessright() != null) {
final BestAccessRight r = sr.getResult().getBestaccessright();
addStructuredField(fields, "bestaccessright", r.getCode() , r.getLabel(), "dnet:access_modes", null);
}
if (sr.getResult().getAuthor() != null) {
sr.getResult().getAuthor().forEach(a -> {
final Element creator = fields.addElement("creator");
creator.addAttribute("rank", "" + a.getRank());
creator.setText(a.getFullname());
});
}
if (sr.getResult().getDescription() != null) {
sr.getResult().getDescription().forEach(s -> fields.addElement("description").setText(s));
}
if (sr.getResult().getLanguage() != null) {
final Language l = sr.getResult().getLanguage();
addStructuredField(fields, "language", l.getCode(), l.getLabel(), "dnet:languages", null);
}
if (sr.getResult().getResulttype() != null) {
addStructuredField(fields, "resulttype", sr.getResult().getResulttype(), "dnet:result_typologies", null);
}
if (sr.getContext() != null) {
sr.getContext().forEach(ctx -> {
final Element ctxNode = fields.addElement("context");
ctxNode.addAttribute("id", ctx.getId());
ctxNode.addAttribute("label", ctx.getLabel());
ctxNode.addAttribute("type", ctx.getType());
if (ctx.getCategory() != null) {
ctx.getCategory().forEach(cat -> {
final Element catNode = ctxNode.addElement("category");
catNode.addAttribute("id", cat.getId());
catNode.addAttribute("label", cat.getLabel());
if (cat.getConcept() != null) {
cat.getConcept().forEach(cpt -> {
final Element cptNode = catNode.addElement("concept");
cptNode.addAttribute("id",cpt.getId());
cptNode.addAttribute("label",cpt.getLabel());
});
}
});
}
});
}
final Element datainfo = fields.addElement("datainfo");
datainfo.addElement("inferred").setText("false");
datainfo.addElement("deletedbyinference").setText("false");
datainfo.addElement("trust").setText("0.9");
datainfo.addElement("inferenceprovenance").setText("");
addStructuredField(datainfo, "provenanceaction", "user:insert", "dnet:provenanceActions", null);
//TODO add project rels
/*
<rels>
#foreach($link in $!pub.linksToProjects)
#set( $info = $!util.calculateProjectInfo($!link) )
<rel inferred="false" trust="0.9" inferenceprovenance="" provenanceaction="user:claim">
<to class="isProducedBy" scheme="dnet:result_project_relations" type="project">$!esc.evaluate($!info.id)</to>
<code>$!esc.evaluate($!info.code)</code>
<acronym>$!esc.evaluate($!info.acronym)</acronym>
<title>$!esc.evaluate($!info.title)</title>
<contracttype classid="" classname="" schemeid="" schemename=""/>
<funding>
<funder id="$!esc.evaluate($!info.funderId)"
shortname="$!esc.evaluate($!info.funderShortName)"
name="$!esc.evaluate($!info.funderName)"
jurisdiction="$!esc.evaluate($!info.jurisdiction)"/>
#if($!info.fundingId)
<funding_level_0 name="$!esc.evaluate($!info.fundingName)">$!esc.evaluate($!info.fundingId)</funding_level_0>
#end
</funding>
<websiteurl/>
</rel>
#end
</rels>
*/
//TODO add instances
/*
<children>
<instance>
<accessright classid="CLOSED" classname="Closed Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" />
<collectedfrom name="Croatian Scientific Bibliography - CROSBI" id="openaire____::345c9d171ef3c5d706d08041d506428c" />
<hostedby name="Croatian Scientific Bibliography - CROSBI" id="openaire____::345c9d171ef3c5d706d08041d506428c" />
<dateofacceptance>2013-01-01</dateofacceptance>
<instancetype classid="0004" classname="Conference object" schemeid="dnet:publication_resource" schemename="dnet:publication_resource" />
<refereed classid="0002" classname="nonPeerReviewed" schemeid="dnet:review_levels" schemename="dnet:review_levels" />
<webresource>
<url>https://www.bib.irb.hr/683750</url>
</webresource>
</instance>
</children>
</result>
*/
}
private static void addStructuredField(final Element parent, final String fieldName, final String classid, final String scheme, final String value) {
addStructuredField(parent, fieldName, classid, classid, scheme, value);
}
private static void addStructuredField(final Element parent, final String fieldName, final String classid, final String classname, final String scheme, final String value) {
final Element title = parent.addElement(fieldName);
title.addAttribute("classid", classid);
title.addAttribute("classname", classname);
title.addAttribute("schemeid", scheme);
title.addAttribute("schemename", scheme);
title.setText(StringUtils.firstNonBlank(value, ""));
}
}