This commit is contained in:
Michele Artini 2024-12-06 11:08:42 +01:00
parent 37d0cec118
commit 42a1db6bde
4 changed files with 52 additions and 31 deletions

View File

@ -1,4 +1,4 @@
package eu.dnetlib.app.directindex.solr;
package eu.dnetlib.app.directindex.mapping;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.DocumentHelper;
@ -6,7 +6,6 @@ import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.QName;
import eu.dnetlib.app.directindex.mapping.InstanceWithTypeCode;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.solr.BestAccessRight;
import eu.dnetlib.dhp.schema.solr.Instance;
@ -15,7 +14,6 @@ import eu.dnetlib.dhp.schema.solr.RecordType;
import eu.dnetlib.dhp.schema.solr.RelatedRecord;
import eu.dnetlib.dhp.schema.solr.SolrRecord;
// @formatter:off
public class XMLSolrSerializer {
private static final Namespace DRI_NS = new Namespace("dri", "http://www.driver-repository.eu/namespace/dri");
@ -67,19 +65,19 @@ public class XMLSolrSerializer {
if (sr.getResult().getBestaccessright() != null) {
final BestAccessRight r = sr.getResult().getBestaccessright();
addStructuredField(fields, "bestaccessright", r.getCode() , r.getLabel(), ModelConstants.DNET_ACCESS_MODES, null);
addStructuredField(fields, "bestaccessright", r.getCode(), r.getLabel(), ModelConstants.DNET_ACCESS_MODES, null);
}
if (sr.getResult().getAuthor() != null) {
sr.getResult().getAuthor().forEach(a -> {
final Element creator = fields.addElement("creator");
creator.addAttribute("rank", "" + a.getRank());
creator.setText(a.getFullname());
creator.addText(a.getFullname());
});
}
if (sr.getResult().getDescription() != null) {
sr.getResult().getDescription().forEach(s -> fields.addElement("description").setText(s));
sr.getResult().getDescription().forEach(s -> fields.addElement("description").addText(s));
}
if (sr.getResult().getLanguage() != null) {
@ -105,8 +103,8 @@ public class XMLSolrSerializer {
if (cat.getConcept() != null) {
cat.getConcept().forEach(cpt -> {
final Element cptNode = catNode.addElement("concept");
cptNode.addAttribute("id",cpt.getId());
cptNode.addAttribute("label",cpt.getLabel());
cptNode.addAttribute("id", cpt.getId());
cptNode.addAttribute("label", cpt.getLabel());
});
}
});
@ -115,13 +113,12 @@ public class XMLSolrSerializer {
}
final Element datainfo = fields.addElement("datainfo");
datainfo.addElement("inferred").setText("false");
datainfo.addElement("deletedbyinference").setText("false");
datainfo.addElement("trust").setText("0.9");
datainfo.addElement("inferenceprovenance").setText("");
datainfo.addElement("inferred").addText("false");
datainfo.addElement("deletedbyinference").addText("false");
datainfo.addElement("trust").addText("0.9");
datainfo.addElement("inferenceprovenance").addText("");
addStructuredField(datainfo, "provenanceaction", "user:insert", ModelConstants.DNET_PROVENANCE_ACTIONS, null);
final Element relsNode = fields.addElement("rels");
if (sr.getLinks() != null) {
sr.getLinks().forEach(p -> addRelProject(relsNode, p));
@ -137,7 +134,8 @@ public class XMLSolrSerializer {
final Element instanceNode = childrenNode.addElement("instance");
if (i instanceof InstanceWithTypeCode) {
addStructuredField(instanceNode, "instancetype", ((InstanceWithTypeCode) i).getInstancetypeCode(), i.getInstancetype(), ModelConstants.DNET_PUBLICATION_RESOURCE, null);
addStructuredField(instanceNode, "instancetype", ((InstanceWithTypeCode) i).getInstancetypeCode(), i
.getInstancetype(), ModelConstants.DNET_PUBLICATION_RESOURCE, null);
}
addStructuredField(instanceNode, "accessright", i.getAccessright().getCode(), i.getAccessright().getLabel(), ModelConstants.DNET_ACCESS_MODES, null);
@ -158,9 +156,7 @@ public class XMLSolrSerializer {
private static void addRelProject(final Element relsNode, final RelatedRecord p) {
if (p.getHeader().getRelatedRecordType() != RecordType.project) {
return;
}
if (p.getHeader().getRelatedRecordType() != RecordType.project) { return; }
final Element rel = relsNode.addElement("rel");
@ -171,7 +167,7 @@ public class XMLSolrSerializer {
final Element to = rel.addElement("to");
to.addAttribute("class", ModelConstants.IS_PRODUCED_BY);
to.addAttribute("scheme", "dnet:result_project_relations");
to.addAttribute("scheme", "dnet:result_project_relations");
to.addAttribute("type", RecordType.project.name());
to.addText(p.getHeader().getRelatedIdentifier());
@ -192,7 +188,6 @@ public class XMLSolrSerializer {
}
}
if (p.getFunding().getLevel0() != null) {
final Element level0 = funding.addElement("funding_level_0");
level0.addAttribute("name", p.getFunding().getLevel0().getName());
@ -205,13 +200,21 @@ public class XMLSolrSerializer {
addStructuredField(parent, fieldName, classid, classid, scheme, value);
}
private static void addStructuredField(final Element parent, final String fieldName, final String classid, final String classname, final String scheme, final String value) {
final Element title = parent.addElement(fieldName);
title.addAttribute("classid", classid);
title.addAttribute("classname", classname);
title.addAttribute("schemeid", scheme);
title.addAttribute("schemename", scheme);
title.setText(StringUtils.firstNonBlank(value, ""));
private static void addStructuredField(final Element parent,
final String fieldName,
final String classid,
final String classname,
final String scheme,
final String value) {
final Element elem = parent.addElement(fieldName);
elem.addAttribute("classid", classid);
elem.addAttribute("classname", classname);
elem.addAttribute("schemeid", scheme);
elem.addAttribute("schemename", scheme);
if (StringUtils.isNotBlank(value)) {
elem.addText(value);
}
}
}

View File

@ -20,6 +20,7 @@ import org.apache.solr.common.params.SolrParams;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
import eu.dnetlib.app.directindex.mapping.XMLSolrSerializer;
import eu.dnetlib.dhp.schema.solr.SolrRecord;
import eu.dnetlib.dhp.solr.mapping.SolrInputDocumentMapper;

View File

@ -53,13 +53,14 @@ class SolrRecordMapperTest {
lenient().when(dsmClient.findDatasource(COLLECTED_FROM_ID)).thenReturn(new DatasourceEntry(COLLECTED_FROM_ID, "TEST AGGREGATOR", "test________"));
lenient().when(dsmClient.findDatasource(HOSTED_BY_ID)).thenReturn(new DatasourceEntry(HOSTED_BY_ID, "TEST AGGREGATOR", "test________"));
lenient().when(vocClient.findTermLabel("dnet:languages", "eng")).thenReturn("English");
lenient().when(vocClient.findTermLabel("dnet:access_modes", "OPEN")).thenReturn("Open Access");
lenient().when(vocClient.findTermLabel("dnet:countries", "EU")).thenReturn("Europe");
lenient().when(vocClient.findTermLabel("dnet:pid_types", "doi")).thenReturn("doi");
lenient().when(vocClient.findTermLabel("dnet:publication_resource", "0001")).thenReturn("Article");
final ProjectInfo project = new ProjectInfo();
project.setId("corda_______::283595");
project.setCode("283595");
project.setAcronym("OPENAIREPLUS");
project.setTitle("OpenAIREplus");
@ -74,7 +75,7 @@ class SolrRecordMapperTest {
}
@Test
void testToSolrRecord() throws Exception {
public void testToSolrRecord() throws Exception {
final ResultEntry result = new ObjectMapper().readValue(getClass().getResourceAsStream("sample-result-01.json"), ResultEntry.class);
@ -113,15 +114,31 @@ class SolrRecordMapperTest {
assertEquals("European Commission", solrRecord.getLinks().get(0).getFunding().getFunder().getName());
assertEquals("ec__________::EC::FP7", solrRecord.getLinks().get(0).getFunding().getLevel0().getId());
assertEquals("FP7", solrRecord.getLinks().get(0).getFunding().getLevel0().getName());
assertEquals(1, solrRecord.getResult().getInstance().size());
assertEquals("Article", solrRecord.getResult().getInstance().get(0).getInstancetype());
assertTrue(solrRecord.getResult().getInstance().get(0) instanceof InstanceWithTypeCode);
assertEquals("0001", ((InstanceWithTypeCode) solrRecord.getResult().getInstance().get(0)).getInstancetypeCode());
}
@Test
void testToResultEntry() {
public void testToResultEntry() {
// fail("Not yet implemented");
}
@Test
void testCalculateOpenaireId() throws DirectIndexApiException {
public void testToXmlSolrRecord() throws Exception {
final ResultEntry result = new ObjectMapper().readValue(getClass().getResourceAsStream("sample-result-01.json"), ResultEntry.class);
final SolrRecord solrRecord = solrRecordMapper.toSolrRecord(result);
final String xml = XMLSolrSerializer.generateXML(solrRecord);
System.out.println(xml);
}
@Test
public void testCalculateOpenaireId() throws DirectIndexApiException {
final String openaireId = solrRecordMapper.calculateOpenaireId("record-oai-12345", COLLECTED_FROM_ID);

View File

@ -18,7 +18,7 @@
"accessRightCode": "OPEN",
"embargoEndDate" : null,
"type": "publication",
"resourceType": "001",
"resourceType": "0001",
"url" : "http//cnr.it/test/record01.pdf",
"collectedFromId" : "test_aggr_00::12344567890",
"hostedById" : "test_repo_01::12344567890",