This commit is contained in:
Michele Artini 2024-12-06 11:08:42 +01:00
parent 37d0cec118
commit 42a1db6bde
4 changed files with 52 additions and 31 deletions

View File

@ -1,4 +1,4 @@
package eu.dnetlib.app.directindex.solr; package eu.dnetlib.app.directindex.mapping;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.DocumentHelper; import org.dom4j.DocumentHelper;
@ -6,7 +6,6 @@ import org.dom4j.Element;
import org.dom4j.Namespace; import org.dom4j.Namespace;
import org.dom4j.QName; import org.dom4j.QName;
import eu.dnetlib.app.directindex.mapping.InstanceWithTypeCode;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.solr.BestAccessRight; import eu.dnetlib.dhp.schema.solr.BestAccessRight;
import eu.dnetlib.dhp.schema.solr.Instance; import eu.dnetlib.dhp.schema.solr.Instance;
@ -15,7 +14,6 @@ import eu.dnetlib.dhp.schema.solr.RecordType;
import eu.dnetlib.dhp.schema.solr.RelatedRecord; import eu.dnetlib.dhp.schema.solr.RelatedRecord;
import eu.dnetlib.dhp.schema.solr.SolrRecord; import eu.dnetlib.dhp.schema.solr.SolrRecord;
// @formatter:off
public class XMLSolrSerializer { public class XMLSolrSerializer {
private static final Namespace DRI_NS = new Namespace("dri", "http://www.driver-repository.eu/namespace/dri"); private static final Namespace DRI_NS = new Namespace("dri", "http://www.driver-repository.eu/namespace/dri");
@ -67,19 +65,19 @@ public class XMLSolrSerializer {
if (sr.getResult().getBestaccessright() != null) { if (sr.getResult().getBestaccessright() != null) {
final BestAccessRight r = sr.getResult().getBestaccessright(); final BestAccessRight r = sr.getResult().getBestaccessright();
addStructuredField(fields, "bestaccessright", r.getCode() , r.getLabel(), ModelConstants.DNET_ACCESS_MODES, null); addStructuredField(fields, "bestaccessright", r.getCode(), r.getLabel(), ModelConstants.DNET_ACCESS_MODES, null);
} }
if (sr.getResult().getAuthor() != null) { if (sr.getResult().getAuthor() != null) {
sr.getResult().getAuthor().forEach(a -> { sr.getResult().getAuthor().forEach(a -> {
final Element creator = fields.addElement("creator"); final Element creator = fields.addElement("creator");
creator.addAttribute("rank", "" + a.getRank()); creator.addAttribute("rank", "" + a.getRank());
creator.setText(a.getFullname()); creator.addText(a.getFullname());
}); });
} }
if (sr.getResult().getDescription() != null) { if (sr.getResult().getDescription() != null) {
sr.getResult().getDescription().forEach(s -> fields.addElement("description").setText(s)); sr.getResult().getDescription().forEach(s -> fields.addElement("description").addText(s));
} }
if (sr.getResult().getLanguage() != null) { if (sr.getResult().getLanguage() != null) {
@ -105,8 +103,8 @@ public class XMLSolrSerializer {
if (cat.getConcept() != null) { if (cat.getConcept() != null) {
cat.getConcept().forEach(cpt -> { cat.getConcept().forEach(cpt -> {
final Element cptNode = catNode.addElement("concept"); final Element cptNode = catNode.addElement("concept");
cptNode.addAttribute("id",cpt.getId()); cptNode.addAttribute("id", cpt.getId());
cptNode.addAttribute("label",cpt.getLabel()); cptNode.addAttribute("label", cpt.getLabel());
}); });
} }
}); });
@ -115,13 +113,12 @@ public class XMLSolrSerializer {
} }
final Element datainfo = fields.addElement("datainfo"); final Element datainfo = fields.addElement("datainfo");
datainfo.addElement("inferred").setText("false"); datainfo.addElement("inferred").addText("false");
datainfo.addElement("deletedbyinference").setText("false"); datainfo.addElement("deletedbyinference").addText("false");
datainfo.addElement("trust").setText("0.9"); datainfo.addElement("trust").addText("0.9");
datainfo.addElement("inferenceprovenance").setText(""); datainfo.addElement("inferenceprovenance").addText("");
addStructuredField(datainfo, "provenanceaction", "user:insert", ModelConstants.DNET_PROVENANCE_ACTIONS, null); addStructuredField(datainfo, "provenanceaction", "user:insert", ModelConstants.DNET_PROVENANCE_ACTIONS, null);
final Element relsNode = fields.addElement("rels"); final Element relsNode = fields.addElement("rels");
if (sr.getLinks() != null) { if (sr.getLinks() != null) {
sr.getLinks().forEach(p -> addRelProject(relsNode, p)); sr.getLinks().forEach(p -> addRelProject(relsNode, p));
@ -137,7 +134,8 @@ public class XMLSolrSerializer {
final Element instanceNode = childrenNode.addElement("instance"); final Element instanceNode = childrenNode.addElement("instance");
if (i instanceof InstanceWithTypeCode) { if (i instanceof InstanceWithTypeCode) {
addStructuredField(instanceNode, "instancetype", ((InstanceWithTypeCode) i).getInstancetypeCode(), i.getInstancetype(), ModelConstants.DNET_PUBLICATION_RESOURCE, null); addStructuredField(instanceNode, "instancetype", ((InstanceWithTypeCode) i).getInstancetypeCode(), i
.getInstancetype(), ModelConstants.DNET_PUBLICATION_RESOURCE, null);
} }
addStructuredField(instanceNode, "accessright", i.getAccessright().getCode(), i.getAccessright().getLabel(), ModelConstants.DNET_ACCESS_MODES, null); addStructuredField(instanceNode, "accessright", i.getAccessright().getCode(), i.getAccessright().getLabel(), ModelConstants.DNET_ACCESS_MODES, null);
@ -158,9 +156,7 @@ public class XMLSolrSerializer {
private static void addRelProject(final Element relsNode, final RelatedRecord p) { private static void addRelProject(final Element relsNode, final RelatedRecord p) {
if (p.getHeader().getRelatedRecordType() != RecordType.project) { if (p.getHeader().getRelatedRecordType() != RecordType.project) { return; }
return;
}
final Element rel = relsNode.addElement("rel"); final Element rel = relsNode.addElement("rel");
@ -171,7 +167,7 @@ public class XMLSolrSerializer {
final Element to = rel.addElement("to"); final Element to = rel.addElement("to");
to.addAttribute("class", ModelConstants.IS_PRODUCED_BY); to.addAttribute("class", ModelConstants.IS_PRODUCED_BY);
to.addAttribute("scheme", "dnet:result_project_relations"); to.addAttribute("scheme", "dnet:result_project_relations");
to.addAttribute("type", RecordType.project.name()); to.addAttribute("type", RecordType.project.name());
to.addText(p.getHeader().getRelatedIdentifier()); to.addText(p.getHeader().getRelatedIdentifier());
@ -192,7 +188,6 @@ public class XMLSolrSerializer {
} }
} }
if (p.getFunding().getLevel0() != null) { if (p.getFunding().getLevel0() != null) {
final Element level0 = funding.addElement("funding_level_0"); final Element level0 = funding.addElement("funding_level_0");
level0.addAttribute("name", p.getFunding().getLevel0().getName()); level0.addAttribute("name", p.getFunding().getLevel0().getName());
@ -205,13 +200,21 @@ public class XMLSolrSerializer {
addStructuredField(parent, fieldName, classid, classid, scheme, value); addStructuredField(parent, fieldName, classid, classid, scheme, value);
} }
private static void addStructuredField(final Element parent, final String fieldName, final String classid, final String classname, final String scheme, final String value) { private static void addStructuredField(final Element parent,
final Element title = parent.addElement(fieldName); final String fieldName,
title.addAttribute("classid", classid); final String classid,
title.addAttribute("classname", classname); final String classname,
title.addAttribute("schemeid", scheme); final String scheme,
title.addAttribute("schemename", scheme); final String value) {
title.setText(StringUtils.firstNonBlank(value, "")); final Element elem = parent.addElement(fieldName);
elem.addAttribute("classid", classid);
elem.addAttribute("classname", classname);
elem.addAttribute("schemeid", scheme);
elem.addAttribute("schemename", scheme);
if (StringUtils.isNotBlank(value)) {
elem.addText(value);
}
} }
} }

View File

@ -20,6 +20,7 @@ import org.apache.solr.common.params.SolrParams;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.app.directindex.errors.DirectIndexApiException; import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
import eu.dnetlib.app.directindex.mapping.XMLSolrSerializer;
import eu.dnetlib.dhp.schema.solr.SolrRecord; import eu.dnetlib.dhp.schema.solr.SolrRecord;
import eu.dnetlib.dhp.solr.mapping.SolrInputDocumentMapper; import eu.dnetlib.dhp.solr.mapping.SolrInputDocumentMapper;

View File

@ -53,13 +53,14 @@ class SolrRecordMapperTest {
lenient().when(dsmClient.findDatasource(COLLECTED_FROM_ID)).thenReturn(new DatasourceEntry(COLLECTED_FROM_ID, "TEST AGGREGATOR", "test________")); lenient().when(dsmClient.findDatasource(COLLECTED_FROM_ID)).thenReturn(new DatasourceEntry(COLLECTED_FROM_ID, "TEST AGGREGATOR", "test________"));
lenient().when(dsmClient.findDatasource(HOSTED_BY_ID)).thenReturn(new DatasourceEntry(HOSTED_BY_ID, "TEST AGGREGATOR", "test________")); lenient().when(dsmClient.findDatasource(HOSTED_BY_ID)).thenReturn(new DatasourceEntry(HOSTED_BY_ID, "TEST AGGREGATOR", "test________"));
lenient().when(vocClient.findTermLabel("dnet:languages", "eng")).thenReturn("English"); lenient().when(vocClient.findTermLabel("dnet:languages", "eng")).thenReturn("English");
lenient().when(vocClient.findTermLabel("dnet:access_modes", "OPEN")).thenReturn("Open Access"); lenient().when(vocClient.findTermLabel("dnet:access_modes", "OPEN")).thenReturn("Open Access");
lenient().when(vocClient.findTermLabel("dnet:countries", "EU")).thenReturn("Europe"); lenient().when(vocClient.findTermLabel("dnet:countries", "EU")).thenReturn("Europe");
lenient().when(vocClient.findTermLabel("dnet:pid_types", "doi")).thenReturn("doi"); lenient().when(vocClient.findTermLabel("dnet:pid_types", "doi")).thenReturn("doi");
lenient().when(vocClient.findTermLabel("dnet:publication_resource", "0001")).thenReturn("Article");
final ProjectInfo project = new ProjectInfo(); final ProjectInfo project = new ProjectInfo();
project.setId("corda_______::283595");
project.setCode("283595"); project.setCode("283595");
project.setAcronym("OPENAIREPLUS"); project.setAcronym("OPENAIREPLUS");
project.setTitle("OpenAIREplus"); project.setTitle("OpenAIREplus");
@ -74,7 +75,7 @@ class SolrRecordMapperTest {
} }
@Test @Test
void testToSolrRecord() throws Exception { public void testToSolrRecord() throws Exception {
final ResultEntry result = new ObjectMapper().readValue(getClass().getResourceAsStream("sample-result-01.json"), ResultEntry.class); final ResultEntry result = new ObjectMapper().readValue(getClass().getResourceAsStream("sample-result-01.json"), ResultEntry.class);
@ -113,15 +114,31 @@ class SolrRecordMapperTest {
assertEquals("European Commission", solrRecord.getLinks().get(0).getFunding().getFunder().getName()); assertEquals("European Commission", solrRecord.getLinks().get(0).getFunding().getFunder().getName());
assertEquals("ec__________::EC::FP7", solrRecord.getLinks().get(0).getFunding().getLevel0().getId()); assertEquals("ec__________::EC::FP7", solrRecord.getLinks().get(0).getFunding().getLevel0().getId());
assertEquals("FP7", solrRecord.getLinks().get(0).getFunding().getLevel0().getName()); assertEquals("FP7", solrRecord.getLinks().get(0).getFunding().getLevel0().getName());
assertEquals(1, solrRecord.getResult().getInstance().size());
assertEquals("Article", solrRecord.getResult().getInstance().get(0).getInstancetype());
assertTrue(solrRecord.getResult().getInstance().get(0) instanceof InstanceWithTypeCode);
assertEquals("0001", ((InstanceWithTypeCode) solrRecord.getResult().getInstance().get(0)).getInstancetypeCode());
} }
@Test @Test
void testToResultEntry() { public void testToResultEntry() {
// fail("Not yet implemented"); // fail("Not yet implemented");
} }
@Test @Test
void testCalculateOpenaireId() throws DirectIndexApiException { public void testToXmlSolrRecord() throws Exception {
final ResultEntry result = new ObjectMapper().readValue(getClass().getResourceAsStream("sample-result-01.json"), ResultEntry.class);
final SolrRecord solrRecord = solrRecordMapper.toSolrRecord(result);
final String xml = XMLSolrSerializer.generateXML(solrRecord);
System.out.println(xml);
}
@Test
public void testCalculateOpenaireId() throws DirectIndexApiException {
final String openaireId = solrRecordMapper.calculateOpenaireId("record-oai-12345", COLLECTED_FROM_ID); final String openaireId = solrRecordMapper.calculateOpenaireId("record-oai-12345", COLLECTED_FROM_ID);

View File

@ -18,7 +18,7 @@
"accessRightCode": "OPEN", "accessRightCode": "OPEN",
"embargoEndDate" : null, "embargoEndDate" : null,
"type": "publication", "type": "publication",
"resourceType": "001", "resourceType": "0001",
"url" : "http//cnr.it/test/record01.pdf", "url" : "http//cnr.it/test/record01.pdf",
"collectedFromId" : "test_aggr_00::12344567890", "collectedFromId" : "test_aggr_00::12344567890",
"hostedById" : "test_repo_01::12344567890", "hostedById" : "test_repo_01::12344567890",