partial implementation

This commit is contained in:
Michele Artini 2024-11-18 13:10:07 +01:00
parent 99b8fbfdfb
commit 740a4f63a2
5 changed files with 87 additions and 285 deletions

View File

@ -20,6 +20,7 @@ import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
import eu.dnetlib.app.directindex.input.ResultEntry; import eu.dnetlib.app.directindex.input.ResultEntry;
import eu.dnetlib.app.directindex.mapping.SolrRecordMapper; import eu.dnetlib.app.directindex.mapping.SolrRecordMapper;
import eu.dnetlib.app.directindex.service.DirectIndexService; import eu.dnetlib.app.directindex.service.DirectIndexService;
import jakarta.servlet.http.HttpServletRequest;
@RestController("/api") @RestController("/api")
@ConditionalOnProperty(value = "dnet.directindex.legacy.enabled", havingValue = "true", matchIfMissing = false) @ConditionalOnProperty(value = "dnet.directindex.legacy.enabled", havingValue = "true", matchIfMissing = false)
@ -34,18 +35,21 @@ public class LegacyApiController {
private SolrRecordMapper solrRecordMapper; private SolrRecordMapper solrRecordMapper;
@PostMapping("/results/feedObject") @PostMapping("/results/feedObject")
public String feedResult(@RequestBody final ResultEntry pub, @RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit) public String feedResult(@RequestBody final ResultEntry pub,
@RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit,
final HttpServletRequest req)
throws DirectIndexApiException { throws DirectIndexApiException {
return service.prepareMetadataInsertion(pub); return service.prepareMetadataInsertion(pub, req.getRemoteAddr());
} }
@DeleteMapping("/result/{openaireId}") @DeleteMapping("/result/{openaireId}")
public boolean deleteResultWithOpenaireId(@PathVariable(value = "openaireId") final String openaireId, public boolean deleteResultWithOpenaireId(@PathVariable(value = "openaireId") final String openaireId,
@RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit) throws DirectIndexApiException { @RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit,
final HttpServletRequest req) throws DirectIndexApiException {
service.prepareMetadataDeletion(openaireId); service.prepareMetadataDeletion(openaireId, req.getRemoteAddr());
return true; return true;
} }
@ -54,11 +58,12 @@ public class LegacyApiController {
public boolean deleteResultWithOriginalId( public boolean deleteResultWithOriginalId(
@RequestParam(value = "originalId", required = true) final String originalId, @RequestParam(value = "originalId", required = true) final String originalId,
@RequestParam(value = "collectedFromId", required = true) final String collectedFromId, @RequestParam(value = "collectedFromId", required = true) final String collectedFromId,
@RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit) throws DirectIndexApiException { @RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit,
final HttpServletRequest req) throws DirectIndexApiException {
final String openaireId = solrRecordMapper.calculateOpenaireId(originalId, collectedFromId); final String openaireId = solrRecordMapper.calculateOpenaireId(originalId, collectedFromId);
service.prepareMetadataDeletion(openaireId); service.prepareMetadataDeletion(openaireId, req.getRemoteAddr());
return true; return true;
} }

View File

@ -2,6 +2,7 @@ package eu.dnetlib.app.directindex.controllers;
import java.io.IOException; import java.io.IOException;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
@ -9,6 +10,7 @@ import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus; import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType; import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.security.core.Authentication;
import org.springframework.web.bind.annotation.DeleteMapping; import org.springframework.web.bind.annotation.DeleteMapping;
import org.springframework.web.bind.annotation.ExceptionHandler; import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.GetMapping;
@ -30,7 +32,6 @@ import eu.dnetlib.app.directindex.errors.SwordException;
import eu.dnetlib.app.directindex.input.ResultEntry; import eu.dnetlib.app.directindex.input.ResultEntry;
import eu.dnetlib.app.directindex.mapping.SolrRecordMapper; import eu.dnetlib.app.directindex.mapping.SolrRecordMapper;
import eu.dnetlib.app.directindex.service.DirectIndexService; import eu.dnetlib.app.directindex.service.DirectIndexService;
import eu.dnetlib.app.directindex.solr.SolrIndexClient;
import eu.dnetlib.app.directindex.solr.SolrIndexClientFactory; import eu.dnetlib.app.directindex.solr.SolrIndexClientFactory;
import eu.dnetlib.app.directindex.sword.model.SwordServiceDocument; import eu.dnetlib.app.directindex.sword.model.SwordServiceDocument;
import eu.dnetlib.app.directindex.sword.model.SwordStatusDocument; import eu.dnetlib.app.directindex.sword.model.SwordStatusDocument;
@ -42,6 +43,8 @@ import jakarta.servlet.http.HttpServletResponse;
@ConditionalOnProperty(value = "dnet.directindex.sword.enabled", havingValue = "true", matchIfMissing = false) @ConditionalOnProperty(value = "dnet.directindex.sword.enabled", havingValue = "true", matchIfMissing = false)
public class SwordApiController { public class SwordApiController {
private static final String UNKNOWN_USER = "UNKNOWN";
@Autowired @Autowired
private DirectIndexService service; private DirectIndexService service;
@ -75,7 +78,8 @@ public class SwordApiController {
@RequestHeader(value = "Metadata-Format", defaultValue = "http://purl.org/net/sword/3.0/types/Metadata") final String mdFormat, @RequestHeader(value = "Metadata-Format", defaultValue = "http://purl.org/net/sword/3.0/types/Metadata") final String mdFormat,
@RequestHeader(value = "Packaging", defaultValue = "http://purl.org/net/sword/3.0/package/Binary") final String packaging, @RequestHeader(value = "Packaging", defaultValue = "http://purl.org/net/sword/3.0/package/Binary") final String packaging,
@RequestHeader("Slug") final String slug, @RequestHeader("Slug") final String slug,
@RequestBody final String json) throws SwordException { @RequestBody final String json,
final Authentication authentication) throws SwordException {
// TODO: DIGEST Evaluate if the digest (md5) of the json string // TODO: DIGEST Evaluate if the digest (md5) of the json string
@ -89,7 +93,7 @@ public class SwordApiController {
responseHeaders.setLocation(null); responseHeaders.setLocation(null);
try { try {
final String openaireId = service.prepareMetadataInsertion(parseMetadata(json)); final String openaireId = service.prepareMetadataInsertion(parseMetadata(json), StringUtils.firstNonBlank(authentication.getName(), UNKNOWN_USER));
final SwordStatusDocument status = SwordStatusDocument.newInstance(baseUrl + "/api/sword/3.0", openaireId); final SwordStatusDocument status = SwordStatusDocument.newInstance(baseUrl + "/api/sword/3.0", openaireId);
@ -101,15 +105,17 @@ public class SwordApiController {
} }
@GetMapping("/objects/{id}/metadata") @GetMapping("/objects/{id}/metadata")
public ResponseEntity<ResultEntry> getMetadata(@PathVariable final String id) throws SwordException { public ResponseEntity<ResultEntry> getMetadata(@PathVariable final String id, final Authentication authentication) throws SwordException {
final SolrIndexClient solr = solrIndexClientFactory.getClient(); try {
final SolrRecord metadata = solrIndexClientFactory.getClient().findRecord(id);
final SolrRecord metadata = solr.findRecord(id);
if (metadata == null) { throw new SwordException(SwordErrorType.NotFound); } if (metadata == null) { throw new SwordException(SwordErrorType.NotFound); }
return new ResponseEntity<>(solRecordMapper.toResultEntry(metadata), HttpStatus.OK); return new ResponseEntity<>(solRecordMapper.toResultEntry(metadata), HttpStatus.OK);
} catch (final DirectIndexApiException e) {
throw new SwordException(SwordErrorType.ContentMalformed);
}
} }
@PutMapping("/objects/{id}/metadata") @PutMapping("/objects/{id}/metadata")
@ -120,21 +126,24 @@ public class SwordApiController {
@RequestHeader("Digest") final String digest, @RequestHeader("Digest") final String digest,
@RequestHeader(value = "Metadata-Format", defaultValue = "http://purl.org/net/sword/3.0/types/Metadata") final String mdFormat, @RequestHeader(value = "Metadata-Format", defaultValue = "http://purl.org/net/sword/3.0/types/Metadata") final String mdFormat,
@PathVariable final String id, @PathVariable final String id,
@RequestBody final ResultEntry result) throws SwordException { @RequestBody final ResultEntry result,
final Authentication authentication) throws SwordException {
final SolrIndexClient solr = solrIndexClientFactory.getClient(); try {
if (!solrIndexClientFactory.getClient().existsRecord(id)) { throw new SwordException(SwordErrorType.NotFound); }
if (!solr.existsRecord(id)) { throw new SwordException(SwordErrorType.NotFound); } service.prepareMetadataReplacement(id, result, StringUtils.firstNonBlank(authentication.getName(), UNKNOWN_USER));
service.prepareMetadataReplacement(id, result);
return new ResponseEntity<>(HttpStatus.ACCEPTED); return new ResponseEntity<>(HttpStatus.ACCEPTED);
} catch (final DirectIndexApiException e) {
throw new SwordException(SwordErrorType.ContentMalformed);
}
} }
@DeleteMapping("/objects/{id}") @DeleteMapping("/objects/{id}")
public ResponseEntity<Void> deleteObject(@PathVariable final String id) { public ResponseEntity<Void> deleteObject(@PathVariable final String id, final Authentication authentication) {
service.prepareMetadataDeletion(id); service.prepareMetadataDeletion(id, StringUtils.firstNonBlank(authentication.getName(), UNKNOWN_USER));
return new ResponseEntity<>(HttpStatus.ACCEPTED); return new ResponseEntity<>(HttpStatus.ACCEPTED);

View File

@ -26,24 +26,24 @@ public class DirectIndexService {
@Autowired @Autowired
private DatasourceManagerClient dsmClient; private DatasourceManagerClient dsmClient;
public void prepareMetadataDeletion(final String openaireId) { public void prepareMetadataDeletion(final String openaireId, final String createdBy) {
final PendingAction action = new PendingAction(); final PendingAction action = new PendingAction();
action.setId(openaireId); action.setId(openaireId);
action.setOperation("DELETE"); action.setOperation("DELETE");
action.setCreatedBy("TODO"); // TODO action.setCreatedBy(createdBy);
action.setCreationDate(OffsetDateTime.now()); action.setCreationDate(OffsetDateTime.now());
action.setExecutionDate(null); action.setExecutionDate(null);
pendingActionRepository.save(action); pendingActionRepository.save(action);
} }
public void prepareMetadataReplacement(final String id, final ResultEntry document) { public void prepareMetadataReplacement(final String id, final ResultEntry document, final String createdBy) {
// TODO Auto-generated method stub // TODO Auto-generated method stub
} }
public String prepareMetadataInsertion(final ResultEntry r) throws DirectIndexApiException { public String prepareMetadataInsertion(final ResultEntry r, final String createdBy) throws DirectIndexApiException {
final PendingAction info = new PendingAction(); final PendingAction info = new PendingAction();
if (StringUtils.isNotBlank(r.getOpenaireId())) { if (StringUtils.isNotBlank(r.getOpenaireId())) {
@ -71,7 +71,7 @@ public class DirectIndexService {
info.setId(r.getOpenaireId()); info.setId(r.getOpenaireId());
info.setBody(r.toJson()); info.setBody(r.toJson());
info.setType(r.getType()); info.setType(r.getType());
info.setCreatedBy("TODO"); // TODO info.setCreatedBy(createdBy);
info.setCreationDate(OffsetDateTime.now()); info.setCreationDate(OffsetDateTime.now());
info.setExecutionDate(null); info.setExecutionDate(null);

View File

@ -2,14 +2,23 @@ package eu.dnetlib.app.directindex.solr;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.app.directindex.errors.DirectIndexApiException; import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
import eu.dnetlib.dhp.schema.solr.SolrRecord; import eu.dnetlib.dhp.schema.solr.SolrRecord;
@ -58,19 +67,48 @@ public class SolrIndexClient {
} }
protected SolrInputDocument prepareSolrDocument(final SolrRecord record) { protected SolrInputDocument prepareSolrDocument(final SolrRecord record) {
// TODO (usare classe ccondivisa preparata da Claudio) // TODO (usare classe condivisa preparata da Claudio)
return null; return null;
} }
public SolrRecord findRecord(final String id) { public SolrRecord findRecord(final String id) throws DirectIndexApiException {
// TODO Auto-generated method stub try {
return null; final QueryResponse response = cloudSolrClient.query(queryParamsForId(id));
final SolrDocumentList documents = response.getResults();
if (documents.isEmpty()) { return null; }
final String json = (String) documents.get(0).getFirstValue("__json");
return new ObjectMapper().readValue(json, SolrRecord.class);
} catch (SolrServerException | IOException e) {
throw new DirectIndexApiException("Error executing solr query", e);
}
} }
public boolean existsRecord(final String id) { public boolean existsRecord(final String id) throws DirectIndexApiException {
// TODO Auto-generated method stub try {
return false; final QueryResponse response = cloudSolrClient.query(queryParamsForId(id));
final SolrDocumentList documents = response.getResults();
return !documents.isEmpty();
} catch (SolrServerException | IOException e) {
throw new DirectIndexApiException("Error executing solr query", e);
} }
} }
private SolrParams queryParamsForId(final String id) {
// TODO: verificare se esiste un campo __date
final Map<String, String> params = new HashMap<String, String>();
params.put("q", "objidentifier:" + ClientUtils.escapeQueryChars(id));
params.put("fl", "objidentifier,__date,__json");
params.put("sort", "__date desc");
return new MapSolrParams(params);
}
}

View File

@ -1,250 +0,0 @@
package eu.dnetlib.app.directindex.solr;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Namespace;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import org.apache.solr.common.SolrInputDocument;
/**
* Optimized version of the document parser, drop in replacement of InputDocumentFactory.
* <p>
* Faster because:
* <ul>
* <li>Doesn't create a DOM for the full document
* <li>Doesn't execute xpaths agains the DOM
* <li>Quickly serialize the 'result' element directly in a string.
* <li>Uses less memory: less pressure on GC and allows more threads to process this in parallel
* </ul>
* <p>
* This class is fully reentrant and can be invoked in parallel.
*
* @author claudio
*/
public class StreamingInputDocumentFactory {
private static final String INDEX_FIELD_PREFIX = "__";
private static final String DS_VERSION = INDEX_FIELD_PREFIX + "dsversion";
private static final String DS_ID = INDEX_FIELD_PREFIX + "dsid";
private static final String RESULT = "result";
private static final String INDEX_RESULT = INDEX_FIELD_PREFIX + RESULT;
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
private static final String DEFAULTDNETRESULT = "dnetResult";
private static final String TARGETFIELDS = "targetFields";
private static final String INDEX_RECORD_ID_ELEMENT = "indexRecordIdentifier";
private static final String ROOT_ELEMENT = "indexRecord";
private static final int MAX_FIELD_LENGTH = 25000;
private final ThreadLocal<XMLInputFactory> inputFactory = ThreadLocal
.withInitial(XMLInputFactory::newInstance);
private final ThreadLocal<XMLOutputFactory> outputFactory = ThreadLocal
.withInitial(XMLOutputFactory::newInstance);
private final ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal
.withInitial(XMLEventFactory::newInstance);
private final String version;
private String resultName = DEFAULTDNETRESULT;
public StreamingInputDocumentFactory(final String version) {
this(version, DEFAULTDNETRESULT);
}
public StreamingInputDocumentFactory(final String version, final String resultName) {
this.version = version;
this.resultName = resultName;
}
public SolrInputDocument parseDocument(final String inputDocument) {
final StringWriter results = new StringWriter();
final List<Namespace> nsList = new LinkedList<>();
try {
final XMLEventReader parser = inputFactory.get().createXMLEventReader(new StringReader(inputDocument));
final SolrInputDocument indexDocument = new SolrInputDocument(new HashMap<>());
while (parser.hasNext()) {
final XMLEvent event = parser.nextEvent();
if (event != null && event.isStartElement()) {
final String localName = event.asStartElement().getName().getLocalPart();
if (ROOT_ELEMENT.equals(localName)) {
nsList.addAll(getNamespaces(event));
} else if (INDEX_RECORD_ID_ELEMENT.equals(localName)) {
final XMLEvent text = parser.nextEvent();
final String recordId = getText(text);
indexDocument.addField(INDEX_RECORD_ID, recordId);
} else if (TARGETFIELDS.equals(localName)) {
parseTargetFields(indexDocument, parser);
} else if (resultName.equals(localName)) {
copyResult(indexDocument, results, parser, nsList, resultName);
}
}
}
if (version != null) {
indexDocument.addField(DS_VERSION, version);
}
if (!indexDocument.containsKey(INDEX_RECORD_ID)) { throw new IllegalStateException("cannot extract record ID from: " + inputDocument); }
return indexDocument;
} catch (final XMLStreamException e) {
throw new IllegalStateException(e);
}
}
private List<Namespace> getNamespaces(final XMLEvent event) {
final List<Namespace> res = new LinkedList<>();
final Iterator<Namespace> nsIter = event.asStartElement().getNamespaces();
while (nsIter.hasNext()) {
final Namespace ns = nsIter.next();
res.add(ns);
}
return res;
}
/**
* Parse the targetFields block and add fields to the solr document.
*
* @param indexDocument
* @param parser
* @throws XMLStreamException
*/
protected void parseTargetFields(
final SolrInputDocument indexDocument,
final XMLEventReader parser)
throws XMLStreamException {
boolean hasFields = false;
while (parser.hasNext()) {
final XMLEvent targetEvent = parser.nextEvent();
if (targetEvent.isEndElement()
&& TARGETFIELDS.equals(targetEvent.asEndElement().getName().getLocalPart())) {
break;
}
if (targetEvent.isStartElement()) {
final String fieldName = targetEvent.asStartElement().getName().getLocalPart();
final XMLEvent text = parser.nextEvent();
final String data = getText(text);
addField(indexDocument, fieldName, data);
hasFields = true;
}
}
if (!hasFields) {
indexDocument.clear();
}
}
/**
* Copy the /indexRecord/result element and children, preserving namespace declarations etc.
*
* @param indexDocument
* @param results
* @param parser
* @param nsList
* @throws XMLStreamException
*/
protected void copyResult(
final SolrInputDocument indexDocument,
final StringWriter results,
final XMLEventReader parser,
final List<Namespace> nsList,
final String dnetResult)
throws XMLStreamException {
final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results);
for (final Namespace ns : nsList) {
eventFactory.get().createNamespace(ns.getPrefix(), ns.getNamespaceURI());
}
final StartElement newRecord = eventFactory.get().createStartElement("", null, RESULT, null, nsList.iterator());
// new root record
writer.add(newRecord);
// copy the rest as it is
while (parser.hasNext()) {
final XMLEvent resultEvent = parser.nextEvent();
// TODO: replace with depth tracking instead of close tag tracking.
if (resultEvent.isEndElement()
&& resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) {
writer.add(eventFactory.get().createEndElement("", null, RESULT));
break;
}
writer.add(resultEvent);
}
writer.close();
indexDocument.addField(INDEX_RESULT, results.toString());
}
/**
* Helper used to add a field to a solr doc. It avoids to add empy fields
*
* @param indexDocument
* @param field
* @param value
*/
private final void addField(
final SolrInputDocument indexDocument,
final String field,
final String value) {
final String cleaned = value.trim();
if (!cleaned.isEmpty()) {
// log.info("\n\n adding field " + field.toLowerCase() + " value: " + cleaned + "\n");
indexDocument.addField(field.toLowerCase(), cleaned);
}
}
/**
* Helper used to get the string from a text element.
*
* @param text
* @return the
*/
protected final String getText(final XMLEvent text) {
if (text.isEndElement()) {
// text.asEndElement().getName().getLocalPart());
return "";
}
final String data = text.asCharacters().getData();
if (data != null && data.length() > MAX_FIELD_LENGTH) { return data.substring(0, MAX_FIELD_LENGTH); }
return data;
}
}