partial implementation
This commit is contained in:
parent
99b8fbfdfb
commit
740a4f63a2
|
@ -20,6 +20,7 @@ import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
|
||||||
import eu.dnetlib.app.directindex.input.ResultEntry;
|
import eu.dnetlib.app.directindex.input.ResultEntry;
|
||||||
import eu.dnetlib.app.directindex.mapping.SolrRecordMapper;
|
import eu.dnetlib.app.directindex.mapping.SolrRecordMapper;
|
||||||
import eu.dnetlib.app.directindex.service.DirectIndexService;
|
import eu.dnetlib.app.directindex.service.DirectIndexService;
|
||||||
|
import jakarta.servlet.http.HttpServletRequest;
|
||||||
|
|
||||||
@RestController("/api")
|
@RestController("/api")
|
||||||
@ConditionalOnProperty(value = "dnet.directindex.legacy.enabled", havingValue = "true", matchIfMissing = false)
|
@ConditionalOnProperty(value = "dnet.directindex.legacy.enabled", havingValue = "true", matchIfMissing = false)
|
||||||
|
@ -34,18 +35,21 @@ public class LegacyApiController {
|
||||||
private SolrRecordMapper solrRecordMapper;
|
private SolrRecordMapper solrRecordMapper;
|
||||||
|
|
||||||
@PostMapping("/results/feedObject")
|
@PostMapping("/results/feedObject")
|
||||||
public String feedResult(@RequestBody final ResultEntry pub, @RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit)
|
public String feedResult(@RequestBody final ResultEntry pub,
|
||||||
|
@RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit,
|
||||||
|
final HttpServletRequest req)
|
||||||
throws DirectIndexApiException {
|
throws DirectIndexApiException {
|
||||||
|
|
||||||
return service.prepareMetadataInsertion(pub);
|
return service.prepareMetadataInsertion(pub, req.getRemoteAddr());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@DeleteMapping("/result/{openaireId}")
|
@DeleteMapping("/result/{openaireId}")
|
||||||
public boolean deleteResultWithOpenaireId(@PathVariable(value = "openaireId") final String openaireId,
|
public boolean deleteResultWithOpenaireId(@PathVariable(value = "openaireId") final String openaireId,
|
||||||
@RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit) throws DirectIndexApiException {
|
@RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit,
|
||||||
|
final HttpServletRequest req) throws DirectIndexApiException {
|
||||||
|
|
||||||
service.prepareMetadataDeletion(openaireId);
|
service.prepareMetadataDeletion(openaireId, req.getRemoteAddr());
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -54,11 +58,12 @@ public class LegacyApiController {
|
||||||
public boolean deleteResultWithOriginalId(
|
public boolean deleteResultWithOriginalId(
|
||||||
@RequestParam(value = "originalId", required = true) final String originalId,
|
@RequestParam(value = "originalId", required = true) final String originalId,
|
||||||
@RequestParam(value = "collectedFromId", required = true) final String collectedFromId,
|
@RequestParam(value = "collectedFromId", required = true) final String collectedFromId,
|
||||||
@RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit) throws DirectIndexApiException {
|
@RequestParam(value = "commit", required = false, defaultValue = "true") final boolean commit,
|
||||||
|
final HttpServletRequest req) throws DirectIndexApiException {
|
||||||
|
|
||||||
final String openaireId = solrRecordMapper.calculateOpenaireId(originalId, collectedFromId);
|
final String openaireId = solrRecordMapper.calculateOpenaireId(originalId, collectedFromId);
|
||||||
|
|
||||||
service.prepareMetadataDeletion(openaireId);
|
service.prepareMetadataDeletion(openaireId, req.getRemoteAddr());
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ package eu.dnetlib.app.directindex.controllers;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
|
@ -9,6 +10,7 @@ import org.springframework.http.HttpHeaders;
|
||||||
import org.springframework.http.HttpStatus;
|
import org.springframework.http.HttpStatus;
|
||||||
import org.springframework.http.MediaType;
|
import org.springframework.http.MediaType;
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.security.core.Authentication;
|
||||||
import org.springframework.web.bind.annotation.DeleteMapping;
|
import org.springframework.web.bind.annotation.DeleteMapping;
|
||||||
import org.springframework.web.bind.annotation.ExceptionHandler;
|
import org.springframework.web.bind.annotation.ExceptionHandler;
|
||||||
import org.springframework.web.bind.annotation.GetMapping;
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
@ -30,7 +32,6 @@ import eu.dnetlib.app.directindex.errors.SwordException;
|
||||||
import eu.dnetlib.app.directindex.input.ResultEntry;
|
import eu.dnetlib.app.directindex.input.ResultEntry;
|
||||||
import eu.dnetlib.app.directindex.mapping.SolrRecordMapper;
|
import eu.dnetlib.app.directindex.mapping.SolrRecordMapper;
|
||||||
import eu.dnetlib.app.directindex.service.DirectIndexService;
|
import eu.dnetlib.app.directindex.service.DirectIndexService;
|
||||||
import eu.dnetlib.app.directindex.solr.SolrIndexClient;
|
|
||||||
import eu.dnetlib.app.directindex.solr.SolrIndexClientFactory;
|
import eu.dnetlib.app.directindex.solr.SolrIndexClientFactory;
|
||||||
import eu.dnetlib.app.directindex.sword.model.SwordServiceDocument;
|
import eu.dnetlib.app.directindex.sword.model.SwordServiceDocument;
|
||||||
import eu.dnetlib.app.directindex.sword.model.SwordStatusDocument;
|
import eu.dnetlib.app.directindex.sword.model.SwordStatusDocument;
|
||||||
|
@ -42,6 +43,8 @@ import jakarta.servlet.http.HttpServletResponse;
|
||||||
@ConditionalOnProperty(value = "dnet.directindex.sword.enabled", havingValue = "true", matchIfMissing = false)
|
@ConditionalOnProperty(value = "dnet.directindex.sword.enabled", havingValue = "true", matchIfMissing = false)
|
||||||
public class SwordApiController {
|
public class SwordApiController {
|
||||||
|
|
||||||
|
private static final String UNKNOWN_USER = "UNKNOWN";
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private DirectIndexService service;
|
private DirectIndexService service;
|
||||||
|
|
||||||
|
@ -75,7 +78,8 @@ public class SwordApiController {
|
||||||
@RequestHeader(value = "Metadata-Format", defaultValue = "http://purl.org/net/sword/3.0/types/Metadata") final String mdFormat,
|
@RequestHeader(value = "Metadata-Format", defaultValue = "http://purl.org/net/sword/3.0/types/Metadata") final String mdFormat,
|
||||||
@RequestHeader(value = "Packaging", defaultValue = "http://purl.org/net/sword/3.0/package/Binary") final String packaging,
|
@RequestHeader(value = "Packaging", defaultValue = "http://purl.org/net/sword/3.0/package/Binary") final String packaging,
|
||||||
@RequestHeader("Slug") final String slug,
|
@RequestHeader("Slug") final String slug,
|
||||||
@RequestBody final String json) throws SwordException {
|
@RequestBody final String json,
|
||||||
|
final Authentication authentication) throws SwordException {
|
||||||
|
|
||||||
// TODO: DIGEST Evaluate if the digest (md5) of the json string
|
// TODO: DIGEST Evaluate if the digest (md5) of the json string
|
||||||
|
|
||||||
|
@ -89,7 +93,7 @@ public class SwordApiController {
|
||||||
responseHeaders.setLocation(null);
|
responseHeaders.setLocation(null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
final String openaireId = service.prepareMetadataInsertion(parseMetadata(json));
|
final String openaireId = service.prepareMetadataInsertion(parseMetadata(json), StringUtils.firstNonBlank(authentication.getName(), UNKNOWN_USER));
|
||||||
|
|
||||||
final SwordStatusDocument status = SwordStatusDocument.newInstance(baseUrl + "/api/sword/3.0", openaireId);
|
final SwordStatusDocument status = SwordStatusDocument.newInstance(baseUrl + "/api/sword/3.0", openaireId);
|
||||||
|
|
||||||
|
@ -101,15 +105,17 @@ public class SwordApiController {
|
||||||
}
|
}
|
||||||
|
|
||||||
@GetMapping("/objects/{id}/metadata")
|
@GetMapping("/objects/{id}/metadata")
|
||||||
public ResponseEntity<ResultEntry> getMetadata(@PathVariable final String id) throws SwordException {
|
public ResponseEntity<ResultEntry> getMetadata(@PathVariable final String id, final Authentication authentication) throws SwordException {
|
||||||
|
|
||||||
final SolrIndexClient solr = solrIndexClientFactory.getClient();
|
try {
|
||||||
|
final SolrRecord metadata = solrIndexClientFactory.getClient().findRecord(id);
|
||||||
|
|
||||||
final SolrRecord metadata = solr.findRecord(id);
|
if (metadata == null) { throw new SwordException(SwordErrorType.NotFound); }
|
||||||
|
|
||||||
if (metadata == null) { throw new SwordException(SwordErrorType.NotFound); }
|
return new ResponseEntity<>(solRecordMapper.toResultEntry(metadata), HttpStatus.OK);
|
||||||
|
} catch (final DirectIndexApiException e) {
|
||||||
return new ResponseEntity<>(solRecordMapper.toResultEntry(metadata), HttpStatus.OK);
|
throw new SwordException(SwordErrorType.ContentMalformed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@PutMapping("/objects/{id}/metadata")
|
@PutMapping("/objects/{id}/metadata")
|
||||||
|
@ -120,21 +126,24 @@ public class SwordApiController {
|
||||||
@RequestHeader("Digest") final String digest,
|
@RequestHeader("Digest") final String digest,
|
||||||
@RequestHeader(value = "Metadata-Format", defaultValue = "http://purl.org/net/sword/3.0/types/Metadata") final String mdFormat,
|
@RequestHeader(value = "Metadata-Format", defaultValue = "http://purl.org/net/sword/3.0/types/Metadata") final String mdFormat,
|
||||||
@PathVariable final String id,
|
@PathVariable final String id,
|
||||||
@RequestBody final ResultEntry result) throws SwordException {
|
@RequestBody final ResultEntry result,
|
||||||
|
final Authentication authentication) throws SwordException {
|
||||||
|
|
||||||
final SolrIndexClient solr = solrIndexClientFactory.getClient();
|
try {
|
||||||
|
if (!solrIndexClientFactory.getClient().existsRecord(id)) { throw new SwordException(SwordErrorType.NotFound); }
|
||||||
|
|
||||||
if (!solr.existsRecord(id)) { throw new SwordException(SwordErrorType.NotFound); }
|
service.prepareMetadataReplacement(id, result, StringUtils.firstNonBlank(authentication.getName(), UNKNOWN_USER));
|
||||||
|
|
||||||
service.prepareMetadataReplacement(id, result);
|
return new ResponseEntity<>(HttpStatus.ACCEPTED);
|
||||||
|
} catch (final DirectIndexApiException e) {
|
||||||
return new ResponseEntity<>(HttpStatus.ACCEPTED);
|
throw new SwordException(SwordErrorType.ContentMalformed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@DeleteMapping("/objects/{id}")
|
@DeleteMapping("/objects/{id}")
|
||||||
public ResponseEntity<Void> deleteObject(@PathVariable final String id) {
|
public ResponseEntity<Void> deleteObject(@PathVariable final String id, final Authentication authentication) {
|
||||||
|
|
||||||
service.prepareMetadataDeletion(id);
|
service.prepareMetadataDeletion(id, StringUtils.firstNonBlank(authentication.getName(), UNKNOWN_USER));
|
||||||
|
|
||||||
return new ResponseEntity<>(HttpStatus.ACCEPTED);
|
return new ResponseEntity<>(HttpStatus.ACCEPTED);
|
||||||
|
|
||||||
|
|
|
@ -26,24 +26,24 @@ public class DirectIndexService {
|
||||||
@Autowired
|
@Autowired
|
||||||
private DatasourceManagerClient dsmClient;
|
private DatasourceManagerClient dsmClient;
|
||||||
|
|
||||||
public void prepareMetadataDeletion(final String openaireId) {
|
public void prepareMetadataDeletion(final String openaireId, final String createdBy) {
|
||||||
final PendingAction action = new PendingAction();
|
final PendingAction action = new PendingAction();
|
||||||
|
|
||||||
action.setId(openaireId);
|
action.setId(openaireId);
|
||||||
action.setOperation("DELETE");
|
action.setOperation("DELETE");
|
||||||
action.setCreatedBy("TODO"); // TODO
|
action.setCreatedBy(createdBy);
|
||||||
action.setCreationDate(OffsetDateTime.now());
|
action.setCreationDate(OffsetDateTime.now());
|
||||||
action.setExecutionDate(null);
|
action.setExecutionDate(null);
|
||||||
|
|
||||||
pendingActionRepository.save(action);
|
pendingActionRepository.save(action);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void prepareMetadataReplacement(final String id, final ResultEntry document) {
|
public void prepareMetadataReplacement(final String id, final ResultEntry document, final String createdBy) {
|
||||||
// TODO Auto-generated method stub
|
// TODO Auto-generated method stub
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String prepareMetadataInsertion(final ResultEntry r) throws DirectIndexApiException {
|
public String prepareMetadataInsertion(final ResultEntry r, final String createdBy) throws DirectIndexApiException {
|
||||||
final PendingAction info = new PendingAction();
|
final PendingAction info = new PendingAction();
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(r.getOpenaireId())) {
|
if (StringUtils.isNotBlank(r.getOpenaireId())) {
|
||||||
|
@ -71,7 +71,7 @@ public class DirectIndexService {
|
||||||
info.setId(r.getOpenaireId());
|
info.setId(r.getOpenaireId());
|
||||||
info.setBody(r.toJson());
|
info.setBody(r.toJson());
|
||||||
info.setType(r.getType());
|
info.setType(r.getType());
|
||||||
info.setCreatedBy("TODO"); // TODO
|
info.setCreatedBy(createdBy);
|
||||||
info.setCreationDate(OffsetDateTime.now());
|
info.setCreationDate(OffsetDateTime.now());
|
||||||
info.setExecutionDate(null);
|
info.setExecutionDate(null);
|
||||||
|
|
||||||
|
|
|
@ -2,14 +2,23 @@ package eu.dnetlib.app.directindex.solr;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||||
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
|
import org.apache.solr.client.solrj.util.ClientUtils;
|
||||||
|
import org.apache.solr.common.SolrDocumentList;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.params.MapSolrParams;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
|
import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
|
||||||
import eu.dnetlib.dhp.schema.solr.SolrRecord;
|
import eu.dnetlib.dhp.schema.solr.SolrRecord;
|
||||||
|
@ -58,19 +67,48 @@ public class SolrIndexClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected SolrInputDocument prepareSolrDocument(final SolrRecord record) {
|
protected SolrInputDocument prepareSolrDocument(final SolrRecord record) {
|
||||||
// TODO (usare classe ccondivisa preparata da Claudio)
|
// TODO (usare classe condivisa preparata da Claudio)
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public SolrRecord findRecord(final String id) {
|
public SolrRecord findRecord(final String id) throws DirectIndexApiException {
|
||||||
// TODO Auto-generated method stub
|
try {
|
||||||
return null;
|
final QueryResponse response = cloudSolrClient.query(queryParamsForId(id));
|
||||||
|
final SolrDocumentList documents = response.getResults();
|
||||||
|
|
||||||
|
if (documents.isEmpty()) { return null; }
|
||||||
|
|
||||||
|
final String json = (String) documents.get(0).getFirstValue("__json");
|
||||||
|
|
||||||
|
return new ObjectMapper().readValue(json, SolrRecord.class);
|
||||||
|
} catch (SolrServerException | IOException e) {
|
||||||
|
throw new DirectIndexApiException("Error executing solr query", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean existsRecord(final String id) {
|
public boolean existsRecord(final String id) throws DirectIndexApiException {
|
||||||
// TODO Auto-generated method stub
|
try {
|
||||||
return false;
|
final QueryResponse response = cloudSolrClient.query(queryParamsForId(id));
|
||||||
|
final SolrDocumentList documents = response.getResults();
|
||||||
|
return !documents.isEmpty();
|
||||||
|
} catch (SolrServerException | IOException e) {
|
||||||
|
throw new DirectIndexApiException("Error executing solr query", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private SolrParams queryParamsForId(final String id) {
|
||||||
|
|
||||||
|
// TODO: verificare se esiste un campo __date
|
||||||
|
|
||||||
|
final Map<String, String> params = new HashMap<String, String>();
|
||||||
|
|
||||||
|
params.put("q", "objidentifier:" + ClientUtils.escapeQueryChars(id));
|
||||||
|
params.put("fl", "objidentifier,__date,__json");
|
||||||
|
params.put("sort", "__date desc");
|
||||||
|
|
||||||
|
return new MapSolrParams(params);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,250 +0,0 @@
|
||||||
package eu.dnetlib.app.directindex.solr;
|
|
||||||
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.io.StringWriter;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import javax.xml.stream.XMLEventFactory;
|
|
||||||
import javax.xml.stream.XMLEventReader;
|
|
||||||
import javax.xml.stream.XMLEventWriter;
|
|
||||||
import javax.xml.stream.XMLInputFactory;
|
|
||||||
import javax.xml.stream.XMLOutputFactory;
|
|
||||||
import javax.xml.stream.XMLStreamException;
|
|
||||||
import javax.xml.stream.events.Namespace;
|
|
||||||
import javax.xml.stream.events.StartElement;
|
|
||||||
import javax.xml.stream.events.XMLEvent;
|
|
||||||
|
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Optimized version of the document parser, drop in replacement of InputDocumentFactory.
|
|
||||||
* <p>
|
|
||||||
* Faster because:
|
|
||||||
* <ul>
|
|
||||||
* <li>Doesn't create a DOM for the full document
|
|
||||||
* <li>Doesn't execute xpaths agains the DOM
|
|
||||||
* <li>Quickly serialize the 'result' element directly in a string.
|
|
||||||
* <li>Uses less memory: less pressure on GC and allows more threads to process this in parallel
|
|
||||||
* </ul>
|
|
||||||
* <p>
|
|
||||||
* This class is fully reentrant and can be invoked in parallel.
|
|
||||||
*
|
|
||||||
* @author claudio
|
|
||||||
*/
|
|
||||||
public class StreamingInputDocumentFactory {
|
|
||||||
|
|
||||||
private static final String INDEX_FIELD_PREFIX = "__";
|
|
||||||
|
|
||||||
private static final String DS_VERSION = INDEX_FIELD_PREFIX + "dsversion";
|
|
||||||
|
|
||||||
private static final String DS_ID = INDEX_FIELD_PREFIX + "dsid";
|
|
||||||
|
|
||||||
private static final String RESULT = "result";
|
|
||||||
|
|
||||||
private static final String INDEX_RESULT = INDEX_FIELD_PREFIX + RESULT;
|
|
||||||
|
|
||||||
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
|
|
||||||
|
|
||||||
private static final String DEFAULTDNETRESULT = "dnetResult";
|
|
||||||
|
|
||||||
private static final String TARGETFIELDS = "targetFields";
|
|
||||||
|
|
||||||
private static final String INDEX_RECORD_ID_ELEMENT = "indexRecordIdentifier";
|
|
||||||
|
|
||||||
private static final String ROOT_ELEMENT = "indexRecord";
|
|
||||||
|
|
||||||
private static final int MAX_FIELD_LENGTH = 25000;
|
|
||||||
|
|
||||||
private final ThreadLocal<XMLInputFactory> inputFactory = ThreadLocal
|
|
||||||
.withInitial(XMLInputFactory::newInstance);
|
|
||||||
|
|
||||||
private final ThreadLocal<XMLOutputFactory> outputFactory = ThreadLocal
|
|
||||||
.withInitial(XMLOutputFactory::newInstance);
|
|
||||||
|
|
||||||
private final ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal
|
|
||||||
.withInitial(XMLEventFactory::newInstance);
|
|
||||||
|
|
||||||
private final String version;
|
|
||||||
|
|
||||||
private String resultName = DEFAULTDNETRESULT;
|
|
||||||
|
|
||||||
public StreamingInputDocumentFactory(final String version) {
|
|
||||||
this(version, DEFAULTDNETRESULT);
|
|
||||||
}
|
|
||||||
|
|
||||||
public StreamingInputDocumentFactory(final String version, final String resultName) {
|
|
||||||
this.version = version;
|
|
||||||
this.resultName = resultName;
|
|
||||||
}
|
|
||||||
|
|
||||||
public SolrInputDocument parseDocument(final String inputDocument) {
|
|
||||||
|
|
||||||
final StringWriter results = new StringWriter();
|
|
||||||
final List<Namespace> nsList = new LinkedList<>();
|
|
||||||
try {
|
|
||||||
|
|
||||||
final XMLEventReader parser = inputFactory.get().createXMLEventReader(new StringReader(inputDocument));
|
|
||||||
|
|
||||||
final SolrInputDocument indexDocument = new SolrInputDocument(new HashMap<>());
|
|
||||||
|
|
||||||
while (parser.hasNext()) {
|
|
||||||
final XMLEvent event = parser.nextEvent();
|
|
||||||
if (event != null && event.isStartElement()) {
|
|
||||||
final String localName = event.asStartElement().getName().getLocalPart();
|
|
||||||
|
|
||||||
if (ROOT_ELEMENT.equals(localName)) {
|
|
||||||
nsList.addAll(getNamespaces(event));
|
|
||||||
} else if (INDEX_RECORD_ID_ELEMENT.equals(localName)) {
|
|
||||||
final XMLEvent text = parser.nextEvent();
|
|
||||||
final String recordId = getText(text);
|
|
||||||
indexDocument.addField(INDEX_RECORD_ID, recordId);
|
|
||||||
} else if (TARGETFIELDS.equals(localName)) {
|
|
||||||
parseTargetFields(indexDocument, parser);
|
|
||||||
} else if (resultName.equals(localName)) {
|
|
||||||
copyResult(indexDocument, results, parser, nsList, resultName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (version != null) {
|
|
||||||
indexDocument.addField(DS_VERSION, version);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!indexDocument.containsKey(INDEX_RECORD_ID)) { throw new IllegalStateException("cannot extract record ID from: " + inputDocument); }
|
|
||||||
|
|
||||||
return indexDocument;
|
|
||||||
} catch (final XMLStreamException e) {
|
|
||||||
throw new IllegalStateException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<Namespace> getNamespaces(final XMLEvent event) {
|
|
||||||
final List<Namespace> res = new LinkedList<>();
|
|
||||||
final Iterator<Namespace> nsIter = event.asStartElement().getNamespaces();
|
|
||||||
while (nsIter.hasNext()) {
|
|
||||||
final Namespace ns = nsIter.next();
|
|
||||||
res.add(ns);
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parse the targetFields block and add fields to the solr document.
|
|
||||||
*
|
|
||||||
* @param indexDocument
|
|
||||||
* @param parser
|
|
||||||
* @throws XMLStreamException
|
|
||||||
*/
|
|
||||||
protected void parseTargetFields(
|
|
||||||
final SolrInputDocument indexDocument,
|
|
||||||
final XMLEventReader parser)
|
|
||||||
throws XMLStreamException {
|
|
||||||
|
|
||||||
boolean hasFields = false;
|
|
||||||
|
|
||||||
while (parser.hasNext()) {
|
|
||||||
final XMLEvent targetEvent = parser.nextEvent();
|
|
||||||
if (targetEvent.isEndElement()
|
|
||||||
&& TARGETFIELDS.equals(targetEvent.asEndElement().getName().getLocalPart())) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (targetEvent.isStartElement()) {
|
|
||||||
final String fieldName = targetEvent.asStartElement().getName().getLocalPart();
|
|
||||||
final XMLEvent text = parser.nextEvent();
|
|
||||||
|
|
||||||
final String data = getText(text);
|
|
||||||
|
|
||||||
addField(indexDocument, fieldName, data);
|
|
||||||
hasFields = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!hasFields) {
|
|
||||||
indexDocument.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copy the /indexRecord/result element and children, preserving namespace declarations etc.
|
|
||||||
*
|
|
||||||
* @param indexDocument
|
|
||||||
* @param results
|
|
||||||
* @param parser
|
|
||||||
* @param nsList
|
|
||||||
* @throws XMLStreamException
|
|
||||||
*/
|
|
||||||
protected void copyResult(
|
|
||||||
final SolrInputDocument indexDocument,
|
|
||||||
final StringWriter results,
|
|
||||||
final XMLEventReader parser,
|
|
||||||
final List<Namespace> nsList,
|
|
||||||
final String dnetResult)
|
|
||||||
throws XMLStreamException {
|
|
||||||
final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results);
|
|
||||||
|
|
||||||
for (final Namespace ns : nsList) {
|
|
||||||
eventFactory.get().createNamespace(ns.getPrefix(), ns.getNamespaceURI());
|
|
||||||
}
|
|
||||||
|
|
||||||
final StartElement newRecord = eventFactory.get().createStartElement("", null, RESULT, null, nsList.iterator());
|
|
||||||
|
|
||||||
// new root record
|
|
||||||
writer.add(newRecord);
|
|
||||||
|
|
||||||
// copy the rest as it is
|
|
||||||
while (parser.hasNext()) {
|
|
||||||
final XMLEvent resultEvent = parser.nextEvent();
|
|
||||||
|
|
||||||
// TODO: replace with depth tracking instead of close tag tracking.
|
|
||||||
if (resultEvent.isEndElement()
|
|
||||||
&& resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) {
|
|
||||||
writer.add(eventFactory.get().createEndElement("", null, RESULT));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
writer.add(resultEvent);
|
|
||||||
}
|
|
||||||
writer.close();
|
|
||||||
indexDocument.addField(INDEX_RESULT, results.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper used to add a field to a solr doc. It avoids to add empy fields
|
|
||||||
*
|
|
||||||
* @param indexDocument
|
|
||||||
* @param field
|
|
||||||
* @param value
|
|
||||||
*/
|
|
||||||
private final void addField(
|
|
||||||
final SolrInputDocument indexDocument,
|
|
||||||
final String field,
|
|
||||||
final String value) {
|
|
||||||
final String cleaned = value.trim();
|
|
||||||
if (!cleaned.isEmpty()) {
|
|
||||||
// log.info("\n\n adding field " + field.toLowerCase() + " value: " + cleaned + "\n");
|
|
||||||
indexDocument.addField(field.toLowerCase(), cleaned);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper used to get the string from a text element.
|
|
||||||
*
|
|
||||||
* @param text
|
|
||||||
* @return the
|
|
||||||
*/
|
|
||||||
protected final String getText(final XMLEvent text) {
|
|
||||||
if (text.isEndElement()) {
|
|
||||||
// text.asEndElement().getName().getLocalPart());
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
final String data = text.asCharacters().getData();
|
|
||||||
if (data != null && data.length() > MAX_FIELD_LENGTH) { return data.substring(0, MAX_FIELD_LENGTH); }
|
|
||||||
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue