diff --git a/pom.xml b/pom.xml index 11a40fc..08f2a9e 100644 --- a/pom.xml +++ b/pom.xml @@ -143,6 +143,31 @@ org.apache.commons commons-pool2 + + + + eu.dnetlib + cnr-rmi-api + ${cnr-rmi-api.version} + + + org.apache.cxf + cxf-rt-transports-http + 3.1.5 + + + + + org.apache.solr + solr-solrj + ${apache.solr.version} + + + + + org.springdoc + springdoc-openapi-ui + org.junit.jupiter @@ -150,6 +175,22 @@ test + + + org.springframework.boot + spring-boot-starter-actuator + + + io.micrometer + micrometer-registry-prometheus + + + + org.apache.maven + maven-model + + + org.mockito mockito-core @@ -446,8 +487,9 @@ UTF-8 3.6.0 17 + 2.6.1 2.14.0 - 7.1.0 + 9.7.0 3.4.2 0.10.0 1.71.0 diff --git a/src/main/java/eu/dnetlib/app/directindex/DirectIndexApplication.java b/src/main/java/eu/dnetlib/app/directindex/DirectIndexApplication.java index 120bb96..15142f6 100644 --- a/src/main/java/eu/dnetlib/app/directindex/DirectIndexApplication.java +++ b/src/main/java/eu/dnetlib/app/directindex/DirectIndexApplication.java @@ -1,13 +1,125 @@ package eu.dnetlib.app.directindex; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; +import org.apache.maven.model.Model; +import org.apache.maven.model.io.xpp3.MavenXpp3Reader; +import org.codehaus.plexus.util.xml.pull.XmlPullParserException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springdoc.core.GroupedOpenApi; +import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.cache.annotation.EnableCaching; +import org.springframework.context.annotation.Bean; +import org.springframework.core.io.ClassPathResource; +import org.springframework.scheduling.annotation.EnableScheduling; + +import eu.dnetlib.app.directindex.is.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import io.micrometer.core.instrument.ImmutableTag; +import io.micrometer.core.instrument.Metrics; +import io.swagger.v3.oas.models.OpenAPI; +import io.swagger.v3.oas.models.info.Info; +import io.swagger.v3.oas.models.info.License; +import io.swagger.v3.oas.models.servers.Server; +import jakarta.annotation.PostConstruct; @SpringBootApplication +@EnableCaching +@EnableScheduling public class DirectIndexApplication { + private static final Logger log = LoggerFactory.getLogger(DirectIndexApplication.class); + + private static final String DIRECT_INDEX_DESC = "APIs documentation"; + + private static final License AGPL_3_LICENSE = + new License().name("GNU Affero General Public License v3.0 or later").url("https://www.gnu.org/licenses/agpl-3.0.txt"); + + @Value("${maven.pom.path}") + private ClassPathResource pom; + + @Value("${server.public_url}") + private String serverPublicUrl; + + @Value("${server.public_desc}") + private String serverPublicDesc; + + @Value("${openaire.service.islookup.wsdl}") + private String isLookupUrl; + public static void main(final String[] args) { SpringApplication.run(DirectIndexApplication.class, args); } + @PostConstruct + public void init() { + final MavenXpp3Reader reader = new MavenXpp3Reader(); + try { + final Model model = reader.read(new InputStreamReader(pom.getInputStream())); + + log.info(String.format("registering metric for %s", model.getArtifactId())); + + final ImmutableTag tag1 = new ImmutableTag("component", model.getGroupId() + ":" + model.getArtifactId()); + final ImmutableTag tag2 = new ImmutableTag("version", model.getVersion()); + final ImmutableTag tag3 = new ImmutableTag("scmtag", model.getScm().getTag()); + + Metrics.gauge("micrometer_info", Arrays.asList(tag1, tag2, tag3), 1); + } catch (IOException | XmlPullParserException e) { + log.error("Error registering metric", e); + } + } + + @Bean + public OpenAPI newSwaggerDocket() { + final List servers = new ArrayList<>(); + if (StringUtils.isNotBlank(serverPublicUrl)) { + final Server server = new Server(); + server.setUrl(serverPublicUrl); + server.setDescription(serverPublicDesc); + servers.add(server); + } + + return new OpenAPI() + .servers(servers) + .info(new Info() + .title(swaggerTitle()) + .description(DIRECT_INDEX_DESC) + .version(swaggerVersion()) + .license(AGPL_3_LICENSE)) + .tags(new ArrayList<>()); + } + + private String swaggerVersion() { + try { + return new MavenXpp3Reader().read(new InputStreamReader(pom.getInputStream())).getVersion(); + } catch (IOException | XmlPullParserException e) { + return "UNKNOWN"; + } + } + + @Bean + public GroupedOpenApi publicApi() { + return GroupedOpenApi.builder() + .group("D-Net DirectIndex API") + .pathsToMatch("/api/**") + .build(); + } + + @Bean + public ISLookUpService lookupServiceStub() { + return ISLookupClientFactory.getLookUpService(isLookupUrl); + } + + protected String swaggerTitle() { + return "OpenAIRE DirectIndex API"; + } + } diff --git a/src/main/java/eu/dnetlib/app/directindex/controllers/AdminController.java b/src/main/java/eu/dnetlib/app/directindex/controllers/AdminController.java new file mode 100644 index 0000000..337b8be --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/controllers/AdminController.java @@ -0,0 +1,24 @@ +package eu.dnetlib.app.directindex.controllers; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.HttpStatus; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.ResponseStatus; +import org.springframework.web.bind.annotation.RestController; + +import eu.dnetlib.app.directindex.is.ISLookupClient; + +@RestController +@RequestMapping("/api/admin") +public class AdminController { + + @Autowired + private ISLookupClient isLookupClient; + + @GetMapping("/evictCache") + @ResponseStatus(HttpStatus.OK) + public void evictCache() { + isLookupClient.evictCache(); + } +} diff --git a/src/main/java/eu/dnetlib/app/directindex/sword/SwordServiceUrlController.java b/src/main/java/eu/dnetlib/app/directindex/controllers/SwordApiController.java similarity index 69% rename from src/main/java/eu/dnetlib/app/directindex/sword/SwordServiceUrlController.java rename to src/main/java/eu/dnetlib/app/directindex/controllers/SwordApiController.java index 6d2e4f0..5703e07 100644 --- a/src/main/java/eu/dnetlib/app/directindex/sword/SwordServiceUrlController.java +++ b/src/main/java/eu/dnetlib/app/directindex/controllers/SwordApiController.java @@ -1,7 +1,8 @@ -package eu.dnetlib.app.directindex.sword; +package eu.dnetlib.app.directindex.controllers; import java.io.IOException; +import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.HttpHeaders; import org.springframework.http.HttpStatus; @@ -21,24 +22,33 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.app.directindex.errors.DirectIndexApiException; +import eu.dnetlib.app.directindex.errors.SwordError; +import eu.dnetlib.app.directindex.errors.SwordException; +import eu.dnetlib.app.directindex.input.ResultEntry; +import eu.dnetlib.app.directindex.is.ISLookupClient; +import eu.dnetlib.app.directindex.is.IndexDsInfo; import eu.dnetlib.app.directindex.service.DirectIndexService; -import eu.dnetlib.app.directindex.service.DnetSolrClient; -import eu.dnetlib.app.directindex.sword.model.SwordError; +import eu.dnetlib.app.directindex.solr.SolrIndexClient; +import eu.dnetlib.app.directindex.solr.SolrIndexClientFactory; import eu.dnetlib.app.directindex.sword.model.SwordErrorType; -import eu.dnetlib.app.directindex.sword.model.SwordMetadataDocument; import eu.dnetlib.app.directindex.sword.model.SwordService; import eu.dnetlib.app.directindex.sword.model.SwordStatusDocument; +import eu.dnetlib.app.directindex.tasks.OafMapper; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; -@RestController("/api/directindex/sword/3.0") -public class SwordServiceUrlController { +@RestController("/api/sword/3.0") +public class SwordApiController { @Autowired private DirectIndexService service; @Autowired - private DnetSolrClient dnetSolrClient; + ISLookupClient isLookupClient; + + @Autowired + private SolrIndexClientFactory solrIndexClientFactory; @GetMapping("/") public SwordService getServiceDocument() { @@ -73,7 +83,7 @@ public class SwordServiceUrlController { try { service.prepareMetadataInsertion(parseMetadata(json)); - } catch (final JsonProcessingException e) { + } catch (final JsonProcessingException | DirectIndexApiException e) { throw new SwordException(SwordErrorType.ContentMalformed); } @@ -82,12 +92,16 @@ public class SwordServiceUrlController { } @GetMapping("/{id}/metadata") - public ResponseEntity getMetadata(@PathVariable final String id) throws SwordException { - final SwordMetadataDocument metadata = dnetSolrClient.findDocument(id); + public ResponseEntity getMetadata(@PathVariable final String id) throws SwordException { - if (metadata == null) { throw new SwordException(SwordErrorType.NotFound); } + final IndexDsInfo info = isLookupClient.currentIndexDsInfo(); + final SolrIndexClient solr = solrIndexClientFactory.getClient(info); - return new ResponseEntity<>(metadata, HttpStatus.OK); + final String metadata = solr.findRecord(id); + + if (StringUtils.isBlank(metadata)) { throw new SwordException(SwordErrorType.NotFound); } + + return new ResponseEntity<>(OafMapper.toResultEntry(metadata), HttpStatus.OK); } @PutMapping("/{id}/metadata") @@ -98,13 +112,14 @@ public class SwordServiceUrlController { @RequestHeader("Digest") final String digest, @RequestHeader(value = "Metadata-Format", defaultValue = "http://purl.org/net/sword/3.0/types/Metadata") final String mdFormat, @PathVariable final String id, - @RequestBody final SwordMetadataDocument document) throws SwordException { + @RequestBody final ResultEntry result) throws SwordException { - final SwordMetadataDocument metadata = dnetSolrClient.findDocument(id); + final IndexDsInfo info = isLookupClient.currentIndexDsInfo(); + final SolrIndexClient solr = solrIndexClientFactory.getClient(info); - if (metadata == null) { throw new SwordException(SwordErrorType.NotFound); } + if (!solr.existsRecord(id)) { throw new SwordException(SwordErrorType.NotFound); } - service.prepareMetadataReplacement(id, document); + service.prepareMetadataReplacement(id, result); return new ResponseEntity<>(HttpStatus.ACCEPTED); } @@ -118,8 +133,8 @@ public class SwordServiceUrlController { } - private SwordMetadataDocument parseMetadata(final String json) throws JsonProcessingException, JsonMappingException { - return new ObjectMapper().readValue(json, SwordMetadataDocument.class); + private ResultEntry parseMetadata(final String json) throws JsonProcessingException, JsonMappingException { + return new ObjectMapper().readValue(json, ResultEntry.class); } @ExceptionHandler(Throwable.class) diff --git a/src/main/java/eu/dnetlib/app/directindex/errors/DirectIndexApiException.java b/src/main/java/eu/dnetlib/app/directindex/errors/DirectIndexApiException.java new file mode 100644 index 0000000..9914459 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/errors/DirectIndexApiException.java @@ -0,0 +1,23 @@ +package eu.dnetlib.app.directindex.errors; + +public class DirectIndexApiException extends Exception { + + private static final long serialVersionUID = -117781753592959439L; + + public DirectIndexApiException() { + super(); + } + + public DirectIndexApiException(final String message, final Throwable cause) { + super(message, cause); + } + + public DirectIndexApiException(final String message) { + super(message); + } + + public DirectIndexApiException(final Throwable cause) { + super(cause); + } + +} diff --git a/src/main/java/eu/dnetlib/app/directindex/sword/model/SwordError.java b/src/main/java/eu/dnetlib/app/directindex/errors/SwordError.java similarity index 92% rename from src/main/java/eu/dnetlib/app/directindex/sword/model/SwordError.java rename to src/main/java/eu/dnetlib/app/directindex/errors/SwordError.java index 6db7bb1..b3ff2df 100644 --- a/src/main/java/eu/dnetlib/app/directindex/sword/model/SwordError.java +++ b/src/main/java/eu/dnetlib/app/directindex/errors/SwordError.java @@ -1,4 +1,4 @@ -package eu.dnetlib.app.directindex.sword.model; +package eu.dnetlib.app.directindex.errors; import java.time.LocalDateTime; @@ -6,7 +6,7 @@ import org.apache.commons.lang3.exception.ExceptionUtils; import com.fasterxml.jackson.annotation.JsonProperty; -import eu.dnetlib.app.directindex.sword.SwordException; +import eu.dnetlib.app.directindex.sword.model.SwordErrorType; import jakarta.servlet.http.HttpServletRequest; public class SwordError { diff --git a/src/main/java/eu/dnetlib/app/directindex/sword/SwordException.java b/src/main/java/eu/dnetlib/app/directindex/errors/SwordException.java similarity index 90% rename from src/main/java/eu/dnetlib/app/directindex/sword/SwordException.java rename to src/main/java/eu/dnetlib/app/directindex/errors/SwordException.java index 275966a..76db3b0 100644 --- a/src/main/java/eu/dnetlib/app/directindex/sword/SwordException.java +++ b/src/main/java/eu/dnetlib/app/directindex/errors/SwordException.java @@ -1,4 +1,4 @@ -package eu.dnetlib.app.directindex.sword; +package eu.dnetlib.app.directindex.errors; import eu.dnetlib.app.directindex.sword.model.SwordErrorType; diff --git a/src/main/java/eu/dnetlib/app/directindex/input/DatasourceEntry.java b/src/main/java/eu/dnetlib/app/directindex/input/DatasourceEntry.java new file mode 100644 index 0000000..cf8fc02 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/input/DatasourceEntry.java @@ -0,0 +1,51 @@ +package eu.dnetlib.app.directindex.input; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.lang3.StringUtils; + +public class DatasourceEntry { + + private String id; + private String name; + private String prefix; + + public DatasourceEntry() {} + + public DatasourceEntry(final String id, final String name, final String prefix) { + this.id = id; + this.name = name; + this.prefix = prefix; + } + + public String getId() { + return id; + } + + public void setId(final String id) { + this.id = id; + } + + public String getName() { + return name; + } + + public void setName(final String name) { + this.name = name; + } + + public String getPrefix() { + return prefix; + } + + public void setPrefix(final String prefix) { + this.prefix = prefix; + } + + public String calculateOpenaireId() { + if (StringUtils.isNotBlank(id)) { + final String[] arr = id.split("::"); + if (arr.length == 2) { return String.format("%s::%s", arr[0], DigestUtils.md5Hex(arr[1])); } + } + return ""; + } +} diff --git a/src/main/java/eu/dnetlib/app/directindex/input/PidEntry.java b/src/main/java/eu/dnetlib/app/directindex/input/PidEntry.java new file mode 100644 index 0000000..a741d09 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/input/PidEntry.java @@ -0,0 +1,35 @@ +package eu.dnetlib.app.directindex.input; + +import io.swagger.v3.oas.annotations.media.Schema; + +public class PidEntry { + + @Schema(required = true, description = "E.g. doi, pmc, urn. See http://api.openaire.eu/vocabularies/dnet:pid_types") + private String type; + + @Schema(required = true) + private String value; + + public PidEntry() {} + + public PidEntry(final String type, final String value) { + this.type = type; + this.value = value; + } + + public String getType() { + return type; + } + + public void setType(final String type) { + this.type = type; + } + + public String getValue() { + return value; + } + + public void setValue(final String value) { + this.value = value; + } +} diff --git a/src/main/java/eu/dnetlib/app/directindex/input/ResultEntry.java b/src/main/java/eu/dnetlib/app/directindex/input/ResultEntry.java new file mode 100644 index 0000000..3cf41d0 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/input/ResultEntry.java @@ -0,0 +1,226 @@ +package eu.dnetlib.app.directindex.input; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import io.swagger.v3.oas.annotations.media.Schema; + +public class ResultEntry { + + private String openaireId; + private String originalId; + + @Schema(required = true) + private String title; + private List authors = new ArrayList<>(); + private String publisher; + private String description; + + @Schema(description = "ISO Alpha-3 code. E.g. 'eng', 'ita'") + private String language; + private List pids = new ArrayList<>(); + + @Schema(required = false, allowableValues = { + "OPEN", "CLOSED", "RESTRICTED", "EMBARGO", "UNKNOWN", "OTHER", "OPEN SOURCE" + }) + private String accessRightCode; + private String embargoEndDate; + + /** + * One of publication, dataset, software, other. Default value is publication. + */ + @Schema(allowableValues = { + "publication", "dataset", "software", "other" + }) + private String type = "publication"; + + @Schema(required = true, description = "Use 001 for articles, 021 for datasets, 0029 for software. See: http://api.openaire.eu/vocabularies/dnet:publication_resource for all the available resource types.") + private String resourceType; + + @Schema(required = true) + private String url; + + @Schema(required = true, description = "Use opendoar___::2659 for Zenodo Publications; re3data_____::r3d100010468 for Zenodo datasets; infrastruct::openaire for OpenAIRE portal.") + private String collectedFromId; + private String hostedById; + + // String according to the EGI context profile, example: egi::classification::natsc::math + + @Schema(description = "E.g. fet, egi::classification::natsc::math::pure, egi::projects::EMI") + private List contexts = new ArrayList<>(); + + // String according to openaire guidelines: + // info:eu-repo/grantAgreement/Funder/FundingProgram/ProjectID/[Jurisdiction]/[ProjectName]/[ProjectAcronym] + @Schema(description = "E.g. info:eu-repo/grantAgreement/EC/FP7/283595/EU//OpenAIREplus") + private List linksToProjects = new ArrayList<>(); + + private static final Log log = LogFactory.getLog(ResultEntry.class); + + public ResultEntry() {} + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(final String openaireId) { + this.openaireId = openaireId; + } + + public String getOriginalId() { + return originalId; + } + + public void setOriginalId(final String originalId) { + this.originalId = originalId; + } + + public String getTitle() { + return title; + } + + public void setTitle(final String title) { + this.title = title; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(final List authors) { + this.authors = authors; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(final String publisher) { + this.publisher = publisher; + } + + public String getDescription() { + return description; + } + + public void setDescription(final String description) { + this.description = description; + } + + public String getLanguage() { + return language; + } + + public void setLanguage(final String language) { + this.language = language; + } + + public List getPids() { + return pids; + } + + public void setPids(final List pids) { + this.pids = pids; + } + + /** + * Set required = true when the deprecated licenseCode is not used anymore by our client and it is removed + * + * @return access rights code + */ + public String getAccessRightCode() { + return accessRightCode; + } + + public void setAccessRightCode(final String accessRightCode) { + this.accessRightCode = accessRightCode; + } + + public String getResourceType() { + return resourceType; + } + + public void setResourceType(final String resourceType) { + this.resourceType = resourceType; + } + + public String getUrl() { + return url; + } + + public void setUrl(final String url) { + this.url = url; + } + + public String getCollectedFromId() { + return collectedFromId; + } + + public void setCollectedFromId(final String collectedFromId) { + this.collectedFromId = collectedFromId; + } + + public String getHostedById() { + return hostedById; + } + + public void setHostedById(final String hostedById) { + this.hostedById = hostedById; + } + + public List getContexts() { + return contexts; + } + + public void setContexts(final List contexts) { + this.contexts = contexts; + } + + public List getLinksToProjects() { + return linksToProjects; + } + + public void setLinksToProjects(final List linksToProjects) { + this.linksToProjects = linksToProjects; + } + + public String getType() { + return type; + } + + public void setType(final String type) { + this.type = type; + } + + public String getEmbargoEndDate() { + return embargoEndDate; + } + + public void setEmbargoEndDate(final String embargoEndDate) { + this.embargoEndDate = embargoEndDate; + } + + @Override + public String toString() { + return toJson(); + } + + public String getAnyId() { + return StringUtils.isNotBlank(openaireId) ? openaireId : originalId; + } + + public String toJson() { + try { + return new ObjectMapper().writeValueAsString(this); + } catch (final JsonProcessingException e) { + log.error("Error converting object in json", e); + throw new RuntimeException("Error converting object in json", e); + } + } +} diff --git a/src/main/java/eu/dnetlib/app/directindex/input/ZenodoContextList.java b/src/main/java/eu/dnetlib/app/directindex/input/ZenodoContextList.java new file mode 100644 index 0000000..81c64e7 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/input/ZenodoContextList.java @@ -0,0 +1,32 @@ +package eu.dnetlib.app.directindex.input; + +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class ZenodoContextList { + + private String zenodoid; + + private List openAirecommunitylist; + + public List getOpenAirecommunitylist() { + return openAirecommunitylist; + } + + public void setOpenAirecommunitylist(final List openAirecommunitylist) { + this.openAirecommunitylist = openAirecommunitylist; + + } + + public String getZenodoid() { + return zenodoid; + } + + public void setZenodoid(final String zenodoid) { + this.zenodoid = zenodoid; + + } + +} diff --git a/src/main/java/eu/dnetlib/app/directindex/is/ISLookupClient.java b/src/main/java/eu/dnetlib/app/directindex/is/ISLookupClient.java new file mode 100644 index 0000000..12f3040 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/is/ISLookupClient.java @@ -0,0 +1,126 @@ +package eu.dnetlib.app.directindex.is; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.codec.Charsets; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.cache.annotation.CacheEvict; +import org.springframework.cache.annotation.Cacheable; +import org.springframework.stereotype.Component; + +import eu.dnetlib.app.directindex.errors.DirectIndexApiException; +import eu.dnetlib.app.directindex.input.DatasourceEntry; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; + +@Component +public class ISLookupClient { + + private static final Log log = LogFactory.getLog(ISLookupClient.class); + + @Autowired + private ISLookUpService lookupService; + + @Cacheable("indexDsInfo") + public IndexDsInfo currentIndexDsInfo() { + try { + log.info("Not using cache"); + + final String queryUrl = IOUtils.toString(getClass().getResourceAsStream("/xquery/findSolrIndexUrl.xquery"), Charsets.UTF_8); + final String queryDs = IOUtils.toString(getClass().getResourceAsStream("/xquery/findIndexDsInfo.xquery"), Charsets.UTF_8); + + final String indexBaseUrl = findOne(queryUrl); + final String idxDs = findOne(queryDs); + + if (idxDs.isEmpty()) { throw new IllegalStateException(queryDs + "\n\nreturned no results, check IS profiles"); } + + final String[] arr = idxDs.split("@@@"); + return new IndexDsInfo(indexBaseUrl, arr[0].trim(), arr[1].trim(), arr[2].trim()); + } catch (final Exception e) { + log.error(e.getMessage()); + throw new RuntimeException(e); + } + } + + @Cacheable("datasources") + public DatasourceEntry findDatasource(final String dsId) throws DirectIndexApiException { + final String query = + "collection('/db/DRIVER/RepositoryServiceResources/RepositoryServiceResourceType')//CONFIGURATION[./DATASOURCE_ORIGINAL_ID='" + dsId + + "']/concat(./OFFICIAL_NAME, ' @@@ ', .//FIELD/value[../key='NamespacePrefix'])"; + final String s = findOne(query); + final String[] arr = s.split("@@@"); + + final DatasourceEntry ds = new DatasourceEntry(dsId, arr[0].trim(), arr[1].trim()); + + if (StringUtils.isBlank(ds.getName()) || StringUtils.isBlank(ds.getPrefix())) { + log.error("Invalid datasource id: " + dsId); + throw new DirectIndexApiException("Invalid datasource id: " + dsId); + } + return ds; + } + + @Cacheable("vocabularies") + public Map findVocabulary(final String voc) throws DirectIndexApiException { + + final String query = "collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')[.//VOCABULARY_NAME/@code='" + voc + + "']//TERM/concat(@code, ' @@@ ', @english_name)"; + + final Map map = new HashMap<>(); + for (final String s : find(query)) { + final String[] arr = s.split("@@@"); + map.put(arr[0].trim(), arr[1].trim()); + } + + return map; + } + + @Cacheable("contexts") + public Map findContexts() throws DirectIndexApiException { + final String query = + "collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')[.//context/@type='community' or .//context/@type='ri']//*[name()='context' or name()='category' or name()='concept']/concat(@id, ' @@@ ', @label)"; + + final Map map = new HashMap<>(); + for (final String s : find(query)) { + final String[] arr = s.split("@@@"); + map.put(arr[0].trim(), arr[1].trim()); + } + return map; + } + + @Cacheable("layouts") + public String findLayoutForFormat(final String format) throws Exception { + return findOne("collection('/db/DRIVER/MDFormatDSResources/MDFormatDSResourceType')[.//NAME='" + format + "']//LAYOUT[@name='index']"); + } + + @CacheEvict(allEntries = true, value = { + "indexDsInfo", "datasources", "vocabularies", "contexts", "layouts" + }) + public void evictCache() { + log.info("Evicting indexDsInfo cache"); + } + + private String findOne(final String query) throws DirectIndexApiException { + try { + return lookupService.getResourceProfileByQuery(query); + } catch (final ISLookUpException e) { + log.error("Error executing xquery: " + query, e); + throw new DirectIndexApiException("Error executing xquery: " + query, e); + } + } + + private List find(final String query) throws DirectIndexApiException { + try { + return lookupService.quickSearchProfile(query); + } catch (final ISLookUpException e) { + log.error("Error executing xquery: " + query, e); + throw new DirectIndexApiException("Error executing xquery: " + query, e); + } + } + +} diff --git a/src/main/java/eu/dnetlib/app/directindex/is/ISLookupClientFactory.java b/src/main/java/eu/dnetlib/app/directindex/is/ISLookupClientFactory.java new file mode 100644 index 0000000..a310267 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/is/ISLookupClientFactory.java @@ -0,0 +1,50 @@ +package eu.dnetlib.app.directindex.is; + +import org.apache.cxf.endpoint.Client; +import org.apache.cxf.frontend.ClientProxy; +import org.apache.cxf.jaxws.JaxWsProxyFactoryBean; +import org.apache.cxf.transport.http.HTTPConduit; +import org.apache.cxf.transports.http.configuration.HTTPClientPolicy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; + +public class ISLookupClientFactory { + + private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class); + + private static final int requestTimeout = 60000 * 10; + private static final int connectTimeout = 60000 * 10; + + public static ISLookUpService getLookUpService(final String isLookupUrl) { + return getServiceStub(ISLookUpService.class, isLookupUrl); + } + + @SuppressWarnings("unchecked") + private static T getServiceStub(final Class clazz, final String endpoint) { + log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint)); + final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean(); + jaxWsProxyFactory.setServiceClass(clazz); + jaxWsProxyFactory.setAddress(endpoint); + + final T service = (T) jaxWsProxyFactory.create(); + + final Client client = ClientProxy.getClient(service); + if (client != null) { + final HTTPConduit conduit = (HTTPConduit) client.getConduit(); + final HTTPClientPolicy policy = new HTTPClientPolicy(); + + log + .info(String + .format("setting connectTimeout to %s, requestTimeout to %s for service %s", connectTimeout, requestTimeout, clazz + .getCanonicalName())); + + policy.setConnectionTimeout(connectTimeout); + policy.setReceiveTimeout(requestTimeout); + conduit.setClient(policy); + } + + return service; + } +} diff --git a/src/main/java/eu/dnetlib/app/directindex/is/IndexDsInfo.java b/src/main/java/eu/dnetlib/app/directindex/is/IndexDsInfo.java new file mode 100644 index 0000000..13bf7fd --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/is/IndexDsInfo.java @@ -0,0 +1,44 @@ +package eu.dnetlib.app.directindex.is; + +public class IndexDsInfo { + + private final String indexBaseUrl; + private final String indexDsId; + private final String format; + private final String coll; + + public IndexDsInfo(final String indexBaseUrl, final String indexDsId, final String format, final String coll) { + this.indexBaseUrl = indexBaseUrl; + this.indexDsId = indexDsId; + this.format = format; + this.coll = coll; + } + + public String getIndexBaseUrl() { + return indexBaseUrl; + } + + public String getIndexDsId() { + return indexDsId; + } + + public String getFormat() { + return format; + } + + public String getColl() { + return coll; + } + + @Override + public int hashCode() { + return getColl().hashCode(); + } + + @Override + public boolean equals(final Object other) { + if (!(other instanceof IndexDsInfo)) { return false; } + + return getColl().equals(((IndexDsInfo) other).getColl()); + } +} diff --git a/src/main/java/eu/dnetlib/app/directindex/repo/PendingAction.java b/src/main/java/eu/dnetlib/app/directindex/repo/PendingAction.java new file mode 100644 index 0000000..8198d82 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/repo/PendingAction.java @@ -0,0 +1,95 @@ +package eu.dnetlib.app.directindex.repo; + +import java.time.OffsetDateTime; + +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.Id; +import jakarta.persistence.Table; + +@Entity +@Table(name = "pending_actions") +public class PendingAction { + + @Id + @Column(name = "id") + private String id; + + @Column(name = "type") + private String type; + + /** + * One of: DELETE, INSERT, UPDATE + */ + @Column(name = "operation") + private String operation; + + @Column(name = "body") + private String body; + + @Column(name = "created_by") + private String createdBy; + + @Column(name = "creation_date") + private OffsetDateTime creationDate; + + @Column(name = "execution_date") + private OffsetDateTime executionDate; + + public String getId() { + return id; + } + + public void setId(final String id) { + this.id = id; + } + + public String getType() { + return type; + } + + public void setType(final String type) { + this.type = type; + } + + public String getOperation() { + return operation; + } + + public void setOperation(final String operation) { + this.operation = operation; + } + + public String getBody() { + return body; + } + + public void setBody(final String body) { + this.body = body; + } + + public String getCreatedBy() { + return createdBy; + } + + public void setCreatedBy(final String createdBy) { + this.createdBy = createdBy; + } + + public OffsetDateTime getCreationDate() { + return creationDate; + } + + public void setCreationDate(final OffsetDateTime creationDate) { + this.creationDate = creationDate; + } + + public OffsetDateTime getExecutionDate() { + return executionDate; + } + + public void setExecutionDate(final OffsetDateTime executionDate) { + this.executionDate = executionDate; + } + +} diff --git a/src/main/java/eu/dnetlib/app/directindex/repo/PendingActionRepository.java b/src/main/java/eu/dnetlib/app/directindex/repo/PendingActionRepository.java new file mode 100644 index 0000000..5189a1d --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/repo/PendingActionRepository.java @@ -0,0 +1,18 @@ +package eu.dnetlib.app.directindex.repo; + +import java.util.List; + +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.stereotype.Repository; + +@Repository +public interface PendingActionRepository extends JpaRepository { + + @Query(value = "select * from pending_actions where execution_date is null and (upper(operation) = 'INSERT' or upper(operation) = 'UPDATE')", nativeQuery = true) + List recentActions(); + + @Query(value = "select * from pending_actions where execution_date is null and upper(operation) = 'DELETE'", nativeQuery = true) + List toDeleteRecords(); + +} diff --git a/src/main/java/eu/dnetlib/app/directindex/service/DirectIndexService.java b/src/main/java/eu/dnetlib/app/directindex/service/DirectIndexService.java index f203375..a4e131d 100644 --- a/src/main/java/eu/dnetlib/app/directindex/service/DirectIndexService.java +++ b/src/main/java/eu/dnetlib/app/directindex/service/DirectIndexService.java @@ -1,32 +1,91 @@ package eu.dnetlib.app.directindex.service; +import java.time.OffsetDateTime; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; -import eu.dnetlib.app.directindex.sword.model.SwordMetadataDocument; +import eu.dnetlib.app.directindex.errors.DirectIndexApiException; +import eu.dnetlib.app.directindex.input.DatasourceEntry; +import eu.dnetlib.app.directindex.input.ResultEntry; +import eu.dnetlib.app.directindex.is.ISLookupClient; +import eu.dnetlib.app.directindex.repo.PendingAction; +import eu.dnetlib.app.directindex.repo.PendingActionRepository; @Service public class DirectIndexService { + private static final DatasourceEntry UNKNOWN_REPO = + new DatasourceEntry("openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18", "Unknown Repository", "unknown_____"); + @Autowired - private DnetSolrClient dnetSolrClient; + private PendingActionRepository pendingActionRepository; - public void prepareMetadataDeletion(final String id) { + @Autowired + private ISLookupClient isLookupClient; + + public void prepareMetadataDeletion(final String openaireId) { + final PendingAction action = new PendingAction(); + + action.setId(openaireId); + action.setOperation("DELETE"); + action.setCreatedBy("TODO"); // TODO + action.setCreationDate(OffsetDateTime.now()); + action.setExecutionDate(null); + + pendingActionRepository.save(action); + } + + public void prepareMetadataReplacement(final String id, final ResultEntry document) { // TODO Auto-generated method stub } - public void prepareMetadataReplacement(final String id, final SwordMetadataDocument document) { - // TODO Auto-generated method stub + public String prepareMetadataInsertion(final ResultEntry r) throws DirectIndexApiException { + final PendingAction info = new PendingAction(); + if (StringUtils.isNotBlank(r.getOpenaireId())) { + + // TODO THE UPDATE SHOULD BE PERFORMED IN THE PREVIOUS METHOD + + if (!r.getOpenaireId().matches("^\\w{12}::\\w{32}$")) { + throw new DirectIndexApiException("Invalid openaireId: " + r.getOpenaireId() + " - regex ^\\w{12}::\\w{32}$ not matched"); + } + info.setOperation("UPDATE"); + } else if (StringUtils.isNoneBlank(r.getOriginalId(), r.getCollectedFromId())) { + fixOpenaireId(r); + info.setOperation("INSERT"); + } else { + throw new DirectIndexApiException("Missing identifier fields: [openaireId] or [originalId, collectedFromId]"); + } + + if (StringUtils.isBlank(r.getTitle())) { throw new DirectIndexApiException("A required field is missing: title"); } + if (StringUtils.isBlank(r.getUrl())) { throw new DirectIndexApiException("A required field is missing: url"); } + if (StringUtils.isBlank(r.getAccessRightCode())) { throw new DirectIndexApiException("A required field is missing: accessRightCode"); } + if (StringUtils.isBlank(r.getResourceType())) { throw new DirectIndexApiException("A required field is missing: resourceType"); } + if (StringUtils.isBlank(r.getCollectedFromId())) { throw new DirectIndexApiException("A required field is missing: collectedFromId"); } + if (StringUtils.isBlank(r.getType())) { throw new DirectIndexApiException("A required field is missing: type"); } + + info.setId(r.getOpenaireId()); + info.setBody(r.toJson()); + info.setType(r.getType()); + info.setCreatedBy("TODO"); // TODO + info.setCreationDate(OffsetDateTime.now()); + info.setExecutionDate(null); + + pendingActionRepository.save(info); + + return info.getId(); } - public String prepareMetadataInsertion(final SwordMetadataDocument body) { - // TODO Auto-generated method stub + private void fixOpenaireId(final ResultEntry r) throws DirectIndexApiException { + final DatasourceEntry ds = StringUtils.isNotBlank(r.getCollectedFromId()) ? isLookupClient.findDatasource(r.getCollectedFromId()) : UNKNOWN_REPO; + final String openaireId = ds.getPrefix() + "::" + DigestUtils.md5Hex(r.getOpenaireId()); - final String id = null; + r.setOpenaireId(openaireId); - return id; } } diff --git a/src/main/java/eu/dnetlib/app/directindex/service/DnetSolrClient.java b/src/main/java/eu/dnetlib/app/directindex/service/DnetSolrClient.java deleted file mode 100644 index f203026..0000000 --- a/src/main/java/eu/dnetlib/app/directindex/service/DnetSolrClient.java +++ /dev/null @@ -1,68 +0,0 @@ -package eu.dnetlib.app.directindex.service; - -import org.springframework.beans.factory.annotation.Value; -import org.springframework.stereotype.Service; - -import eu.dnetlib.app.directindex.sword.model.SwordMetadataDocument; - -@Service -public class DnetSolrClient { - - // TODO: implements an API to change the indexName (TMF / DMF) - - @Value("${dnet.solr.baseurl}") - private String baseUrl; - - private String indexName; - - public SwordMetadataDocument findDocument(final String id) { - // TODO - - final String solrUrl = currentUrl(); - - return null; - } - - public String saveDocument(final String id, final SwordMetadataDocument doc) { - // TODO - - final String solrUrl = currentUrl(); - - return null; - } - - public void deleteDocument(final String id) { - // TODO - - final String solrUrl = currentUrl(); - - } - - public void commit() { - // TODO - - final String solrUrl = currentUrl(); - - } - - private String currentUrl() { - return baseUrl + "/" + indexName; - } - - public String getBaseUrl() { - return baseUrl; - } - - public void setBaseUrl(final String baseUrl) { - this.baseUrl = baseUrl; - } - - public synchronized String getIndexName() { - return indexName; - } - - public synchronized void setIndexName(final String indexName) { - this.indexName = indexName; - } - -} diff --git a/src/main/java/eu/dnetlib/app/directindex/solr/SolrIndexClient.java b/src/main/java/eu/dnetlib/app/directindex/solr/SolrIndexClient.java new file mode 100644 index 0000000..e268b07 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/solr/SolrIndexClient.java @@ -0,0 +1,83 @@ +package eu.dnetlib.app.directindex.solr; + +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Date; +import java.util.Iterator; +import java.util.stream.Stream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.common.SolrInputDocument; + +import eu.dnetlib.app.directindex.errors.DirectIndexApiException; + +public class SolrIndexClient { + + private static final Log log = LogFactory.getLog(SolrIndexClient.class); + + private final CloudSolrClient cloudSolrClient; + + public SolrIndexClient(final CloudSolrClient cloudSolrClient) { + this.cloudSolrClient = cloudSolrClient; + } + + public void commit() throws DirectIndexApiException { + try { + cloudSolrClient.commit(); + } catch (SolrServerException | IOException e) { + throw new DirectIndexApiException(e); + } + } + + public void deleteByQuery(final String query) throws DirectIndexApiException { + try { + cloudSolrClient.deleteByQuery(query); + cloudSolrClient.commit(); + } catch (SolrServerException | IOException e) { + throw new DirectIndexApiException(e); + } + } + + public void addRecords(final Stream records) throws DirectIndexApiException { + try { + final Iterator iter = records + .map(this::prepareSolrDocument) + .iterator(); + + cloudSolrClient.add(iter); + } catch (final Throwable e) { + throw new DirectIndexApiException("Error creating solr document", e); + } + } + + public void addRecords(final String... xmlRecords) throws Exception { + addRecords(Arrays.stream(xmlRecords)); + } + + protected SolrInputDocument prepareSolrDocument(final String record) { + + final String version = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss'Z'").format(new Date()); + final StreamingInputDocumentFactory documentFactory = new StreamingInputDocumentFactory(version); + final String indexRecord = SolrRecordMappper.toSolr(record); + if (log.isDebugEnabled()) { + log.debug("***************************************\nSubmitting index record:\n" + indexRecord + + "\n***************************************\n"); + } + return documentFactory.parseDocument(indexRecord); + + } + + public String findRecord(final String id) { + // TODO Auto-generated method stub + return null; + } + + public boolean existsRecord(final String id) { + // TODO Auto-generated method stub + return false; + } +} diff --git a/src/main/java/eu/dnetlib/app/directindex/solr/SolrIndexClientFactory.java b/src/main/java/eu/dnetlib/app/directindex/solr/SolrIndexClientFactory.java new file mode 100644 index 0000000..1b8b8d6 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/solr/SolrIndexClientFactory.java @@ -0,0 +1,41 @@ +package eu.dnetlib.app.directindex.solr; + +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.springframework.stereotype.Component; + +import eu.dnetlib.app.directindex.is.IndexDsInfo; + +@Component +public class SolrIndexClientFactory { + + private static final Log log = LogFactory.getLog(SolrIndexClientFactory.class); + + public static final String CHROOT_SEPARATOR = "/"; + + public SolrIndexClient getClient(final IndexDsInfo info) { + log.info(String.format("Initializing solr client (%s) with collection %s", info.getIndexBaseUrl(), info.getColl())); + + // Example: quorum0:2182,quorum1:2182,quorum2:2182,quorum3:2182,quorum4:2182/solr-dev-openaire + + final String s = StringUtils.substringAfterLast(info.getIndexBaseUrl(), CHROOT_SEPARATOR); + final String chroot = StringUtils.isNotBlank(s) ? CHROOT_SEPARATOR + s : null; + final String urls = chroot != null ? info.getIndexBaseUrl().replace(chroot, "") : info.getIndexBaseUrl(); + + final List urlList = Arrays.stream(urls.split(",")).map(String::trim).filter(StringUtils::isNotBlank).toList(); + + final CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(urlList, Optional.of(chroot)) + .withParallelUpdates(true) + .withDefaultCollection(info.getColl()) + .build(); + + return new SolrIndexClient(cloudSolrClient); + } + +} diff --git a/src/main/java/eu/dnetlib/app/directindex/solr/SolrRecordMappper.java b/src/main/java/eu/dnetlib/app/directindex/solr/SolrRecordMappper.java new file mode 100644 index 0000000..e190f3f --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/solr/SolrRecordMappper.java @@ -0,0 +1,10 @@ +package eu.dnetlib.app.directindex.solr; + +public class SolrRecordMappper { + + public static String toSolr(final String record) { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/src/main/java/eu/dnetlib/app/directindex/solr/StreamingInputDocumentFactory.java b/src/main/java/eu/dnetlib/app/directindex/solr/StreamingInputDocumentFactory.java new file mode 100644 index 0000000..185e264 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/solr/StreamingInputDocumentFactory.java @@ -0,0 +1,250 @@ +package eu.dnetlib.app.directindex.solr; + +import java.io.StringReader; +import java.io.StringWriter; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +import javax.xml.stream.XMLEventFactory; +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.XMLEventWriter; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.events.Namespace; +import javax.xml.stream.events.StartElement; +import javax.xml.stream.events.XMLEvent; + +import org.apache.solr.common.SolrInputDocument; + +/** + * Optimized version of the document parser, drop in replacement of InputDocumentFactory. + *

+ * Faster because: + *

    + *
  • Doesn't create a DOM for the full document + *
  • Doesn't execute xpaths agains the DOM + *
  • Quickly serialize the 'result' element directly in a string. + *
  • Uses less memory: less pressure on GC and allows more threads to process this in parallel + *
+ *

+ * This class is fully reentrant and can be invoked in parallel. + * + * @author claudio + */ +public class StreamingInputDocumentFactory { + + private static final String INDEX_FIELD_PREFIX = "__"; + + private static final String DS_VERSION = INDEX_FIELD_PREFIX + "dsversion"; + + private static final String DS_ID = INDEX_FIELD_PREFIX + "dsid"; + + private static final String RESULT = "result"; + + private static final String INDEX_RESULT = INDEX_FIELD_PREFIX + RESULT; + + private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier"; + + private static final String DEFAULTDNETRESULT = "dnetResult"; + + private static final String TARGETFIELDS = "targetFields"; + + private static final String INDEX_RECORD_ID_ELEMENT = "indexRecordIdentifier"; + + private static final String ROOT_ELEMENT = "indexRecord"; + + private static final int MAX_FIELD_LENGTH = 25000; + + private final ThreadLocal inputFactory = ThreadLocal + .withInitial(XMLInputFactory::newInstance); + + private final ThreadLocal outputFactory = ThreadLocal + .withInitial(XMLOutputFactory::newInstance); + + private final ThreadLocal eventFactory = ThreadLocal + .withInitial(XMLEventFactory::newInstance); + + private final String version; + + private String resultName = DEFAULTDNETRESULT; + + public StreamingInputDocumentFactory(final String version) { + this(version, DEFAULTDNETRESULT); + } + + public StreamingInputDocumentFactory(final String version, final String resultName) { + this.version = version; + this.resultName = resultName; + } + + public SolrInputDocument parseDocument(final String inputDocument) { + + final StringWriter results = new StringWriter(); + final List nsList = new LinkedList<>(); + try { + + final XMLEventReader parser = inputFactory.get().createXMLEventReader(new StringReader(inputDocument)); + + final SolrInputDocument indexDocument = new SolrInputDocument(new HashMap<>()); + + while (parser.hasNext()) { + final XMLEvent event = parser.nextEvent(); + if (event != null && event.isStartElement()) { + final String localName = event.asStartElement().getName().getLocalPart(); + + if (ROOT_ELEMENT.equals(localName)) { + nsList.addAll(getNamespaces(event)); + } else if (INDEX_RECORD_ID_ELEMENT.equals(localName)) { + final XMLEvent text = parser.nextEvent(); + final String recordId = getText(text); + indexDocument.addField(INDEX_RECORD_ID, recordId); + } else if (TARGETFIELDS.equals(localName)) { + parseTargetFields(indexDocument, parser); + } else if (resultName.equals(localName)) { + copyResult(indexDocument, results, parser, nsList, resultName); + } + } + } + + if (version != null) { + indexDocument.addField(DS_VERSION, version); + } + + if (!indexDocument.containsKey(INDEX_RECORD_ID)) { throw new IllegalStateException("cannot extract record ID from: " + inputDocument); } + + return indexDocument; + } catch (final XMLStreamException e) { + throw new IllegalStateException(e); + } + } + + private List getNamespaces(final XMLEvent event) { + final List res = new LinkedList<>(); + final Iterator nsIter = event.asStartElement().getNamespaces(); + while (nsIter.hasNext()) { + final Namespace ns = nsIter.next(); + res.add(ns); + } + return res; + } + + /** + * Parse the targetFields block and add fields to the solr document. + * + * @param indexDocument + * @param parser + * @throws XMLStreamException + */ + protected void parseTargetFields( + final SolrInputDocument indexDocument, + final XMLEventReader parser) + throws XMLStreamException { + + boolean hasFields = false; + + while (parser.hasNext()) { + final XMLEvent targetEvent = parser.nextEvent(); + if (targetEvent.isEndElement() + && TARGETFIELDS.equals(targetEvent.asEndElement().getName().getLocalPart())) { + break; + } + + if (targetEvent.isStartElement()) { + final String fieldName = targetEvent.asStartElement().getName().getLocalPart(); + final XMLEvent text = parser.nextEvent(); + + final String data = getText(text); + + addField(indexDocument, fieldName, data); + hasFields = true; + } + } + + if (!hasFields) { + indexDocument.clear(); + } + } + + /** + * Copy the /indexRecord/result element and children, preserving namespace declarations etc. + * + * @param indexDocument + * @param results + * @param parser + * @param nsList + * @throws XMLStreamException + */ + protected void copyResult( + final SolrInputDocument indexDocument, + final StringWriter results, + final XMLEventReader parser, + final List nsList, + final String dnetResult) + throws XMLStreamException { + final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results); + + for (final Namespace ns : nsList) { + eventFactory.get().createNamespace(ns.getPrefix(), ns.getNamespaceURI()); + } + + final StartElement newRecord = eventFactory.get().createStartElement("", null, RESULT, null, nsList.iterator()); + + // new root record + writer.add(newRecord); + + // copy the rest as it is + while (parser.hasNext()) { + final XMLEvent resultEvent = parser.nextEvent(); + + // TODO: replace with depth tracking instead of close tag tracking. + if (resultEvent.isEndElement() + && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { + writer.add(eventFactory.get().createEndElement("", null, RESULT)); + break; + } + + writer.add(resultEvent); + } + writer.close(); + indexDocument.addField(INDEX_RESULT, results.toString()); + } + + /** + * Helper used to add a field to a solr doc. It avoids to add empy fields + * + * @param indexDocument + * @param field + * @param value + */ + private final void addField( + final SolrInputDocument indexDocument, + final String field, + final String value) { + final String cleaned = value.trim(); + if (!cleaned.isEmpty()) { + // log.info("\n\n adding field " + field.toLowerCase() + " value: " + cleaned + "\n"); + indexDocument.addField(field.toLowerCase(), cleaned); + } + } + + /** + * Helper used to get the string from a text element. + * + * @param text + * @return the + */ + protected final String getText(final XMLEvent text) { + if (text.isEndElement()) { + // text.asEndElement().getName().getLocalPart()); + return ""; + } + + final String data = text.asCharacters().getData(); + if (data != null && data.length() > MAX_FIELD_LENGTH) { return data.substring(0, MAX_FIELD_LENGTH); } + + return data; + } +} diff --git a/src/main/java/eu/dnetlib/app/directindex/sword/model/SwordMetadataDocument.java b/src/main/java/eu/dnetlib/app/directindex/sword/model/SwordMetadataDocument.java deleted file mode 100644 index ea3c5e5..0000000 --- a/src/main/java/eu/dnetlib/app/directindex/sword/model/SwordMetadataDocument.java +++ /dev/null @@ -1,6 +0,0 @@ -package eu.dnetlib.app.directindex.sword.model; - - -public class SwordMetadataDocument { - -} diff --git a/src/main/java/eu/dnetlib/app/directindex/tasks/OafMapper.java b/src/main/java/eu/dnetlib/app/directindex/tasks/OafMapper.java new file mode 100644 index 0000000..28e3579 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/tasks/OafMapper.java @@ -0,0 +1,18 @@ +package eu.dnetlib.app.directindex.tasks; + +import eu.dnetlib.app.directindex.input.ResultEntry; + +public class OafMapper { + + public static String toOAF(final ResultEntry result) { + // TODO + + return null; + } + + public static ResultEntry toResultEntry(final String oaf) { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/src/main/java/eu/dnetlib/app/directindex/tasks/ScheduledActions.java b/src/main/java/eu/dnetlib/app/directindex/tasks/ScheduledActions.java new file mode 100644 index 0000000..b4a3e42 --- /dev/null +++ b/src/main/java/eu/dnetlib/app/directindex/tasks/ScheduledActions.java @@ -0,0 +1,113 @@ +package eu.dnetlib.app.directindex.tasks; + +import java.time.OffsetDateTime; +import java.util.List; +import java.util.Objects; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.app.directindex.errors.DirectIndexApiException; +import eu.dnetlib.app.directindex.input.ResultEntry; +import eu.dnetlib.app.directindex.is.ISLookupClient; +import eu.dnetlib.app.directindex.is.IndexDsInfo; +import eu.dnetlib.app.directindex.repo.PendingAction; +import eu.dnetlib.app.directindex.repo.PendingActionRepository; +import eu.dnetlib.app.directindex.solr.SolrIndexClient; +import eu.dnetlib.app.directindex.solr.SolrIndexClientFactory; + +@Component +@ConditionalOnProperty(value = "directindex.scheduling.enabled", havingValue = "true", matchIfMissing = false) +public class ScheduledActions { + + private static final Log log = LogFactory.getLog(ScheduledActions.class); + + @Autowired + private ISLookupClient isLookupClient; + + @Autowired + private SolrIndexClientFactory solrIndexClientFactory; + + @Autowired + private PendingActionRepository pendingActionRepository; + + @Scheduled(fixedDelay = 5 * 60 * 1000) // 5 minutes + public void indexNewRecords() throws DirectIndexApiException { + log.info("Indexing new records..."); + + final List list = pendingActionRepository.recentActions(); + + if (list.size() > 0) { + final IndexDsInfo info = isLookupClient.currentIndexDsInfo(); + final SolrIndexClient solr = solrIndexClientFactory.getClient(info); + + final ObjectMapper objectMapper = new ObjectMapper(); + + solr.addRecords(list.stream() + .map(PendingAction::getBody) + .map(s -> { + try { + return objectMapper.readValue(s, ResultEntry.class); + } catch (final Exception e) { + log.error(e); + return null; + } + }) + .filter(Objects::nonNull) + .map(OafMapper::toOAF) + .filter(StringUtils::isNotBlank)); + + solr.commit(); + + updateExecutionDate(list); + } + + log.info(String.format("done (indexed records: %s)", list.size())); + + } + + @Scheduled(fixedDelay = 30 * 60 * 1000) // 30 minutes + public void deleteRecords() { + log.info("Deleting records from index..."); + + final List list = pendingActionRepository.toDeleteRecords(); + + if (list.size() > 0) { + final IndexDsInfo info = isLookupClient.currentIndexDsInfo(); + final SolrIndexClient solr = solrIndexClientFactory.getClient(info); + + list.stream().map(PendingAction::getId).forEach(id -> { + try { + final String query = String.format("objidentifier:\"%s\" OR resultdupid:\"%s\"", id, id); + solr.deleteByQuery(query); + } catch (final DirectIndexApiException e) { + log.error(e); + } + }); + + updateExecutionDate(list); + } + + log.info(String.format("done (deleted records: %s)", list.size())); + } + + private void updateExecutionDate(final List list) { + final OffsetDateTime now = OffsetDateTime.now(); + list.forEach(r -> r.setExecutionDate(now)); + pendingActionRepository.saveAll(list); + } + + @Scheduled(fixedDelay = 24 * 60 * 60 * 1000) // 24 hours + public void removeOldRecordsFromDB() { + // TODO + + } + +}