From 2df47908e11cec3ec97205b0787f3894d948a4f0 Mon Sep 17 00:00:00 2001 From: Alexios Symeonidis Date: Mon, 29 Jul 2024 14:42:35 +0300 Subject: [PATCH] 9719: cursor_based pagination impl. --- .../api/dto/request/DataSourceRequest.java | 14 ++++++- .../api/dto/request/OrganizationRequest.java | 19 ++++++--- .../api/dto/request/PaginatedRequest.java | 3 +- .../api/dto/request/ProjectRequest.java | 19 ++++++--- .../dto/request/ResearchProductsRequest.java | 14 ++++++- .../validators/PaginationValidator.java | 2 +- .../api/dto/response/SearchHeader.java | 21 +++++----- .../api/errors/ServiceExceptionHandler.java | 15 ++++++- .../java/eu/openaire/api/mappers/Utils.java | 25 ++++++++++++ .../query/DataSourceRequestMapper.java | 1 + .../response/ResponseHeaderMapper.java | 12 +++++- .../api/repositories/SolrRepository.java | 39 ++++++++++++------- .../eu/openaire/api/solr/SolrQueryParams.java | 24 ++++-------- 13 files changed, 144 insertions(+), 64 deletions(-) diff --git a/src/main/java/eu/openaire/api/dto/request/DataSourceRequest.java b/src/main/java/eu/openaire/api/dto/request/DataSourceRequest.java index f81feed..d29ebca 100644 --- a/src/main/java/eu/openaire/api/dto/request/DataSourceRequest.java +++ b/src/main/java/eu/openaire/api/dto/request/DataSourceRequest.java @@ -10,6 +10,10 @@ import lombok.Data; import lombok.Getter; import lombok.Setter; +import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC; +import static eu.openaire.api.mappers.Utils.API_PAGE_DESC; +import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC; + @Getter @Setter @Data @@ -92,7 +96,7 @@ public class DataSourceRequest implements PaginatedRequest { @Min(value = 1) @Parameter( - description = "Page number of the results", + description = API_PAGE_DESC, schema = @Schema(defaultValue = "1", type = "integer") ) private int page = 1; @@ -100,11 +104,17 @@ public class DataSourceRequest implements PaginatedRequest { @Min(value = 1, message = "Page size must be at least 1") @Max(value = 100, message = "Page size must be at most 100") @Parameter( - description = "Number of results per page", + description = API_PAGE_SIZE_DESC, schema = @Schema(defaultValue = "10", type = "integer") ) private int pageSize = 10; + @Parameter( + description = API_CURSOR_DESC, + schema = @Schema(type = "string") + ) + private String cursor; + @Parameter( description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, organizations can be only sorted by the 'relevance'." , schema = @Schema(defaultValue = "relevance DESC") diff --git a/src/main/java/eu/openaire/api/dto/request/OrganizationRequest.java b/src/main/java/eu/openaire/api/dto/request/OrganizationRequest.java index 6a24214..12fe1e4 100644 --- a/src/main/java/eu/openaire/api/dto/request/OrganizationRequest.java +++ b/src/main/java/eu/openaire/api/dto/request/OrganizationRequest.java @@ -10,6 +10,10 @@ import lombok.Data; import lombok.Getter; import lombok.Setter; +import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC; +import static eu.openaire.api.mappers.Utils.API_PAGE_DESC; +import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC; + @Getter @Setter @Data @@ -71,19 +75,22 @@ public class OrganizationRequest implements PaginatedRequest { @Min(value = 1) @Parameter( - description = "Page number of the results", - schema = @Schema(defaultValue = "1", type = "integer") - ) + description = API_PAGE_DESC, + schema = @Schema(defaultValue = "1", type = "integer")) private int page = 1; @Min(value = 1, message = "Page size must be at least 1") @Max(value = 100, message = "Page size must be at most 100") @Parameter( - description = "Number of results per page", - schema = @Schema(defaultValue = "10", type = "integer") - ) + description = API_PAGE_SIZE_DESC, + schema = @Schema(defaultValue = "10", type = "integer")) private int pageSize = 10; + @Parameter( + description = API_CURSOR_DESC, + schema = @Schema(type = "string")) + private String cursor; + @Parameter( description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, organizations can be only sorted by the 'relevance'." , schema = @Schema(defaultValue = "relevance DESC") diff --git a/src/main/java/eu/openaire/api/dto/request/PaginatedRequest.java b/src/main/java/eu/openaire/api/dto/request/PaginatedRequest.java index 37740c0..f81d07e 100644 --- a/src/main/java/eu/openaire/api/dto/request/PaginatedRequest.java +++ b/src/main/java/eu/openaire/api/dto/request/PaginatedRequest.java @@ -1,8 +1,7 @@ package eu.openaire.api.dto.request; public interface PaginatedRequest { - int getPage(); - int getPageSize(); + String getCursor(); } diff --git a/src/main/java/eu/openaire/api/dto/request/ProjectRequest.java b/src/main/java/eu/openaire/api/dto/request/ProjectRequest.java index b7e192c..8ce7f86 100644 --- a/src/main/java/eu/openaire/api/dto/request/ProjectRequest.java +++ b/src/main/java/eu/openaire/api/dto/request/ProjectRequest.java @@ -13,6 +13,10 @@ import org.springframework.format.annotation.DateTimeFormat; import java.time.LocalDate; +import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC; +import static eu.openaire.api.mappers.Utils.API_PAGE_DESC; +import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC; + @Getter @Setter @Data @@ -138,19 +142,22 @@ public class ProjectRequest implements PaginatedRequest { @Min(value = 1) @Parameter( - description = "Page number of the results", - schema = @Schema(defaultValue = "1", type = "integer") - ) + description = API_PAGE_DESC, + schema = @Schema(defaultValue = "1", type = "integer")) private int page = 1; @Min(value = 1, message = "Page size must be at least 1") @Max(value = 100, message = "Page size must be at most 100") @Parameter( - description = "Number of results per page", - schema = @Schema(defaultValue = "10", type = "integer") - ) + description = API_PAGE_SIZE_DESC, + schema = @Schema(defaultValue = "10", type = "integer")) private int pageSize = 10; + @Parameter( + description = API_CURSOR_DESC, + schema = @Schema(type = "string")) + private String cursor; + @Parameter( description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, where fieldname is one of 'relevance', 'startDate', 'endDate'. Multiple sorting parameters should be comma-separated." , schema = @Schema(defaultValue = "relevance DESC") diff --git a/src/main/java/eu/openaire/api/dto/request/ResearchProductsRequest.java b/src/main/java/eu/openaire/api/dto/request/ResearchProductsRequest.java index f7cc4cb..1a3b55e 100644 --- a/src/main/java/eu/openaire/api/dto/request/ResearchProductsRequest.java +++ b/src/main/java/eu/openaire/api/dto/request/ResearchProductsRequest.java @@ -13,6 +13,10 @@ import org.springframework.format.annotation.DateTimeFormat; import java.time.LocalDate; +import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC; +import static eu.openaire.api.mappers.Utils.API_PAGE_DESC; +import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC; + @Getter @Setter @Data @@ -294,7 +298,7 @@ public class ResearchProductsRequest implements PaginatedRequest { @Min(value = 1) @Parameter( - description = "Page number of the results", + description = API_PAGE_DESC, schema = @Schema(defaultValue = "1", type = "integer") ) private int page = 1; @@ -302,11 +306,17 @@ public class ResearchProductsRequest implements PaginatedRequest { @Min(value = 1, message = "Page size must be at least 1") @Max(value = 100, message = "Page size must be at most 100") @Parameter( - description = "Number of results per page", + description = API_PAGE_SIZE_DESC, schema = @Schema(defaultValue = "10", type = "integer") ) private int pageSize = 10; + @Parameter( + description = API_CURSOR_DESC, + schema = @Schema(type = "string") + ) + private String cursor; + @Parameter( description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, where fieldname is one of 'relevance', 'publicationDate', 'dateOfCollection', 'influence', 'popularity', 'citationCount', 'impulse'. Multiple sorting parameters should be comma-separated.", schema = @Schema(defaultValue = "relevance DESC") diff --git a/src/main/java/eu/openaire/api/dto/request/validators/PaginationValidator.java b/src/main/java/eu/openaire/api/dto/request/validators/PaginationValidator.java index 68f7872..4342039 100644 --- a/src/main/java/eu/openaire/api/dto/request/validators/PaginationValidator.java +++ b/src/main/java/eu/openaire/api/dto/request/validators/PaginationValidator.java @@ -11,7 +11,7 @@ public class PaginationValidator implements Validator { private final HttpServletRequest request; - private final int MAX_RESULTS = 10000; + private static final int MAX_RESULTS = 10000; public PaginationValidator(HttpServletRequest request) { this.request = request; diff --git a/src/main/java/eu/openaire/api/dto/response/SearchHeader.java b/src/main/java/eu/openaire/api/dto/response/SearchHeader.java index 67bbfdd..17a261e 100644 --- a/src/main/java/eu/openaire/api/dto/response/SearchHeader.java +++ b/src/main/java/eu/openaire/api/dto/response/SearchHeader.java @@ -1,22 +1,21 @@ package eu.openaire.api.dto.response; import com.fasterxml.jackson.annotation.JsonInclude; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.Data; +import static eu.openaire.api.mappers.Utils.API_NEXT_CURSOR_DESC; + @Data @JsonInclude(JsonInclude.Include.NON_NULL) public class SearchHeader { - private SearchHeaderDebug debug; + private Long numFound; + private Float maxScore; + private Integer queryTime; + private Integer page; + private Integer pageSize; - private long numFound; - - private float maxScore; - - private int queryTime; - - private int page; - - private int pageSize; - + @Schema(description = API_NEXT_CURSOR_DESC) + private String nextCursor; } diff --git a/src/main/java/eu/openaire/api/errors/ServiceExceptionHandler.java b/src/main/java/eu/openaire/api/errors/ServiceExceptionHandler.java index c01c7e2..3616da5 100644 --- a/src/main/java/eu/openaire/api/errors/ServiceExceptionHandler.java +++ b/src/main/java/eu/openaire/api/errors/ServiceExceptionHandler.java @@ -14,6 +14,8 @@ import org.springframework.web.context.request.WebRequest; import org.springframework.web.servlet.resource.NoResourceFoundException; import java.util.Date; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; @RestControllerAdvice @@ -21,6 +23,9 @@ public class ServiceExceptionHandler { private final Logger log = LogManager.getLogger(this.getClass()); + private static final String URL_REGEX = "https?://\\S*"; + private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX); + @ExceptionHandler(NotFoundException.class) public ResponseEntity handleNotFoundException(NotFoundException e, WebRequest request) { return this.handleException(e.getMessage(), request, HttpStatus.NOT_FOUND); @@ -50,8 +55,9 @@ public class ServiceExceptionHandler { @ExceptionHandler(Exception.class) public ResponseEntity handleAllOtherExceptions(Exception e, WebRequest request) { + //todo: log4j2.xml - add error appender e.printStackTrace(); - return this.handleException("An internal server error occurred", request, HttpStatus.INTERNAL_SERVER_ERROR); + return this.handleException(e.getMessage(), request, HttpStatus.INTERNAL_SERVER_ERROR); } private ResponseEntity handleException(String message, WebRequest request, HttpStatus httpStatus) { @@ -59,7 +65,7 @@ public class ServiceExceptionHandler { String path = String.format("%s?%s", req.getRequestURI(), req.getQueryString()); ErrorResponse response = ErrorResponse.builder() - .message(message) + .message(obfuscateUrlsInMessage(message)) .error(httpStatus.getReasonPhrase()) .code(httpStatus.value()) .timestamp(new Date()) @@ -70,4 +76,9 @@ public class ServiceExceptionHandler { .status(httpStatus) .body(response); } + + private static String obfuscateUrlsInMessage(String message) { + Matcher matcher = URL_PATTERN.matcher(message); + return matcher.replaceAll("[https://***]."); + } } \ No newline at end of file diff --git a/src/main/java/eu/openaire/api/mappers/Utils.java b/src/main/java/eu/openaire/api/mappers/Utils.java index b1606aa..314569a 100644 --- a/src/main/java/eu/openaire/api/mappers/Utils.java +++ b/src/main/java/eu/openaire/api/mappers/Utils.java @@ -13,6 +13,31 @@ import java.util.*; public class Utils { + private Utils() {} + + public static final String API_PAGE_DESC = """ + Page number of the results,\s + used for basic start/rows pagination.\s + Max dataset to retrieve - 10000 records.\s + To get more than that, use cursor-based pagination."""; + + public static final String API_PAGE_SIZE_DESC = "Number of results per page"; + + /* todo: maybe mention that if a big dataset is required, then download directly the compressed data file + like this, we avoid high load on this microservice */ + public static final String API_CURSOR_DESC = """ + Cursor-based pagination. Initial value: `cursor=*`.\s + Cursor should be used when it is required to retrieve a big dataset (more than 10000 records).\s + To get the next page of results, use nextCursor returned in the response. + """; + + public static final String API_NEXT_CURSOR_DESC = """ + nextCursor - to be used in the next request to get the next page of results.\s + You can repeat this process until you’ve fetched as many results as you want,\s + or until the nextCursor returned matches the current cursor you’ve already specified,\s + indicating that there are no more results. + """; + static public String escapeAndJoin(String[] tokens, String predicate, boolean addQuotes, String suffix) { tokens = Arrays.stream(tokens) diff --git a/src/main/java/eu/openaire/api/mappers/query/DataSourceRequestMapper.java b/src/main/java/eu/openaire/api/mappers/query/DataSourceRequestMapper.java index ad0ff17..c3a353d 100644 --- a/src/main/java/eu/openaire/api/mappers/query/DataSourceRequestMapper.java +++ b/src/main/java/eu/openaire/api/mappers/query/DataSourceRequestMapper.java @@ -14,6 +14,7 @@ public interface DataSourceRequestMapper { @Mapping(target = "start", expression = "java( calculateStart(src.getPage(), src.getPageSize()) )") @Mapping(target = "rows", source = "pageSize") @Mapping(target = "debugQuery", source = "debugQuery") + @Mapping(target = "cursor", source = "cursor") @Mapping(target = "sort", expression = "java( eu.openaire.api.mappers.Utils.formatSortByParam(src.getSortBy(), SolrFieldsMapper.dataSourceSortMapping) )") SolrQueryParams toSolrQuery(DataSourceRequest src); diff --git a/src/main/java/eu/openaire/api/mappers/response/ResponseHeaderMapper.java b/src/main/java/eu/openaire/api/mappers/response/ResponseHeaderMapper.java index 59fba52..a3ca25b 100644 --- a/src/main/java/eu/openaire/api/mappers/response/ResponseHeaderMapper.java +++ b/src/main/java/eu/openaire/api/mappers/response/ResponseHeaderMapper.java @@ -4,23 +4,33 @@ import eu.openaire.api.dto.response.SearchHeader; import eu.openaire.api.dto.response.SearchHeaderDebug; import eu.openaire.api.solr.SolrQueryParams; import org.apache.solr.client.solrj.response.QueryResponse; +import org.mapstruct.AfterMapping; import org.mapstruct.Mapper; import org.mapstruct.Mapping; +import org.mapstruct.MappingTarget; import java.util.Optional; @Mapper(componentModel = "spring") public interface ResponseHeaderMapper { - @Mapping(target = "numFound", source = "queryResponse.results.numFound") + @Mapping(target = "numFound", expression = "java( Long.valueOf(queryResponse.getResults().getNumFound()) )") @Mapping(target = "maxScore", source = "queryResponse.results.maxScore") @Mapping(target = "page", source = "page") @Mapping(target = "pageSize", source = "pageSize") + @Mapping(target = "nextCursor", source = "queryResponse.nextCursorMark") @Mapping(target = "queryTime", expression = "java( (int) queryResponse.getHeader().get(\"QTime\") )") @Mapping(target = "debug", expression = "java( mapDebug(queryResponse, solrQueryParams, debugQuery) )") SearchHeader toSearchHeader(QueryResponse queryResponse, SolrQueryParams solrQueryParams, boolean debugQuery, int page, int pageSize); + @AfterMapping + default void removePage(@MappingTarget SearchHeader searchHeader) { + if (searchHeader.getNextCursor() != null) { + searchHeader.setPage(null); + } + } + default SearchHeaderDebug mapDebug(QueryResponse queryResponse, SolrQueryParams solrQueryParams, boolean debugQuery) { if (!debugQuery) { return null; diff --git a/src/main/java/eu/openaire/api/repositories/SolrRepository.java b/src/main/java/eu/openaire/api/repositories/SolrRepository.java index dc16795..b3b2452 100644 --- a/src/main/java/eu/openaire/api/repositories/SolrRepository.java +++ b/src/main/java/eu/openaire/api/repositories/SolrRepository.java @@ -10,6 +10,7 @@ import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.params.CursorMarkParams; import org.springframework.stereotype.Repository; import java.io.IOException; @@ -19,31 +20,33 @@ import java.io.IOException; public class SolrRepository { private final SolrConnectionManager solrConnectionManager; - private final Logger log = LogManager.getLogger(this.getClass()); + private static final String UNIQUE_KEY = "__indexrecordidentifier"; public SolrDocument getById(String id) throws SolrServerException, IOException { return solrConnectionManager.getSolrClient().getById(id); } public QueryResponse query(SolrQueryParams queryParams) throws SolrServerException, IOException { - SolrQuery query = new SolrQuery(); + query.setQuery(queryParams.getQueryString()); // add Q - // add Q - query.setQuery(queryParams.getQueryString()); - - // add FQ - for (String fq : queryParams.getFilterQueries()) { + for (String fq : queryParams.getFilterQueries()) { // add FQ query.addFilterQuery(fq); } - // add FL - query.addField(queryParams.getFieldList()); + query.addField(queryParams.getFieldList()); // add FL - // set pagination parameters - query.setStart(queryParams.getStart()); + // set pagination query.setRows(queryParams.getRows()); + String cursor = queryParams.getCursor(); + + if (cursor != null && !cursor.isEmpty()) { // set cursor-based pagination + query.set(CursorMarkParams.CURSOR_MARK_PARAM, cursor); + query.addSort(UNIQUE_KEY, SolrQuery.ORDER.asc); + } else { // set basic page/page-size pagination + query.setStart(queryParams.getStart()); + } // set sorting for (var sortClause : queryParams.getSort()) { @@ -55,10 +58,16 @@ public class SolrRepository { query.set("debugQuery", "on"); } - log.info(query); - - return solrConnectionManager.getSolrClient().query(query); - + try { + log.info(query); + return solrConnectionManager.getSolrClient().query(query); + } catch (SolrServerException e) { + log.error(e.getMessage()); + throw new SolrServerException(e); + } catch (IOException e) { + log.error(e.getMessage()); + throw new IOException(e); + } } public SolrPingResponse ping() throws SolrServerException, IOException { diff --git a/src/main/java/eu/openaire/api/solr/SolrQueryParams.java b/src/main/java/eu/openaire/api/solr/SolrQueryParams.java index a5c9802..0b15f93 100644 --- a/src/main/java/eu/openaire/api/solr/SolrQueryParams.java +++ b/src/main/java/eu/openaire/api/solr/SolrQueryParams.java @@ -2,24 +2,16 @@ package eu.openaire.api.solr; import lombok.Data; import org.apache.solr.client.solrj.SolrQuery; - import java.util.List; @Data public class SolrQueryParams { - - String queryString = "*:*"; - - List filterQueries; - - String fieldList = "__json"; - - Boolean debugQuery = false; - - int start; - - int rows; - - List sort; - + private String queryString = "*:*"; + private List filterQueries; + private String fieldList = "__json"; + private Boolean debugQuery = false; + private int start; + private int rows; + private List sort; + private String cursor; }