9719: cursor_based pagination impl.

This commit is contained in:
Alexios Symeonidis 2024-07-29 14:42:35 +03:00
parent 05d60af01e
commit 2df47908e1
13 changed files with 144 additions and 64 deletions

View File

@ -10,6 +10,10 @@ import lombok.Data;
import lombok.Getter;
import lombok.Setter;
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
@Getter
@Setter
@Data
@ -92,7 +96,7 @@ public class DataSourceRequest implements PaginatedRequest {
@Min(value = 1)
@Parameter(
description = "Page number of the results",
description = API_PAGE_DESC,
schema = @Schema(defaultValue = "1", type = "integer")
)
private int page = 1;
@ -100,11 +104,17 @@ public class DataSourceRequest implements PaginatedRequest {
@Min(value = 1, message = "Page size must be at least 1")
@Max(value = 100, message = "Page size must be at most 100")
@Parameter(
description = "Number of results per page",
description = API_PAGE_SIZE_DESC,
schema = @Schema(defaultValue = "10", type = "integer")
)
private int pageSize = 10;
@Parameter(
description = API_CURSOR_DESC,
schema = @Schema(type = "string")
)
private String cursor;
@Parameter(
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, organizations can be only sorted by the 'relevance'." ,
schema = @Schema(defaultValue = "relevance DESC")

View File

@ -10,6 +10,10 @@ import lombok.Data;
import lombok.Getter;
import lombok.Setter;
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
@Getter
@Setter
@Data
@ -71,19 +75,22 @@ public class OrganizationRequest implements PaginatedRequest {
@Min(value = 1)
@Parameter(
description = "Page number of the results",
schema = @Schema(defaultValue = "1", type = "integer")
)
description = API_PAGE_DESC,
schema = @Schema(defaultValue = "1", type = "integer"))
private int page = 1;
@Min(value = 1, message = "Page size must be at least 1")
@Max(value = 100, message = "Page size must be at most 100")
@Parameter(
description = "Number of results per page",
schema = @Schema(defaultValue = "10", type = "integer")
)
description = API_PAGE_SIZE_DESC,
schema = @Schema(defaultValue = "10", type = "integer"))
private int pageSize = 10;
@Parameter(
description = API_CURSOR_DESC,
schema = @Schema(type = "string"))
private String cursor;
@Parameter(
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, organizations can be only sorted by the 'relevance'." ,
schema = @Schema(defaultValue = "relevance DESC")

View File

@ -1,8 +1,7 @@
package eu.openaire.api.dto.request;
public interface PaginatedRequest {
int getPage();
int getPageSize();
String getCursor();
}

View File

@ -13,6 +13,10 @@ import org.springframework.format.annotation.DateTimeFormat;
import java.time.LocalDate;
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
@Getter
@Setter
@Data
@ -138,19 +142,22 @@ public class ProjectRequest implements PaginatedRequest {
@Min(value = 1)
@Parameter(
description = "Page number of the results",
schema = @Schema(defaultValue = "1", type = "integer")
)
description = API_PAGE_DESC,
schema = @Schema(defaultValue = "1", type = "integer"))
private int page = 1;
@Min(value = 1, message = "Page size must be at least 1")
@Max(value = 100, message = "Page size must be at most 100")
@Parameter(
description = "Number of results per page",
schema = @Schema(defaultValue = "10", type = "integer")
)
description = API_PAGE_SIZE_DESC,
schema = @Schema(defaultValue = "10", type = "integer"))
private int pageSize = 10;
@Parameter(
description = API_CURSOR_DESC,
schema = @Schema(type = "string"))
private String cursor;
@Parameter(
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, where fieldname is one of 'relevance', 'startDate', 'endDate'. Multiple sorting parameters should be comma-separated." ,
schema = @Schema(defaultValue = "relevance DESC")

View File

@ -13,6 +13,10 @@ import org.springframework.format.annotation.DateTimeFormat;
import java.time.LocalDate;
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
@Getter
@Setter
@Data
@ -294,7 +298,7 @@ public class ResearchProductsRequest implements PaginatedRequest {
@Min(value = 1)
@Parameter(
description = "Page number of the results",
description = API_PAGE_DESC,
schema = @Schema(defaultValue = "1", type = "integer")
)
private int page = 1;
@ -302,11 +306,17 @@ public class ResearchProductsRequest implements PaginatedRequest {
@Min(value = 1, message = "Page size must be at least 1")
@Max(value = 100, message = "Page size must be at most 100")
@Parameter(
description = "Number of results per page",
description = API_PAGE_SIZE_DESC,
schema = @Schema(defaultValue = "10", type = "integer")
)
private int pageSize = 10;
@Parameter(
description = API_CURSOR_DESC,
schema = @Schema(type = "string")
)
private String cursor;
@Parameter(
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, where fieldname is one of 'relevance', 'publicationDate', 'dateOfCollection', 'influence', 'popularity', 'citationCount', 'impulse'. Multiple sorting parameters should be comma-separated.",
schema = @Schema(defaultValue = "relevance DESC")

View File

@ -11,7 +11,7 @@ public class PaginationValidator implements Validator {
private final HttpServletRequest request;
private final int MAX_RESULTS = 10000;
private static final int MAX_RESULTS = 10000;
public PaginationValidator(HttpServletRequest request) {
this.request = request;

View File

@ -1,22 +1,21 @@
package eu.openaire.api.dto.response;
import com.fasterxml.jackson.annotation.JsonInclude;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import static eu.openaire.api.mappers.Utils.API_NEXT_CURSOR_DESC;
@Data
@JsonInclude(JsonInclude.Include.NON_NULL)
public class SearchHeader {
private SearchHeaderDebug debug;
private Long numFound;
private Float maxScore;
private Integer queryTime;
private Integer page;
private Integer pageSize;
private long numFound;
private float maxScore;
private int queryTime;
private int page;
private int pageSize;
@Schema(description = API_NEXT_CURSOR_DESC)
private String nextCursor;
}

View File

@ -14,6 +14,8 @@ import org.springframework.web.context.request.WebRequest;
import org.springframework.web.servlet.resource.NoResourceFoundException;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@RestControllerAdvice
@ -21,6 +23,9 @@ public class ServiceExceptionHandler {
private final Logger log = LogManager.getLogger(this.getClass());
private static final String URL_REGEX = "https?://\\S*";
private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX);
@ExceptionHandler(NotFoundException.class)
public ResponseEntity<ErrorResponse> handleNotFoundException(NotFoundException e, WebRequest request) {
return this.handleException(e.getMessage(), request, HttpStatus.NOT_FOUND);
@ -50,8 +55,9 @@ public class ServiceExceptionHandler {
@ExceptionHandler(Exception.class)
public ResponseEntity<ErrorResponse> handleAllOtherExceptions(Exception e, WebRequest request) {
//todo: log4j2.xml - add error appender
e.printStackTrace();
return this.handleException("An internal server error occurred", request, HttpStatus.INTERNAL_SERVER_ERROR);
return this.handleException(e.getMessage(), request, HttpStatus.INTERNAL_SERVER_ERROR);
}
private ResponseEntity<ErrorResponse> handleException(String message, WebRequest request, HttpStatus httpStatus) {
@ -59,7 +65,7 @@ public class ServiceExceptionHandler {
String path = String.format("%s?%s", req.getRequestURI(), req.getQueryString());
ErrorResponse response = ErrorResponse.builder()
.message(message)
.message(obfuscateUrlsInMessage(message))
.error(httpStatus.getReasonPhrase())
.code(httpStatus.value())
.timestamp(new Date())
@ -70,4 +76,9 @@ public class ServiceExceptionHandler {
.status(httpStatus)
.body(response);
}
private static String obfuscateUrlsInMessage(String message) {
Matcher matcher = URL_PATTERN.matcher(message);
return matcher.replaceAll("[https://***].");
}
}

View File

@ -13,6 +13,31 @@ import java.util.*;
public class Utils {
private Utils() {}
public static final String API_PAGE_DESC = """
Page number of the results,\s
used for basic start/rows pagination.\s
Max dataset to retrieve - 10000 records.\s
To get more than that, use cursor-based pagination.""";
public static final String API_PAGE_SIZE_DESC = "Number of results per page";
/* todo: maybe mention that if a big dataset is required, then download directly the compressed data file
like this, we avoid high load on this microservice */
public static final String API_CURSOR_DESC = """
Cursor-based pagination. Initial value: `cursor=*`.\s
Cursor should be used when it is required to retrieve a big dataset (more than 10000 records).\s
To get the next page of results, use nextCursor returned in the response.
""";
public static final String API_NEXT_CURSOR_DESC = """
nextCursor - to be used in the next request to get the next page of results.\s
You can repeat this process until youve fetched as many results as you want,\s
or until the nextCursor returned matches the current cursor youve already specified,\s
indicating that there are no more results.
""";
static public String escapeAndJoin(String[] tokens, String predicate, boolean addQuotes, String suffix) {
tokens = Arrays.stream(tokens)

View File

@ -14,6 +14,7 @@ public interface DataSourceRequestMapper {
@Mapping(target = "start", expression = "java( calculateStart(src.getPage(), src.getPageSize()) )")
@Mapping(target = "rows", source = "pageSize")
@Mapping(target = "debugQuery", source = "debugQuery")
@Mapping(target = "cursor", source = "cursor")
@Mapping(target = "sort", expression = "java( eu.openaire.api.mappers.Utils.formatSortByParam(src.getSortBy(), SolrFieldsMapper.dataSourceSortMapping) )")
SolrQueryParams toSolrQuery(DataSourceRequest src);

View File

@ -4,23 +4,33 @@ import eu.openaire.api.dto.response.SearchHeader;
import eu.openaire.api.dto.response.SearchHeaderDebug;
import eu.openaire.api.solr.SolrQueryParams;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.mapstruct.AfterMapping;
import org.mapstruct.Mapper;
import org.mapstruct.Mapping;
import org.mapstruct.MappingTarget;
import java.util.Optional;
@Mapper(componentModel = "spring")
public interface ResponseHeaderMapper {
@Mapping(target = "numFound", source = "queryResponse.results.numFound")
@Mapping(target = "numFound", expression = "java( Long.valueOf(queryResponse.getResults().getNumFound()) )")
@Mapping(target = "maxScore", source = "queryResponse.results.maxScore")
@Mapping(target = "page", source = "page")
@Mapping(target = "pageSize", source = "pageSize")
@Mapping(target = "nextCursor", source = "queryResponse.nextCursorMark")
@Mapping(target = "queryTime", expression = "java( (int) queryResponse.getHeader().get(\"QTime\") )")
@Mapping(target = "debug", expression = "java( mapDebug(queryResponse, solrQueryParams, debugQuery) )")
SearchHeader toSearchHeader(QueryResponse queryResponse, SolrQueryParams solrQueryParams,
boolean debugQuery, int page, int pageSize);
@AfterMapping
default void removePage(@MappingTarget SearchHeader searchHeader) {
if (searchHeader.getNextCursor() != null) {
searchHeader.setPage(null);
}
}
default SearchHeaderDebug mapDebug(QueryResponse queryResponse, SolrQueryParams solrQueryParams, boolean debugQuery) {
if (!debugQuery) {
return null;

View File

@ -10,6 +10,7 @@ import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.CursorMarkParams;
import org.springframework.stereotype.Repository;
import java.io.IOException;
@ -19,31 +20,33 @@ import java.io.IOException;
public class SolrRepository {
private final SolrConnectionManager solrConnectionManager;
private final Logger log = LogManager.getLogger(this.getClass());
private static final String UNIQUE_KEY = "__indexrecordidentifier";
public SolrDocument getById(String id) throws SolrServerException, IOException {
return solrConnectionManager.getSolrClient().getById(id);
}
public QueryResponse query(SolrQueryParams queryParams) throws SolrServerException, IOException {
SolrQuery query = new SolrQuery();
query.setQuery(queryParams.getQueryString()); // add Q
// add Q
query.setQuery(queryParams.getQueryString());
// add FQ
for (String fq : queryParams.getFilterQueries()) {
for (String fq : queryParams.getFilterQueries()) { // add FQ
query.addFilterQuery(fq);
}
// add FL
query.addField(queryParams.getFieldList());
query.addField(queryParams.getFieldList()); // add FL
// set pagination parameters
query.setStart(queryParams.getStart());
// set pagination
query.setRows(queryParams.getRows());
String cursor = queryParams.getCursor();
if (cursor != null && !cursor.isEmpty()) { // set cursor-based pagination
query.set(CursorMarkParams.CURSOR_MARK_PARAM, cursor);
query.addSort(UNIQUE_KEY, SolrQuery.ORDER.asc);
} else { // set basic page/page-size pagination
query.setStart(queryParams.getStart());
}
// set sorting
for (var sortClause : queryParams.getSort()) {
@ -55,10 +58,16 @@ public class SolrRepository {
query.set("debugQuery", "on");
}
try {
log.info(query);
return solrConnectionManager.getSolrClient().query(query);
} catch (SolrServerException e) {
log.error(e.getMessage());
throw new SolrServerException(e);
} catch (IOException e) {
log.error(e.getMessage());
throw new IOException(e);
}
}
public SolrPingResponse ping() throws SolrServerException, IOException {

View File

@ -2,24 +2,16 @@ package eu.openaire.api.solr;
import lombok.Data;
import org.apache.solr.client.solrj.SolrQuery;
import java.util.List;
@Data
public class SolrQueryParams {
String queryString = "*:*";
List<String> filterQueries;
String fieldList = "__json";
Boolean debugQuery = false;
int start;
int rows;
List<SolrQuery.SortClause> sort;
private String queryString = "*:*";
private List<String> filterQueries;
private String fieldList = "__json";
private Boolean debugQuery = false;
private int start;
private int rows;
private List<SolrQuery.SortClause> sort;
private String cursor;
}