Merge pull request '9719: cursor_based implementation' (#6) from 9719_cursor-based_pagination into master
Reviewed-on: #6
This commit is contained in:
commit
f23acfff89
|
@ -10,6 +10,10 @@ import lombok.Data;
|
|||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Data
|
||||
|
@ -92,7 +96,7 @@ public class DataSourceRequest implements PaginatedRequest {
|
|||
|
||||
@Min(value = 1)
|
||||
@Parameter(
|
||||
description = "Page number of the results",
|
||||
description = API_PAGE_DESC,
|
||||
schema = @Schema(defaultValue = "1", type = "integer")
|
||||
)
|
||||
private int page = 1;
|
||||
|
@ -100,11 +104,17 @@ public class DataSourceRequest implements PaginatedRequest {
|
|||
@Min(value = 1, message = "Page size must be at least 1")
|
||||
@Max(value = 100, message = "Page size must be at most 100")
|
||||
@Parameter(
|
||||
description = "Number of results per page",
|
||||
description = API_PAGE_SIZE_DESC,
|
||||
schema = @Schema(defaultValue = "10", type = "integer")
|
||||
)
|
||||
private int pageSize = 10;
|
||||
|
||||
@Parameter(
|
||||
description = API_CURSOR_DESC,
|
||||
schema = @Schema(type = "string")
|
||||
)
|
||||
private String cursor;
|
||||
|
||||
@Parameter(
|
||||
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, organizations can be only sorted by the 'relevance'." ,
|
||||
schema = @Schema(defaultValue = "relevance DESC")
|
||||
|
|
|
@ -10,6 +10,10 @@ import lombok.Data;
|
|||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Data
|
||||
|
@ -71,19 +75,22 @@ public class OrganizationRequest implements PaginatedRequest {
|
|||
|
||||
@Min(value = 1)
|
||||
@Parameter(
|
||||
description = "Page number of the results",
|
||||
schema = @Schema(defaultValue = "1", type = "integer")
|
||||
)
|
||||
description = API_PAGE_DESC,
|
||||
schema = @Schema(defaultValue = "1", type = "integer"))
|
||||
private int page = 1;
|
||||
|
||||
@Min(value = 1, message = "Page size must be at least 1")
|
||||
@Max(value = 100, message = "Page size must be at most 100")
|
||||
@Parameter(
|
||||
description = "Number of results per page",
|
||||
schema = @Schema(defaultValue = "10", type = "integer")
|
||||
)
|
||||
description = API_PAGE_SIZE_DESC,
|
||||
schema = @Schema(defaultValue = "10", type = "integer"))
|
||||
private int pageSize = 10;
|
||||
|
||||
@Parameter(
|
||||
description = API_CURSOR_DESC,
|
||||
schema = @Schema(type = "string"))
|
||||
private String cursor;
|
||||
|
||||
@Parameter(
|
||||
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, organizations can be only sorted by the 'relevance'." ,
|
||||
schema = @Schema(defaultValue = "relevance DESC")
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
package eu.openaire.api.dto.request;
|
||||
|
||||
public interface PaginatedRequest {
|
||||
|
||||
int getPage();
|
||||
|
||||
int getPageSize();
|
||||
String getCursor();
|
||||
}
|
||||
|
|
|
@ -13,6 +13,10 @@ import org.springframework.format.annotation.DateTimeFormat;
|
|||
|
||||
import java.time.LocalDate;
|
||||
|
||||
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Data
|
||||
|
@ -138,19 +142,22 @@ public class ProjectRequest implements PaginatedRequest {
|
|||
|
||||
@Min(value = 1)
|
||||
@Parameter(
|
||||
description = "Page number of the results",
|
||||
schema = @Schema(defaultValue = "1", type = "integer")
|
||||
)
|
||||
description = API_PAGE_DESC,
|
||||
schema = @Schema(defaultValue = "1", type = "integer"))
|
||||
private int page = 1;
|
||||
|
||||
@Min(value = 1, message = "Page size must be at least 1")
|
||||
@Max(value = 100, message = "Page size must be at most 100")
|
||||
@Parameter(
|
||||
description = "Number of results per page",
|
||||
schema = @Schema(defaultValue = "10", type = "integer")
|
||||
)
|
||||
description = API_PAGE_SIZE_DESC,
|
||||
schema = @Schema(defaultValue = "10", type = "integer"))
|
||||
private int pageSize = 10;
|
||||
|
||||
@Parameter(
|
||||
description = API_CURSOR_DESC,
|
||||
schema = @Schema(type = "string"))
|
||||
private String cursor;
|
||||
|
||||
@Parameter(
|
||||
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, where fieldname is one of 'relevance', 'startDate', 'endDate'. Multiple sorting parameters should be comma-separated." ,
|
||||
schema = @Schema(defaultValue = "relevance DESC")
|
||||
|
|
|
@ -13,6 +13,10 @@ import org.springframework.format.annotation.DateTimeFormat;
|
|||
|
||||
import java.time.LocalDate;
|
||||
|
||||
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Data
|
||||
|
@ -294,7 +298,7 @@ public class ResearchProductsRequest implements PaginatedRequest {
|
|||
|
||||
@Min(value = 1)
|
||||
@Parameter(
|
||||
description = "Page number of the results",
|
||||
description = API_PAGE_DESC,
|
||||
schema = @Schema(defaultValue = "1", type = "integer")
|
||||
)
|
||||
private int page = 1;
|
||||
|
@ -302,11 +306,17 @@ public class ResearchProductsRequest implements PaginatedRequest {
|
|||
@Min(value = 1, message = "Page size must be at least 1")
|
||||
@Max(value = 100, message = "Page size must be at most 100")
|
||||
@Parameter(
|
||||
description = "Number of results per page",
|
||||
description = API_PAGE_SIZE_DESC,
|
||||
schema = @Schema(defaultValue = "10", type = "integer")
|
||||
)
|
||||
private int pageSize = 10;
|
||||
|
||||
@Parameter(
|
||||
description = API_CURSOR_DESC,
|
||||
schema = @Schema(type = "string")
|
||||
)
|
||||
private String cursor;
|
||||
|
||||
@Parameter(
|
||||
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, where fieldname is one of 'relevance', 'publicationDate', 'dateOfCollection', 'influence', 'popularity', 'citationCount', 'impulse'. Multiple sorting parameters should be comma-separated.",
|
||||
schema = @Schema(defaultValue = "relevance DESC")
|
||||
|
|
|
@ -11,7 +11,7 @@ public class PaginationValidator implements Validator {
|
|||
|
||||
private final HttpServletRequest request;
|
||||
|
||||
private final int MAX_RESULTS = 10000;
|
||||
private static final int MAX_RESULTS = 10000;
|
||||
|
||||
public PaginationValidator(HttpServletRequest request) {
|
||||
this.request = request;
|
||||
|
|
|
@ -1,22 +1,21 @@
|
|||
package eu.openaire.api.dto.response;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Data;
|
||||
|
||||
import static eu.openaire.api.mappers.Utils.API_NEXT_CURSOR_DESC;
|
||||
|
||||
@Data
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class SearchHeader {
|
||||
|
||||
private SearchHeaderDebug debug;
|
||||
private Long numFound;
|
||||
private Float maxScore;
|
||||
private Integer queryTime;
|
||||
private Integer page;
|
||||
private Integer pageSize;
|
||||
|
||||
private long numFound;
|
||||
|
||||
private float maxScore;
|
||||
|
||||
private int queryTime;
|
||||
|
||||
private int page;
|
||||
|
||||
private int pageSize;
|
||||
|
||||
@Schema(description = API_NEXT_CURSOR_DESC)
|
||||
private String nextCursor;
|
||||
}
|
||||
|
|
|
@ -14,6 +14,8 @@ import org.springframework.web.context.request.WebRequest;
|
|||
import org.springframework.web.servlet.resource.NoResourceFoundException;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@RestControllerAdvice
|
||||
|
@ -21,6 +23,9 @@ public class ServiceExceptionHandler {
|
|||
|
||||
private final Logger log = LogManager.getLogger(this.getClass());
|
||||
|
||||
private static final String URL_REGEX = "https?://\\S*";
|
||||
private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX);
|
||||
|
||||
@ExceptionHandler(NotFoundException.class)
|
||||
public ResponseEntity<ErrorResponse> handleNotFoundException(NotFoundException e, WebRequest request) {
|
||||
return this.handleException(e.getMessage(), request, HttpStatus.NOT_FOUND);
|
||||
|
@ -50,8 +55,9 @@ public class ServiceExceptionHandler {
|
|||
|
||||
@ExceptionHandler(Exception.class)
|
||||
public ResponseEntity<ErrorResponse> handleAllOtherExceptions(Exception e, WebRequest request) {
|
||||
//todo: log4j2.xml - add error appender
|
||||
e.printStackTrace();
|
||||
return this.handleException("An internal server error occurred", request, HttpStatus.INTERNAL_SERVER_ERROR);
|
||||
return this.handleException(e.getMessage(), request, HttpStatus.INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
private ResponseEntity<ErrorResponse> handleException(String message, WebRequest request, HttpStatus httpStatus) {
|
||||
|
@ -59,7 +65,7 @@ public class ServiceExceptionHandler {
|
|||
String path = String.format("%s?%s", req.getRequestURI(), req.getQueryString());
|
||||
|
||||
ErrorResponse response = ErrorResponse.builder()
|
||||
.message(message)
|
||||
.message(obfuscateUrlsInMessage(message))
|
||||
.error(httpStatus.getReasonPhrase())
|
||||
.code(httpStatus.value())
|
||||
.timestamp(new Date())
|
||||
|
@ -70,4 +76,9 @@ public class ServiceExceptionHandler {
|
|||
.status(httpStatus)
|
||||
.body(response);
|
||||
}
|
||||
|
||||
private static String obfuscateUrlsInMessage(String message) {
|
||||
Matcher matcher = URL_PATTERN.matcher(message);
|
||||
return matcher.replaceAll("[https://***].");
|
||||
}
|
||||
}
|
|
@ -13,6 +13,31 @@ import java.util.*;
|
|||
|
||||
public class Utils {
|
||||
|
||||
private Utils() {}
|
||||
|
||||
public static final String API_PAGE_DESC = """
|
||||
Page number of the results,\s
|
||||
used for basic start/rows pagination.\s
|
||||
Max dataset to retrieve - 10000 records.\s
|
||||
To get more than that, use cursor-based pagination.""";
|
||||
|
||||
public static final String API_PAGE_SIZE_DESC = "Number of results per page";
|
||||
|
||||
/* todo: maybe mention that if a big dataset is required, then download directly the compressed data file
|
||||
like this, we avoid high load on this microservice */
|
||||
public static final String API_CURSOR_DESC = """
|
||||
Cursor-based pagination. Initial value: `cursor=*`.\s
|
||||
Cursor should be used when it is required to retrieve a big dataset (more than 10000 records).\s
|
||||
To get the next page of results, use nextCursor returned in the response.
|
||||
""";
|
||||
|
||||
public static final String API_NEXT_CURSOR_DESC = """
|
||||
nextCursor - to be used in the next request to get the next page of results.\s
|
||||
You can repeat this process until you’ve fetched as many results as you want,\s
|
||||
or until the nextCursor returned matches the current cursor you’ve already specified,\s
|
||||
indicating that there are no more results.
|
||||
""";
|
||||
|
||||
static public String escapeAndJoin(String[] tokens, String predicate, boolean addQuotes, String suffix) {
|
||||
|
||||
tokens = Arrays.stream(tokens)
|
||||
|
|
|
@ -14,6 +14,7 @@ public interface DataSourceRequestMapper {
|
|||
@Mapping(target = "start", expression = "java( calculateStart(src.getPage(), src.getPageSize()) )")
|
||||
@Mapping(target = "rows", source = "pageSize")
|
||||
@Mapping(target = "debugQuery", source = "debugQuery")
|
||||
@Mapping(target = "cursor", source = "cursor")
|
||||
@Mapping(target = "sort", expression = "java( eu.openaire.api.mappers.Utils.formatSortByParam(src.getSortBy(), SolrFieldsMapper.dataSourceSortMapping) )")
|
||||
SolrQueryParams toSolrQuery(DataSourceRequest src);
|
||||
|
||||
|
|
|
@ -4,23 +4,33 @@ import eu.openaire.api.dto.response.SearchHeader;
|
|||
import eu.openaire.api.dto.response.SearchHeaderDebug;
|
||||
import eu.openaire.api.solr.SolrQueryParams;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.mapstruct.AfterMapping;
|
||||
import org.mapstruct.Mapper;
|
||||
import org.mapstruct.Mapping;
|
||||
import org.mapstruct.MappingTarget;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
@Mapper(componentModel = "spring")
|
||||
public interface ResponseHeaderMapper {
|
||||
|
||||
@Mapping(target = "numFound", source = "queryResponse.results.numFound")
|
||||
@Mapping(target = "numFound", expression = "java( Long.valueOf(queryResponse.getResults().getNumFound()) )")
|
||||
@Mapping(target = "maxScore", source = "queryResponse.results.maxScore")
|
||||
@Mapping(target = "page", source = "page")
|
||||
@Mapping(target = "pageSize", source = "pageSize")
|
||||
@Mapping(target = "nextCursor", source = "queryResponse.nextCursorMark")
|
||||
@Mapping(target = "queryTime", expression = "java( (int) queryResponse.getHeader().get(\"QTime\") )")
|
||||
@Mapping(target = "debug", expression = "java( mapDebug(queryResponse, solrQueryParams, debugQuery) )")
|
||||
SearchHeader toSearchHeader(QueryResponse queryResponse, SolrQueryParams solrQueryParams,
|
||||
boolean debugQuery, int page, int pageSize);
|
||||
|
||||
@AfterMapping
|
||||
default void removePage(@MappingTarget SearchHeader searchHeader) {
|
||||
if (searchHeader.getNextCursor() != null) {
|
||||
searchHeader.setPage(null);
|
||||
}
|
||||
}
|
||||
|
||||
default SearchHeaderDebug mapDebug(QueryResponse queryResponse, SolrQueryParams solrQueryParams, boolean debugQuery) {
|
||||
if (!debugQuery) {
|
||||
return null;
|
||||
|
|
|
@ -10,6 +10,7 @@ import org.apache.solr.client.solrj.SolrServerException;
|
|||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.client.solrj.response.SolrPingResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.params.CursorMarkParams;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -19,31 +20,33 @@ import java.io.IOException;
|
|||
public class SolrRepository {
|
||||
|
||||
private final SolrConnectionManager solrConnectionManager;
|
||||
|
||||
private final Logger log = LogManager.getLogger(this.getClass());
|
||||
private static final String UNIQUE_KEY = "__indexrecordidentifier";
|
||||
|
||||
public SolrDocument getById(String id) throws SolrServerException, IOException {
|
||||
return solrConnectionManager.getSolrClient().getById(id);
|
||||
}
|
||||
|
||||
public QueryResponse query(SolrQueryParams queryParams) throws SolrServerException, IOException {
|
||||
|
||||
SolrQuery query = new SolrQuery();
|
||||
query.setQuery(queryParams.getQueryString()); // add Q
|
||||
|
||||
// add Q
|
||||
query.setQuery(queryParams.getQueryString());
|
||||
|
||||
// add FQ
|
||||
for (String fq : queryParams.getFilterQueries()) {
|
||||
for (String fq : queryParams.getFilterQueries()) { // add FQ
|
||||
query.addFilterQuery(fq);
|
||||
}
|
||||
|
||||
// add FL
|
||||
query.addField(queryParams.getFieldList());
|
||||
query.addField(queryParams.getFieldList()); // add FL
|
||||
|
||||
// set pagination parameters
|
||||
query.setStart(queryParams.getStart());
|
||||
// set pagination
|
||||
query.setRows(queryParams.getRows());
|
||||
String cursor = queryParams.getCursor();
|
||||
|
||||
if (cursor != null && !cursor.isEmpty()) { // set cursor-based pagination
|
||||
query.set(CursorMarkParams.CURSOR_MARK_PARAM, cursor);
|
||||
query.addSort(UNIQUE_KEY, SolrQuery.ORDER.asc);
|
||||
} else { // set basic page/page-size pagination
|
||||
query.setStart(queryParams.getStart());
|
||||
}
|
||||
|
||||
// set sorting
|
||||
for (var sortClause : queryParams.getSort()) {
|
||||
|
@ -55,10 +58,16 @@ public class SolrRepository {
|
|||
query.set("debugQuery", "on");
|
||||
}
|
||||
|
||||
try {
|
||||
log.info(query);
|
||||
|
||||
return solrConnectionManager.getSolrClient().query(query);
|
||||
|
||||
} catch (SolrServerException e) {
|
||||
log.error(e.getMessage());
|
||||
throw new SolrServerException(e);
|
||||
} catch (IOException e) {
|
||||
log.error(e.getMessage());
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public SolrPingResponse ping() throws SolrServerException, IOException {
|
||||
|
|
|
@ -2,24 +2,16 @@ package eu.openaire.api.solr;
|
|||
|
||||
import lombok.Data;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class SolrQueryParams {
|
||||
|
||||
String queryString = "*:*";
|
||||
|
||||
List<String> filterQueries;
|
||||
|
||||
String fieldList = "__json";
|
||||
|
||||
Boolean debugQuery = false;
|
||||
|
||||
int start;
|
||||
|
||||
int rows;
|
||||
|
||||
List<SolrQuery.SortClause> sort;
|
||||
|
||||
private String queryString = "*:*";
|
||||
private List<String> filterQueries;
|
||||
private String fieldList = "__json";
|
||||
private Boolean debugQuery = false;
|
||||
private int start;
|
||||
private int rows;
|
||||
private List<SolrQuery.SortClause> sort;
|
||||
private String cursor;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue