9719: cursor_based implementation

This commit is contained in:
Alexios Symeonidis 2024-07-29 14:42:35 +03:00
parent 8df8091a3d
commit 7fc64caa44
17 changed files with 138 additions and 72 deletions

View File

@ -10,6 +10,10 @@ import lombok.Data;
import lombok.Getter;
import lombok.Setter;
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
@Getter
@Setter
@Data
@ -92,7 +96,7 @@ public class DataSourceRequest implements PaginatedRequest {
@Min(value = 1)
@Parameter(
description = "Page number of the results",
description = API_PAGE_DESC,
schema = @Schema(defaultValue = "1", type = "integer")
)
private int page = 1;
@ -100,11 +104,17 @@ public class DataSourceRequest implements PaginatedRequest {
@Min(value = 1, message = "Page size must be at least 1")
@Max(value = 100, message = "Page size must be at most 100")
@Parameter(
description = "Number of results per page",
description = API_PAGE_SIZE_DESC,
schema = @Schema(defaultValue = "10", type = "integer")
)
private int pageSize = 10;
@Parameter(
description = API_CURSOR_DESC,
schema = @Schema(type = "string")
)
private String cursor;
@Parameter(
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, organizations can be only sorted by the 'relevance'." ,
schema = @Schema(defaultValue = "relevance DESC")

View File

@ -10,6 +10,10 @@ import lombok.Data;
import lombok.Getter;
import lombok.Setter;
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
@Getter
@Setter
@Data
@ -71,19 +75,22 @@ public class OrganizationRequest implements PaginatedRequest {
@Min(value = 1)
@Parameter(
description = "Page number of the results",
schema = @Schema(defaultValue = "1", type = "integer")
)
description = API_PAGE_DESC,
schema = @Schema(defaultValue = "1", type = "integer"))
private int page = 1;
@Min(value = 1, message = "Page size must be at least 1")
@Max(value = 100, message = "Page size must be at most 100")
@Parameter(
description = "Number of results per page",
schema = @Schema(defaultValue = "10", type = "integer")
)
description = API_PAGE_SIZE_DESC,
schema = @Schema(defaultValue = "10", type = "integer"))
private int pageSize = 10;
@Parameter(
description = API_CURSOR_DESC,
schema = @Schema(type = "string"))
private String cursor;
@Parameter(
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, organizations can be only sorted by the 'relevance'." ,
schema = @Schema(defaultValue = "relevance DESC")

View File

@ -1,8 +1,7 @@
package eu.openaire.api.dto.request;
public interface PaginatedRequest {
int getPage();
int getPageSize();
String getCursor();
}

View File

@ -13,6 +13,10 @@ import org.springframework.format.annotation.DateTimeFormat;
import java.time.LocalDate;
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
@Getter
@Setter
@Data
@ -138,19 +142,22 @@ public class ProjectRequest implements PaginatedRequest {
@Min(value = 1)
@Parameter(
description = "Page number of the results",
schema = @Schema(defaultValue = "1", type = "integer")
)
description = API_PAGE_DESC,
schema = @Schema(defaultValue = "1", type = "integer"))
private int page = 1;
@Min(value = 1, message = "Page size must be at least 1")
@Max(value = 100, message = "Page size must be at most 100")
@Parameter(
description = "Number of results per page",
schema = @Schema(defaultValue = "10", type = "integer")
)
description = API_PAGE_SIZE_DESC,
schema = @Schema(defaultValue = "10", type = "integer"))
private int pageSize = 10;
@Parameter(
description = API_CURSOR_DESC,
schema = @Schema(type = "string"))
private String cursor;
@Parameter(
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, where fieldname is one of 'relevance', 'startDate', 'endDate'. Multiple sorting parameters should be comma-separated." ,
schema = @Schema(defaultValue = "relevance DESC")

View File

@ -13,6 +13,10 @@ import org.springframework.format.annotation.DateTimeFormat;
import java.time.LocalDate;
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
@Getter
@Setter
@Data
@ -294,7 +298,7 @@ public class ResearchProductsRequest implements PaginatedRequest {
@Min(value = 1)
@Parameter(
description = "Page number of the results",
description = API_PAGE_DESC,
schema = @Schema(defaultValue = "1", type = "integer")
)
private int page = 1;
@ -302,11 +306,17 @@ public class ResearchProductsRequest implements PaginatedRequest {
@Min(value = 1, message = "Page size must be at least 1")
@Max(value = 100, message = "Page size must be at most 100")
@Parameter(
description = "Number of results per page",
description = API_PAGE_SIZE_DESC,
schema = @Schema(defaultValue = "10", type = "integer")
)
private int pageSize = 10;
@Parameter(
description = API_CURSOR_DESC,
schema = @Schema(type = "string")
)
private String cursor;
@Parameter(
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, where fieldname is one of 'relevance', 'publicationDate', 'dateOfCollection', 'influence', 'popularity', 'citationCount', 'impulse'. Multiple sorting parameters should be comma-separated.",
schema = @Schema(defaultValue = "relevance DESC")

View File

@ -11,7 +11,7 @@ public class PaginationValidator implements Validator {
private final HttpServletRequest request;
private final int MAX_RESULTS = 10000;
private static final int MAX_RESULTS = 10000;
public PaginationValidator(HttpServletRequest request) {
this.request = request;

View File

@ -6,17 +6,11 @@ import lombok.Data;
@Data
@JsonInclude(JsonInclude.Include.NON_NULL)
public class SearchHeader {
private SearchHeaderDebug debug;
private long numFound;
private float maxScore;
private int queryTime;
private int page;
private int pageSize;
private Long numFound;
private Float maxScore;
private Integer queryTime;
private Integer page;
private Integer pageSize;
private String nextCursor;
}

View File

@ -14,6 +14,8 @@ import org.springframework.web.context.request.WebRequest;
import org.springframework.web.servlet.resource.NoResourceFoundException;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@RestControllerAdvice
@ -21,6 +23,9 @@ public class ServiceExceptionHandler {
private final Logger log = LogManager.getLogger(this.getClass());
private static final String URL_REGEX = "https?://\\S*";
private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX);
@ExceptionHandler(NotFoundException.class)
public ResponseEntity<ErrorResponse> handleNotFoundException(NotFoundException e, WebRequest request) {
return this.handleException(e.getMessage(), request, HttpStatus.NOT_FOUND);
@ -50,8 +55,9 @@ public class ServiceExceptionHandler {
@ExceptionHandler(Exception.class)
public ResponseEntity<ErrorResponse> handleAllOtherExceptions(Exception e, WebRequest request) {
//todo: log4j2.xml - add error appender
e.printStackTrace();
return this.handleException("An internal server error occurred", request, HttpStatus.INTERNAL_SERVER_ERROR);
return this.handleException(e.getMessage(), request, HttpStatus.INTERNAL_SERVER_ERROR);
}
private ResponseEntity<ErrorResponse> handleException(String message, WebRequest request, HttpStatus httpStatus) {
@ -59,7 +65,7 @@ public class ServiceExceptionHandler {
String path = String.format("%s?%s", req.getRequestURI(), req.getQueryString());
ErrorResponse response = ErrorResponse.builder()
.message(message)
.message(obfuscateUrlsInMessage(message))
.error(httpStatus.getReasonPhrase())
.code(httpStatus.value())
.timestamp(new Date())
@ -70,4 +76,9 @@ public class ServiceExceptionHandler {
.status(httpStatus)
.body(response);
}
private static String obfuscateUrlsInMessage(String message) {
Matcher matcher = URL_PATTERN.matcher(message);
return matcher.replaceAll("[https://***].");
}
}

View File

@ -13,6 +13,22 @@ import java.util.*;
public class Utils {
private Utils() {}
public static final String API_PAGE_DESC = """
Page number of the results,\s
used for basic start/rows pagination.\s
Max dataset to retrieve - 10000 records.\s
To get more than that, use cursor-based pagination""";
public static final String API_PAGE_SIZE_DESC = "Number of results per page";
/* todo: maybe mention that if a big dataset is required, then download directly the compressed data file
like this, we avoid high load on this microservice */
public static final String API_CURSOR_DESC = """
Cursor-based pagination.\s
Cursor should be used when it is required to retrieve a big dataset (more than 10000 records)""";
static public String escapeAndJoin(String[] tokens, String predicate, boolean addQuotes, String suffix) {
tokens = Arrays.stream(tokens)

View File

@ -14,6 +14,7 @@ public interface DataSourceRequestMapper {
@Mapping(target = "start", expression = "java( calculateStart(src.getPage(), src.getPageSize()) )")
@Mapping(target = "rows", source = "pageSize")
@Mapping(target = "debugQuery", source = "debugQuery")
@Mapping(target = "cursor", source = "cursor")
@Mapping(target = "sort", expression = "java( eu.openaire.api.mappers.Utils.formatSortByParam(src.getSortBy(), SolrFieldsMapper.dataSourceSortMapping) )")
SolrQueryParams toSolrQuery(DataSourceRequest src);

View File

@ -4,23 +4,33 @@ import eu.openaire.api.dto.response.SearchHeader;
import eu.openaire.api.dto.response.SearchHeaderDebug;
import eu.openaire.api.solr.SolrQueryParams;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.mapstruct.AfterMapping;
import org.mapstruct.Mapper;
import org.mapstruct.Mapping;
import org.mapstruct.MappingTarget;
import java.util.Optional;
@Mapper(componentModel = "spring")
public interface ResponseHeaderMapper {
@Mapping(target = "numFound", source = "queryResponse.results.numFound")
@Mapping(target = "numFound", expression = "java( Long.valueOf(queryResponse.getResults().getNumFound()) )")
@Mapping(target = "maxScore", source = "queryResponse.results.maxScore")
@Mapping(target = "page", source = "page")
@Mapping(target = "pageSize", source = "pageSize")
@Mapping(target = "nextCursor", source = "queryResponse.nextCursorMark")
@Mapping(target = "queryTime", expression = "java( (int) queryResponse.getHeader().get(\"QTime\") )")
@Mapping(target = "debug", expression = "java( mapDebug(queryResponse, solrQueryParams, debugQuery) )")
SearchHeader toSearchHeader(QueryResponse queryResponse, SolrQueryParams solrQueryParams,
boolean debugQuery, int page, int pageSize);
@AfterMapping
default void removePage(@MappingTarget SearchHeader searchHeader) {
if (searchHeader.getNextCursor() != null) {
searchHeader.setPage(null);
}
}
default SearchHeaderDebug mapDebug(QueryResponse queryResponse, SolrQueryParams solrQueryParams, boolean debugQuery) {
if (!debugQuery) {
return null;

View File

@ -21,7 +21,7 @@ public interface DatasourceMapper {
@Mapping(target = "openaireCompatibility", source = "payload.datasource.openairecompatibility.label")
@Mapping(target = "originalId", source = "payload.header.originalId")
@Mapping(target = "pid", expression = "java( mapPids(payload.getPid()) )")
@Mapping(target = "datasourceType", expression = "java( mapDatasourceType(payload.getDatasource().getDatasourcetype()))")
//@Mapping(target = "datasourceType", expression = "java( mapDatasourceType(payload.getDatasource().getDatasourcetype()))")
@Mapping(target = "officialName", source = "payload.datasource.officialname")
@Mapping(target = "englishName", source = "payload.datasource.englishname")
@Mapping(target = "websiteUrl", source = "payload.datasource.websiteurl")

View File

@ -29,8 +29,8 @@ public interface ResearchProductMapper {
@Mapping(target = "instance", expression = "java(mapInstances(payload.getResult().getInstance()))")
@Mapping(target = "author", expression = "java(mapAuthors(payload.getResult().getAuthor()))")
@Mapping(target = "openAccessColor", expression = "java(mapOpenAccessColor(payload.getResult().getOpenAccessColor()))")
@Mapping(target = "green", source = "payload.result.green")
@Mapping(target = "inDiamondJournal", source = "payload.result.inDiamondJournal")
//@Mapping(target = "green", source = "payload.result.green")
//@Mapping(target = "inDiamondJournal", source = "payload.result.inDiamondJournal")
@Mapping(target = "publiclyFunded", source = "payload.result.publiclyFunded")
@Mapping(target = "type", source = "payload.result.resulttype")
@Mapping(target = "country", expression = "java(mapCountries(payload.getResult().getCountry()))")

View File

@ -10,6 +10,7 @@ import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.CursorMarkParams;
import org.springframework.stereotype.Repository;
import java.io.IOException;
@ -19,31 +20,33 @@ import java.io.IOException;
public class SolrRepository {
private final SolrConnectionManager solrConnectionManager;
private final Logger log = LogManager.getLogger(this.getClass());
private static final String UNIQUE_KEY = "__indexrecordidentifier";
public SolrDocument getById(String id) throws SolrServerException, IOException {
return solrConnectionManager.getSolrClient().getById(id);
}
public QueryResponse query(SolrQueryParams queryParams) throws SolrServerException, IOException {
SolrQuery query = new SolrQuery();
query.setQuery(queryParams.getQueryString()); // add Q
// add Q
query.setQuery(queryParams.getQueryString());
// add FQ
for (String fq : queryParams.getFilterQueries()) {
for (String fq : queryParams.getFilterQueries()) { // add FQ
query.addFilterQuery(fq);
}
// add FL
query.addField(queryParams.getFieldList());
query.addField(queryParams.getFieldList()); // add FL
// set pagination parameters
query.setStart(queryParams.getStart());
// set pagination
query.setRows(queryParams.getRows());
String cursor = queryParams.getCursor();
if (cursor != null && !cursor.isEmpty()) { // set cursor-based pagination
query.set(CursorMarkParams.CURSOR_MARK_PARAM, cursor);
query.addSort(UNIQUE_KEY, SolrQuery.ORDER.asc);
} else { // set basic page/page-size pagination
query.setStart(queryParams.getStart());
}
// set sorting
for (var sortClause : queryParams.getSort()) {
@ -55,10 +58,16 @@ public class SolrRepository {
query.set("debugQuery", "on");
}
try {
log.info(query);
return solrConnectionManager.getSolrClient().query(query);
} catch (SolrServerException e) {
log.error(e.getMessage());
throw new SolrServerException(e);
} catch (IOException e) {
log.error(e.getMessage());
throw new IOException(e);
}
}
public SolrPingResponse ping() throws SolrServerException, IOException {

View File

@ -2,24 +2,16 @@ package eu.openaire.api.solr;
import lombok.Data;
import org.apache.solr.client.solrj.SolrQuery;
import java.util.List;
@Data
public class SolrQueryParams {
String queryString = "*:*";
List<String> filterQueries;
String fieldList = "__json";
Boolean debugQuery = false;
int start;
int rows;
List<SolrQuery.SortClause> sort;
private String queryString = "*:*";
private List<String> filterQueries;
private String fieldList = "__json";
private Boolean debugQuery = false;
private int start;
private int rows;
private List<SolrQuery.SortClause> sort;
private String cursor;
}

View File

@ -36,8 +36,8 @@ public class DatasourceMapperTest {
Assertions.assertEquals("doajarticles::614fdb5f82725ed3f8834ae90b9a0212", graphDatasource.getId());
Assertions.assertEquals(2, graphDatasource.getOriginalId().size());
Assertions.assertTrue(graphDatasource.getOriginalId().containsAll(Arrays.asList("doajarticles::2196-8403", "issn___print::2196-8403")));
Assertions.assertEquals("Journal", graphDatasource.getDatasourceType().getValue());
Assertions.assertEquals("pubsrepository::journal", graphDatasource.getDatasourceType().getScheme());
//Assertions.assertEquals("Journal", graphDatasource.getDatasourceType().getValue());
//Assertions.assertEquals("pubsrepository::journal", graphDatasource.getDatasourceType().getScheme());
Assertions.assertEquals("collected from a compatible aggregator", graphDatasource.getOpenaireCompatibility());
Assertions.assertEquals("Convivium", graphDatasource.getOfficialName());
Assertions.assertEquals("Convivium", graphDatasource.getEnglishName());

View File

@ -75,8 +75,8 @@ public class ResultProductMapperTest {
Assertions.assertEquals(Constants.COAR_ACCESS_RIGHT_SCHEMA, graphResult.getBestAccessRight().getScheme());
Assertions.assertFalse(graphResult.getPubliclyFunded());
Assertions.assertFalse(graphResult.getGreen());
Assertions.assertFalse(graphResult.getInDiamondJournal());
//Assertions.assertFalse(graphResult.getGreen());
//Assertions.assertFalse(graphResult.getInDiamondJournal());
Assertions.assertEquals(1, graphResult.getInstance().size());
Instance instance = graphResult.getInstance().get(0);