9719: cursor_based implementation
This commit is contained in:
parent
8df8091a3d
commit
7fc64caa44
|
@ -10,6 +10,10 @@ import lombok.Data;
|
|||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Data
|
||||
|
@ -92,7 +96,7 @@ public class DataSourceRequest implements PaginatedRequest {
|
|||
|
||||
@Min(value = 1)
|
||||
@Parameter(
|
||||
description = "Page number of the results",
|
||||
description = API_PAGE_DESC,
|
||||
schema = @Schema(defaultValue = "1", type = "integer")
|
||||
)
|
||||
private int page = 1;
|
||||
|
@ -100,11 +104,17 @@ public class DataSourceRequest implements PaginatedRequest {
|
|||
@Min(value = 1, message = "Page size must be at least 1")
|
||||
@Max(value = 100, message = "Page size must be at most 100")
|
||||
@Parameter(
|
||||
description = "Number of results per page",
|
||||
description = API_PAGE_SIZE_DESC,
|
||||
schema = @Schema(defaultValue = "10", type = "integer")
|
||||
)
|
||||
private int pageSize = 10;
|
||||
|
||||
@Parameter(
|
||||
description = API_CURSOR_DESC,
|
||||
schema = @Schema(type = "string")
|
||||
)
|
||||
private String cursor;
|
||||
|
||||
@Parameter(
|
||||
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, organizations can be only sorted by the 'relevance'." ,
|
||||
schema = @Schema(defaultValue = "relevance DESC")
|
||||
|
|
|
@ -10,6 +10,10 @@ import lombok.Data;
|
|||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Data
|
||||
|
@ -71,19 +75,22 @@ public class OrganizationRequest implements PaginatedRequest {
|
|||
|
||||
@Min(value = 1)
|
||||
@Parameter(
|
||||
description = "Page number of the results",
|
||||
schema = @Schema(defaultValue = "1", type = "integer")
|
||||
)
|
||||
description = API_PAGE_DESC,
|
||||
schema = @Schema(defaultValue = "1", type = "integer"))
|
||||
private int page = 1;
|
||||
|
||||
@Min(value = 1, message = "Page size must be at least 1")
|
||||
@Max(value = 100, message = "Page size must be at most 100")
|
||||
@Parameter(
|
||||
description = "Number of results per page",
|
||||
schema = @Schema(defaultValue = "10", type = "integer")
|
||||
)
|
||||
description = API_PAGE_SIZE_DESC,
|
||||
schema = @Schema(defaultValue = "10", type = "integer"))
|
||||
private int pageSize = 10;
|
||||
|
||||
@Parameter(
|
||||
description = API_CURSOR_DESC,
|
||||
schema = @Schema(type = "string"))
|
||||
private String cursor;
|
||||
|
||||
@Parameter(
|
||||
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, organizations can be only sorted by the 'relevance'." ,
|
||||
schema = @Schema(defaultValue = "relevance DESC")
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
package eu.openaire.api.dto.request;
|
||||
|
||||
public interface PaginatedRequest {
|
||||
|
||||
int getPage();
|
||||
|
||||
int getPageSize();
|
||||
String getCursor();
|
||||
}
|
||||
|
|
|
@ -13,6 +13,10 @@ import org.springframework.format.annotation.DateTimeFormat;
|
|||
|
||||
import java.time.LocalDate;
|
||||
|
||||
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Data
|
||||
|
@ -138,19 +142,22 @@ public class ProjectRequest implements PaginatedRequest {
|
|||
|
||||
@Min(value = 1)
|
||||
@Parameter(
|
||||
description = "Page number of the results",
|
||||
schema = @Schema(defaultValue = "1", type = "integer")
|
||||
)
|
||||
description = API_PAGE_DESC,
|
||||
schema = @Schema(defaultValue = "1", type = "integer"))
|
||||
private int page = 1;
|
||||
|
||||
@Min(value = 1, message = "Page size must be at least 1")
|
||||
@Max(value = 100, message = "Page size must be at most 100")
|
||||
@Parameter(
|
||||
description = "Number of results per page",
|
||||
schema = @Schema(defaultValue = "10", type = "integer")
|
||||
)
|
||||
description = API_PAGE_SIZE_DESC,
|
||||
schema = @Schema(defaultValue = "10", type = "integer"))
|
||||
private int pageSize = 10;
|
||||
|
||||
@Parameter(
|
||||
description = API_CURSOR_DESC,
|
||||
schema = @Schema(type = "string"))
|
||||
private String cursor;
|
||||
|
||||
@Parameter(
|
||||
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, where fieldname is one of 'relevance', 'startDate', 'endDate'. Multiple sorting parameters should be comma-separated." ,
|
||||
schema = @Schema(defaultValue = "relevance DESC")
|
||||
|
|
|
@ -13,6 +13,10 @@ import org.springframework.format.annotation.DateTimeFormat;
|
|||
|
||||
import java.time.LocalDate;
|
||||
|
||||
import static eu.openaire.api.mappers.Utils.API_CURSOR_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_DESC;
|
||||
import static eu.openaire.api.mappers.Utils.API_PAGE_SIZE_DESC;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Data
|
||||
|
@ -294,7 +298,7 @@ public class ResearchProductsRequest implements PaginatedRequest {
|
|||
|
||||
@Min(value = 1)
|
||||
@Parameter(
|
||||
description = "Page number of the results",
|
||||
description = API_PAGE_DESC,
|
||||
schema = @Schema(defaultValue = "1", type = "integer")
|
||||
)
|
||||
private int page = 1;
|
||||
|
@ -302,11 +306,17 @@ public class ResearchProductsRequest implements PaginatedRequest {
|
|||
@Min(value = 1, message = "Page size must be at least 1")
|
||||
@Max(value = 100, message = "Page size must be at most 100")
|
||||
@Parameter(
|
||||
description = "Number of results per page",
|
||||
description = API_PAGE_SIZE_DESC,
|
||||
schema = @Schema(defaultValue = "10", type = "integer")
|
||||
)
|
||||
private int pageSize = 10;
|
||||
|
||||
@Parameter(
|
||||
description = API_CURSOR_DESC,
|
||||
schema = @Schema(type = "string")
|
||||
)
|
||||
private String cursor;
|
||||
|
||||
@Parameter(
|
||||
description = "The field to sort the results by and the sort direction. The format should be in the format `fieldname ASC|DESC`, where fieldname is one of 'relevance', 'publicationDate', 'dateOfCollection', 'influence', 'popularity', 'citationCount', 'impulse'. Multiple sorting parameters should be comma-separated.",
|
||||
schema = @Schema(defaultValue = "relevance DESC")
|
||||
|
|
|
@ -11,7 +11,7 @@ public class PaginationValidator implements Validator {
|
|||
|
||||
private final HttpServletRequest request;
|
||||
|
||||
private final int MAX_RESULTS = 10000;
|
||||
private static final int MAX_RESULTS = 10000;
|
||||
|
||||
public PaginationValidator(HttpServletRequest request) {
|
||||
this.request = request;
|
||||
|
|
|
@ -6,17 +6,11 @@ import lombok.Data;
|
|||
@Data
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class SearchHeader {
|
||||
|
||||
private SearchHeaderDebug debug;
|
||||
|
||||
private long numFound;
|
||||
|
||||
private float maxScore;
|
||||
|
||||
private int queryTime;
|
||||
|
||||
private int page;
|
||||
|
||||
private int pageSize;
|
||||
|
||||
private Long numFound;
|
||||
private Float maxScore;
|
||||
private Integer queryTime;
|
||||
private Integer page;
|
||||
private Integer pageSize;
|
||||
private String nextCursor;
|
||||
}
|
||||
|
|
|
@ -14,6 +14,8 @@ import org.springframework.web.context.request.WebRequest;
|
|||
import org.springframework.web.servlet.resource.NoResourceFoundException;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@RestControllerAdvice
|
||||
|
@ -21,6 +23,9 @@ public class ServiceExceptionHandler {
|
|||
|
||||
private final Logger log = LogManager.getLogger(this.getClass());
|
||||
|
||||
private static final String URL_REGEX = "https?://\\S*";
|
||||
private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX);
|
||||
|
||||
@ExceptionHandler(NotFoundException.class)
|
||||
public ResponseEntity<ErrorResponse> handleNotFoundException(NotFoundException e, WebRequest request) {
|
||||
return this.handleException(e.getMessage(), request, HttpStatus.NOT_FOUND);
|
||||
|
@ -50,8 +55,9 @@ public class ServiceExceptionHandler {
|
|||
|
||||
@ExceptionHandler(Exception.class)
|
||||
public ResponseEntity<ErrorResponse> handleAllOtherExceptions(Exception e, WebRequest request) {
|
||||
//todo: log4j2.xml - add error appender
|
||||
e.printStackTrace();
|
||||
return this.handleException("An internal server error occurred", request, HttpStatus.INTERNAL_SERVER_ERROR);
|
||||
return this.handleException(e.getMessage(), request, HttpStatus.INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
private ResponseEntity<ErrorResponse> handleException(String message, WebRequest request, HttpStatus httpStatus) {
|
||||
|
@ -59,7 +65,7 @@ public class ServiceExceptionHandler {
|
|||
String path = String.format("%s?%s", req.getRequestURI(), req.getQueryString());
|
||||
|
||||
ErrorResponse response = ErrorResponse.builder()
|
||||
.message(message)
|
||||
.message(obfuscateUrlsInMessage(message))
|
||||
.error(httpStatus.getReasonPhrase())
|
||||
.code(httpStatus.value())
|
||||
.timestamp(new Date())
|
||||
|
@ -70,4 +76,9 @@ public class ServiceExceptionHandler {
|
|||
.status(httpStatus)
|
||||
.body(response);
|
||||
}
|
||||
|
||||
private static String obfuscateUrlsInMessage(String message) {
|
||||
Matcher matcher = URL_PATTERN.matcher(message);
|
||||
return matcher.replaceAll("[https://***].");
|
||||
}
|
||||
}
|
|
@ -13,6 +13,22 @@ import java.util.*;
|
|||
|
||||
public class Utils {
|
||||
|
||||
private Utils() {}
|
||||
|
||||
public static final String API_PAGE_DESC = """
|
||||
Page number of the results,\s
|
||||
used for basic start/rows pagination.\s
|
||||
Max dataset to retrieve - 10000 records.\s
|
||||
To get more than that, use cursor-based pagination""";
|
||||
|
||||
public static final String API_PAGE_SIZE_DESC = "Number of results per page";
|
||||
|
||||
/* todo: maybe mention that if a big dataset is required, then download directly the compressed data file
|
||||
like this, we avoid high load on this microservice */
|
||||
public static final String API_CURSOR_DESC = """
|
||||
Cursor-based pagination.\s
|
||||
Cursor should be used when it is required to retrieve a big dataset (more than 10000 records)""";
|
||||
|
||||
static public String escapeAndJoin(String[] tokens, String predicate, boolean addQuotes, String suffix) {
|
||||
|
||||
tokens = Arrays.stream(tokens)
|
||||
|
|
|
@ -14,6 +14,7 @@ public interface DataSourceRequestMapper {
|
|||
@Mapping(target = "start", expression = "java( calculateStart(src.getPage(), src.getPageSize()) )")
|
||||
@Mapping(target = "rows", source = "pageSize")
|
||||
@Mapping(target = "debugQuery", source = "debugQuery")
|
||||
@Mapping(target = "cursor", source = "cursor")
|
||||
@Mapping(target = "sort", expression = "java( eu.openaire.api.mappers.Utils.formatSortByParam(src.getSortBy(), SolrFieldsMapper.dataSourceSortMapping) )")
|
||||
SolrQueryParams toSolrQuery(DataSourceRequest src);
|
||||
|
||||
|
|
|
@ -4,23 +4,33 @@ import eu.openaire.api.dto.response.SearchHeader;
|
|||
import eu.openaire.api.dto.response.SearchHeaderDebug;
|
||||
import eu.openaire.api.solr.SolrQueryParams;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.mapstruct.AfterMapping;
|
||||
import org.mapstruct.Mapper;
|
||||
import org.mapstruct.Mapping;
|
||||
import org.mapstruct.MappingTarget;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
@Mapper(componentModel = "spring")
|
||||
public interface ResponseHeaderMapper {
|
||||
|
||||
@Mapping(target = "numFound", source = "queryResponse.results.numFound")
|
||||
@Mapping(target = "numFound", expression = "java( Long.valueOf(queryResponse.getResults().getNumFound()) )")
|
||||
@Mapping(target = "maxScore", source = "queryResponse.results.maxScore")
|
||||
@Mapping(target = "page", source = "page")
|
||||
@Mapping(target = "pageSize", source = "pageSize")
|
||||
@Mapping(target = "nextCursor", source = "queryResponse.nextCursorMark")
|
||||
@Mapping(target = "queryTime", expression = "java( (int) queryResponse.getHeader().get(\"QTime\") )")
|
||||
@Mapping(target = "debug", expression = "java( mapDebug(queryResponse, solrQueryParams, debugQuery) )")
|
||||
SearchHeader toSearchHeader(QueryResponse queryResponse, SolrQueryParams solrQueryParams,
|
||||
boolean debugQuery, int page, int pageSize);
|
||||
|
||||
@AfterMapping
|
||||
default void removePage(@MappingTarget SearchHeader searchHeader) {
|
||||
if (searchHeader.getNextCursor() != null) {
|
||||
searchHeader.setPage(null);
|
||||
}
|
||||
}
|
||||
|
||||
default SearchHeaderDebug mapDebug(QueryResponse queryResponse, SolrQueryParams solrQueryParams, boolean debugQuery) {
|
||||
if (!debugQuery) {
|
||||
return null;
|
||||
|
|
|
@ -21,7 +21,7 @@ public interface DatasourceMapper {
|
|||
@Mapping(target = "openaireCompatibility", source = "payload.datasource.openairecompatibility.label")
|
||||
@Mapping(target = "originalId", source = "payload.header.originalId")
|
||||
@Mapping(target = "pid", expression = "java( mapPids(payload.getPid()) )")
|
||||
@Mapping(target = "datasourceType", expression = "java( mapDatasourceType(payload.getDatasource().getDatasourcetype()))")
|
||||
//@Mapping(target = "datasourceType", expression = "java( mapDatasourceType(payload.getDatasource().getDatasourcetype()))")
|
||||
@Mapping(target = "officialName", source = "payload.datasource.officialname")
|
||||
@Mapping(target = "englishName", source = "payload.datasource.englishname")
|
||||
@Mapping(target = "websiteUrl", source = "payload.datasource.websiteurl")
|
||||
|
|
|
@ -29,8 +29,8 @@ public interface ResearchProductMapper {
|
|||
@Mapping(target = "instance", expression = "java(mapInstances(payload.getResult().getInstance()))")
|
||||
@Mapping(target = "author", expression = "java(mapAuthors(payload.getResult().getAuthor()))")
|
||||
@Mapping(target = "openAccessColor", expression = "java(mapOpenAccessColor(payload.getResult().getOpenAccessColor()))")
|
||||
@Mapping(target = "green", source = "payload.result.green")
|
||||
@Mapping(target = "inDiamondJournal", source = "payload.result.inDiamondJournal")
|
||||
//@Mapping(target = "green", source = "payload.result.green")
|
||||
//@Mapping(target = "inDiamondJournal", source = "payload.result.inDiamondJournal")
|
||||
@Mapping(target = "publiclyFunded", source = "payload.result.publiclyFunded")
|
||||
@Mapping(target = "type", source = "payload.result.resulttype")
|
||||
@Mapping(target = "country", expression = "java(mapCountries(payload.getResult().getCountry()))")
|
||||
|
|
|
@ -10,6 +10,7 @@ import org.apache.solr.client.solrj.SolrServerException;
|
|||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.client.solrj.response.SolrPingResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.params.CursorMarkParams;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -19,31 +20,33 @@ import java.io.IOException;
|
|||
public class SolrRepository {
|
||||
|
||||
private final SolrConnectionManager solrConnectionManager;
|
||||
|
||||
private final Logger log = LogManager.getLogger(this.getClass());
|
||||
private static final String UNIQUE_KEY = "__indexrecordidentifier";
|
||||
|
||||
public SolrDocument getById(String id) throws SolrServerException, IOException {
|
||||
return solrConnectionManager.getSolrClient().getById(id);
|
||||
}
|
||||
|
||||
public QueryResponse query(SolrQueryParams queryParams) throws SolrServerException, IOException {
|
||||
|
||||
SolrQuery query = new SolrQuery();
|
||||
query.setQuery(queryParams.getQueryString()); // add Q
|
||||
|
||||
// add Q
|
||||
query.setQuery(queryParams.getQueryString());
|
||||
|
||||
// add FQ
|
||||
for (String fq : queryParams.getFilterQueries()) {
|
||||
for (String fq : queryParams.getFilterQueries()) { // add FQ
|
||||
query.addFilterQuery(fq);
|
||||
}
|
||||
|
||||
// add FL
|
||||
query.addField(queryParams.getFieldList());
|
||||
query.addField(queryParams.getFieldList()); // add FL
|
||||
|
||||
// set pagination parameters
|
||||
query.setStart(queryParams.getStart());
|
||||
// set pagination
|
||||
query.setRows(queryParams.getRows());
|
||||
String cursor = queryParams.getCursor();
|
||||
|
||||
if (cursor != null && !cursor.isEmpty()) { // set cursor-based pagination
|
||||
query.set(CursorMarkParams.CURSOR_MARK_PARAM, cursor);
|
||||
query.addSort(UNIQUE_KEY, SolrQuery.ORDER.asc);
|
||||
} else { // set basic page/page-size pagination
|
||||
query.setStart(queryParams.getStart());
|
||||
}
|
||||
|
||||
// set sorting
|
||||
for (var sortClause : queryParams.getSort()) {
|
||||
|
@ -55,10 +58,16 @@ public class SolrRepository {
|
|||
query.set("debugQuery", "on");
|
||||
}
|
||||
|
||||
try {
|
||||
log.info(query);
|
||||
|
||||
return solrConnectionManager.getSolrClient().query(query);
|
||||
|
||||
} catch (SolrServerException e) {
|
||||
log.error(e.getMessage());
|
||||
throw new SolrServerException(e);
|
||||
} catch (IOException e) {
|
||||
log.error(e.getMessage());
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public SolrPingResponse ping() throws SolrServerException, IOException {
|
||||
|
|
|
@ -2,24 +2,16 @@ package eu.openaire.api.solr;
|
|||
|
||||
import lombok.Data;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class SolrQueryParams {
|
||||
|
||||
String queryString = "*:*";
|
||||
|
||||
List<String> filterQueries;
|
||||
|
||||
String fieldList = "__json";
|
||||
|
||||
Boolean debugQuery = false;
|
||||
|
||||
int start;
|
||||
|
||||
int rows;
|
||||
|
||||
List<SolrQuery.SortClause> sort;
|
||||
|
||||
private String queryString = "*:*";
|
||||
private List<String> filterQueries;
|
||||
private String fieldList = "__json";
|
||||
private Boolean debugQuery = false;
|
||||
private int start;
|
||||
private int rows;
|
||||
private List<SolrQuery.SortClause> sort;
|
||||
private String cursor;
|
||||
}
|
||||
|
|
|
@ -36,8 +36,8 @@ public class DatasourceMapperTest {
|
|||
Assertions.assertEquals("doajarticles::614fdb5f82725ed3f8834ae90b9a0212", graphDatasource.getId());
|
||||
Assertions.assertEquals(2, graphDatasource.getOriginalId().size());
|
||||
Assertions.assertTrue(graphDatasource.getOriginalId().containsAll(Arrays.asList("doajarticles::2196-8403", "issn___print::2196-8403")));
|
||||
Assertions.assertEquals("Journal", graphDatasource.getDatasourceType().getValue());
|
||||
Assertions.assertEquals("pubsrepository::journal", graphDatasource.getDatasourceType().getScheme());
|
||||
//Assertions.assertEquals("Journal", graphDatasource.getDatasourceType().getValue());
|
||||
//Assertions.assertEquals("pubsrepository::journal", graphDatasource.getDatasourceType().getScheme());
|
||||
Assertions.assertEquals("collected from a compatible aggregator", graphDatasource.getOpenaireCompatibility());
|
||||
Assertions.assertEquals("Convivium", graphDatasource.getOfficialName());
|
||||
Assertions.assertEquals("Convivium", graphDatasource.getEnglishName());
|
||||
|
|
|
@ -75,8 +75,8 @@ public class ResultProductMapperTest {
|
|||
Assertions.assertEquals(Constants.COAR_ACCESS_RIGHT_SCHEMA, graphResult.getBestAccessRight().getScheme());
|
||||
|
||||
Assertions.assertFalse(graphResult.getPubliclyFunded());
|
||||
Assertions.assertFalse(graphResult.getGreen());
|
||||
Assertions.assertFalse(graphResult.getInDiamondJournal());
|
||||
//Assertions.assertFalse(graphResult.getGreen());
|
||||
//Assertions.assertFalse(graphResult.getInDiamondJournal());
|
||||
Assertions.assertEquals(1, graphResult.getInstance().size());
|
||||
|
||||
Instance instance = graphResult.getInstance().get(0);
|
||||
|
|
Loading…
Reference in New Issue