223 lines
8.2 KiB
Java
223 lines
8.2 KiB
Java
package eu.dnetlib.scholix.api.index;
|
|
|
|
|
|
import eu.dnetlib.dhp.schema.sx.scholix.Scholix;
|
|
import eu.dnetlib.scholix.api.ScholixAPIVersion;
|
|
import eu.dnetlib.scholix.api.ScholixException;
|
|
import eu.dnetlib.scholix.api.TaggedCounter;
|
|
|
|
import io.micrometer.core.annotation.Timed;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
|
import org.apache.commons.lang3.tuple.Pair;
|
|
import org.apache.lucene.search.join.ScoreMode;
|
|
import org.elasticsearch.index.query.BoolQueryBuilder;
|
|
import org.elasticsearch.index.query.NestedQueryBuilder;
|
|
import org.elasticsearch.index.query.QueryBuilder;
|
|
import org.elasticsearch.index.query.TermQueryBuilder;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.springframework.data.domain.PageRequest;
|
|
import org.springframework.data.elasticsearch.core.ElasticsearchOperations;
|
|
import org.springframework.data.elasticsearch.core.SearchHit;
|
|
import org.springframework.data.elasticsearch.core.SearchHits;
|
|
import org.springframework.data.elasticsearch.core.mapping.IndexCoordinates;
|
|
import org.springframework.data.elasticsearch.core.query.NativeSearchQuery;
|
|
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
|
|
import org.springframework.stereotype.Component;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.stream.Collectors;
|
|
|
|
/**
|
|
* The type Scholix index manager.
|
|
*/
|
|
@Component
|
|
public class ScholixIndexManager {
|
|
|
|
/**
|
|
* The Elastic search properties.
|
|
*/
|
|
@Autowired
|
|
ElasticSearchProperties elasticSearchProperties;
|
|
|
|
/**
|
|
* The Elasticsearch template.
|
|
*/
|
|
@Autowired
|
|
ElasticsearchOperations elasticsearchTemplate;
|
|
|
|
/**
|
|
* The My counter.
|
|
*/
|
|
@Autowired
|
|
TaggedCounter myCounter;
|
|
|
|
|
|
/**
|
|
* The enum Pid type prefix.
|
|
*/
|
|
enum pidTypePrefix {
|
|
/**
|
|
* Source pid type prefix.
|
|
*/
|
|
source,
|
|
/**
|
|
* Target pid type prefix.
|
|
*/
|
|
target
|
|
}
|
|
|
|
|
|
private QueryBuilder createObjectTypeQuery(final pidTypePrefix prefix, final String objectType ) throws ScholixException{
|
|
if (prefix == null){
|
|
throw new ScholixException("prefix cannot be null");
|
|
}
|
|
return new NestedQueryBuilder(String.format("%s", prefix), new TermQueryBuilder(String.format("%s.objectType",prefix), objectType), ScoreMode.None);
|
|
}
|
|
|
|
|
|
private QueryBuilder createPidTypeQuery(final pidTypePrefix prefix, final String pidTypeValue ) throws ScholixException{
|
|
if (prefix == null){
|
|
throw new ScholixException("prefix cannot be null");
|
|
}
|
|
return new NestedQueryBuilder(String.format("%s.identifier", prefix), new TermQueryBuilder(String.format("%s.identifier.schema",prefix), pidTypeValue), ScoreMode.None);
|
|
}
|
|
|
|
|
|
private QueryBuilder createPidValueQuery(final pidTypePrefix prefix, final String pidValue ) throws ScholixException{
|
|
if (prefix == null){
|
|
throw new ScholixException("prefix cannot be null");
|
|
}
|
|
return new NestedQueryBuilder(String.format("%s.identifier", prefix), new TermQueryBuilder(String.format("%s.identifier.identifier",prefix), pidValue), ScoreMode.None);
|
|
}
|
|
|
|
|
|
private QueryBuilder createFinalQuery(final List<QueryBuilder> queries) throws ScholixException{
|
|
|
|
if (queries == null || queries.isEmpty())
|
|
throw new ScholixException("the list of queries must be not empty");
|
|
|
|
|
|
if (queries.size() ==1) {
|
|
return queries.get(0);
|
|
}
|
|
|
|
else {
|
|
final BoolQueryBuilder b = new BoolQueryBuilder();
|
|
b.must().addAll(queries);
|
|
|
|
return b;
|
|
}
|
|
|
|
}
|
|
|
|
private void incrementPidCounter(pidTypePrefix prefix, String value) {
|
|
|
|
|
|
switch (value.toLowerCase()){
|
|
case "doi": {
|
|
myCounter.increment(String.format("%s_doi", prefix));
|
|
break;
|
|
}
|
|
case "pmc": {
|
|
myCounter.increment(String.format("%s_pmc", prefix));
|
|
break;
|
|
}
|
|
default:
|
|
myCounter.increment(String.format("%s_other", prefix));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
/**
|
|
* Links from pid pair.
|
|
*
|
|
* @param outputVersion the output version
|
|
* @param linkProvider the link provider
|
|
* @param targetPid the target pid
|
|
* @param targetPidType the target pid type
|
|
* @param targetPublisher the target publisher
|
|
* @param targetType the target type
|
|
* @param sourcePid the source pid
|
|
* @param sourcePidType the source pid type
|
|
* @param sourcePublisher the source publisher
|
|
* @param sourceType the source type
|
|
* @param harvestedAfter the harvested after
|
|
* @param page the page
|
|
* @return the pair
|
|
* @throws ScholixException the scholix exception
|
|
*/
|
|
@Timed(value = "scholix.index.request.links", description = "Time taken to request index")
|
|
public Pair<Long,List<Scholix>> linksFromPid (final ScholixAPIVersion outputVersion, final String linkProvider,
|
|
final String targetPid, final String targetPidType, final String targetPublisher,
|
|
final String targetType, final String sourcePid, final String sourcePidType,
|
|
final String sourcePublisher, final String sourceType, final String harvestedAfter,
|
|
final Integer page) throws ScholixException {
|
|
|
|
if(outputVersion == null)
|
|
throw new ScholixException("Error outputVersion not be empty");
|
|
|
|
if (sourcePid==null && sourcePidType==null && targetPid==null && targetPidType==null && sourcePublisher==null && targetPublisher==null && linkProvider==null)
|
|
throw new ScholixException("One of sourcePid, targetPid, sourcePublisher, targetPublisher, linkProvider should be not null");
|
|
|
|
final List<QueryBuilder> queries = new ArrayList<>();
|
|
|
|
if (StringUtils.isNoneBlank(targetPid)) {
|
|
myCounter.increment("targetPid");
|
|
queries.add(createPidValueQuery(pidTypePrefix.target, targetPid));
|
|
}
|
|
if (StringUtils.isNoneBlank(sourcePid)) {
|
|
myCounter.increment("sourcePid");
|
|
queries.add(createPidValueQuery(pidTypePrefix.source, sourcePid));
|
|
}
|
|
|
|
if (StringUtils.isNoneBlank(targetPidType)) {
|
|
assert targetPidType != null;
|
|
incrementPidCounter(pidTypePrefix.target,targetPidType);
|
|
queries.add(createPidTypeQuery(pidTypePrefix.target, targetPidType));
|
|
}
|
|
if (StringUtils.isNoneBlank(sourcePidType)) {
|
|
assert sourcePidType != null;
|
|
incrementPidCounter(pidTypePrefix.source,sourcePidType);
|
|
queries.add(createPidTypeQuery(pidTypePrefix.source, sourcePidType));
|
|
}
|
|
|
|
if (StringUtils.isNoneBlank(targetType)) {
|
|
if ("dataset".equalsIgnoreCase(targetType) || "publication".equalsIgnoreCase(targetType))
|
|
myCounter.increment(String.format("targetType_%s", targetType));
|
|
queries.add(createObjectTypeQuery(pidTypePrefix.target, targetType));
|
|
}
|
|
|
|
if (StringUtils.isNoneBlank(sourceType)) {
|
|
if ("dataset".equalsIgnoreCase(sourceType) || "publication".equalsIgnoreCase(sourceType)) {
|
|
myCounter.increment(String.format("sourceType_%s", sourceType));
|
|
}
|
|
queries.add(createObjectTypeQuery(pidTypePrefix.source, sourceType));
|
|
}
|
|
|
|
QueryBuilder result = createFinalQuery(queries);
|
|
|
|
NativeSearchQuery finalQuery = new NativeSearchQueryBuilder()
|
|
.withQuery(result)
|
|
.withPageable(PageRequest.of(page,10))
|
|
.build();
|
|
|
|
long tt = elasticsearchTemplate.count(finalQuery, Scholix.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
|
|
System.out.println(tt);
|
|
|
|
SearchHits<Scholix> scholixRes = elasticsearchTemplate.search(finalQuery, Scholix.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
|
|
|
|
System.out.println("SIZE OF HITS ->"+scholixRes.getSearchHits().size());
|
|
|
|
return new ImmutablePair<>(tt,scholixRes.stream().map(SearchHit::getContent).collect(Collectors.toList()));
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|