adapted API to index Flat

This commit is contained in:
Sandro La Bruzzo 2023-04-13 20:36:21 +02:00
parent dfca500b31
commit b07be69f90
5 changed files with 63 additions and 39 deletions

View File

@ -57,10 +57,10 @@ public class ScholixControllerV2 extends AbstractDnetController {
// description = "Filter scholix Links having collected after this date") String harvestedAfter,
@Parameter(in = ParameterIn.QUERY, description = "select page of result") final Integer page) throws Exception {
if (StringUtils.isEmpty(sourcePid) && StringUtils.isEmpty(targetPid) && StringUtils.isEmpty(sourcePublisher) && StringUtils.isEmpty(targetPublisher)
if (StringUtils.isEmpty(sourcePid) && StringUtils.isEmpty(targetPid) && StringUtils.isEmpty(sourcePublisher) && StringUtils.isEmpty(targetPublisher)&&StringUtils.isEmpty(sourceType)
&& StringUtils.isEmpty(linkProvider)) {
throw new ScholixException(
"The method requires one of the following parameters: sourcePid, targetPid, sourcePublisher, targetPublisher, linkProvider");
"The method requires one of the following parameters: sourcePid, targetPid, sourcePublisher, targetPublisher, linkProvider, sourceType");
}
try {

View File

@ -2,6 +2,7 @@ package eu.dnetlib.scholix.api.index;
import eu.dnetlib.dhp.schema.sx.scholix.Scholix;
import eu.dnetlib.dhp.schema.sx.scholix.ScholixFlat;
import eu.dnetlib.scholix.api.ScholixException;
import eu.dnetlib.scholix.api.TaggedCounter;
import io.micrometer.core.annotation.Timed;
@ -17,6 +18,7 @@ import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.bucket.nested.ParsedNested;
import org.elasticsearch.search.aggregations.bucket.terms.ParsedStringTerms;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
@ -147,14 +149,15 @@ public class ScholixIndexManager {
public List<Pair<String, Long>> totalLinksByProvider(final String filterName) throws ScholixException {
final QueryBuilder query = StringUtils.isNoneBlank(filterName)?createLinkProviderQuery(filterName):QueryBuilders.matchAllQuery();
final QueryBuilder query = StringUtils.isNoneBlank(filterName)?QueryBuilders.termQuery("linkProviders", filterName):QueryBuilders.matchAllQuery();
final NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
.withQuery(query)
.withSearchType(SearchType.DEFAULT)
.withPageable(PageRequest.of(0,10))
.addAggregation(AggregationBuilders.nested("nested", "linkprovider")
.subAggregation(AggregationBuilders.terms("by_map").field("linkprovider.name").size(100).minDocCount(1)))
.addAggregation(
AggregationBuilders.terms("genres").field("linkProviders").size(100)
.minDocCount(1))
.build();
@ -168,13 +171,10 @@ public class ScholixIndexManager {
if(aggregations == null)
return null;
final Aggregation aggByMap = ((ParsedNested) aggregations.asMap().get("nested")).getAggregations().asMap().get("by_map");
return ((ParsedStringTerms) aggregations.get("genres")).getBuckets().stream().map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount())).collect(Collectors.toList());
return ((ParsedStringTerms) aggByMap).getBuckets()
.stream()
.map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount()))
.collect(Collectors.toList());
}
@ -187,8 +187,9 @@ public class ScholixIndexManager {
.withQuery(query)
.withSearchType(SearchType.DEFAULT)
.withPageable(PageRequest.of(0,10))
.addAggregation(AggregationBuilders.nested("nested", String.format("%s.publisher", prefix ))
.subAggregation(AggregationBuilders.terms("by_map").field(String.format("%s.publisher.name", prefix )).size(100).minDocCount(1)))
.addAggregation(
AggregationBuilders.terms("publishers").field(String.format("%sPublisher", prefix.toString())).size(100)
.minDocCount(1))
.build();
@ -202,13 +203,7 @@ public class ScholixIndexManager {
if(aggregations == null)
return null;
final Aggregation aggByMap = ((ParsedNested) aggregations.asMap().get("nested")).getAggregations().asMap().get("by_map");
return ((ParsedStringTerms) aggByMap).getBuckets()
.stream()
.map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount()))
.collect(Collectors.toList());
return ((ParsedStringTerms) aggregations.get("publishers")).getBuckets().stream().map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount())).collect(Collectors.toList());
}
@ -249,66 +244,63 @@ public class ScholixIndexManager {
if (StringUtils.isNoneBlank(linkProvider)) {
myCounter.increment("linkProvider");
queries.add(createLinkProviderQuery(linkProvider));
queries.add(QueryBuilders.termQuery("linkProviders", linkProvider));
}
if (StringUtils.isNoneBlank(targetPid)) {
myCounter.increment("targetPid");
queries.add(createPidValueQuery(RelationPrefix.target, targetPid));
queries.add(QueryBuilders.termQuery("targetPid", targetPid));
}
if (StringUtils.isNoneBlank(sourcePid)) {
myCounter.increment("sourcePid");
queries.add(createPidValueQuery(RelationPrefix.source, sourcePid));
queries.add(QueryBuilders.termQuery("sourcePid", sourcePid));
}
if (StringUtils.isNoneBlank(targetPidType)) {
assert targetPidType != null;
incrementPidCounter(RelationPrefix.target,targetPidType);
queries.add(createPidTypeQuery(RelationPrefix.target, targetPidType));
queries.add(QueryBuilders.termQuery("targetPidType", targetPidType));
}
if (StringUtils.isNoneBlank(sourcePidType)) {
assert sourcePidType != null;
incrementPidCounter(RelationPrefix.source,sourcePidType);
queries.add(createPidTypeQuery(RelationPrefix.source, sourcePidType));
queries.add(QueryBuilders.termQuery("sourcePidType", sourcePidType));
}
if (StringUtils.isNoneBlank(targetType)) {
if ("dataset".equalsIgnoreCase(targetType) || "publication".equalsIgnoreCase(targetType))
myCounter.increment(String.format("targetType_%s", targetType));
queries.add(createObjectTypeQuery(RelationPrefix.target, targetType));
myCounter.increment(String.format("targetType_%s", targetType));
queries.add(QueryBuilders.termQuery("targetType", targetType));
}
if (StringUtils.isNoneBlank(sourceType)) {
if ("dataset".equalsIgnoreCase(sourceType) || "publication".equalsIgnoreCase(sourceType)) {
myCounter.increment(String.format("sourceType_%s", sourceType));
}
queries.add(createObjectTypeQuery(RelationPrefix.source, sourceType));
myCounter.increment(String.format("sourceType_%s", sourceType));
queries.add(QueryBuilders.termQuery("sourceType", sourceType));
}
if (StringUtils.isNoneBlank(targetPublisher)) {
myCounter.increment("targetPublisher");
queries.add(createLinkPublisherQuery(RelationPrefix.target,targetPublisher));
queries.add(QueryBuilders.termQuery("targetPublisher", targetPublisher));
}
QueryBuilder result = createFinalQuery(queries);
NativeSearchQuery finalQuery = new NativeSearchQueryBuilder()
.withQuery(result)
.withPageable(PageRequest.of(page,10))
.withPageable(PageRequest.of(page,100))
.build();
Pair<RestHighLevelClient, ElasticsearchRestTemplate> resource = connectionPool.getResource();
ElasticsearchRestTemplate client = resource.getValue();
long tt = client.count(finalQuery, Scholix.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
SearchHits<Scholix> scholixRes = client.search(finalQuery, Scholix.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
long tt = client.count(finalQuery, ScholixFlat .class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
SearchHits<ScholixFlat> scholixRes = client.search(finalQuery, ScholixFlat.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
connectionPool.returnResource(resource);
return new ImmutablePair<>(tt,scholixRes.stream().map(SearchHit::getContent).collect(Collectors.toList()));
return new ImmutablePair<>(tt,scholixRes.stream().map(SearchHit::getContent).map(ScholixUtils::getScholixFromBlob).collect(Collectors.toList()));
}
}

View File

@ -0,0 +1,32 @@
package eu.dnetlib.scholix.api.index;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.sx.scholix.Scholix;
import eu.dnetlib.dhp.schema.sx.scholix.ScholixFlat;
import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.io.IOUtils;
import java.io.ByteArrayInputStream;
import java.util.zip.GZIPInputStream;
public class ScholixUtils {
private static ObjectMapper MAPPER = new ObjectMapper();
private static String uncompress(final String compressed) throws Exception {
Base64InputStream bis = new Base64InputStream(new ByteArrayInputStream(compressed.getBytes()));
GZIPInputStream gzip = new GZIPInputStream(bis);
return IOUtils.toString(gzip);
}
public static Scholix getScholixFromBlob(final ScholixFlat flat) {
try {
return MAPPER.readValue(uncompress(flat.getBlob()), Scholix.class);
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
}

View File

@ -27,7 +27,7 @@ management.metrics.distribution.percentiles.http.server.requests=0.5, 0.9, 0.95,
#scholix.elastic.clusterNodes = 10.19.65.51:9200,10.19.65.52:9200,10.19.65.53:9200,10.19.65.54:9200
scholix.elastic.clusterNodes = localhost:9200
scholix.elastic.indexName = dli_shadow_scholix
scholix.elastic.indexName = scholix
scholix.elastic.socketTimeout = 60000
scholix.elastic.connectionTimeout= 60000

View File

@ -453,7 +453,7 @@
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version>
<java.version>1.8</java.version>
<dhp-schemas-version>2.14.0</dhp-schemas-version>
<dhp-schemas-version>3.16.1-SNAPSHOT</dhp-schemas-version>
<apache.solr.version>7.1.0</apache.solr.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version>
<prometheus.version>0.10.0</prometheus.version>