adapted API to index Flat
This commit is contained in:
parent
dfca500b31
commit
b07be69f90
|
@ -57,10 +57,10 @@ public class ScholixControllerV2 extends AbstractDnetController {
|
|||
// description = "Filter scholix Links having collected after this date") String harvestedAfter,
|
||||
@Parameter(in = ParameterIn.QUERY, description = "select page of result") final Integer page) throws Exception {
|
||||
|
||||
if (StringUtils.isEmpty(sourcePid) && StringUtils.isEmpty(targetPid) && StringUtils.isEmpty(sourcePublisher) && StringUtils.isEmpty(targetPublisher)
|
||||
if (StringUtils.isEmpty(sourcePid) && StringUtils.isEmpty(targetPid) && StringUtils.isEmpty(sourcePublisher) && StringUtils.isEmpty(targetPublisher)&&StringUtils.isEmpty(sourceType)
|
||||
&& StringUtils.isEmpty(linkProvider)) {
|
||||
throw new ScholixException(
|
||||
"The method requires one of the following parameters: sourcePid, targetPid, sourcePublisher, targetPublisher, linkProvider");
|
||||
"The method requires one of the following parameters: sourcePid, targetPid, sourcePublisher, targetPublisher, linkProvider, sourceType");
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
@ -2,6 +2,7 @@ package eu.dnetlib.scholix.api.index;
|
|||
|
||||
|
||||
import eu.dnetlib.dhp.schema.sx.scholix.Scholix;
|
||||
import eu.dnetlib.dhp.schema.sx.scholix.ScholixFlat;
|
||||
import eu.dnetlib.scholix.api.ScholixException;
|
||||
import eu.dnetlib.scholix.api.TaggedCounter;
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
|
@ -17,6 +18,7 @@ import org.elasticsearch.search.aggregations.AggregationBuilders;
|
|||
import org.elasticsearch.search.aggregations.Aggregations;
|
||||
import org.elasticsearch.search.aggregations.bucket.nested.ParsedNested;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.ParsedStringTerms;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
|
||||
|
@ -147,14 +149,15 @@ public class ScholixIndexManager {
|
|||
public List<Pair<String, Long>> totalLinksByProvider(final String filterName) throws ScholixException {
|
||||
|
||||
|
||||
final QueryBuilder query = StringUtils.isNoneBlank(filterName)?createLinkProviderQuery(filterName):QueryBuilders.matchAllQuery();
|
||||
final QueryBuilder query = StringUtils.isNoneBlank(filterName)?QueryBuilders.termQuery("linkProviders", filterName):QueryBuilders.matchAllQuery();
|
||||
|
||||
final NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
|
||||
.withQuery(query)
|
||||
.withSearchType(SearchType.DEFAULT)
|
||||
.withPageable(PageRequest.of(0,10))
|
||||
.addAggregation(AggregationBuilders.nested("nested", "linkprovider")
|
||||
.subAggregation(AggregationBuilders.terms("by_map").field("linkprovider.name").size(100).minDocCount(1)))
|
||||
.addAggregation(
|
||||
AggregationBuilders.terms("genres").field("linkProviders").size(100)
|
||||
.minDocCount(1))
|
||||
.build();
|
||||
|
||||
|
||||
|
@ -168,13 +171,10 @@ public class ScholixIndexManager {
|
|||
if(aggregations == null)
|
||||
return null;
|
||||
|
||||
final Aggregation aggByMap = ((ParsedNested) aggregations.asMap().get("nested")).getAggregations().asMap().get("by_map");
|
||||
return ((ParsedStringTerms) aggregations.get("genres")).getBuckets().stream().map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount())).collect(Collectors.toList());
|
||||
|
||||
|
||||
|
||||
return ((ParsedStringTerms) aggByMap).getBuckets()
|
||||
.stream()
|
||||
.map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
|
@ -187,8 +187,9 @@ public class ScholixIndexManager {
|
|||
.withQuery(query)
|
||||
.withSearchType(SearchType.DEFAULT)
|
||||
.withPageable(PageRequest.of(0,10))
|
||||
.addAggregation(AggregationBuilders.nested("nested", String.format("%s.publisher", prefix ))
|
||||
.subAggregation(AggregationBuilders.terms("by_map").field(String.format("%s.publisher.name", prefix )).size(100).minDocCount(1)))
|
||||
.addAggregation(
|
||||
AggregationBuilders.terms("publishers").field(String.format("%sPublisher", prefix.toString())).size(100)
|
||||
.minDocCount(1))
|
||||
.build();
|
||||
|
||||
|
||||
|
@ -202,13 +203,7 @@ public class ScholixIndexManager {
|
|||
if(aggregations == null)
|
||||
return null;
|
||||
|
||||
final Aggregation aggByMap = ((ParsedNested) aggregations.asMap().get("nested")).getAggregations().asMap().get("by_map");
|
||||
|
||||
|
||||
return ((ParsedStringTerms) aggByMap).getBuckets()
|
||||
.stream()
|
||||
.map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount()))
|
||||
.collect(Collectors.toList());
|
||||
return ((ParsedStringTerms) aggregations.get("publishers")).getBuckets().stream().map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount())).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
|
@ -249,66 +244,63 @@ public class ScholixIndexManager {
|
|||
|
||||
if (StringUtils.isNoneBlank(linkProvider)) {
|
||||
myCounter.increment("linkProvider");
|
||||
queries.add(createLinkProviderQuery(linkProvider));
|
||||
queries.add(QueryBuilders.termQuery("linkProviders", linkProvider));
|
||||
}
|
||||
|
||||
if (StringUtils.isNoneBlank(targetPid)) {
|
||||
myCounter.increment("targetPid");
|
||||
queries.add(createPidValueQuery(RelationPrefix.target, targetPid));
|
||||
queries.add(QueryBuilders.termQuery("targetPid", targetPid));
|
||||
}
|
||||
if (StringUtils.isNoneBlank(sourcePid)) {
|
||||
myCounter.increment("sourcePid");
|
||||
queries.add(createPidValueQuery(RelationPrefix.source, sourcePid));
|
||||
queries.add(QueryBuilders.termQuery("sourcePid", sourcePid));
|
||||
}
|
||||
|
||||
if (StringUtils.isNoneBlank(targetPidType)) {
|
||||
assert targetPidType != null;
|
||||
incrementPidCounter(RelationPrefix.target,targetPidType);
|
||||
queries.add(createPidTypeQuery(RelationPrefix.target, targetPidType));
|
||||
queries.add(QueryBuilders.termQuery("targetPidType", targetPidType));
|
||||
}
|
||||
if (StringUtils.isNoneBlank(sourcePidType)) {
|
||||
assert sourcePidType != null;
|
||||
incrementPidCounter(RelationPrefix.source,sourcePidType);
|
||||
queries.add(createPidTypeQuery(RelationPrefix.source, sourcePidType));
|
||||
queries.add(QueryBuilders.termQuery("sourcePidType", sourcePidType));
|
||||
}
|
||||
|
||||
if (StringUtils.isNoneBlank(targetType)) {
|
||||
if ("dataset".equalsIgnoreCase(targetType) || "publication".equalsIgnoreCase(targetType))
|
||||
myCounter.increment(String.format("targetType_%s", targetType));
|
||||
queries.add(createObjectTypeQuery(RelationPrefix.target, targetType));
|
||||
myCounter.increment(String.format("targetType_%s", targetType));
|
||||
queries.add(QueryBuilders.termQuery("targetType", targetType));
|
||||
}
|
||||
|
||||
if (StringUtils.isNoneBlank(sourceType)) {
|
||||
if ("dataset".equalsIgnoreCase(sourceType) || "publication".equalsIgnoreCase(sourceType)) {
|
||||
myCounter.increment(String.format("sourceType_%s", sourceType));
|
||||
}
|
||||
queries.add(createObjectTypeQuery(RelationPrefix.source, sourceType));
|
||||
myCounter.increment(String.format("sourceType_%s", sourceType));
|
||||
queries.add(QueryBuilders.termQuery("sourceType", sourceType));
|
||||
}
|
||||
|
||||
if (StringUtils.isNoneBlank(targetPublisher)) {
|
||||
myCounter.increment("targetPublisher");
|
||||
|
||||
queries.add(createLinkPublisherQuery(RelationPrefix.target,targetPublisher));
|
||||
queries.add(QueryBuilders.termQuery("targetPublisher", targetPublisher));
|
||||
}
|
||||
|
||||
QueryBuilder result = createFinalQuery(queries);
|
||||
|
||||
NativeSearchQuery finalQuery = new NativeSearchQueryBuilder()
|
||||
.withQuery(result)
|
||||
.withPageable(PageRequest.of(page,10))
|
||||
.withPageable(PageRequest.of(page,100))
|
||||
.build();
|
||||
|
||||
|
||||
Pair<RestHighLevelClient, ElasticsearchRestTemplate> resource = connectionPool.getResource();
|
||||
ElasticsearchRestTemplate client = resource.getValue();
|
||||
|
||||
long tt = client.count(finalQuery, Scholix.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
|
||||
|
||||
SearchHits<Scholix> scholixRes = client.search(finalQuery, Scholix.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
|
||||
|
||||
long tt = client.count(finalQuery, ScholixFlat .class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
|
||||
|
||||
SearchHits<ScholixFlat> scholixRes = client.search(finalQuery, ScholixFlat.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
|
||||
connectionPool.returnResource(resource);
|
||||
|
||||
return new ImmutablePair<>(tt,scholixRes.stream().map(SearchHit::getContent).collect(Collectors.toList()));
|
||||
return new ImmutablePair<>(tt,scholixRes.stream().map(SearchHit::getContent).map(ScholixUtils::getScholixFromBlob).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
package eu.dnetlib.scholix.api.index;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.schema.sx.scholix.Scholix;
|
||||
import eu.dnetlib.dhp.schema.sx.scholix.ScholixFlat;
|
||||
import org.apache.commons.codec.binary.Base64InputStream;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
public class ScholixUtils {
|
||||
|
||||
private static ObjectMapper MAPPER = new ObjectMapper();
|
||||
|
||||
private static String uncompress(final String compressed) throws Exception {
|
||||
Base64InputStream bis = new Base64InputStream(new ByteArrayInputStream(compressed.getBytes()));
|
||||
GZIPInputStream gzip = new GZIPInputStream(bis);
|
||||
return IOUtils.toString(gzip);
|
||||
}
|
||||
|
||||
public static Scholix getScholixFromBlob(final ScholixFlat flat) {
|
||||
try {
|
||||
return MAPPER.readValue(uncompress(flat.getBlob()), Scholix.class);
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -27,7 +27,7 @@ management.metrics.distribution.percentiles.http.server.requests=0.5, 0.9, 0.95,
|
|||
|
||||
#scholix.elastic.clusterNodes = 10.19.65.51:9200,10.19.65.52:9200,10.19.65.53:9200,10.19.65.54:9200
|
||||
scholix.elastic.clusterNodes = localhost:9200
|
||||
scholix.elastic.indexName = dli_shadow_scholix
|
||||
scholix.elastic.indexName = scholix
|
||||
scholix.elastic.socketTimeout = 60000
|
||||
scholix.elastic.connectionTimeout= 60000
|
||||
|
||||
|
|
2
pom.xml
2
pom.xml
|
@ -453,7 +453,7 @@
|
|||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||
<maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version>
|
||||
<java.version>1.8</java.version>
|
||||
<dhp-schemas-version>2.14.0</dhp-schemas-version>
|
||||
<dhp-schemas-version>3.16.1-SNAPSHOT</dhp-schemas-version>
|
||||
<apache.solr.version>7.1.0</apache.solr.version>
|
||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||
<prometheus.version>0.10.0</prometheus.version>
|
||||
|
|
Loading…
Reference in New Issue