Compare commits

...

3 Commits

6 changed files with 133 additions and 98 deletions

View File

@ -11,6 +11,7 @@
<packaging>jar</packaging>
<artifactId>scholexplorer-api</artifactId>
<dependencies>
<dependency>
<groupId>org.springframework.data</groupId>

View File

@ -57,10 +57,10 @@ public class ScholixControllerV2 extends AbstractDnetController {
// description = "Filter scholix Links having collected after this date") String harvestedAfter,
@Parameter(in = ParameterIn.QUERY, description = "select page of result") final Integer page) throws Exception {
if (StringUtils.isEmpty(sourcePid) && StringUtils.isEmpty(targetPid) && StringUtils.isEmpty(sourcePublisher) && StringUtils.isEmpty(targetPublisher)
if (StringUtils.isEmpty(sourcePid) && StringUtils.isEmpty(targetPid) && StringUtils.isEmpty(sourcePublisher) && StringUtils.isEmpty(targetPublisher)&&StringUtils.isEmpty(sourceType)
&& StringUtils.isEmpty(linkProvider)) {
throw new ScholixException(
"The method requires one of the following parameters: sourcePid, targetPid, sourcePublisher, targetPublisher, linkProvider");
"The method requires one of the following parameters: sourcePid, targetPid, sourcePublisher, targetPublisher, linkProvider, sourceType");
}
try {

View File

@ -2,6 +2,7 @@ package eu.dnetlib.scholix.api.index;
import eu.dnetlib.dhp.schema.sx.scholix.Scholix;
import eu.dnetlib.dhp.schema.sx.scholix.ScholixFlat;
import eu.dnetlib.scholix.api.ScholixException;
import eu.dnetlib.scholix.api.TaggedCounter;
import io.micrometer.core.annotation.Timed;
@ -12,10 +13,8 @@ import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.*;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.bucket.nested.ParsedNested;
import org.elasticsearch.search.aggregations.bucket.terms.ParsedStringTerms;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.PageRequest;
@ -71,55 +70,53 @@ public class ScholixIndexManager {
}
private QueryBuilder createObjectTypeQuery(final RelationPrefix prefix, final String objectType ) throws ScholixException{
if (prefix == null){
private QueryBuilder createObjectTypeQuery(final RelationPrefix prefix, final String objectType) throws ScholixException {
if (prefix == null) {
throw new ScholixException("prefix cannot be null");
}
return new NestedQueryBuilder(String.format("%s", prefix), new TermQueryBuilder(String.format("%s.objectType",prefix), objectType), ScoreMode.None);
return new NestedQueryBuilder(String.format("%s", prefix), new TermQueryBuilder(String.format("%s.objectType", prefix), objectType), ScoreMode.None);
}
private QueryBuilder createPidTypeQuery(final RelationPrefix prefix, final String pidTypeValue ) throws ScholixException{
if (prefix == null){
private QueryBuilder createPidTypeQuery(final RelationPrefix prefix, final String pidTypeValue) throws ScholixException {
if (prefix == null) {
throw new ScholixException("prefix cannot be null");
}
return new NestedQueryBuilder(String.format("%s.identifier", prefix), new TermQueryBuilder(String.format("%s.identifier.schema",prefix), pidTypeValue), ScoreMode.None);
return new NestedQueryBuilder(String.format("%s.identifier", prefix), new TermQueryBuilder(String.format("%s.identifier.schema", prefix), pidTypeValue), ScoreMode.None);
}
private QueryBuilder createLinkProviderQuery(final String providerName ) throws ScholixException{
if (providerName == null){
private QueryBuilder createLinkProviderQuery(final String providerName) throws ScholixException {
if (providerName == null) {
throw new ScholixException("prefix cannot be null");
}
return new NestedQueryBuilder("linkprovider", new TermQueryBuilder("linkprovider.name",providerName), ScoreMode.None);
return new NestedQueryBuilder("linkprovider", new TermQueryBuilder("linkprovider.name", providerName), ScoreMode.None);
}
private QueryBuilder createLinkPublisherQuery(final RelationPrefix prefix, final String publisher ) throws ScholixException{
if (prefix == null){
private QueryBuilder createLinkPublisherQuery(final RelationPrefix prefix, final String publisher) throws ScholixException {
if (prefix == null) {
throw new ScholixException("prefix cannot be null");
}
return new NestedQueryBuilder(String.format("%s.publisher", prefix), new TermQueryBuilder(String.format("%s.publisher.name",prefix), publisher), ScoreMode.None);
return new NestedQueryBuilder(String.format("%s.publisher", prefix), new TermQueryBuilder(String.format("%s.publisher.name", prefix), publisher), ScoreMode.None);
}
private QueryBuilder createPidValueQuery(final RelationPrefix prefix, final String pidValue ) throws ScholixException{
if (prefix == null){
private QueryBuilder createPidValueQuery(final RelationPrefix prefix, final String pidValue) throws ScholixException {
if (prefix == null) {
throw new ScholixException("prefix cannot be null");
}
return new NestedQueryBuilder(String.format("%s.identifier", prefix), new TermQueryBuilder(String.format("%s.identifier.identifier",prefix), pidValue), ScoreMode.None);
return new NestedQueryBuilder(String.format("%s.identifier", prefix), new TermQueryBuilder(String.format("%s.identifier.identifier", prefix), pidValue), ScoreMode.None);
}
private QueryBuilder createFinalQuery(final List<QueryBuilder> queries) throws ScholixException{
private QueryBuilder createFinalQuery(final List<QueryBuilder> queries) throws ScholixException {
if (queries == null || queries.isEmpty())
throw new ScholixException("the list of queries must be not empty");
if (queries.size() ==1) {
if (queries.size() == 1) {
return queries.get(0);
}
else {
} else {
final BoolQueryBuilder b = new BoolQueryBuilder();
b.must().addAll(queries);
@ -129,7 +126,7 @@ public class ScholixIndexManager {
}
private void incrementPidCounter(RelationPrefix prefix, String value) {
switch (value.toLowerCase()){
switch (value.toLowerCase()) {
case "doi": {
myCounter.increment(String.format("%s_doi", prefix));
break;
@ -147,76 +144,77 @@ public class ScholixIndexManager {
public List<Pair<String, Long>> totalLinksByProvider(final String filterName) throws ScholixException {
final QueryBuilder query = StringUtils.isNoneBlank(filterName)?createLinkProviderQuery(filterName):QueryBuilders.matchAllQuery();
final QueryBuilder query = StringUtils.isNoneBlank(filterName) ? QueryBuilders.termQuery("linkProviders", filterName) : QueryBuilders.matchAllQuery();
final NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
.withQuery(query)
.withSearchType(SearchType.DEFAULT)
.withPageable(PageRequest.of(0,10))
.addAggregation(AggregationBuilders.nested("nested", "linkprovider")
.subAggregation(AggregationBuilders.terms("by_map").field("linkprovider.name").size(100).minDocCount(1)))
.withPageable(PageRequest.of(0, 10))
.addAggregation(
AggregationBuilders.terms("genres").field("linkProviders").size(100)
.minDocCount(1))
.build();
Pair<RestHighLevelClient, ElasticsearchRestTemplate> resource = connectionPool.getResource();
ElasticsearchRestTemplate client = resource.getValue();
final SearchHits<Scholix> hits = client.search(searchQuery, Scholix.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
Pair<RestHighLevelClient, ElasticsearchRestTemplate> resource = null;
try {
resource = connectionPool.getResource();
ElasticsearchRestTemplate client = resource.getValue();
final SearchHits<ScholixFlat> hits = client.search(searchQuery, ScholixFlat.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
final Aggregations aggregations = hits.getAggregations();
connectionPool.returnResource(resource);
final Aggregations aggregations = hits.getAggregations();
if (aggregations == null)
return null;
if(aggregations == null)
return null;
final Aggregation aggByMap = ((ParsedNested) aggregations.asMap().get("nested")).getAggregations().asMap().get("by_map");
return ((ParsedStringTerms) aggregations.get("genres")).getBuckets().stream().map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount())).collect(Collectors.toList());
} catch (ScholixException e) {
throw e;
} finally {
if (connectionPool != null) {
connectionPool.returnResource(resource);
}
}
return ((ParsedStringTerms) aggByMap).getBuckets()
.stream()
.map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount()))
.collect(Collectors.toList());
}
public List<Pair<String, Long>> totalLinksPublisher(final RelationPrefix prefix, final String filterName) throws ScholixException {
final QueryBuilder query = StringUtils.isNoneBlank(filterName)?createLinkPublisherQuery(prefix,filterName):QueryBuilders.matchAllQuery();
final QueryBuilder query = StringUtils.isNoneBlank(filterName) ? createLinkPublisherQuery(prefix, filterName) : QueryBuilders.matchAllQuery();
final NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
.withQuery(query)
.withSearchType(SearchType.DEFAULT)
.withPageable(PageRequest.of(0,10))
.addAggregation(AggregationBuilders.nested("nested", String.format("%s.publisher", prefix ))
.subAggregation(AggregationBuilders.terms("by_map").field(String.format("%s.publisher.name", prefix )).size(100).minDocCount(1)))
.withPageable(PageRequest.of(0, 10))
.addAggregation(
AggregationBuilders.terms("publishers").field(String.format("%sPublisher", prefix.toString())).size(100)
.minDocCount(1))
.build();
Pair<RestHighLevelClient, ElasticsearchRestTemplate> resource = connectionPool.getResource();
ElasticsearchRestTemplate client = resource.getValue();
final SearchHits<Scholix> hits = client.search(searchQuery, Scholix.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
Pair<RestHighLevelClient, ElasticsearchRestTemplate> resource = null;
try {
resource = connectionPool.getResource();
ElasticsearchRestTemplate client = resource.getValue();
final SearchHits<ScholixFlat> hits = client.search(searchQuery, ScholixFlat.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
final Aggregations aggregations = hits.getAggregations();
connectionPool.returnResource(resource);
final Aggregations aggregations = hits.getAggregations();
if (aggregations == null)
return null;
if(aggregations == null)
return null;
final Aggregation aggByMap = ((ParsedNested) aggregations.asMap().get("nested")).getAggregations().asMap().get("by_map");
return ((ParsedStringTerms) aggByMap).getBuckets()
.stream()
.map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount()))
.collect(Collectors.toList());
return ((ParsedStringTerms) aggregations.get("publishers")).getBuckets().stream().map(b -> new ImmutablePair<>(b.getKeyAsString(), b.getDocCount())).collect(Collectors.toList());
} catch (ScholixException e) {
throw e;
} finally {
if (connectionPool != null) {
connectionPool.returnResource(resource);
}
}
}
/**
* Links from pid pair.
*
@ -234,81 +232,84 @@ public class ScholixIndexManager {
* @throws ScholixException the scholix exception
*/
@Timed(value = "scholix.index.request.links", description = "Time taken to request index")
public Pair<Long,List<Scholix>> linksFromPid ( final String linkProvider,
final String targetPid, final String targetPidType, final String targetPublisher,
final String targetType, final String sourcePid, final String sourcePidType,
final String sourcePublisher, final String sourceType,
final Integer page) throws ScholixException {
public Pair<Long, List<Scholix>> linksFromPid(final String linkProvider,
final String targetPid, final String targetPidType, final String targetPublisher,
final String targetType, final String sourcePid, final String sourcePidType,
final String sourcePublisher, final String sourceType,
final Integer page) throws ScholixException {
if (sourcePid==null && sourcePidType==null && targetPid==null && targetPidType==null && sourcePublisher==null && targetPublisher==null && linkProvider==null)
if (sourcePid == null && sourcePidType == null && targetPid == null && targetPidType == null && sourcePublisher == null && targetPublisher == null && linkProvider == null)
throw new ScholixException("One of sourcePid, targetPid, sourcePublisher, targetPublisher, linkProvider should be not null");
final List<QueryBuilder> queries = new ArrayList<>();
if (StringUtils.isNoneBlank(linkProvider)) {
myCounter.increment("linkProvider");
queries.add(createLinkProviderQuery(linkProvider));
queries.add(QueryBuilders.termQuery("linkProviders", linkProvider));
}
if (StringUtils.isNoneBlank(targetPid)) {
myCounter.increment("targetPid");
queries.add(createPidValueQuery(RelationPrefix.target, targetPid));
queries.add(QueryBuilders.termQuery("targetPid", targetPid));
}
if (StringUtils.isNoneBlank(sourcePid)) {
myCounter.increment("sourcePid");
queries.add(createPidValueQuery(RelationPrefix.source, sourcePid));
queries.add(QueryBuilders.termQuery("sourcePid", sourcePid));
}
if (StringUtils.isNoneBlank(targetPidType)) {
assert targetPidType != null;
incrementPidCounter(RelationPrefix.target,targetPidType);
queries.add(createPidTypeQuery(RelationPrefix.target, targetPidType));
incrementPidCounter(RelationPrefix.target, targetPidType);
queries.add(QueryBuilders.termQuery("targetPidType", targetPidType));
}
if (StringUtils.isNoneBlank(sourcePidType)) {
assert sourcePidType != null;
incrementPidCounter(RelationPrefix.source,sourcePidType);
queries.add(createPidTypeQuery(RelationPrefix.source, sourcePidType));
incrementPidCounter(RelationPrefix.source, sourcePidType);
queries.add(QueryBuilders.termQuery("sourcePidType", sourcePidType));
}
if (StringUtils.isNoneBlank(targetType)) {
if ("dataset".equalsIgnoreCase(targetType) || "publication".equalsIgnoreCase(targetType))
myCounter.increment(String.format("targetType_%s", targetType));
queries.add(createObjectTypeQuery(RelationPrefix.target, targetType));
myCounter.increment(String.format("targetType_%s", targetType));
queries.add(QueryBuilders.termQuery("targetType", targetType));
}
if (StringUtils.isNoneBlank(sourceType)) {
if ("dataset".equalsIgnoreCase(sourceType) || "publication".equalsIgnoreCase(sourceType)) {
myCounter.increment(String.format("sourceType_%s", sourceType));
}
queries.add(createObjectTypeQuery(RelationPrefix.source, sourceType));
myCounter.increment(String.format("sourceType_%s", sourceType));
queries.add(QueryBuilders.termQuery("sourceType", sourceType));
}
if (StringUtils.isNoneBlank(targetPublisher)) {
myCounter.increment("targetPublisher");
queries.add(createLinkPublisherQuery(RelationPrefix.target,targetPublisher));
queries.add(QueryBuilders.termQuery("targetPublisher", targetPublisher));
}
QueryBuilder result = createFinalQuery(queries);
NativeSearchQuery finalQuery = new NativeSearchQueryBuilder()
.withQuery(result)
.withPageable(PageRequest.of(page,10))
.withPageable(PageRequest.of(page, 100))
.build();
Pair<RestHighLevelClient, ElasticsearchRestTemplate> resource = connectionPool.getResource();
ElasticsearchRestTemplate client = resource.getValue();
Pair<RestHighLevelClient, ElasticsearchRestTemplate> resource = null;
try {
resource = connectionPool.getResource();
ElasticsearchRestTemplate client = resource.getValue();
long tt = client.count(finalQuery, Scholix.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
SearchHits<Scholix> scholixRes = client.search(finalQuery, Scholix.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
long tt = client.count(finalQuery, ScholixFlat.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
connectionPool.returnResource(resource);
SearchHits<ScholixFlat> scholixRes = client.search(finalQuery, ScholixFlat.class, IndexCoordinates.of(elasticSearchProperties.getIndexName()));
return new ImmutablePair<>(tt, scholixRes.stream().map(SearchHit::getContent).map(ScholixUtils::getScholixFromBlob).collect(Collectors.toList()));
} catch (ScholixException e) {
throw e;
} finally {
if (connectionPool != null) {
connectionPool.returnResource(resource);
}
return new ImmutablePair<>(tt,scholixRes.stream().map(SearchHit::getContent).collect(Collectors.toList()));
}
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.scholix.api.index;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.sx.scholix.Scholix;
import eu.dnetlib.dhp.schema.sx.scholix.ScholixFlat;
import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.io.IOUtils;
import java.io.ByteArrayInputStream;
import java.nio.charset.Charset;
import java.util.zip.GZIPInputStream;
public class ScholixUtils {
private static ObjectMapper MAPPER = new ObjectMapper();
private static String uncompress(final String compressed) throws Exception {
Base64InputStream bis = new Base64InputStream(new ByteArrayInputStream(compressed.getBytes()));
GZIPInputStream gzip = new GZIPInputStream(bis);
return IOUtils.toString(gzip, Charset.defaultCharset());
}
public static Scholix getScholixFromBlob(final ScholixFlat flat) {
try {
return MAPPER.readValue(uncompress(flat.getBlob()), Scholix.class);
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
}

View File

@ -27,7 +27,7 @@ management.metrics.distribution.percentiles.http.server.requests=0.5, 0.9, 0.95,
#scholix.elastic.clusterNodes = 10.19.65.51:9200,10.19.65.52:9200,10.19.65.53:9200,10.19.65.54:9200
scholix.elastic.clusterNodes = localhost:9200
scholix.elastic.indexName = dli_shadow_scholix
scholix.elastic.indexName = dli_scholix
scholix.elastic.socketTimeout = 60000
scholix.elastic.connectionTimeout= 60000

View File

@ -453,7 +453,7 @@
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version>
<java.version>1.8</java.version>
<dhp-schemas-version>2.14.0</dhp-schemas-version>
<dhp-schemas-version>3.17.2-FLAT-SNAPSHOT</dhp-schemas-version>
<apache.solr.version>7.1.0</apache.solr.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version>
<prometheus.version>0.10.0</prometheus.version>