dnet-applications/apps/dhp-broker-public-application/src/main/java/eu/dnetlib/broker/oa/controllers/OpenairePublicController.java

194 lines
7.5 KiB
Java
Raw Normal View History

2020-09-04 14:33:19 +02:00
package eu.dnetlib.broker.oa.controllers;
2020-08-28 09:40:04 +02:00
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
2020-09-08 16:38:14 +02:00
import java.util.zip.GZIPOutputStream;
2020-08-28 09:40:04 +02:00
2020-09-08 16:38:14 +02:00
import javax.servlet.ServletOutputStream;
2020-09-07 16:01:29 +02:00
import javax.servlet.http.HttpServletResponse;
2020-09-04 14:33:19 +02:00
import org.apache.commons.codec.digest.DigestUtils;
2020-09-07 16:01:29 +02:00
import org.apache.commons.io.IOUtils;
2020-08-28 09:40:04 +02:00
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
2020-09-04 14:33:19 +02:00
import org.apache.lucene.search.join.ScoreMode;
2020-08-28 09:40:04 +02:00
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.index.query.QueryBuilders;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Profile;
2020-09-08 16:38:14 +02:00
import org.springframework.data.domain.PageRequest;
2020-08-28 09:40:04 +02:00
import org.springframework.data.elasticsearch.core.ElasticsearchOperations;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.SearchHit;
import org.springframework.data.elasticsearch.core.SearchScrollHits;
import org.springframework.data.elasticsearch.core.mapping.IndexCoordinates;
import org.springframework.data.elasticsearch.core.query.NativeSearchQuery;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
2020-09-07 16:01:29 +02:00
import com.google.gson.Gson;
2020-09-04 14:33:19 +02:00
import eu.dnetlib.broker.BrokerConfiguration;
import eu.dnetlib.broker.common.controllers.AbstractLbsController;
import eu.dnetlib.broker.common.elasticsearch.Notification;
import eu.dnetlib.broker.common.properties.ElasticSearchProperties;
import eu.dnetlib.broker.common.subscriptions.Subscription;
import eu.dnetlib.broker.common.subscriptions.SubscriptionRepository;
2020-08-28 09:40:04 +02:00
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
@Profile("openaire")
@RestController
2020-09-08 16:38:14 +02:00
@RequestMapping("/")
2020-08-28 09:40:04 +02:00
@Api(tags = BrokerConfiguration.TAG_OPENAIRE)
2020-09-04 14:33:19 +02:00
public class OpenairePublicController extends AbstractLbsController {
2020-08-28 09:40:04 +02:00
@Autowired
private ElasticsearchOperations esOperations;
@Autowired
private SubscriptionRepository subscriptionRepo;
@Autowired
private ElasticSearchProperties props;
private static final long SCROLL_TIMEOUT_IN_MILLIS = 5 * 60 * 1000;
2020-09-04 14:33:19 +02:00
private static final Log log = LogFactory.getLog(OpenairePublicController.class);
2020-08-28 09:40:04 +02:00
2020-09-04 14:33:19 +02:00
@ApiOperation("Returns notifications by subscription using scrolls (first page)")
@GetMapping("/scroll/notifications/bySubscriptionId/{subscrId}")
public ScrollPage<NotificationMessage> prepareScrollNotificationsBySubscrId(@PathVariable final String subscrId) {
2020-08-28 09:40:04 +02:00
final Optional<Subscription> optSub = subscriptionRepo.findById(subscrId);
if (optSub.isPresent()) {
final ElasticsearchRestTemplate esTemplate = (ElasticsearchRestTemplate) esOperations;
final NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
2020-09-08 16:38:14 +02:00
.withQuery(QueryBuilders.termQuery("subscriptionId.keyword", subscrId))
2020-08-28 09:40:04 +02:00
.withSearchType(SearchType.DEFAULT)
2020-09-07 16:01:29 +02:00
.withFields("topic", "payload")
2020-09-08 16:38:14 +02:00
.withPageable(PageRequest.of(0, 100))
2020-08-28 09:40:04 +02:00
.build();
final SearchScrollHits<Notification> scroll =
esTemplate.searchScrollStart(SCROLL_TIMEOUT_IN_MILLIS, searchQuery, Notification.class, IndexCoordinates.of(props.getNotificationsIndexName()));
if (scroll.hasSearchHits()) {
2020-09-04 14:33:19 +02:00
final List<NotificationMessage> values = calculateNotificationMessages(scroll);
return new ScrollPage<>(scroll.getScrollId(), values.isEmpty() || scroll.getScrollId() == null, values);
2020-08-28 09:40:04 +02:00
} else {
esTemplate.searchScrollClear(Arrays.asList(scroll.getScrollId()));
2020-09-04 14:33:19 +02:00
return new ScrollPage<>(null, true, new ArrayList<>());
2020-08-28 09:40:04 +02:00
}
} else {
log.warn("Invalid subscription: " + subscrId);
2020-09-07 16:01:29 +02:00
return new ScrollPage<>(null, true, new ArrayList<>());
2020-09-04 14:33:19 +02:00
}
}
@ApiOperation("Returns notifications by opendorar Id (for example: 301) using scrolls (first page)")
@GetMapping("/scroll/notifications/byOpenDoarId/{opendoarId}")
public ScrollPage<NotificationMessage> prepareScrollNotificationsByOpendoarId(@PathVariable final String opendoarId) {
final ElasticsearchRestTemplate esTemplate = (ElasticsearchRestTemplate) esOperations;
final String dsId = calculateDsIdFromOpenDoarId(opendoarId);
final NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
2020-09-08 16:38:14 +02:00
.withQuery(QueryBuilders.nestedQuery("map", QueryBuilders.termQuery("map.targetDatasourceId", dsId), ScoreMode.None))
2020-09-04 14:33:19 +02:00
.withSearchType(SearchType.DEFAULT)
2020-09-07 16:01:29 +02:00
.withFields("topic", "payload")
2020-09-04 14:33:19 +02:00
.build();
final SearchScrollHits<Notification> scroll =
esTemplate.searchScrollStart(SCROLL_TIMEOUT_IN_MILLIS, searchQuery, Notification.class, IndexCoordinates.of(props.getNotificationsIndexName()));
if (scroll.hasSearchHits()) {
final List<NotificationMessage> values = calculateNotificationMessages(scroll);
return new ScrollPage<>(scroll.getScrollId(), values.isEmpty() || scroll.getScrollId() == null, values);
} else {
esTemplate.searchScrollClear(Arrays.asList(scroll.getScrollId()));
return new ScrollPage<>(null, true, new ArrayList<>());
2020-08-28 09:40:04 +02:00
}
}
2020-09-04 14:33:19 +02:00
private String calculateDsIdFromOpenDoarId(final String opendoarId) {
return "10|opendoar____::" + DigestUtils.md5Hex(opendoarId);
}
2020-08-28 09:40:04 +02:00
@ApiOperation("Returns notifications using scrolls (other pages)")
@GetMapping("/scroll/notifications/{scrollId}")
2020-09-04 14:33:19 +02:00
public ScrollPage<NotificationMessage> scrollNotifications(@PathVariable final String scrollId) {
2020-08-28 09:40:04 +02:00
final ElasticsearchRestTemplate esTemplate = (ElasticsearchRestTemplate) esOperations;
final SearchScrollHits<Notification> scroll =
esTemplate.searchScrollContinue(scrollId, SCROLL_TIMEOUT_IN_MILLIS, Notification.class, IndexCoordinates.of(props.getNotificationsIndexName()));
if (scroll.hasSearchHits()) {
2020-09-04 14:33:19 +02:00
final List<NotificationMessage> values = calculateNotificationMessages(scroll);
return new ScrollPage<>(scroll.getScrollId(), values.isEmpty() || scroll.getScrollId() == null, values);
2020-08-28 09:40:04 +02:00
} else {
esTemplate.searchScrollClear(Arrays.asList(scroll.getScrollId()));
2020-09-04 14:33:19 +02:00
return new ScrollPage<>(null, true, new ArrayList<>());
2020-08-28 09:40:04 +02:00
}
}
2020-09-07 16:01:29 +02:00
@ApiOperation("Returns notifications as file")
2020-09-08 16:38:14 +02:00
@GetMapping(value = "/file/notifications/bySubscriptionId/{subscrId}", produces = "application/gzip")
2020-09-07 16:01:29 +02:00
public void notificationsAsFile(final HttpServletResponse res, @PathVariable final String subscrId) throws Exception {
final Gson gson = new Gson();
2020-09-08 16:38:14 +02:00
res.setHeader("Content-Disposition", "attachment; filename=dump.json.gz");
2020-09-07 16:01:29 +02:00
2020-09-08 16:38:14 +02:00
try (final ServletOutputStream out = res.getOutputStream(); final GZIPOutputStream gzOut = new GZIPOutputStream(out)) {
2020-09-07 16:01:29 +02:00
2020-09-08 16:38:14 +02:00
boolean first = true;
2020-09-07 16:01:29 +02:00
2020-09-11 12:05:11 +02:00
IOUtils.write("[\n", gzOut);
2020-09-07 16:01:29 +02:00
2020-09-08 16:38:14 +02:00
ScrollPage<NotificationMessage> page = null;
do {
page = page == null ? prepareScrollNotificationsBySubscrId(subscrId) : scrollNotifications(page.getId());
for (final NotificationMessage msg : page.getValues()) {
if (first) {
first = false;
} else {
2020-09-11 12:05:11 +02:00
IOUtils.write(",\n", gzOut);
2020-09-08 16:38:14 +02:00
}
IOUtils.write(gson.toJson(msg), gzOut);
2020-09-07 16:01:29 +02:00
}
2020-09-08 16:38:14 +02:00
} while (!page.isCompleted());
2020-09-07 16:01:29 +02:00
2020-09-11 12:05:11 +02:00
IOUtils.write("\n]\n", gzOut);
2020-09-08 16:38:14 +02:00
gzOut.flush();
}
2020-09-07 16:01:29 +02:00
}
2020-09-04 14:33:19 +02:00
private List<NotificationMessage> calculateNotificationMessages(final SearchScrollHits<Notification> scroll) {
2020-08-28 09:40:04 +02:00
if (scroll.getSearchHits().size() > 0) {
return scroll.stream()
.map(SearchHit::getContent)
2020-09-04 14:33:19 +02:00
.map(NotificationMessage::fromNotification)
2020-08-28 09:40:04 +02:00
.collect(Collectors.toList());
} else {
return new ArrayList<>();
}
}
}