opendoarID api using hdfs
This commit is contained in:
parent
502f28dfd3
commit
61e50ddea5
|
@ -1,5 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
|
||||
<parent>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
|
@ -22,7 +24,40 @@
|
|||
<artifactId>dnet-broker-apps-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<version>2.6.0-cdh5.9.2</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
<repositories>
|
||||
<repository>
|
||||
<releases>
|
||||
<enabled>true</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
<id>cloudera</id>
|
||||
<name>Cloudera Repository</name>
|
||||
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
|
|
|
@ -42,12 +42,7 @@ public class BrokerConfiguration extends AbstractElasticsearchConfiguration {
|
|||
@Autowired
|
||||
private ElasticSearchProperties elasticSearchProperties;
|
||||
|
||||
public static final String TAG_EVENTS = "Events";
|
||||
public static final String TAG_SUBSCRIPTIONS = "Subscriptions";
|
||||
public static final String TAG_NOTIFICATIONS = "Notifications";
|
||||
public static final String TAG_TOPIC_TYPES = "Topic Types";
|
||||
public static final String TAG_OPENAIRE = "OpenAIRE";
|
||||
public static final String TAG_MATCHING = "Subscription-Event Matching";
|
||||
public static final String OA_PUBLIC_APIS = "Openaire Broker Public API";
|
||||
|
||||
@Override
|
||||
@Bean
|
||||
|
@ -67,10 +62,9 @@ public class BrokerConfiguration extends AbstractElasticsearchConfiguration {
|
|||
return new Docket(DocumentationType.SWAGGER_2)
|
||||
.select()
|
||||
.apis(RequestHandlerSelectors.any())
|
||||
.paths(p -> p.startsWith("/api/"))
|
||||
.paths(p -> p.startsWith("/"))
|
||||
.build()
|
||||
.tags(new Tag(TAG_EVENTS, "Events management"), new Tag(TAG_SUBSCRIPTIONS, "Subscriptions management"), new Tag(TAG_NOTIFICATIONS,
|
||||
"Notifications management"), new Tag(TAG_TOPIC_TYPES, "Topic types management"), new Tag(TAG_OPENAIRE, "OpenAIRE use case"))
|
||||
.tags(new Tag(OA_PUBLIC_APIS, OA_PUBLIC_APIS))
|
||||
.apiInfo(new ApiInfo(
|
||||
"Literature Broker Service",
|
||||
"APIs documentation",
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
package eu.dnetlib.broker.oa.controllers;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -15,10 +18,15 @@ import org.apache.commons.io.IOUtils;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.elasticsearch.action.search.SearchType;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
import org.springframework.data.elasticsearch.core.ElasticsearchOperations;
|
||||
|
@ -49,7 +57,7 @@ import io.swagger.annotations.ApiOperation;
|
|||
@Profile("openaire")
|
||||
@RestController
|
||||
@RequestMapping("/")
|
||||
@Api(tags = BrokerConfiguration.TAG_OPENAIRE)
|
||||
@Api(tags = BrokerConfiguration.OA_PUBLIC_APIS)
|
||||
public class OpenairePublicController extends AbstractLbsController {
|
||||
|
||||
@Autowired
|
||||
|
@ -61,6 +69,12 @@ public class OpenairePublicController extends AbstractLbsController {
|
|||
@Autowired
|
||||
private ElasticSearchProperties props;
|
||||
|
||||
@Value("${lbs.hadoop.namenode.url}")
|
||||
private String namenode;
|
||||
|
||||
@Value("${lbs.hadoop.opendoar.events.path}")
|
||||
private String opendoarEventsPath;
|
||||
|
||||
private static final long SCROLL_TIMEOUT_IN_MILLIS = 5 * 60 * 1000;
|
||||
|
||||
private static final Log log = LogFactory.getLog(OpenairePublicController.class);
|
||||
|
@ -98,36 +112,6 @@ public class OpenairePublicController extends AbstractLbsController {
|
|||
}
|
||||
}
|
||||
|
||||
@ApiOperation("Returns notifications by opendorar Id (for example: 301) using scrolls (first page)")
|
||||
@GetMapping("/scroll/notifications/byOpenDoarId/{opendoarId}")
|
||||
public ScrollPage<ShortEventMessage> prepareScrollNotificationsByOpendoarId(@PathVariable final String opendoarId) {
|
||||
|
||||
final ElasticsearchRestTemplate esTemplate = (ElasticsearchRestTemplate) esOperations;
|
||||
|
||||
final String dsId = calculateDsIdFromOpenDoarId(opendoarId);
|
||||
|
||||
final NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
|
||||
.withQuery(QueryBuilders.nestedQuery("map", QueryBuilders.termQuery("map.targetDatasourceId", dsId), ScoreMode.None))
|
||||
.withSearchType(SearchType.DEFAULT)
|
||||
.withFields("topic", "payload")
|
||||
.build();
|
||||
|
||||
final SearchScrollHits<Notification> scroll =
|
||||
esTemplate.searchScrollStart(SCROLL_TIMEOUT_IN_MILLIS, searchQuery, Notification.class, IndexCoordinates.of(props.getNotificationsIndexName()));
|
||||
if (scroll.hasSearchHits()) {
|
||||
final List<ShortEventMessage> values = calculateNotificationMessages(scroll);
|
||||
return new ScrollPage<>(scroll.getScrollId(), values.isEmpty() || scroll.getScrollId() == null, values);
|
||||
} else {
|
||||
esTemplate.searchScrollClear(Arrays.asList(scroll.getScrollId()));
|
||||
return new ScrollPage<>(null, true, new ArrayList<>());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private String calculateDsIdFromOpenDoarId(final String opendoarId) {
|
||||
return "10|opendoar____::" + DigestUtils.md5Hex(opendoarId);
|
||||
}
|
||||
|
||||
@ApiOperation("Returns notifications using scrolls (other pages)")
|
||||
@GetMapping("/scroll/notifications/{scrollId}")
|
||||
public ScrollPage<ShortEventMessage> scrollNotifications(@PathVariable final String scrollId) {
|
||||
|
@ -182,6 +166,61 @@ public class OpenairePublicController extends AbstractLbsController {
|
|||
|
||||
}
|
||||
|
||||
@ApiOperation("Returns events as file by opendoarId")
|
||||
@GetMapping(value = "/file/events/opendoar/{id}", produces = "application/gzip")
|
||||
public void opendoarEventsAsFile(final HttpServletResponse res, @PathVariable final String id) {
|
||||
|
||||
res.setHeader("Content-Disposition", "attachment; filename=dump.json.gz");
|
||||
|
||||
final Configuration conf = new Configuration();
|
||||
conf.addResource(getClass().getResourceAsStream("/core-site.xml"));
|
||||
conf.addResource(getClass().getResourceAsStream("/ocean-hadoop-conf.xml"));
|
||||
|
||||
final Path pathDir = new Path(opendoarEventsPath + "/" + DigestUtils.md5Hex(id));
|
||||
|
||||
try (final FileSystem fs = FileSystem.get(conf);
|
||||
final ServletOutputStream out = res.getOutputStream();
|
||||
final GZIPOutputStream gzOut = new GZIPOutputStream(out)) {
|
||||
boolean first = true;
|
||||
|
||||
IOUtils.write("[\n", gzOut);
|
||||
|
||||
try {
|
||||
for (final FileStatus fileStatus : fs.listStatus(pathDir)) {
|
||||
if (fileStatus.isFile()) {
|
||||
final Path path = fileStatus.getPath();
|
||||
if (path.getName().endsWith(".json")) {
|
||||
try (final FSDataInputStream fis = fs.open(path);
|
||||
final InputStreamReader isr = new InputStreamReader(fis);
|
||||
final BufferedReader br = new BufferedReader(isr)) {
|
||||
|
||||
String line = br.readLine();
|
||||
while (line != null) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
IOUtils.write(",\n", gzOut);
|
||||
}
|
||||
|
||||
IOUtils.write(line, gzOut);
|
||||
|
||||
line = br.readLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (final FileNotFoundException e) {
|
||||
log.warn("File not found - " + e.getMessage());
|
||||
}
|
||||
IOUtils.write("\n]\n", gzOut);
|
||||
gzOut.flush();
|
||||
} catch (final Throwable e) {
|
||||
log.error("Error accessing hdfs file", e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private List<ShortEventMessage> calculateNotificationMessages(final SearchScrollHits<Notification> scroll) {
|
||||
if (scroll.getSearchHits().size() > 0) {
|
||||
return scroll.stream()
|
||||
|
|
|
@ -19,6 +19,8 @@ spring.jpa.properties.hibernate.format_sql = false
|
|||
|
||||
lbs.database.url = ${spring.datasource.url}
|
||||
|
||||
lbs.hadoop.opendoar.events.path = /user/michele.artini/broker_events/eventsByOpendoarId
|
||||
|
||||
# for development server
|
||||
#lbs.elastic.clusterNodes = broker1-dev-dnet.d4science.org:9300
|
||||
#lbs.elastic.homepage = http://broker1-dev-dnet.d4science.org:9200/_plugin/hq
|
||||
|
|
|
@ -0,0 +1,145 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--Autogenerated by Cloudera Manager-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>fs.trash.interval</name>
|
||||
<value>1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>io.compression.codecs</name>
|
||||
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.authentication</name>
|
||||
<value>simple</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.authorization</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.rpc.protection</name>
|
||||
<value>authentication</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.auth_to_local</name>
|
||||
<value>DEFAULT</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.oozie.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.oozie.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.mapred.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.mapred.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.flume.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.flume.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.HTTP.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.HTTP.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hive.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hive.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hue.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hue.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.httpfs.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.httpfs.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hdfs.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hdfs.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.yarn.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.yarn.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.group.mapping</name>
|
||||
<value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.instrumentation.requires.admin</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>net.topology.script.file.name</name>
|
||||
<value>/etc/hadoop/conf.cloudera.yarn2/topology.py</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>io.file.buffer.size</name>
|
||||
<value>65536</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.ssl.enabled</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.ssl.require.client.cert</name>
|
||||
<value>false</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.ssl.keystores.factory.class</name>
|
||||
<value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.ssl.server.conf</name>
|
||||
<value>ssl-server.xml</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.ssl.client.conf</name>
|
||||
<value>ssl-client.xml</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,101 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--Autogenerated by Cloudera Manager-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.nameservices</name>
|
||||
<value>nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.nameservice1</name>
|
||||
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.ha.automatic-failover.enabled.nameservice1</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>ha.zookeeper.quorum</name>
|
||||
<value>iis-cdh5-test-m1.ocean.icm.edu.pl:2181,iis-cdh5-test-m2.ocean.icm.edu.pl:2181,iis-cdh5-test-m3.ocean.icm.edu.pl:2181</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.ha.namenodes.nameservice1</name>
|
||||
<value>namenode528,namenode434</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.nameservice1.namenode528</name>
|
||||
<value>iis-cdh5-test-m1.ocean.icm.edu.pl:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.servicerpc-address.nameservice1.namenode528</name>
|
||||
<value>iis-cdh5-test-m1.ocean.icm.edu.pl:8022</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.nameservice1.namenode528</name>
|
||||
<value>iis-cdh5-test-m1.ocean.icm.edu.pl:50070</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.https-address.nameservice1.namenode528</name>
|
||||
<value>iis-cdh5-test-m1.ocean.icm.edu.pl:50470</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.nameservice1.namenode434</name>
|
||||
<value>iis-cdh5-test-m2.ocean.icm.edu.pl:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.servicerpc-address.nameservice1.namenode434</name>
|
||||
<value>iis-cdh5-test-m2.ocean.icm.edu.pl:8022</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.nameservice1.namenode434</name>
|
||||
<value>iis-cdh5-test-m2.ocean.icm.edu.pl:50070</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.https-address.nameservice1.namenode434</name>
|
||||
<value>iis-cdh5-test-m2.ocean.icm.edu.pl:50470</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.blocksize</name>
|
||||
<value>134217728</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.use.datanode.hostname</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>fs.permissions.umask-mode</name>
|
||||
<value>022</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.acls.enabled</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.use.legacy.blockreader</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.read.shortcircuit</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.domain.socket.path</name>
|
||||
<value>/var/run/hdfs-sockets/dn</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.read.shortcircuit.skip.checksum</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.domain.socket.data.traffic</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,7 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>OpenAIRE Broker Public API</title>
|
||||
<meta http-equiv="refresh" content="2; url = /swagger" />
|
||||
</head>
|
||||
</html>
|
Loading…
Reference in New Issue