hdfsClient to delete mdstores and versions on hadoop

This commit is contained in:
Michele Artini 2021-01-28 15:55:22 +01:00
parent 05f5f35fa0
commit 81bef1295d
13 changed files with 379 additions and 42 deletions

View File

@ -43,19 +43,6 @@
</dependency>
</dependencies>
<repositories>
<repository>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
<id>cloudera</id>
<name>Cloudera Repository</name>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
</repository>
</repositories>
<build>
<resources>

View File

@ -37,7 +37,28 @@
<artifactId>commons-io</artifactId>
</dependency>
<!-- Common -->
<!-- Hadoop -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-cdh5.9.2</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- DHP Common -->
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId>

View File

@ -10,7 +10,6 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.DeleteMapping;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
@ -20,6 +19,7 @@ import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreWithInfo;
import eu.dnetlib.data.mdstore.manager.exceptions.MDStoreManagerException;
import eu.dnetlib.data.mdstore.manager.utils.DatabaseUtils;
import eu.dnetlib.data.mdstore.manager.utils.HdfsClient;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import io.swagger.annotations.ApiParam;
@ -34,6 +34,9 @@ public class MDStoreController extends AbstractDnetController {
@Autowired
private DatabaseUtils databaseUtils;
@Autowired
private HdfsClient hdfsClient;
private static final Logger log = LoggerFactory.getLogger(DatabaseUtils.class);
@ApiOperation("Return all the mdstores")
@ -76,7 +79,8 @@ public class MDStoreController extends AbstractDnetController {
@ApiOperation("Delete a mdstore by id")
@DeleteMapping("/mdstore/{mdId}")
public StatusResponse delete(@ApiParam("the id of the mdstore that will be deleted") @PathVariable final String mdId) throws MDStoreManagerException {
databaseUtils.deleteMdStore(mdId);
final String hdfsPath = databaseUtils.deleteMdStore(mdId);
hdfsClient.deletePath(hdfsPath);
return StatusResponse.DELETED;
}
@ -99,12 +103,21 @@ public class MDStoreController extends AbstractDnetController {
return databaseUtils.commitMdStoreVersion(versionId, size);
}
@ApiOperation("Abort a preliminary version")
@GetMapping("/version/{versionId}/abort")
public StatusResponse commitVersion(@ApiParam("the id of the version to abort") @PathVariable final String versionId) throws MDStoreManagerException {
final String hdfsPath = databaseUtils.deleteMdStoreVersion(versionId, true);
hdfsClient.deletePath(hdfsPath);
return StatusResponse.ABORTED;
}
@ApiOperation("Delete a mdstore version")
@DeleteMapping("/version/{versionId}")
public StatusResponse deleteVersion(@ApiParam("the id of the version that has to be deleted") @PathVariable final String versionId,
@ApiParam("if true, the controls on writing and readcount values will be skipped") @RequestParam(required = false, defaultValue = "false") final boolean force)
throws MDStoreManagerException {
databaseUtils.deleteMdStoreVersion(versionId, force);
final String hdfsPath = databaseUtils.deleteMdStoreVersion(versionId, force);
hdfsClient.deletePath(hdfsPath);
return StatusResponse.DELETED;
}
@ -123,14 +136,15 @@ public class MDStoreController extends AbstractDnetController {
}
@ApiOperation("Delete expired versions")
@PostMapping("/versions/expired")
@DeleteMapping("/versions/expired")
public StatusResponse deleteExpiredVersions() {
new Thread(() -> {
for (final String v : databaseUtils.listExpiredVersions()) {
for (final String versionId : databaseUtils.listExpiredVersions()) {
try {
databaseUtils.deleteMdStoreVersion(v, true);
final String hdfsPath = databaseUtils.deleteMdStoreVersion(versionId, true);
hdfsClient.deletePath(hdfsPath);
} catch (final MDStoreManagerException e) {
log.warn("Error deleteting version " + v);
log.warn("Error deleteting version " + versionId, e);
}
}
}).start();
@ -143,7 +157,7 @@ public class MDStoreController extends AbstractDnetController {
public Map<String, Object> info() {
final Map<String, Object> info = new LinkedHashMap<>();
info.put("number_of_mdstores", databaseUtils.countMdStores());
info.put("hadoop_cluster", databaseUtils.getHadoopCluster());
info.put("hadoop_cluster", hdfsClient.getHadoopCluster());
info.put("hdfs_base_path", databaseUtils.getHdfsBasePath());
info.put("expired_versions", databaseUtils.listExpiredVersions());
return info;

View File

@ -4,6 +4,7 @@ public class StatusResponse {
public static final StatusResponse DELETED = new StatusResponse("DELETED");
public static final StatusResponse DELETING = new StatusResponse("DELETING...");
public static final StatusResponse ABORTED = new StatusResponse("ABORTED");;
private String status;

View File

@ -2,6 +2,7 @@ package eu.dnetlib.data.mdstore.manager.utils;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.transaction.Transactional;
@ -40,9 +41,6 @@ public class DatabaseUtils {
@Value("${dhp.mdstore-manager.hdfs.base-path}")
private String hdfsBasePath;
@Value("${dhp.mdstore-manager.hadoop.cluster}")
private String hadoopCluster;
private static final Logger log = LoggerFactory.getLogger(DatabaseUtils.class);
public Iterable<MDStoreWithInfo> listMdStores() {
@ -90,8 +88,11 @@ public class DatabaseUtils {
}
@Transactional
public void deleteMdStore(final String mdId) throws MDStoreManagerException {
if (!mdstoreRepository.existsById(mdId)) {
public String deleteMdStore(final String mdId) throws MDStoreManagerException {
final Optional<MDStore> md = mdstoreRepository.findById(mdId);
if (!md.isPresent()) {
log.error("MDStore not found: " + mdId);
throw new MDStoreManagerException("MDStore not found: " + mdId);
}
@ -109,6 +110,8 @@ public class DatabaseUtils {
mdstoreCurrentVersionRepository.deleteById(mdId);
mdstoreVersionRepository.deleteByMdstore(mdId);
mdstoreRepository.deleteById(mdId);
return md.get().getHdfsPath();
}
@Transactional
@ -155,7 +158,7 @@ public class DatabaseUtils {
}
@Transactional
public void deleteMdStoreVersion(final String versionId, final boolean force) throws MDStoreManagerException {
public String deleteMdStoreVersion(final String versionId, final boolean force) throws MDStoreManagerException {
final MDStoreVersion v = mdstoreVersionRepository.findById(versionId).orElseThrow(() -> new MDStoreManagerException("Version not found"));
@ -170,6 +173,8 @@ public class DatabaseUtils {
}
mdstoreVersionRepository.delete(v);
return v.getHdfsPath();
}
public String getHdfsBasePath() {
@ -180,12 +185,4 @@ public class DatabaseUtils {
this.hdfsBasePath = hdfsBasePath;
}
public String getHadoopCluster() {
return hadoopCluster;
}
public void setHadoopCluster(final String hadoopCluster) {
this.hadoopCluster = hadoopCluster;
}
}

View File

@ -0,0 +1,52 @@
package eu.dnetlib.data.mdstore.manager.utils;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import eu.dnetlib.data.mdstore.manager.exceptions.MDStoreManagerException;
@Component
public class HdfsClient {
@Value("${dhp.mdstore-manager.hadoop.cluster}")
private String hadoopCluster;
private static final Log log = LogFactory.getLog(HdfsClient.class);
public void deletePath(final String path) throws MDStoreManagerException {
final Configuration conf = new Configuration();
if (hadoopCluster.equalsIgnoreCase("OCEAN")) {
conf.addResource(getClass().getResourceAsStream("/hadoop/OCEAN/core-site.xml"));
conf.addResource(getClass().getResourceAsStream("/hadoop/OCEAN/ocean-hadoop-conf.xml"));
} else if (hadoopCluster.equalsIgnoreCase("GARR")) {
conf.addResource(getClass().getResourceAsStream("/hadoop/GARR/core-site.xml"));
conf.addResource(getClass().getResourceAsStream("/hadoop/GARR/garr-hadoop-conf.xml"));
} else {
log.error("Invalid Haddop Cluster: " + hadoopCluster);
throw new MDStoreManagerException("Invalid Haddop Cluster: " + hadoopCluster);
}
try (final FileSystem fs = FileSystem.get(conf)) {
fs.delete(new Path(path), true);
} catch (IllegalArgumentException | IOException e) {
log.error("Eror deleting path: " + path, e);
throw new MDStoreManagerException("Eror deleting path: " + path, e);
}
}
public String getHadoopCluster() {
return hadoopCluster;
}
public void setHadoopCluster(final String hadoopCluster) {
this.hadoopCluster = hadoopCluster;
}
}

View File

@ -0,0 +1,145 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Autogenerated by Cloudera Manager-->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>1</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>false</value>
</property>
<property>
<name>hadoop.rpc.protection</name>
<value>authentication</value>
</property>
<property>
<name>hadoop.security.auth_to_local</name>
<value>DEFAULT</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mapred.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mapred.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.flume.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.flume.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.HTTP.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.HTTP.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.yarn.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.yarn.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.security.group.mapping</name>
<value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
</property>
<property>
<name>hadoop.security.instrumentation.requires.admin</name>
<value>false</value>
</property>
<property>
<name>net.topology.script.file.name</name>
<value>/etc/hadoop/conf.cloudera.yarn2/topology.py</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>65536</value>
</property>
<property>
<name>hadoop.ssl.enabled</name>
<value>false</value>
</property>
<property>
<name>hadoop.ssl.require.client.cert</name>
<value>false</value>
<final>true</final>
</property>
<property>
<name>hadoop.ssl.keystores.factory.class</name>
<value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
<final>true</final>
</property>
<property>
<name>hadoop.ssl.server.conf</name>
<value>ssl-server.xml</value>
<final>true</final>
</property>
<property>
<name>hadoop.ssl.client.conf</name>
<value>ssl-client.xml</value>
<final>true</final>
</property>
</configuration>

View File

@ -0,0 +1,101 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Autogenerated by Cloudera Manager-->
<configuration>
<property>
<name>dfs.nameservices</name>
<value>nameservice1</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.nameservice1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled.nameservice1</name>
<value>true</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>iis-cdh5-test-m1.ocean.icm.edu.pl:2181,iis-cdh5-test-m2.ocean.icm.edu.pl:2181,iis-cdh5-test-m3.ocean.icm.edu.pl:2181</value>
</property>
<property>
<name>dfs.ha.namenodes.nameservice1</name>
<value>namenode528,namenode434</value>
</property>
<property>
<name>dfs.namenode.rpc-address.nameservice1.namenode528</name>
<value>iis-cdh5-test-m1.ocean.icm.edu.pl:8020</value>
</property>
<property>
<name>dfs.namenode.servicerpc-address.nameservice1.namenode528</name>
<value>iis-cdh5-test-m1.ocean.icm.edu.pl:8022</value>
</property>
<property>
<name>dfs.namenode.http-address.nameservice1.namenode528</name>
<value>iis-cdh5-test-m1.ocean.icm.edu.pl:50070</value>
</property>
<property>
<name>dfs.namenode.https-address.nameservice1.namenode528</name>
<value>iis-cdh5-test-m1.ocean.icm.edu.pl:50470</value>
</property>
<property>
<name>dfs.namenode.rpc-address.nameservice1.namenode434</name>
<value>iis-cdh5-test-m2.ocean.icm.edu.pl:8020</value>
</property>
<property>
<name>dfs.namenode.servicerpc-address.nameservice1.namenode434</name>
<value>iis-cdh5-test-m2.ocean.icm.edu.pl:8022</value>
</property>
<property>
<name>dfs.namenode.http-address.nameservice1.namenode434</name>
<value>iis-cdh5-test-m2.ocean.icm.edu.pl:50070</value>
</property>
<property>
<name>dfs.namenode.https-address.nameservice1.namenode434</name>
<value>iis-cdh5-test-m2.ocean.icm.edu.pl:50470</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>fs.permissions.umask-mode</name>
<value>022</value>
</property>
<property>
<name>dfs.namenode.acls.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.legacy.blockreader</name>
<value>false</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>false</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/var/run/hdfs-sockets/dn</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.skip.checksum</name>
<value>false</value>
</property>
<property>
<name>dfs.client.domain.socket.data.traffic</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
<value>true</value>
</property>
</configuration>

View File

@ -153,7 +153,10 @@
<tr ng-repeat="v in versions">
<td>
<span class="glyphicon glyphicon-pencil" ng-if="v.writing" title="writing..."></span> <span ng-class="{'text-success': v.current}">{{v.id}}</span><br />
<span class="small"><b>Path:</b> {{v.hdfsPath}}</span>
<span class="small"><b>Path:</b> {{v.hdfsPath}}</span><br/>
<button class="btn btn-xs btn-primary" ng-show="v.writing" ng-click="commitVersion(v.id)">commit</button>
<button class="btn btn-xs btn-warning" ng-show="v.writing" ng-click="abortVersion(v.id)">abort</button>
<button class="btn btn-xs btn-danger" ng-disabled="v.current" ng-click="deleteVersion(v.id, forceVersionDelete)">delete</button>
</td>
<td class="text-center">
{{v.readCount}}
@ -161,10 +164,6 @@
</td>
<td class="text-center" title="{{v.lastUpdate}}">{{v.lastUpdate | date:"MMM dd, yyyy 'at' HH:mm"}}</td>
<td class="text-right">{{v.size}}</td>
<td class="text-right">
<button class="btn btn-sm btn-primary" ng-show="v.writing" ng-click="commitVersion(v.id)">commit</button>
<button class="btn btn-sm btn-danger" ng-disabled="v.current" ng-click="deleteVersion(v.id, forceVersionDelete)">delete</button>
</td>
</tr>
</tbody>
</table>

View File

@ -60,7 +60,16 @@ app.controller('mdstoreManagerController', function($scope, $http) {
});
}
};
$scope.abortVersion = function(versionId) {
$http.get("/mdstores/version/" + versionId + "/abort?" + $.now()).success(function(data) {
$scope.reload();
$scope.refreshVersions();
}).error(function(err) {
alert('ERROR: ' + err.message);
});
};
$scope.resetReading = function(versionId) {
$http.get("/mdstores/version/" + versionId + "/resetReading" + '?' + $.now()).success(function(data) {
$scope.reload();

11
pom.xml
View File

@ -66,6 +66,17 @@
<enabled>true</enabled>
</releases>
</repository>
<repository>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
<id>cloudera</id>
<name>Cloudera Repository</name>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
</repository>
</repositories>
<dependencies>