added creationDate and hdfsPath fields

This commit is contained in:
Michele Artini 2021-01-28 11:02:04 +01:00
parent 7b5d26ada1
commit 40c3ee37ef
8 changed files with 475 additions and 64 deletions

View File

@ -0,0 +1,12 @@
1) Usare un client hdfs per cancellare le stores
2) sostiuire il metodo expiredVersion s deleteVersions con un metodo solo che cancelli anche su hdfs
3) Non consentire operazioni lettura e scrittura sulle store in prepareDelete
4) Aggiungere il path hdfs delle store nelle versions e nella store (parent come property)
5) metodo info con alcune props
alter table mdstores add column creation_date timestamp;
alter table mdstores add column hdfs_path text;
alter table mdstore_versions add column hdfs_path text;
drop view mdstores_with_info;

View File

@ -3,8 +3,6 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>apps</artifactId> <artifactId>apps</artifactId>
@ -16,7 +14,6 @@
<artifactId>dhp-mdstore-manager</artifactId> <artifactId>dhp-mdstore-manager</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<!-- Add typical dependencies for a web application --> <!-- Add typical dependencies for a web application -->
<dependencies> <dependencies>
<dependency> <dependency>
@ -44,6 +41,17 @@
<dependency> <dependency>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId> <artifactId>dhp-common</artifactId>
<version>1.2.4-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-pace-core</artifactId>
</exclusion>
<exclusion>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId>
</exclusion>
</exclusions>
</dependency> </dependency>
<!-- JUnit --> <!-- JUnit -->

View File

@ -9,6 +9,7 @@ import javax.transaction.Transactional;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@ -36,6 +37,9 @@ public class DatabaseUtils {
@Autowired @Autowired
private JdbcTemplate jdbcTemplate; private JdbcTemplate jdbcTemplate;
@Value("${dhp.mdstore-manager.hdfs.base-path}")
private String hdfsBasePath;
private static final Logger log = LoggerFactory.getLogger(DatabaseUtils.class); private static final Logger log = LoggerFactory.getLogger(DatabaseUtils.class);
public Iterable<MDStoreWithInfo> listMdStores() { public Iterable<MDStoreWithInfo> listMdStores() {
@ -71,10 +75,10 @@ public class DatabaseUtils {
final String dsId, final String dsId,
final String apiId) { final String apiId) {
final MDStore md = MDStore.newInstance(format, layout, interpretation, dsName, dsId, apiId); final MDStore md = MDStore.newInstance(format, layout, interpretation, dsName, dsId, apiId, hdfsBasePath);
mdstoreRepository.save(md); mdstoreRepository.save(md);
final MDStoreVersion v = MDStoreVersion.newInstance(md.getId(), false); final MDStoreVersion v = MDStoreVersion.newInstance(md.getId(), false, hdfsBasePath);
v.setLastUpdate(new Date()); v.setLastUpdate(new Date());
mdstoreVersionRepository.save(v); mdstoreVersionRepository.save(v);
mdstoreCurrentVersionRepository.save(MDStoreCurrentVersion.newInstance(v)); mdstoreCurrentVersionRepository.save(MDStoreCurrentVersion.newInstance(v));
@ -131,7 +135,7 @@ public class DatabaseUtils {
@Transactional @Transactional
public MDStoreVersion prepareMdStoreVersion(final String mdId) { public MDStoreVersion prepareMdStoreVersion(final String mdId) {
final MDStoreVersion v = MDStoreVersion.newInstance(mdId, true); final MDStoreVersion v = MDStoreVersion.newInstance(mdId, true, hdfsBasePath);
mdstoreVersionRepository.save(v); mdstoreVersionRepository.save(v);
return v; return v;
} }

View File

@ -0,0 +1,149 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Autogenerated by Cloudera Manager-->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>1440</value>
</property>
<property>
<name>fs.trash.checkpoint.interval</name>
<value>60</value>
</property>
<property>
<name>net.topology.script.file.name</name>
<value>{{CMF_CONF_DIR}}/topology.py</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>false</value>
</property>
<property>
<name>hadoop.rpc.protection</name>
<value>authentication</value>
</property>
<property>
<name>hadoop.ssl.enabled</name>
<value>false</value>
</property>
<property>
<name>hadoop.ssl.require.client.cert</name>
<value>false</value>
<final>true</final>
</property>
<property>
<name>hadoop.ssl.keystores.factory.class</name>
<value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
<final>true</final>
</property>
<property>
<name>hadoop.ssl.server.conf</name>
<value>ssl-server.xml</value>
<final>true</final>
</property>
<property>
<name>hadoop.ssl.client.conf</name>
<value>ssl-client.xml</value>
<final>true</final>
</property>
<property>
<name>hadoop.security.auth_to_local</name>
<value>DEFAULT</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mapred.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mapred.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.flume.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.flume.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.HTTP.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.HTTP.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.groups</name>
<value>*</value>
</property>
<property>
<name>hue.kerberos.principal.shortname</name>
<value>hue</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.yarn.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.yarn.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.security.group.mapping</name>
<value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
</property>
<property>
<name>hadoop.security.instrumentation.requires.admin</name>
<value>false</value>
</property>
<property>
<name>hadoop.http.logs.enabled</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,217 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Autogenerated by Cloudera Manager-->
<configuration>
<property>
<name>dfs.hosts</name>
<value>{{CMF_CONF_DIR}}/dfs_all_hosts.txt</value>
</property>
<property>
<name>dfs.namenode.hosts.provider.classname</name>
<value>org.apache.hadoop.hdfs.server.blockmanagement.CombinedHostFileManager</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///dfs/nn</value>
</property>
<property>
<name>dfs.namenode.servicerpc-address</name>
<value>hadoop-rm1.garr-pa1.d4science.org:8022</value>
</property>
<property>
<name>dfs.namenode.rpc-address</name>
<value>hadoop-rm1.garr-pa1.d4science.org:8020</value>
</property>
<property>
<name>dfs.https.address</name>
<value>hadoop-rm1.garr-pa1.d4science.org:50470</value>
</property>
<property>
<name>dfs.https.port</name>
<value>50470</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop-rm1.garr-pa1.d4science.org:50070</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop-rm2.garr-pa1.d4science.org:50090</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>supergroup</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.replication.min</name>
<value>1</value>
</property>
<property>
<name>dfs.replication.max</name>
<value>512</value>
</property>
<property>
<name>dfs.namenode.maintenance.replication.min</name>
<value>1</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.image.transfer.timeout</name>
<value>60000</value>
</property>
<property>
<name>dfs.image.transfer.bandwidthPerSec</name>
<value>0</value>
</property>
<property>
<name>dfs.namenode.plugins</name>
<value></value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>59</value>
</property>
<property>
<name>dfs.namenode.service.handler.count</name>
<value>59</value>
</property>
<property>
<name>dfs.namenode.name.dir.restore</name>
<value>false</value>
</property>
<property>
<name>dfs.thrift.threads.max</name>
<value>20</value>
</property>
<property>
<name>dfs.thrift.threads.min</name>
<value>10</value>
</property>
<property>
<name>dfs.thrift.timeout</name>
<value>60</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.safemode.threshold-pct</name>
<value>0.999</value>
</property>
<property>
<name>dfs.namenode.invalidate.work.pct.per.iteration</name>
<value>0.32</value>
</property>
<property>
<name>dfs.namenode.replication.work.multiplier.per.iteration</name>
<value>10</value>
</property>
<property>
<name>dfs.namenode.replication.max-streams</name>
<value>20</value>
</property>
<property>
<name>dfs.namenode.replication.max-streams-hard-limit</name>
<value>40</value>
</property>
<property>
<name>dfs.namenode.avoid.read.stale.datanode</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.snapshot.capture.openfiles</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.avoid.write.stale.datanode</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.stale.datanode.interval</name>
<value>30000</value>
</property>
<property>
<name>dfs.namenode.write.stale.datanode.ratio</name>
<value>0.5</value>
</property>
<property>
<name>dfs.namenode.safemode.min.datanodes</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.safemode.extension</name>
<value>30000</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>fs.permissions.umask-mode</name>
<value>022</value>
</property>
<property>
<name>dfs.encrypt.data.transfer</name>
<value>false</value>
</property>
<property>
<name>dfs.encrypt.data.transfer.algorithm</name>
<value>rc4</value>
</property>
<property>
<name>dfs.namenode.acls.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.access.time.precision</name>
<value>3600000</value>
</property>
<property>
<name>dfs.qjournal.write-txns.timeout.ms</name>
<value>20000</value>
</property>
<property>
<name>dfs.qjournal.start-segment.timeout.ms</name>
<value>20000</value>
</property>
<property>
<name>dfs.qjournal.prepare-recovery.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.accept-recovery.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.finalize-segment.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.select-input-streams.timeout.ms</name>
<value>20000</value>
</property>
<property>
<name>dfs.qjournal.get-journal-state.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.new-epoch.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
<value>true</value>
</property>
</configuration>

View File

@ -20,3 +20,6 @@ spring.jpa.open-in-view=true
# logs # logs
logging.level.io.swagger.models.parameters.AbstractSerializableParameter = error logging.level.io.swagger.models.parameters.AbstractSerializableParameter = error
dhp.mdstore-manager.hdfs.base-path = /tmp/mdstoremanager_dev

View File

@ -10,7 +10,9 @@ CREATE TABLE mdstores (
interpretation text, interpretation text,
datasource_name text, datasource_name text,
datasource_id text, datasource_id text,
api_id text api_id text,
creation_date timestamp,
hdfs_path text
); );
CREATE TABLE mdstore_versions ( CREATE TABLE mdstore_versions (
@ -19,7 +21,8 @@ CREATE TABLE mdstore_versions (
writing boolean, writing boolean,
readcount int, readcount int,
lastupdate timestamp, lastupdate timestamp,
size bigint size bigint,
hdfs_path text
); );
CREATE TABLE mdstore_current_versions ( CREATE TABLE mdstore_current_versions (
@ -35,6 +38,8 @@ CREATE VIEW mdstores_with_info AS SELECT
md.datasource_name AS datasource_name, md.datasource_name AS datasource_name,
md.datasource_id AS datasource_id, md.datasource_id AS datasource_id,
md.api_id AS api_id, md.api_id AS api_id,
md.hdfs_path as hdfs_path,
md.creation_date as creation_date,
cv.current_version AS current_version, cv.current_version AS current_version,
v1.lastupdate AS lastupdate, v1.lastupdate AS lastupdate,
v1.size AS size, v1.size AS size,
@ -42,15 +47,17 @@ CREATE VIEW mdstores_with_info AS SELECT
FROM FROM
mdstores md mdstores md
LEFT OUTER JOIN mdstore_current_versions cv ON (md.id = cv.mdstore) LEFT OUTER JOIN mdstore_current_versions cv ON (md.id = cv.mdstore)
LEFT OUTER JOIN mdstore_versions v1 ON (cv.current_version = v1.id) LEFT OUTER JOIN mdstore_versions v1 ON (cv.current_version = v1.id)
LEFT OUTER JOIN mdstore_versions v2 ON (md.id = v2.mdstore) LEFT OUTER JOIN mdstore_versions v2 ON (md.id = v2.mdstore)
GROUP BY md.id, GROUP BY md.id,
md.format, md.format,
md.layout, md.layout,
md.interpretation, md.interpretation,
md.datasource_name, md.datasource_name,
md.datasource_id, md.datasource_id,
md.hdfs_path,
md.creation_date,
md.api_id, md.api_id,
cv.current_version, cv.current_version,
v1.lastupdate, v1.lastupdate,
v1.size; v1.size;

View File

@ -18,63 +18,71 @@
</style> </style>
<body ng-app="mdstoreManagerApp" ng-controller="mdstoreManagerController"> <body ng-app="mdstoreManagerApp" ng-controller="mdstoreManagerController">
<div class="container-fluid">
<div class="row"> <div class="row">
<div class="col-xs-12 col-md-offset-1 col-md-10"> <div class="col-xs-12">
<h1>Metadata Store Manager</h1> <h1>Metadata Store Manager</h1>
<hr /> <hr />
<div>
<a href="/doc" target="_blank">API documentation</a> <a href="/doc" target="_blank">API documentation</a>
</div> <hr />
<a href="javascript:void(0)" data-toggle="modal" data-target="#newMdstoreModal">create a new mdstore</a>
<hr /> <hr />
<table class="table table-striped small"> <p ng-show="mdstores.length > 0">
<thead> <input type="text" class="form-control form-control-sm" ng-model="mdstoreFilter" placeholder="Filter..."/>
<tr> </p>
<th class="col-xs-4">ID</th>
<th class="col-xs-2">Format / Layout / Interpretation</th> <div class="panel panel-primary" ng-repeat="md in mdstores | filter:mdstoreFilter">
<th class="col-xs-3">Datasource</th> <div class="panel-heading">{{md.id}}</div>
<th class="col-xs-1 text-center">Last Update</th>
<th class="col-xs-1 text-right">Size</th> <table class="table table-striped small">
<th class="col-xs-1 text-right">Versions</th> <tr>
</tr> <th class="col-xs-4">Format / Layout / Interpretation</th>
</thead> <td class="col-xs-8">{{md.format}} / {{md.layout}} / {{md.interpretation}}</td>
<tbody> </tr><tr>
<tr> <th class="col-xs-4">Datasource</th>
<td colspan="6"> <td class="col-xs-8">
<a href="javascript:void(0)" data-toggle="modal" data-target="#newMdstoreModal">create a new mdstore</a> <span ng-if="md.datasourceName">
</td> {{md.datasourceName}}<br />
</tr> <small>
<tr ng-repeat="md in mdstores"> <b>id: </b>{{md.datasourceId}}
<td><button class="btn btn-xs btn-danger" ng-click="deleteMdstore(md.id)">delete</button> {{md.id}}</td> <b>api: </b>{{md.apiId}}
<td>{{md.format}} / {{md.layout}} / {{md.interpretation}}</td> </small>
<td> </span>
<span ng-if="md.datasourceName"> </td>
{{md.datasourceName}}<br /> </tr><tr>
<small> <th class="col-xs-4">Creation Date</th>
<b>id: </b>{{md.datasourceId}} <td class="col-xs-8" title="{{md.creationDate}}">{{md.creationDate | date:"fullDate"}}</td>
<b>api: </b>{{md.apiId}} </tr><tr>
</small> <th class="col-xs-4">Last Update</th>
</span> <td class="col-xs-8" title="{{md.lastUpdate}}">{{md.lastUpdate | date:"fullDate"}}</td>
</td> </tr><tr>
<td class="text-center" title="{{md.lastUpdate}}">{{md.lastUpdate | date:"MMM dd, yyyy 'at' HH:mm"}}</td> <th class="col-xs-4">Size</th>
<td class="text-right">{{md.size}}</td> <td class="col-xs-8">{{md.size}}</td>
<td class="text-right"> </tr><tr>
<a href="javascript:void(0)" ng-click="listVersions(md.id, md.currentVersion)" data-toggle="modal" data-target="#versionsModal" title="Current: {{md.currentVersion}}">{{md.numberOfVersions}} version(s)</a> / <th class="col-xs-4">HDFS Path</th>
<a href="javascript:void(0)" ng-click="prepareVersion(md.id, md.currentVersion)" data-toggle="modal" data-target="#versionsModal">new</a> <td class="col-xs-8">{{md.hdfsPath}}</td>
</td> </tr><tr>
<th class="col-xs-4">Versions</th>
</tr> <td class="col-xs-8">
</tbody> <a href="javascript:void(0)" ng-click="listVersions(md.id, md.currentVersion)" data-toggle="modal" data-target="#versionsModal" title="Current: {{md.currentVersion}}">{{md.numberOfVersions}} version(s)</a> /
</table> <a href="javascript:void(0)" ng-click="prepareVersion(md.id, md.currentVersion)" data-toggle="modal" data-target="#versionsModal">prepare new</a>
</td>
</tr>
</table>
<div class="panel-footer">
<button class="btn btn-sm btn-danger" ng-click="deleteMdstore(md.id)">delete</button>
</div>
</div>
</div>
</div> </div>
</div> </div>
<!-- Modals -->
<div class="modal fade" tabindex="-1" id="newMdstoreModal"> <div class="modal fade" tabindex="-1" id="newMdstoreModal">
<div class="modal-dialog modal-lg"> <div class="modal-dialog modal-lg">
<div class="modal-content"> <div class="modal-content">
@ -143,7 +151,10 @@
</thead> </thead>
<tbody> <tbody>
<tr ng-repeat="v in versions"> <tr ng-repeat="v in versions">
<td ng-class="{'text-success': v.current}"><span class="glyphicon glyphicon-pencil" ng-if="v.writing" title="writing..."></span> {{v.id}}</td> <td>
<span class="glyphicon glyphicon-pencil" ng-if="v.writing" title="writing..."></span> <span ng-class="{'text-success': v.current}">{{v.id}}</span><br />
<span class="small"><b>Path:</b> {{v.hdfsPath}}</span>
</td>
<td class="text-center"> <td class="text-center">
{{v.readCount}} {{v.readCount}}
<button class="btn btn-xs btn-warning" ng-click="resetReading(v.id)" ng-disabled="v.readCount == 0">reset</button> <button class="btn btn-xs btn-warning" ng-click="resetReading(v.id)" ng-disabled="v.readCount == 0">reset</button>