2021-01-25 14:16:37 +01:00
package eu.dnetlib.data.mdstore.manager.controller ;
2021-01-28 11:18:50 +01:00
import java.util.LinkedHashMap ;
2021-01-25 14:16:37 +01:00
import java.util.List ;
2021-01-28 11:18:50 +01:00
import java.util.Map ;
2021-02-01 16:40:54 +01:00
import java.util.Set ;
2021-01-25 14:16:37 +01:00
2021-01-28 12:59:06 +01:00
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
2021-01-25 14:16:37 +01:00
import org.springframework.beans.factory.annotation.Autowired ;
2021-01-25 16:40:35 +01:00
import org.springframework.web.bind.annotation.DeleteMapping ;
import org.springframework.web.bind.annotation.GetMapping ;
2021-01-25 14:16:37 +01:00
import org.springframework.web.bind.annotation.PathVariable ;
import org.springframework.web.bind.annotation.RequestMapping ;
import org.springframework.web.bind.annotation.RequestParam ;
import org.springframework.web.bind.annotation.RestController ;
2021-02-01 16:40:54 +01:00
import com.google.common.collect.Sets ;
2021-01-27 12:17:38 +01:00
import eu.dnetlib.common.controller.AbstractDnetController ;
2021-01-25 14:16:37 +01:00
import eu.dnetlib.data.mdstore.manager.exceptions.MDStoreManagerException ;
2021-01-25 16:40:35 +01:00
import eu.dnetlib.data.mdstore.manager.utils.DatabaseUtils ;
2021-01-28 15:55:22 +01:00
import eu.dnetlib.data.mdstore.manager.utils.HdfsClient ;
2021-10-29 10:43:42 +02:00
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion ;
import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo ;
2022-08-19 13:27:36 +02:00
import io.swagger.v3.oas.annotations.Operation ;
import io.swagger.v3.oas.annotations.Parameter ;
import io.swagger.v3.oas.annotations.tags.Tag ;
2021-01-25 14:16:37 +01:00
@RestController
@RequestMapping ( " /mdstores " )
2022-08-19 13:27:36 +02:00
@Tag ( name = " Metadata Stores " )
2021-01-26 13:34:39 +01:00
public class MDStoreController extends AbstractDnetController {
2021-01-25 14:16:37 +01:00
@Autowired
2021-01-25 16:40:35 +01:00
private DatabaseUtils databaseUtils ;
2021-01-25 14:16:37 +01:00
2021-01-28 15:55:22 +01:00
@Autowired
private HdfsClient hdfsClient ;
2021-01-28 12:59:06 +01:00
private static final Logger log = LoggerFactory . getLogger ( DatabaseUtils . class ) ;
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Return all the mdstores " )
2021-01-25 16:40:35 +01:00
@GetMapping ( " / " )
public Iterable < MDStoreWithInfo > find ( ) {
return databaseUtils . listMdStores ( ) ;
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Return all the mdstore identifiers " )
2021-01-25 16:40:35 +01:00
@GetMapping ( " /ids " )
2021-01-25 14:16:37 +01:00
public List < String > findIdentifiers ( ) {
2021-01-25 16:40:35 +01:00
return databaseUtils . listMdStoreIDs ( ) ;
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Return a mdstores by id " )
2021-01-25 16:40:35 +01:00
@GetMapping ( " /mdstore/{mdId} " )
2022-08-19 13:27:36 +02:00
public MDStoreWithInfo getMdStore ( @Parameter ( name = " the mdstore identifier " ) @PathVariable final String mdId ) throws MDStoreManagerException {
2021-01-25 16:40:35 +01:00
return databaseUtils . findMdStore ( mdId ) ;
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Increase the read count of the current mdstore " )
2021-01-25 16:40:35 +01:00
@GetMapping ( " /mdstore/{mdId}/startReading " )
2022-08-19 13:27:36 +02:00
public MDStoreVersion startReading ( @Parameter ( name = " the mdstore identifier " ) @PathVariable final String mdId ) throws MDStoreManagerException {
2021-01-25 16:40:35 +01:00
return databaseUtils . startReading ( mdId ) ;
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Create a new mdstore " )
2021-01-25 16:40:35 +01:00
@GetMapping ( " /new/{format}/{layout}/{interpretation} " )
2021-01-25 14:16:37 +01:00
public MDStoreWithInfo createMDStore (
2022-08-19 13:27:36 +02:00
@Parameter ( name = " mdstore format " ) @PathVariable final String format ,
@Parameter ( name = " mdstore layout " ) @PathVariable final String layout ,
@Parameter ( name = " mdstore interpretation " ) @PathVariable final String interpretation ,
@Parameter ( name = " datasource name " ) @RequestParam ( required = true ) final String dsName ,
@Parameter ( name = " datasource id " ) @RequestParam ( required = true ) final String dsId ,
@Parameter ( name = " api id " ) @RequestParam ( required = true ) final String apiId ) throws MDStoreManagerException {
2021-01-25 16:40:35 +01:00
final String id = databaseUtils . createMDStore ( format , layout , interpretation , dsName , dsId , apiId ) ;
return databaseUtils . findMdStore ( id ) ;
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Delete a mdstore by id " )
2021-01-25 16:40:35 +01:00
@DeleteMapping ( " /mdstore/{mdId} " )
2022-08-19 13:27:36 +02:00
public StatusResponse delete ( @Parameter ( name = " the id of the mdstore that will be deleted " ) @PathVariable final String mdId ) throws MDStoreManagerException {
2021-01-28 15:55:22 +01:00
final String hdfsPath = databaseUtils . deleteMdStore ( mdId ) ;
hdfsClient . deletePath ( hdfsPath ) ;
2021-01-25 16:40:35 +01:00
return StatusResponse . DELETED ;
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Return all the versions of a mdstore " )
2021-01-25 16:40:35 +01:00
@GetMapping ( " /mdstore/{mdId}/versions " )
public Iterable < MDStoreVersion > listVersions ( @PathVariable final String mdId ) throws MDStoreManagerException {
return databaseUtils . listVersions ( mdId ) ;
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Create a new preliminary version of a mdstore " )
2021-01-25 16:40:35 +01:00
@GetMapping ( " /mdstore/{mdId}/newVersion " )
2022-08-19 13:27:36 +02:00
public MDStoreVersion prepareNewVersion ( @Parameter ( name = " the id of the mdstore for which will be created a new version " ) @PathVariable final String mdId ) {
2021-01-25 16:40:35 +01:00
return databaseUtils . prepareMdStoreVersion ( mdId ) ;
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Promote a preliminary version to current " )
2021-01-25 16:40:35 +01:00
@GetMapping ( " /version/{versionId}/commit/{size} " )
2022-08-19 13:27:36 +02:00
public MDStoreVersion commitVersion ( @Parameter ( name = " the id of the version that will be promoted to the current version " ) @PathVariable final String versionId ,
@Parameter ( name = " the size of the new current mdstore " ) @PathVariable final long size ) throws MDStoreManagerException {
2021-02-01 16:40:54 +01:00
try {
return databaseUtils . commitMdStoreVersion ( versionId , size ) ;
} finally {
deleteExpiredVersions ( ) ;
}
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Abort a preliminary version " )
2021-01-28 15:55:22 +01:00
@GetMapping ( " /version/{versionId}/abort " )
2022-08-19 13:27:36 +02:00
public StatusResponse commitVersion ( @Parameter ( name = " the id of the version to abort " ) @PathVariable final String versionId ) throws MDStoreManagerException {
2021-01-28 15:55:22 +01:00
final String hdfsPath = databaseUtils . deleteMdStoreVersion ( versionId , true ) ;
hdfsClient . deletePath ( hdfsPath ) ;
return StatusResponse . ABORTED ;
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Return an existing mdstore version " )
2021-01-29 13:15:21 +01:00
@GetMapping ( " /version/{versionId} " )
2022-08-19 13:27:36 +02:00
public MDStoreVersion getVersion ( @Parameter ( name = " the id of the version that has to be deleted " ) @PathVariable final String versionId )
2021-01-29 13:15:21 +01:00
throws MDStoreManagerException {
return databaseUtils . findVersion ( versionId ) ;
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Delete a mdstore version " )
2021-01-25 16:40:35 +01:00
@DeleteMapping ( " /version/{versionId} " )
2022-08-19 13:27:36 +02:00
public StatusResponse deleteVersion ( @Parameter ( name = " the id of the version that has to be deleted " ) @PathVariable final String versionId ,
@Parameter ( name = " if true, the controls on writing and readcount values will be skipped " ) @RequestParam ( required = false , defaultValue = " false " ) final boolean force )
2021-01-25 16:40:35 +01:00
throws MDStoreManagerException {
2021-01-28 15:55:22 +01:00
final String hdfsPath = databaseUtils . deleteMdStoreVersion ( versionId , force ) ;
hdfsClient . deletePath ( hdfsPath ) ;
2021-01-25 16:40:35 +01:00
return StatusResponse . DELETED ;
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Decrease the read count of a mdstore version " )
2021-01-25 16:40:35 +01:00
@GetMapping ( " /version/{versionId}/endReading " )
2022-08-19 13:27:36 +02:00
public MDStoreVersion endReading ( @Parameter ( name = " the id of the version that has been completely read " ) @PathVariable final String versionId )
2021-01-25 16:40:35 +01:00
throws MDStoreManagerException {
return databaseUtils . endReading ( versionId ) ;
2021-01-25 14:16:37 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Reset the read count of a mdstore version " )
2021-01-26 13:34:39 +01:00
@GetMapping ( " /version/{versionId}/resetReading " )
2022-08-19 13:27:36 +02:00
public MDStoreVersion resetReading ( @Parameter ( name = " the id of the version " ) @PathVariable final String versionId )
2021-01-26 13:34:39 +01:00
throws MDStoreManagerException {
return databaseUtils . resetReading ( versionId ) ;
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Delete expired versions " )
2021-01-28 15:55:22 +01:00
@DeleteMapping ( " /versions/expired " )
2021-01-28 12:59:06 +01:00
public StatusResponse deleteExpiredVersions ( ) {
2021-02-02 08:10:28 +01:00
new Thread ( this : : performDeleteOfExpiredVersions ) . start ( ) ;
2021-01-28 12:59:06 +01:00
return StatusResponse . DELETING ;
2021-01-25 14:16:37 +01:00
}
2021-01-28 11:18:50 +01:00
2021-02-02 08:10:28 +01:00
private synchronized void performDeleteOfExpiredVersions ( ) {
2021-02-02 08:15:06 +01:00
log . info ( " Deleting expired version... " ) ;
2021-02-02 08:10:28 +01:00
for ( final String versionId : databaseUtils . listExpiredVersions ( ) ) {
try {
final String hdfsPath = databaseUtils . deleteMdStoreVersion ( versionId , true ) ;
hdfsClient . deletePath ( hdfsPath ) ;
} catch ( final MDStoreManagerException e ) {
log . warn ( " Error deleteting version " + versionId , e ) ;
}
}
2021-02-02 08:15:06 +01:00
log . info ( " Done. " ) ;
2021-02-02 08:10:28 +01:00
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Fix the inconsistencies on HDFS " )
2021-02-01 16:40:54 +01:00
@GetMapping ( " /hdfs/inconsistencies " )
public Set < String > fixHdfsInconsistencies (
2022-08-19 13:27:36 +02:00
@Parameter ( name = " force the deletion of hdfs paths " ) @RequestParam ( required = false , defaultValue = " false " ) final boolean delete )
2021-02-01 16:40:54 +01:00
throws MDStoreManagerException {
final Set < String > hdfsDirs = hdfsClient . listHadoopDirs ( ) ;
final Set < String > validDirs = databaseUtils . listValidHdfsPaths ( ) ;
final Set < String > toDelete = Sets . difference ( hdfsDirs , validDirs ) ;
2021-02-02 08:15:06 +01:00
log . info ( " Found " + toDelete . size ( ) + " hdfs paths to remove " ) ;
2021-02-01 16:40:54 +01:00
if ( delete ) {
for ( final String p : toDelete ) {
hdfsClient . deletePath ( p ) ;
}
}
return toDelete ;
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " Show informations " )
2021-01-28 11:18:50 +01:00
@GetMapping ( " /info " )
public Map < String , Object > info ( ) {
final Map < String , Object > info = new LinkedHashMap < > ( ) ;
info . put ( " number_of_mdstores " , databaseUtils . countMdStores ( ) ) ;
2021-01-29 15:45:20 +01:00
info . put ( " hadoop_user " , hdfsClient . getHadoopUser ( ) ) ;
2021-01-28 15:55:22 +01:00
info . put ( " hadoop_cluster " , hdfsClient . getHadoopCluster ( ) ) ;
2021-01-28 11:18:50 +01:00
info . put ( " hdfs_base_path " , databaseUtils . getHdfsBasePath ( ) ) ;
2021-01-28 12:59:06 +01:00
info . put ( " expired_versions " , databaseUtils . listExpiredVersions ( ) ) ;
2021-01-28 11:18:50 +01:00
return info ;
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " list the file inside the path of a mdstore version " )
2021-02-18 14:28:04 +01:00
@GetMapping ( " /version/{versionId}/parquet/files " )
public Set < String > listVersionFiles ( @PathVariable final String versionId ) throws MDStoreManagerException {
final String path = databaseUtils . findVersion ( versionId ) . getHdfsPath ( ) ;
return hdfsClient . listContent ( path + " /store " , HdfsClient : : isParquetFile ) ;
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " read the parquet file of a mdstore version " )
2021-02-18 14:28:04 +01:00
@GetMapping ( " /version/{versionId}/parquet/content/{limit} " )
2021-02-22 16:54:23 +01:00
public List < Map < String , String > > listVersionParquet ( @PathVariable final String versionId , @PathVariable final long limit ) throws MDStoreManagerException {
2021-02-18 14:28:04 +01:00
final String path = databaseUtils . findVersion ( versionId ) . getHdfsPath ( ) ;
return hdfsClient . readParquetFiles ( path + " /store " , limit ) ;
}
2022-08-19 11:20:13 +02:00
@Operation ( summary = " read the parquet file of a mdstore (current version) " )
2021-02-19 15:52:14 +01:00
@GetMapping ( " /mdstore/{mdId}/parquet/content/{limit} " )
2021-02-22 16:54:23 +01:00
public List < Map < String , String > > listMdstoreParquet ( @PathVariable final String mdId , @PathVariable final long limit ) throws MDStoreManagerException {
2021-02-19 15:52:14 +01:00
final String versionId = databaseUtils . findMdStore ( mdId ) . getCurrentVersion ( ) ;
final String path = databaseUtils . findVersion ( versionId ) . getHdfsPath ( ) ;
return hdfsClient . readParquetFiles ( path + " /store " , limit ) ;
}
2021-02-10 15:31:34 +01:00
protected void setDatabaseUtils ( final DatabaseUtils databaseUtils ) {
this . databaseUtils = databaseUtils ;
}
protected void setHdfsClient ( final HdfsClient hdfsClient ) {
this . hdfsClient = hdfsClient ;
}
2021-01-25 14:16:37 +01:00
}