mdstore streams

This commit is contained in:
Michele Artini 2023-04-18 16:13:42 +02:00
parent a855a6afff
commit c7eef9a4e9
10 changed files with 234 additions and 60 deletions

View File

@ -1,72 +1,74 @@
<div *ngIf="conf" style="padding: 16px; margin-top: 0.4em; border: 1px solid lightgray; border-radius: 8px;">
<mat-card *ngIf="conf" style="margin-top: 0.4em;">
<mat-card-header>
<mat-card-title>{{conf.name}}</mat-card-title>
<mat-card-subtitle *ngIf="conf.dsName"><b>Datasource Name:</b> {{conf.dsName}}</mat-card-subtitle>
<mat-card-subtitle *ngIf="conf.dsId"><b>Datasource ID:</b> {{conf.dsId}}</mat-card-subtitle>
<mat-card-subtitle *ngIf="conf.apiId"><b>Datasource API:</b> {{conf.apiId}}</mat-card-subtitle>
</mat-card-header>
<mat-card-content style="padding-top: 1em;">
<button mat-stroked-button color="primary" (click)="launchWfConf()">
<mat-icon fontIcon="play_circle"></mat-icon>
launch
</button>
<button mat-stroked-button color="primary" (click)="editConf()">
<mat-icon fontIcon="edit"></mat-icon>
configure
</button>
<a href="./api/resources/{{conf.workflow}}/content" mat-stroked-button color="link" target="_blank">
<mat-icon fontIcon="code"></mat-icon>
raw workflow
</a>
<button mat-stroked-button color="warn" (click)="deleteConf()">
<mat-icon fontIcon="delete"></mat-icon>
delete
</button>
<h2>{{conf.name}}</h2>
<span *ngIf="conf.dsName"><b>Datasource Name:</b> {{conf.dsName}}<br /></span>
<span *ngIf="conf.dsId"><b>Datasource ID:</b> {{conf.dsId}}<br /></span>
<span *ngIf="conf.apiId"><b>Datasource API:</b> {{conf.apiId}}<br /></span>
<mat-divider style="margin-top: 1em; margin-bottom: 1em;"></mat-divider>
<button mat-stroked-button color="primary" (click)="launchWfConf()">
<mat-icon fontIcon="play_circle"></mat-icon>
launch
</button>
<button mat-stroked-button color="primary" (click)="editConf()">
<mat-icon fontIcon="edit"></mat-icon>
configure
</button>
<a href="./api/resources/{{conf.workflow}}/content" mat-stroked-button color="link" target="_blank">
<mat-icon fontIcon="code"></mat-icon>
raw workflow
</a>
<button mat-stroked-button color="warn" (click)="deleteConf()">
<mat-icon fontIcon="delete"></mat-icon>
delete
</button>
<table mat-table [dataSource]="historyDatasource" matSort>
<mat-divider style="margin-top: 1em; margin-bottom: 1em;"></mat-divider>
<ng-container matColumnDef="processId">
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
sortActionDescription="Sort by Process ID"> Process Id </th>
<td mat-cell *matCellDef="let element">
<a (click)="openWfHistoryDialog(element)">{{element.processId}}</a>
</td>
</ng-container>
<table mat-table [dataSource]="historyDatasource" matSort>
<ng-container matColumnDef="status">
<th mat-header-cell *matHeaderCellDef style="width: 10%;" mat-sort-header
sortActionDescription="Sort by Status">
Status </th>
<td mat-cell *matCellDef="let element"><span class="badge-label"
[ngClass]="{'badge-success' : element.status === 'success', 'badge-failure' : element.status === 'failure'}">{{element.status}}</span>
</td>
</ng-container>
<ng-container matColumnDef="processId">
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
sortActionDescription="Sort by Process ID"> Process Id </th>
<td mat-cell *matCellDef="let element">
<a (click)="openWfHistoryDialog(element)">{{element.processId}}</a>
</td>
</ng-container>
<ng-container matColumnDef="startDate">
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
sortActionDescription="Sort by Start Date"> Start Date </th>
<td mat-cell *matCellDef="let element"> {{element.startDate}} </td>
</ng-container>
<ng-container matColumnDef="status">
<th mat-header-cell *matHeaderCellDef style="width: 10%;" mat-sort-header sortActionDescription="Sort by Status">
Status </th>
<td mat-cell *matCellDef="let element"><span class="badge-label"
[ngClass]="{'badge-success' : element.status === 'success', 'badge-failure' : element.status === 'failure'}">{{element.status}}</span>
</td>
</ng-container>
<ng-container matColumnDef="endDate">
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
sortActionDescription="Sort by End Date">
End Date </th>
<td mat-cell *matCellDef="let element"> {{element.endDate}} </td>
</ng-container>
<ng-container matColumnDef="startDate">
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
sortActionDescription="Sort by Start Date"> Start Date </th>
<td mat-cell *matCellDef="let element"> {{element.startDate}} </td>
</ng-container>
<ng-container matColumnDef="endDate">
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
sortActionDescription="Sort by End Date">
End Date </th>
<td mat-cell *matCellDef="let element"> {{element.endDate}} </td>
</ng-container>
<tr mat-header-row *matHeaderRowDef="colums"></tr>
<tr mat-row *matRowDef="let row; columns: colums;"></tr>
<!-- Row shown when there is no matching data. -->
<tr class="mat-row" *matNoDataRow>
<td class="mat-cell" colspan="4" style="padding: 0 16px;">No execution in history"</td>
</tr>
</table>
<tr mat-header-row *matHeaderRowDef="colums"></tr>
<tr mat-row *matRowDef="let row; columns: colums;"></tr>
<!-- Row shown when there is no matching data. -->
<tr class="mat-row" *matNoDataRow>
<td class="mat-cell" colspan="4" style="padding: 0 16px;">No execution in history"</td>
</tr>
</table>
</mat-card-content>
<!-- <pre>{{conf | json}}</pre> -->
</div>
</mat-card>
<div *ngIf="!conf" style="margin-top: 2em;">
Workflow Configuration does not exist

View File

@ -63,6 +63,7 @@
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -10,6 +10,7 @@ import java.util.Set;
import java.util.UUID;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import javax.transaction.Transactional;
@ -241,6 +242,12 @@ public class MDStoreService {
return selectBackend(md.getType()).listEntries(v, limit);
}
public Stream<MetadataRecord> streamVersionRecords(final String versionId) throws MDStoreManagerException {
final MDStoreVersion v = mdstoreVersionRepository.findById(versionId).orElseThrow(() -> new MDStoreManagerException("Version not found"));
final MDStore md = mdstoreRepository.findById(v.getMdstore()).orElseThrow(() -> new MDStoreManagerException("MDStore not found"));
return selectBackend(md.getType()).streamEntries(v);
}
public MDStore newMDStore(
final String format,
final String layout,

View File

@ -0,0 +1,90 @@
package eu.dnetlib.data.mdstore;
import java.util.Iterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import eu.dnetlib.data.mdstore.model.MDStoreWithInfo;
import eu.dnetlib.data.mdstore.model.MetadataRecord;
import eu.dnetlib.errors.MDStoreManagerException;
@Service
public class MDStoreStreamReader {
@Autowired
private MDStoreService mdStoreService;
private enum Status {
PREPARED,
READING,
COMPLETED,
FAILED
}
// TODO the failure could be throw consuming the stream, so it is necessary to perform a refactoring of this method
public Stream<MetadataRecord> prepareMDStoreStream(final String mdstoreId) throws MDStoreManagerException {
final MDStoreWithInfo mdstore = mdStoreService.findMdStore(mdstoreId);
final Iterator<MetadataRecord> innerIterator = mdStoreService.streamVersionRecords(mdstore.getCurrentVersion()).iterator();
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(new Iterator<>() {
private Status status = Status.PREPARED;
@Override
public boolean hasNext() {
if (innerIterator.hasNext()) {
return true;
} else {
try {
complete();
return false;
} catch (final MDStoreManagerException e) {
throw new RuntimeException("Error reading mdstore", e);
}
}
}
@Override
public MetadataRecord next() {
try {
verifyStart();
return innerIterator.next();
} catch (final Throwable e) {
try {
fail();
throw new RuntimeException("Error reading mdstore", e);
} catch (final MDStoreManagerException e1) {
throw new RuntimeException("Error reading mdstore", e);
}
}
}
private synchronized void verifyStart() throws MDStoreManagerException {
if (status == Status.PREPARED) {
status = Status.READING;
mdStoreService.startReading(mdstoreId);
}
}
private synchronized void complete() throws MDStoreManagerException {
if (status == Status.PREPARED || status == Status.READING) {
status = Status.COMPLETED;
mdStoreService.endReading(mdstoreId);
}
}
private synchronized void fail() throws MDStoreManagerException {
if (status == Status.PREPARED || status == Status.READING) {
status = Status.FAILED;
mdStoreService.endReading(mdstoreId);
}
}
}, 0), false);
}
}

View File

@ -5,6 +5,7 @@ import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Stream;
import org.springframework.stereotype.Service;
@ -33,6 +34,11 @@ public class DefaultBackend implements MDStoreBackend {
return new ArrayList<>();
}
@Override
public Stream<MetadataRecord> streamEntries(final MDStoreVersion version) throws MDStoreManagerException {
return Stream.empty();
}
@Override
public Set<String> listInternalFiles(final MDStoreVersion version) throws MDStoreManagerException {
return new LinkedHashSet<>();

View File

@ -3,6 +3,7 @@ package eu.dnetlib.data.mdstore.backends;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
@ -99,4 +100,14 @@ public class HdfsBackend implements MDStoreBackend {
}
}
@Override
public Stream<MetadataRecord> streamEntries(final MDStoreVersion version) throws MDStoreManagerException {
final String path = version.getParams().getOrDefault("hdfs_path", "").toString();
if (StringUtils.isNotBlank(path)) {
return hdfsClient.streamParquetFiles(path + "/store", MetadataRecord.class);
} else {
throw new MDStoreManagerException("hdfs path is missing");
}
}
}

View File

@ -2,6 +2,7 @@ package eu.dnetlib.data.mdstore.backends;
import java.util.List;
import java.util.Set;
import java.util.stream.Stream;
import eu.dnetlib.data.mdstore.model.MDStore;
import eu.dnetlib.data.mdstore.model.MDStoreVersion;
@ -20,6 +21,8 @@ public interface MDStoreBackend {
List<MetadataRecord> listEntries(MDStoreVersion version, long limit) throws MDStoreManagerException;
Stream<MetadataRecord> streamEntries(MDStoreVersion version) throws MDStoreManagerException;
Set<String> listInternalFiles(MDStoreVersion version) throws MDStoreManagerException;
Set<String> fixInconsistencies(boolean delete) throws MDStoreManagerException;

View File

@ -6,6 +6,7 @@ import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Stream;
import org.springframework.stereotype.Service;
@ -59,6 +60,11 @@ public class MockBackend implements MDStoreBackend {
return list;
}
@Override
public Stream<MetadataRecord> streamEntries(final MDStoreVersion version) throws MDStoreManagerException {
return listEntries(version, 1000).stream();
}
@Override
public Set<String> listInternalFiles(final MDStoreVersion version) throws MDStoreManagerException {
return new LinkedHashSet<>(Arrays.asList("file1", "file2", "file3", "file4"));

View File

@ -9,6 +9,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Stream;
import org.apache.avro.generic.GenericRecord;
import org.apache.commons.logging.Log;
@ -144,6 +145,52 @@ public class HdfsClient {
return list;
}
public <T> Stream<T> streamParquetFiles(final String path, final Class<T> clazz) throws MDStoreManagerException {
// TODO Re-implement the method without list
final List<T> list = new ArrayList<>();
final Configuration conf = conf();
final Set<String> fields = new LinkedHashSet<>();
for (final String f : listContent(path, HdfsClient::isParquetFile)) {
log.info("Opening parquet file: " + f);
try (final ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord> builder(new Path(f)).withConf(conf).build()) {
log.debug("File parquet OPENED");
final ObjectMapper mapper = new ObjectMapper();
GenericRecord rec = null;
while ((rec = reader.read()) != null) {
if (fields.isEmpty()) {
rec.getSchema().getFields().forEach(field -> fields.add(field.name()));
log.debug("Found schema: " + fields);
}
final Map<String, String> map = new LinkedHashMap<>();
for (final String field : fields) {
final Object v = rec.get(field);
map.put(field, v != null ? v.toString() : "");
}
list.add(mapper.convertValue(map, clazz));
log.debug("added record");
}
} catch (final FileNotFoundException e) {
log.warn("Missing path: " + hdfsBasePath);
} catch (final Throwable e) {
log.error("Error reading parquet file: " + f, e);
throw new MDStoreManagerException("Error reading parquet file: " + f, e);
}
}
return list.stream();
}
/*
*
* private String printGroup(final Group g) { final StringWriter sw = new StringWriter();

View File

@ -2,5 +2,6 @@ package eu.dnetlib.data.mdstore.model;
public enum MDStoreType {
HDFS,
MOCK
MOCK,
SQL_DB
}