mdstore streams
This commit is contained in:
parent
a855a6afff
commit
c7eef9a4e9
|
@ -1,72 +1,74 @@
|
|||
<div *ngIf="conf" style="padding: 16px; margin-top: 0.4em; border: 1px solid lightgray; border-radius: 8px;">
|
||||
<mat-card *ngIf="conf" style="margin-top: 0.4em;">
|
||||
<mat-card-header>
|
||||
<mat-card-title>{{conf.name}}</mat-card-title>
|
||||
<mat-card-subtitle *ngIf="conf.dsName"><b>Datasource Name:</b> {{conf.dsName}}</mat-card-subtitle>
|
||||
<mat-card-subtitle *ngIf="conf.dsId"><b>Datasource ID:</b> {{conf.dsId}}</mat-card-subtitle>
|
||||
<mat-card-subtitle *ngIf="conf.apiId"><b>Datasource API:</b> {{conf.apiId}}</mat-card-subtitle>
|
||||
</mat-card-header>
|
||||
<mat-card-content style="padding-top: 1em;">
|
||||
<button mat-stroked-button color="primary" (click)="launchWfConf()">
|
||||
<mat-icon fontIcon="play_circle"></mat-icon>
|
||||
launch
|
||||
</button>
|
||||
<button mat-stroked-button color="primary" (click)="editConf()">
|
||||
<mat-icon fontIcon="edit"></mat-icon>
|
||||
configure
|
||||
</button>
|
||||
<a href="./api/resources/{{conf.workflow}}/content" mat-stroked-button color="link" target="_blank">
|
||||
<mat-icon fontIcon="code"></mat-icon>
|
||||
raw workflow
|
||||
</a>
|
||||
<button mat-stroked-button color="warn" (click)="deleteConf()">
|
||||
<mat-icon fontIcon="delete"></mat-icon>
|
||||
delete
|
||||
</button>
|
||||
|
||||
<h2>{{conf.name}}</h2>
|
||||
<span *ngIf="conf.dsName"><b>Datasource Name:</b> {{conf.dsName}}<br /></span>
|
||||
<span *ngIf="conf.dsId"><b>Datasource ID:</b> {{conf.dsId}}<br /></span>
|
||||
<span *ngIf="conf.apiId"><b>Datasource API:</b> {{conf.apiId}}<br /></span>
|
||||
<mat-divider style="margin-top: 1em; margin-bottom: 1em;"></mat-divider>
|
||||
|
||||
<button mat-stroked-button color="primary" (click)="launchWfConf()">
|
||||
<mat-icon fontIcon="play_circle"></mat-icon>
|
||||
launch
|
||||
</button>
|
||||
<button mat-stroked-button color="primary" (click)="editConf()">
|
||||
<mat-icon fontIcon="edit"></mat-icon>
|
||||
configure
|
||||
</button>
|
||||
<a href="./api/resources/{{conf.workflow}}/content" mat-stroked-button color="link" target="_blank">
|
||||
<mat-icon fontIcon="code"></mat-icon>
|
||||
raw workflow
|
||||
</a>
|
||||
<button mat-stroked-button color="warn" (click)="deleteConf()">
|
||||
<mat-icon fontIcon="delete"></mat-icon>
|
||||
delete
|
||||
</button>
|
||||
<table mat-table [dataSource]="historyDatasource" matSort>
|
||||
|
||||
<mat-divider style="margin-top: 1em; margin-bottom: 1em;"></mat-divider>
|
||||
<ng-container matColumnDef="processId">
|
||||
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
|
||||
sortActionDescription="Sort by Process ID"> Process Id </th>
|
||||
<td mat-cell *matCellDef="let element">
|
||||
<a (click)="openWfHistoryDialog(element)">{{element.processId}}</a>
|
||||
</td>
|
||||
</ng-container>
|
||||
|
||||
<table mat-table [dataSource]="historyDatasource" matSort>
|
||||
<ng-container matColumnDef="status">
|
||||
<th mat-header-cell *matHeaderCellDef style="width: 10%;" mat-sort-header
|
||||
sortActionDescription="Sort by Status">
|
||||
Status </th>
|
||||
<td mat-cell *matCellDef="let element"><span class="badge-label"
|
||||
[ngClass]="{'badge-success' : element.status === 'success', 'badge-failure' : element.status === 'failure'}">{{element.status}}</span>
|
||||
</td>
|
||||
</ng-container>
|
||||
|
||||
<ng-container matColumnDef="processId">
|
||||
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
|
||||
sortActionDescription="Sort by Process ID"> Process Id </th>
|
||||
<td mat-cell *matCellDef="let element">
|
||||
<a (click)="openWfHistoryDialog(element)">{{element.processId}}</a>
|
||||
</td>
|
||||
</ng-container>
|
||||
<ng-container matColumnDef="startDate">
|
||||
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
|
||||
sortActionDescription="Sort by Start Date"> Start Date </th>
|
||||
<td mat-cell *matCellDef="let element"> {{element.startDate}} </td>
|
||||
</ng-container>
|
||||
|
||||
<ng-container matColumnDef="status">
|
||||
<th mat-header-cell *matHeaderCellDef style="width: 10%;" mat-sort-header sortActionDescription="Sort by Status">
|
||||
Status </th>
|
||||
<td mat-cell *matCellDef="let element"><span class="badge-label"
|
||||
[ngClass]="{'badge-success' : element.status === 'success', 'badge-failure' : element.status === 'failure'}">{{element.status}}</span>
|
||||
</td>
|
||||
</ng-container>
|
||||
<ng-container matColumnDef="endDate">
|
||||
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
|
||||
sortActionDescription="Sort by End Date">
|
||||
End Date </th>
|
||||
<td mat-cell *matCellDef="let element"> {{element.endDate}} </td>
|
||||
</ng-container>
|
||||
|
||||
<ng-container matColumnDef="startDate">
|
||||
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
|
||||
sortActionDescription="Sort by Start Date"> Start Date </th>
|
||||
<td mat-cell *matCellDef="let element"> {{element.startDate}} </td>
|
||||
</ng-container>
|
||||
|
||||
<ng-container matColumnDef="endDate">
|
||||
<th mat-header-cell *matHeaderCellDef style="width: 15%;" mat-sort-header
|
||||
sortActionDescription="Sort by End Date">
|
||||
End Date </th>
|
||||
<td mat-cell *matCellDef="let element"> {{element.endDate}} </td>
|
||||
</ng-container>
|
||||
|
||||
<tr mat-header-row *matHeaderRowDef="colums"></tr>
|
||||
<tr mat-row *matRowDef="let row; columns: colums;"></tr>
|
||||
|
||||
<!-- Row shown when there is no matching data. -->
|
||||
<tr class="mat-row" *matNoDataRow>
|
||||
<td class="mat-cell" colspan="4" style="padding: 0 16px;">No execution in history"</td>
|
||||
</tr>
|
||||
</table>
|
||||
<tr mat-header-row *matHeaderRowDef="colums"></tr>
|
||||
<tr mat-row *matRowDef="let row; columns: colums;"></tr>
|
||||
|
||||
<!-- Row shown when there is no matching data. -->
|
||||
<tr class="mat-row" *matNoDataRow>
|
||||
<td class="mat-cell" colspan="4" style="padding: 0 16px;">No execution in history"</td>
|
||||
</tr>
|
||||
</table>
|
||||
</mat-card-content>
|
||||
<!-- <pre>{{conf | json}}</pre> -->
|
||||
|
||||
</div>
|
||||
</mat-card>
|
||||
|
||||
<div *ngIf="!conf" style="margin-top: 2em;">
|
||||
Workflow Configuration does not exist
|
||||
|
|
|
@ -63,6 +63,7 @@
|
|||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -10,6 +10,7 @@ import java.util.Set;
|
|||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import javax.transaction.Transactional;
|
||||
|
@ -241,6 +242,12 @@ public class MDStoreService {
|
|||
return selectBackend(md.getType()).listEntries(v, limit);
|
||||
}
|
||||
|
||||
public Stream<MetadataRecord> streamVersionRecords(final String versionId) throws MDStoreManagerException {
|
||||
final MDStoreVersion v = mdstoreVersionRepository.findById(versionId).orElseThrow(() -> new MDStoreManagerException("Version not found"));
|
||||
final MDStore md = mdstoreRepository.findById(v.getMdstore()).orElseThrow(() -> new MDStoreManagerException("MDStore not found"));
|
||||
return selectBackend(md.getType()).streamEntries(v);
|
||||
}
|
||||
|
||||
public MDStore newMDStore(
|
||||
final String format,
|
||||
final String layout,
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
package eu.dnetlib.data.mdstore;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Spliterators;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import eu.dnetlib.data.mdstore.model.MDStoreWithInfo;
|
||||
import eu.dnetlib.data.mdstore.model.MetadataRecord;
|
||||
import eu.dnetlib.errors.MDStoreManagerException;
|
||||
|
||||
@Service
|
||||
public class MDStoreStreamReader {
|
||||
|
||||
@Autowired
|
||||
private MDStoreService mdStoreService;
|
||||
|
||||
private enum Status {
|
||||
PREPARED,
|
||||
READING,
|
||||
COMPLETED,
|
||||
FAILED
|
||||
}
|
||||
|
||||
// TODO the failure could be throw consuming the stream, so it is necessary to perform a refactoring of this method
|
||||
public Stream<MetadataRecord> prepareMDStoreStream(final String mdstoreId) throws MDStoreManagerException {
|
||||
|
||||
final MDStoreWithInfo mdstore = mdStoreService.findMdStore(mdstoreId);
|
||||
final Iterator<MetadataRecord> innerIterator = mdStoreService.streamVersionRecords(mdstore.getCurrentVersion()).iterator();
|
||||
|
||||
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(new Iterator<>() {
|
||||
|
||||
private Status status = Status.PREPARED;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (innerIterator.hasNext()) {
|
||||
return true;
|
||||
} else {
|
||||
try {
|
||||
complete();
|
||||
return false;
|
||||
} catch (final MDStoreManagerException e) {
|
||||
throw new RuntimeException("Error reading mdstore", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public MetadataRecord next() {
|
||||
try {
|
||||
verifyStart();
|
||||
return innerIterator.next();
|
||||
} catch (final Throwable e) {
|
||||
try {
|
||||
fail();
|
||||
throw new RuntimeException("Error reading mdstore", e);
|
||||
} catch (final MDStoreManagerException e1) {
|
||||
throw new RuntimeException("Error reading mdstore", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized void verifyStart() throws MDStoreManagerException {
|
||||
if (status == Status.PREPARED) {
|
||||
status = Status.READING;
|
||||
mdStoreService.startReading(mdstoreId);
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized void complete() throws MDStoreManagerException {
|
||||
if (status == Status.PREPARED || status == Status.READING) {
|
||||
status = Status.COMPLETED;
|
||||
mdStoreService.endReading(mdstoreId);
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized void fail() throws MDStoreManagerException {
|
||||
if (status == Status.PREPARED || status == Status.READING) {
|
||||
status = Status.FAILED;
|
||||
mdStoreService.endReading(mdstoreId);
|
||||
}
|
||||
}
|
||||
}, 0), false);
|
||||
}
|
||||
|
||||
}
|
|
@ -5,6 +5,7 @@ import java.util.HashSet;
|
|||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
|
@ -33,6 +34,11 @@ public class DefaultBackend implements MDStoreBackend {
|
|||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<MetadataRecord> streamEntries(final MDStoreVersion version) throws MDStoreManagerException {
|
||||
return Stream.empty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> listInternalFiles(final MDStoreVersion version) throws MDStoreManagerException {
|
||||
return new LinkedHashSet<>();
|
||||
|
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.data.mdstore.backends;
|
|||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -99,4 +100,14 @@ public class HdfsBackend implements MDStoreBackend {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<MetadataRecord> streamEntries(final MDStoreVersion version) throws MDStoreManagerException {
|
||||
final String path = version.getParams().getOrDefault("hdfs_path", "").toString();
|
||||
if (StringUtils.isNotBlank(path)) {
|
||||
return hdfsClient.streamParquetFiles(path + "/store", MetadataRecord.class);
|
||||
} else {
|
||||
throw new MDStoreManagerException("hdfs path is missing");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package eu.dnetlib.data.mdstore.backends;
|
|||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import eu.dnetlib.data.mdstore.model.MDStore;
|
||||
import eu.dnetlib.data.mdstore.model.MDStoreVersion;
|
||||
|
@ -20,6 +21,8 @@ public interface MDStoreBackend {
|
|||
|
||||
List<MetadataRecord> listEntries(MDStoreVersion version, long limit) throws MDStoreManagerException;
|
||||
|
||||
Stream<MetadataRecord> streamEntries(MDStoreVersion version) throws MDStoreManagerException;
|
||||
|
||||
Set<String> listInternalFiles(MDStoreVersion version) throws MDStoreManagerException;
|
||||
|
||||
Set<String> fixInconsistencies(boolean delete) throws MDStoreManagerException;
|
||||
|
|
|
@ -6,6 +6,7 @@ import java.util.Arrays;
|
|||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
|
@ -59,6 +60,11 @@ public class MockBackend implements MDStoreBackend {
|
|||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<MetadataRecord> streamEntries(final MDStoreVersion version) throws MDStoreManagerException {
|
||||
return listEntries(version, 1000).stream();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> listInternalFiles(final MDStoreVersion version) throws MDStoreManagerException {
|
||||
return new LinkedHashSet<>(Arrays.asList("file1", "file2", "file3", "file4"));
|
||||
|
|
|
@ -9,6 +9,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -144,6 +145,52 @@ public class HdfsClient {
|
|||
return list;
|
||||
}
|
||||
|
||||
public <T> Stream<T> streamParquetFiles(final String path, final Class<T> clazz) throws MDStoreManagerException {
|
||||
|
||||
// TODO Re-implement the method without list
|
||||
final List<T> list = new ArrayList<>();
|
||||
|
||||
final Configuration conf = conf();
|
||||
|
||||
final Set<String> fields = new LinkedHashSet<>();
|
||||
|
||||
for (final String f : listContent(path, HdfsClient::isParquetFile)) {
|
||||
|
||||
log.info("Opening parquet file: " + f);
|
||||
|
||||
try (final ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord> builder(new Path(f)).withConf(conf).build()) {
|
||||
log.debug("File parquet OPENED");
|
||||
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
GenericRecord rec = null;
|
||||
while ((rec = reader.read()) != null) {
|
||||
if (fields.isEmpty()) {
|
||||
rec.getSchema().getFields().forEach(field -> fields.add(field.name()));
|
||||
log.debug("Found schema: " + fields);
|
||||
}
|
||||
final Map<String, String> map = new LinkedHashMap<>();
|
||||
for (final String field : fields) {
|
||||
final Object v = rec.get(field);
|
||||
map.put(field, v != null ? v.toString() : "");
|
||||
}
|
||||
|
||||
list.add(mapper.convertValue(map, clazz));
|
||||
|
||||
log.debug("added record");
|
||||
}
|
||||
} catch (final FileNotFoundException e) {
|
||||
log.warn("Missing path: " + hdfsBasePath);
|
||||
} catch (final Throwable e) {
|
||||
log.error("Error reading parquet file: " + f, e);
|
||||
throw new MDStoreManagerException("Error reading parquet file: " + f, e);
|
||||
}
|
||||
}
|
||||
|
||||
return list.stream();
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* private String printGroup(final Group g) { final StringWriter sw = new StringWriter();
|
||||
|
|
|
@ -2,5 +2,6 @@ package eu.dnetlib.data.mdstore.model;
|
|||
|
||||
public enum MDStoreType {
|
||||
HDFS,
|
||||
MOCK
|
||||
MOCK,
|
||||
SQL_DB
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue