mdstore streams
This commit is contained in:
parent
a855a6afff
commit
c7eef9a4e9
|
@ -1,10 +1,11 @@
|
||||||
<div *ngIf="conf" style="padding: 16px; margin-top: 0.4em; border: 1px solid lightgray; border-radius: 8px;">
|
<mat-card *ngIf="conf" style="margin-top: 0.4em;">
|
||||||
|
<mat-card-header>
|
||||||
<h2>{{conf.name}}</h2>
|
<mat-card-title>{{conf.name}}</mat-card-title>
|
||||||
<span *ngIf="conf.dsName"><b>Datasource Name:</b> {{conf.dsName}}<br /></span>
|
<mat-card-subtitle *ngIf="conf.dsName"><b>Datasource Name:</b> {{conf.dsName}}</mat-card-subtitle>
|
||||||
<span *ngIf="conf.dsId"><b>Datasource ID:</b> {{conf.dsId}}<br /></span>
|
<mat-card-subtitle *ngIf="conf.dsId"><b>Datasource ID:</b> {{conf.dsId}}</mat-card-subtitle>
|
||||||
<span *ngIf="conf.apiId"><b>Datasource API:</b> {{conf.apiId}}<br /></span>
|
<mat-card-subtitle *ngIf="conf.apiId"><b>Datasource API:</b> {{conf.apiId}}</mat-card-subtitle>
|
||||||
|
</mat-card-header>
|
||||||
|
<mat-card-content style="padding-top: 1em;">
|
||||||
<button mat-stroked-button color="primary" (click)="launchWfConf()">
|
<button mat-stroked-button color="primary" (click)="launchWfConf()">
|
||||||
<mat-icon fontIcon="play_circle"></mat-icon>
|
<mat-icon fontIcon="play_circle"></mat-icon>
|
||||||
launch
|
launch
|
||||||
|
@ -35,7 +36,8 @@
|
||||||
</ng-container>
|
</ng-container>
|
||||||
|
|
||||||
<ng-container matColumnDef="status">
|
<ng-container matColumnDef="status">
|
||||||
<th mat-header-cell *matHeaderCellDef style="width: 10%;" mat-sort-header sortActionDescription="Sort by Status">
|
<th mat-header-cell *matHeaderCellDef style="width: 10%;" mat-sort-header
|
||||||
|
sortActionDescription="Sort by Status">
|
||||||
Status </th>
|
Status </th>
|
||||||
<td mat-cell *matCellDef="let element"><span class="badge-label"
|
<td mat-cell *matCellDef="let element"><span class="badge-label"
|
||||||
[ngClass]="{'badge-success' : element.status === 'success', 'badge-failure' : element.status === 'failure'}">{{element.status}}</span>
|
[ngClass]="{'badge-success' : element.status === 'success', 'badge-failure' : element.status === 'failure'}">{{element.status}}</span>
|
||||||
|
@ -63,10 +65,10 @@
|
||||||
<td class="mat-cell" colspan="4" style="padding: 0 16px;">No execution in history"</td>
|
<td class="mat-cell" colspan="4" style="padding: 0 16px;">No execution in history"</td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
</mat-card-content>
|
||||||
<!-- <pre>{{conf | json}}</pre> -->
|
<!-- <pre>{{conf | json}}</pre> -->
|
||||||
|
|
||||||
</div>
|
</mat-card>
|
||||||
|
|
||||||
<div *ngIf="!conf" style="margin-top: 2em;">
|
<div *ngIf="!conf" style="margin-top: 2em;">
|
||||||
Workflow Configuration does not exist
|
Workflow Configuration does not exist
|
||||||
|
|
|
@ -63,6 +63,7 @@
|
||||||
<artifactId>spring-boot-starter-test</artifactId>
|
<artifactId>spring-boot-starter-test</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -10,6 +10,7 @@ import java.util.Set;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
import java.util.stream.StreamSupport;
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
import javax.transaction.Transactional;
|
import javax.transaction.Transactional;
|
||||||
|
@ -241,6 +242,12 @@ public class MDStoreService {
|
||||||
return selectBackend(md.getType()).listEntries(v, limit);
|
return selectBackend(md.getType()).listEntries(v, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Stream<MetadataRecord> streamVersionRecords(final String versionId) throws MDStoreManagerException {
|
||||||
|
final MDStoreVersion v = mdstoreVersionRepository.findById(versionId).orElseThrow(() -> new MDStoreManagerException("Version not found"));
|
||||||
|
final MDStore md = mdstoreRepository.findById(v.getMdstore()).orElseThrow(() -> new MDStoreManagerException("MDStore not found"));
|
||||||
|
return selectBackend(md.getType()).streamEntries(v);
|
||||||
|
}
|
||||||
|
|
||||||
public MDStore newMDStore(
|
public MDStore newMDStore(
|
||||||
final String format,
|
final String format,
|
||||||
final String layout,
|
final String layout,
|
||||||
|
|
|
@ -0,0 +1,90 @@
|
||||||
|
package eu.dnetlib.data.mdstore;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Spliterators;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import eu.dnetlib.data.mdstore.model.MDStoreWithInfo;
|
||||||
|
import eu.dnetlib.data.mdstore.model.MetadataRecord;
|
||||||
|
import eu.dnetlib.errors.MDStoreManagerException;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
public class MDStoreStreamReader {
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private MDStoreService mdStoreService;
|
||||||
|
|
||||||
|
private enum Status {
|
||||||
|
PREPARED,
|
||||||
|
READING,
|
||||||
|
COMPLETED,
|
||||||
|
FAILED
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO the failure could be throw consuming the stream, so it is necessary to perform a refactoring of this method
|
||||||
|
public Stream<MetadataRecord> prepareMDStoreStream(final String mdstoreId) throws MDStoreManagerException {
|
||||||
|
|
||||||
|
final MDStoreWithInfo mdstore = mdStoreService.findMdStore(mdstoreId);
|
||||||
|
final Iterator<MetadataRecord> innerIterator = mdStoreService.streamVersionRecords(mdstore.getCurrentVersion()).iterator();
|
||||||
|
|
||||||
|
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(new Iterator<>() {
|
||||||
|
|
||||||
|
private Status status = Status.PREPARED;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
if (innerIterator.hasNext()) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
complete();
|
||||||
|
return false;
|
||||||
|
} catch (final MDStoreManagerException e) {
|
||||||
|
throw new RuntimeException("Error reading mdstore", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MetadataRecord next() {
|
||||||
|
try {
|
||||||
|
verifyStart();
|
||||||
|
return innerIterator.next();
|
||||||
|
} catch (final Throwable e) {
|
||||||
|
try {
|
||||||
|
fail();
|
||||||
|
throw new RuntimeException("Error reading mdstore", e);
|
||||||
|
} catch (final MDStoreManagerException e1) {
|
||||||
|
throw new RuntimeException("Error reading mdstore", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void verifyStart() throws MDStoreManagerException {
|
||||||
|
if (status == Status.PREPARED) {
|
||||||
|
status = Status.READING;
|
||||||
|
mdStoreService.startReading(mdstoreId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void complete() throws MDStoreManagerException {
|
||||||
|
if (status == Status.PREPARED || status == Status.READING) {
|
||||||
|
status = Status.COMPLETED;
|
||||||
|
mdStoreService.endReading(mdstoreId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void fail() throws MDStoreManagerException {
|
||||||
|
if (status == Status.PREPARED || status == Status.READING) {
|
||||||
|
status = Status.FAILED;
|
||||||
|
mdStoreService.endReading(mdstoreId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 0), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -5,6 +5,7 @@ import java.util.HashSet;
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@ -33,6 +34,11 @@ public class DefaultBackend implements MDStoreBackend {
|
||||||
return new ArrayList<>();
|
return new ArrayList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Stream<MetadataRecord> streamEntries(final MDStoreVersion version) throws MDStoreManagerException {
|
||||||
|
return Stream.empty();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Set<String> listInternalFiles(final MDStoreVersion version) throws MDStoreManagerException {
|
public Set<String> listInternalFiles(final MDStoreVersion version) throws MDStoreManagerException {
|
||||||
return new LinkedHashSet<>();
|
return new LinkedHashSet<>();
|
||||||
|
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.data.mdstore.backends;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -99,4 +100,14 @@ public class HdfsBackend implements MDStoreBackend {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Stream<MetadataRecord> streamEntries(final MDStoreVersion version) throws MDStoreManagerException {
|
||||||
|
final String path = version.getParams().getOrDefault("hdfs_path", "").toString();
|
||||||
|
if (StringUtils.isNotBlank(path)) {
|
||||||
|
return hdfsClient.streamParquetFiles(path + "/store", MetadataRecord.class);
|
||||||
|
} else {
|
||||||
|
throw new MDStoreManagerException("hdfs path is missing");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ package eu.dnetlib.data.mdstore.backends;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import eu.dnetlib.data.mdstore.model.MDStore;
|
import eu.dnetlib.data.mdstore.model.MDStore;
|
||||||
import eu.dnetlib.data.mdstore.model.MDStoreVersion;
|
import eu.dnetlib.data.mdstore.model.MDStoreVersion;
|
||||||
|
@ -20,6 +21,8 @@ public interface MDStoreBackend {
|
||||||
|
|
||||||
List<MetadataRecord> listEntries(MDStoreVersion version, long limit) throws MDStoreManagerException;
|
List<MetadataRecord> listEntries(MDStoreVersion version, long limit) throws MDStoreManagerException;
|
||||||
|
|
||||||
|
Stream<MetadataRecord> streamEntries(MDStoreVersion version) throws MDStoreManagerException;
|
||||||
|
|
||||||
Set<String> listInternalFiles(MDStoreVersion version) throws MDStoreManagerException;
|
Set<String> listInternalFiles(MDStoreVersion version) throws MDStoreManagerException;
|
||||||
|
|
||||||
Set<String> fixInconsistencies(boolean delete) throws MDStoreManagerException;
|
Set<String> fixInconsistencies(boolean delete) throws MDStoreManagerException;
|
||||||
|
|
|
@ -6,6 +6,7 @@ import java.util.Arrays;
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@ -59,6 +60,11 @@ public class MockBackend implements MDStoreBackend {
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Stream<MetadataRecord> streamEntries(final MDStoreVersion version) throws MDStoreManagerException {
|
||||||
|
return listEntries(version, 1000).stream();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Set<String> listInternalFiles(final MDStoreVersion version) throws MDStoreManagerException {
|
public Set<String> listInternalFiles(final MDStoreVersion version) throws MDStoreManagerException {
|
||||||
return new LinkedHashSet<>(Arrays.asList("file1", "file2", "file3", "file4"));
|
return new LinkedHashSet<>(Arrays.asList("file1", "file2", "file3", "file4"));
|
||||||
|
|
|
@ -9,6 +9,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.avro.generic.GenericRecord;
|
import org.apache.avro.generic.GenericRecord;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
|
@ -144,6 +145,52 @@ public class HdfsClient {
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public <T> Stream<T> streamParquetFiles(final String path, final Class<T> clazz) throws MDStoreManagerException {
|
||||||
|
|
||||||
|
// TODO Re-implement the method without list
|
||||||
|
final List<T> list = new ArrayList<>();
|
||||||
|
|
||||||
|
final Configuration conf = conf();
|
||||||
|
|
||||||
|
final Set<String> fields = new LinkedHashSet<>();
|
||||||
|
|
||||||
|
for (final String f : listContent(path, HdfsClient::isParquetFile)) {
|
||||||
|
|
||||||
|
log.info("Opening parquet file: " + f);
|
||||||
|
|
||||||
|
try (final ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord> builder(new Path(f)).withConf(conf).build()) {
|
||||||
|
log.debug("File parquet OPENED");
|
||||||
|
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
GenericRecord rec = null;
|
||||||
|
while ((rec = reader.read()) != null) {
|
||||||
|
if (fields.isEmpty()) {
|
||||||
|
rec.getSchema().getFields().forEach(field -> fields.add(field.name()));
|
||||||
|
log.debug("Found schema: " + fields);
|
||||||
|
}
|
||||||
|
final Map<String, String> map = new LinkedHashMap<>();
|
||||||
|
for (final String field : fields) {
|
||||||
|
final Object v = rec.get(field);
|
||||||
|
map.put(field, v != null ? v.toString() : "");
|
||||||
|
}
|
||||||
|
|
||||||
|
list.add(mapper.convertValue(map, clazz));
|
||||||
|
|
||||||
|
log.debug("added record");
|
||||||
|
}
|
||||||
|
} catch (final FileNotFoundException e) {
|
||||||
|
log.warn("Missing path: " + hdfsBasePath);
|
||||||
|
} catch (final Throwable e) {
|
||||||
|
log.error("Error reading parquet file: " + f, e);
|
||||||
|
throw new MDStoreManagerException("Error reading parquet file: " + f, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return list.stream();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*
|
*
|
||||||
* private String printGroup(final Group g) { final StringWriter sw = new StringWriter();
|
* private String printGroup(final Group g) { final StringWriter sw = new StringWriter();
|
||||||
|
|
|
@ -2,5 +2,6 @@ package eu.dnetlib.data.mdstore.model;
|
||||||
|
|
||||||
public enum MDStoreType {
|
public enum MDStoreType {
|
||||||
HDFS,
|
HDFS,
|
||||||
MOCK
|
MOCK,
|
||||||
|
SQL_DB
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue