some fields in stats
This commit is contained in:
parent
5ddbef3a5b
commit
71204a8056
|
@ -9,7 +9,6 @@ import java.sql.SQLException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
@ -52,8 +51,7 @@ public class BaseAnalyzerJob {
|
|||
public static void main(final String[] args) throws Exception {
|
||||
|
||||
final String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
BaseAnalyzerJob.class
|
||||
.toString(BaseAnalyzerJob.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/action_set_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
|
@ -96,32 +94,26 @@ public class BaseAnalyzerJob {
|
|||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||
if (fromStep <= 0) {
|
||||
log
|
||||
.info(
|
||||
"\n**************************************\n* EXECUTING STEP 0: LoadRecords\n**************************************");
|
||||
.info("\n**************************************\n* EXECUTING STEP 0: LoadRecords\n**************************************");
|
||||
loadRecords(inputPath, dataPath);
|
||||
log
|
||||
.info(
|
||||
"\n**************************************\n* EXECUTING STEP 0: DONE\n**************************************");
|
||||
.info("\n**************************************\n* EXECUTING STEP 0: DONE\n**************************************");
|
||||
}
|
||||
|
||||
if (fromStep <= 1) {
|
||||
log
|
||||
.info(
|
||||
"\n**************************************\n* EXECUTING STEP 1: Base Report\n**************************************");
|
||||
.info("\n**************************************\n* EXECUTING STEP 1: Base Report\n**************************************");
|
||||
generateReport(spark, dataPath, outputPath);
|
||||
log
|
||||
.info(
|
||||
"\n**************************************\n* EXECUTING STEP 1: DONE\n**************************************");
|
||||
.info("\n**************************************\n* EXECUTING STEP 1: DONE\n**************************************");
|
||||
}
|
||||
|
||||
if (fromStep <= 2) {
|
||||
log
|
||||
.info(
|
||||
"\n**************************************\n* EXECUTING STEP 2: OpenDOAR Report\n**************************************");
|
||||
.info("\n**************************************\n* EXECUTING STEP 2: OpenDOAR Report\n**************************************");
|
||||
generateOpenDoarReport(spark, outputPath, opendoarPath, loadOpenDoarStats(dbUrl, dbUser, dbPassword));
|
||||
log
|
||||
.info(
|
||||
"\n**************************************\n* EXECUTING STEP 2: DONE\n**************************************");
|
||||
.info("\n**************************************\n* EXECUTING STEP 2: DONE\n**************************************");
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -146,8 +138,8 @@ public class BaseAnalyzerJob {
|
|||
final OpenDoarRepoStatus repo = new OpenDoarRepoStatus();
|
||||
repo.setId(row.getString(0));
|
||||
repo.getAggregations().put(BASE_DUMP, row.getLong(1));
|
||||
repo.setFromBase(true);
|
||||
repo.setBaseMAX(true);
|
||||
repo.setBaseCount(row.getLong(1));
|
||||
repo.setOpenaireCount(0);
|
||||
repo.setHighCompliance(false);
|
||||
return repo;
|
||||
}, Encoders.bean(OpenDoarRepoStatus.class));
|
||||
|
@ -162,35 +154,17 @@ public class BaseAnalyzerJob {
|
|||
}
|
||||
|
||||
private static OpenDoarRepoStatus merge(final OpenDoarRepoStatus r1, final OpenDoarRepoStatus r2) {
|
||||
if (r1 == null) {
|
||||
return r2;
|
||||
}
|
||||
if (r2 == null) {
|
||||
return r1;
|
||||
}
|
||||
if (r1 == null) { return r2; }
|
||||
if (r2 == null) { return r1; }
|
||||
|
||||
final OpenDoarRepoStatus r = new OpenDoarRepoStatus();
|
||||
r.setId(ObjectUtils.firstNonNull(r1.getId(), r2.getId()));
|
||||
r.setJurisdiction(ObjectUtils.firstNonNull(r1.getJurisdiction(), r2.getJurisdiction()));
|
||||
r.getAggregations().putAll(r1.getAggregations());
|
||||
r.getAggregations().putAll(r2.getAggregations());
|
||||
r.setFromBase(r1.isFromBase() || r2.isFromBase());
|
||||
r.setHighCompliance(r1.isHighCompliance() || r2.isHighCompliance());
|
||||
|
||||
if (r.getAggregations().containsKey(BASE_DUMP)) {
|
||||
final long baseSize = r.getAggregations().get(BASE_DUMP);
|
||||
final long otherSize = r
|
||||
.getAggregations()
|
||||
.entrySet()
|
||||
.stream()
|
||||
.filter(e -> !BASE_DUMP.equals(e.getKey()))
|
||||
.mapToLong(Entry::getValue)
|
||||
.max()
|
||||
.orElse(0);
|
||||
r.setBaseMAX(baseSize > otherSize);
|
||||
} else {
|
||||
r.setBaseMAX(false);
|
||||
}
|
||||
r.setBaseCount(Math.max(r1.getBaseCount(), r2.getBaseCount()));
|
||||
r.setOpenaireCount(Math.max(r1.getOpenaireCount(), r2.getOpenaireCount()));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -203,25 +177,30 @@ public class BaseAnalyzerJob {
|
|||
try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
|
||||
|
||||
final String sql = IOUtils
|
||||
.toString(
|
||||
BaseAnalyzerJob.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-aggregation-status.sql"));
|
||||
.toString(BaseAnalyzerJob.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-aggregation-status.sql"));
|
||||
|
||||
dbClient.processResults(sql, row -> {
|
||||
try {
|
||||
final OpenDoarRepoStatus repo = new OpenDoarRepoStatus();
|
||||
repo.setId(row.getString("id"));
|
||||
repo.setJurisdiction(row.getString("jurisdiction"));
|
||||
repo.setBaseCount(0);
|
||||
repo.setHighCompliance(false);
|
||||
|
||||
long sum = 0;
|
||||
for (final String s : (String[]) row.getArray("aggregations").getArray()) {
|
||||
final String api = StringUtils.substringBefore(s, "@@@");
|
||||
final long count = NumberUtils.toLong(StringUtils.substringAfter(s, "@@@"), 0);
|
||||
sum += count;
|
||||
repo.getAggregations().put(api, count);
|
||||
repo.setFromBase(false);
|
||||
repo.setBaseMAX(false);
|
||||
// This should recognize the HIGH Compliances: openaire*X.Y*
|
||||
repo.setHighCompliance(s.contains("compliance: openaire"));
|
||||
if (s.contains("compliance: openaire")) {
|
||||
repo.setHighCompliance(true);
|
||||
}
|
||||
}
|
||||
repo.setOpenaireCount(sum);
|
||||
|
||||
repos.add(repo);
|
||||
log.info("# FOUND OPENDOAR (DB): " + repo.getId());
|
||||
} catch (final SQLException e) {
|
||||
|
@ -243,12 +222,9 @@ public class BaseAnalyzerJob {
|
|||
final Text value = new Text();
|
||||
|
||||
try (final SequenceFile.Writer writer = SequenceFile
|
||||
.createWriter(
|
||||
fs.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
|
||||
.keyClass(LongWritable.class),
|
||||
SequenceFile.Writer
|
||||
.valueClass(Text.class),
|
||||
SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
|
||||
.createWriter(fs.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
|
||||
.keyClass(LongWritable.class), SequenceFile.Writer
|
||||
.valueClass(Text.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
|
||||
|
||||
final BaseCollectorIterator iteraror = new BaseCollectorIterator(fs, new Path(inputPath), report);
|
||||
|
||||
|
|
|
@ -13,11 +13,11 @@ public class OpenDoarRepoStatus implements Serializable {
|
|||
|
||||
private String jurisdiction;
|
||||
|
||||
private boolean fromBase = false;
|
||||
|
||||
private boolean highCompliance = false;
|
||||
|
||||
private boolean baseMAX = false;
|
||||
private long baseCount = 0;
|
||||
|
||||
private long openaireCount = 0;
|
||||
|
||||
private Map<String, Long> aggregations = new HashMap<>();
|
||||
|
||||
|
@ -53,19 +53,19 @@ public class OpenDoarRepoStatus implements Serializable {
|
|||
this.highCompliance = highCompliance;
|
||||
}
|
||||
|
||||
public boolean isFromBase() {
|
||||
return this.fromBase;
|
||||
public long getOpenaireCount() {
|
||||
return this.openaireCount;
|
||||
}
|
||||
|
||||
public void setFromBase(final boolean fromBase) {
|
||||
this.fromBase = fromBase;
|
||||
public void setOpenaireCount(final long openaireCount) {
|
||||
this.openaireCount = openaireCount;
|
||||
}
|
||||
|
||||
public boolean isBaseMAX() {
|
||||
return this.baseMAX;
|
||||
public long getBaseCount() {
|
||||
return this.baseCount;
|
||||
}
|
||||
|
||||
public void setBaseMAX(final boolean baseMAX) {
|
||||
this.baseMAX = baseMAX;
|
||||
public void setBaseCount(final long baseCount) {
|
||||
this.baseCount = baseCount;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue