Merge remote-tracking branch 'origin/beta_to_master_may2024' into beta_to_master_may2024

This commit is contained in:
Miriam Baglioni 2024-06-11 17:04:07 +02:00
commit 86088ef26e
4 changed files with 23 additions and 16 deletions

View File

@ -119,7 +119,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.getContext() .getContext()
.stream() .stream()
.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId)) .filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
.collect(Collectors.toList())); .collect(Collectors.toCollection(ArrayList::new)));
} }
return (T) res; return (T) res;
} else { } else {

View File

@ -1,13 +1,9 @@
package eu.dnetlib.dhp.schema.oaf.utils; package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
import java.util.*; import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
@ -42,17 +38,23 @@ public class MergeEntitiesComparator implements Comparator<Oaf> {
int res = 0; int res = 0;
// pid authority // pid authority
int cfp1 = left int cfp1 = Optional
.getCollectedfrom() .ofNullable(left.getCollectedfrom())
.stream() .map(
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey())) cf -> cf
.max(Integer::compare) .stream()
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
.max(Integer::compare)
.orElse(-1))
.orElse(-1); .orElse(-1);
int cfp2 = right int cfp2 = Optional
.getCollectedfrom() .ofNullable(right.getCollectedfrom())
.stream() .map(
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey())) cf -> cf
.max(Integer::compare) .stream()
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
.max(Integer::compare)
.orElse(-1))
.orElse(-1); .orElse(-1);
if (cfp1 >= 0 && cfp1 > cfp2) { if (cfp1 >= 0 && cfp1 > cfp2) {

View File

@ -432,7 +432,10 @@ public class MergeUtils {
// merge datainfo for same context id // merge datainfo for same context id
merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> { merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> {
r.getDataInfo().addAll(l.getDataInfo()); ArrayList<DataInfo> di = new ArrayList<>();
di.addAll(r.getDataInfo());
di.addAll(l.getDataInfo());
r.setDataInfo(di);
return r; return r;
})); }));

View File

@ -45,6 +45,7 @@
--executor-memory=${sparkExecutorMemory} --executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.shuffle.partitions=15000 --conf spark.sql.shuffle.partitions=15000
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -79,6 +80,7 @@
--executor-memory=${sparkExecutorMemory} --executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.shuffle.partitions=10000 --conf spark.sql.shuffle.partitions=10000
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}