forked from D-Net/dnet-hadoop
Merge remote-tracking branch 'origin/beta_to_master_may2024' into beta_to_master_may2024
This commit is contained in:
commit
86088ef26e
|
@ -119,7 +119,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
.getContext()
|
||||
.stream()
|
||||
.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
|
||||
.collect(Collectors.toList()));
|
||||
.collect(Collectors.toCollection(ArrayList::new)));
|
||||
}
|
||||
return (T) res;
|
||||
} else {
|
||||
|
|
|
@ -1,13 +1,9 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
@ -42,17 +38,23 @@ public class MergeEntitiesComparator implements Comparator<Oaf> {
|
|||
int res = 0;
|
||||
|
||||
// pid authority
|
||||
int cfp1 = left
|
||||
.getCollectedfrom()
|
||||
.stream()
|
||||
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
|
||||
.max(Integer::compare)
|
||||
int cfp1 = Optional
|
||||
.ofNullable(left.getCollectedfrom())
|
||||
.map(
|
||||
cf -> cf
|
||||
.stream()
|
||||
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
|
||||
.max(Integer::compare)
|
||||
.orElse(-1))
|
||||
.orElse(-1);
|
||||
int cfp2 = right
|
||||
.getCollectedfrom()
|
||||
.stream()
|
||||
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
|
||||
.max(Integer::compare)
|
||||
int cfp2 = Optional
|
||||
.ofNullable(right.getCollectedfrom())
|
||||
.map(
|
||||
cf -> cf
|
||||
.stream()
|
||||
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
|
||||
.max(Integer::compare)
|
||||
.orElse(-1))
|
||||
.orElse(-1);
|
||||
|
||||
if (cfp1 >= 0 && cfp1 > cfp2) {
|
||||
|
|
|
@ -432,7 +432,10 @@ public class MergeUtils {
|
|||
|
||||
// merge datainfo for same context id
|
||||
merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> {
|
||||
r.getDataInfo().addAll(l.getDataInfo());
|
||||
ArrayList<DataInfo> di = new ArrayList<>();
|
||||
di.addAll(r.getDataInfo());
|
||||
di.addAll(l.getDataInfo());
|
||||
r.setDataInfo(di);
|
||||
return r;
|
||||
}));
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.shuffle.partitions=15000
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
|
@ -79,6 +80,7 @@
|
|||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.shuffle.partitions=10000
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
|
|
Loading…
Reference in New Issue