forked from D-Net/dnet-hadoop
Merge remote-tracking branch 'origin/beta_to_master_may2024' into beta_to_master_may2024
This commit is contained in:
commit
86088ef26e
|
@ -119,7 +119,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.getContext()
|
.getContext()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
|
.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toCollection(ArrayList::new)));
|
||||||
}
|
}
|
||||||
return (T) res;
|
return (T) res;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -1,13 +1,9 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
@ -42,17 +38,23 @@ public class MergeEntitiesComparator implements Comparator<Oaf> {
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
|
||||||
// pid authority
|
// pid authority
|
||||||
int cfp1 = left
|
int cfp1 = Optional
|
||||||
.getCollectedfrom()
|
.ofNullable(left.getCollectedfrom())
|
||||||
.stream()
|
.map(
|
||||||
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
|
cf -> cf
|
||||||
.max(Integer::compare)
|
.stream()
|
||||||
|
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
|
||||||
|
.max(Integer::compare)
|
||||||
|
.orElse(-1))
|
||||||
.orElse(-1);
|
.orElse(-1);
|
||||||
int cfp2 = right
|
int cfp2 = Optional
|
||||||
.getCollectedfrom()
|
.ofNullable(right.getCollectedfrom())
|
||||||
.stream()
|
.map(
|
||||||
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
|
cf -> cf
|
||||||
.max(Integer::compare)
|
.stream()
|
||||||
|
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
|
||||||
|
.max(Integer::compare)
|
||||||
|
.orElse(-1))
|
||||||
.orElse(-1);
|
.orElse(-1);
|
||||||
|
|
||||||
if (cfp1 >= 0 && cfp1 > cfp2) {
|
if (cfp1 >= 0 && cfp1 > cfp2) {
|
||||||
|
|
|
@ -432,7 +432,10 @@ public class MergeUtils {
|
||||||
|
|
||||||
// merge datainfo for same context id
|
// merge datainfo for same context id
|
||||||
merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> {
|
merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> {
|
||||||
r.getDataInfo().addAll(l.getDataInfo());
|
ArrayList<DataInfo> di = new ArrayList<>();
|
||||||
|
di.addAll(r.getDataInfo());
|
||||||
|
di.addAll(l.getDataInfo());
|
||||||
|
r.setDataInfo(di);
|
||||||
return r;
|
return r;
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,7 @@
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.shuffle.partitions=15000
|
--conf spark.sql.shuffle.partitions=15000
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
@ -79,6 +80,7 @@
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.shuffle.partitions=10000
|
--conf spark.sql.shuffle.partitions=10000
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
|
Loading…
Reference in New Issue