Merge remote-tracking branch 'origin/beta_to_master_may2024' into beta_to_master_may2024

2024-06-11 17:04:07 +02:00 · 2024-06-11 17:04:07 +02:00 · 86088ef26e
parent 143c525343 c371513d43
commit 86088ef26e
4 changed files with 23 additions and 16 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@ -119,7 +119,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 							.getContext()
 							.stream()
 							.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
-							.collect(Collectors.toList()));
+							.collect(Collectors.toCollection(ArrayList::new)));
 			}
 			return (T) res;
 		} else {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeEntitiesComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeEntitiesComparator.java
@ -1,13 +1,9 @@

 package eu.dnetlib.dhp.schema.oaf.utils;

-import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
-
 import java.util.*;
-import java.util.stream.Collectors;

 import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.oaf.KeyValue;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
 import eu.dnetlib.dhp.schema.oaf.OafEntity;
 import eu.dnetlib.dhp.schema.oaf.Result;
@ -42,17 +38,23 @@ public class MergeEntitiesComparator implements Comparator<Oaf> {
 		int res = 0;

 		// pid authority
-		int cfp1 = left
-			.getCollectedfrom()
+		int cfp1 = Optional
+			.ofNullable(left.getCollectedfrom())
+			.map(
+				cf -> cf
 					.stream()
 					.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
 					.max(Integer::compare)
+					.orElse(-1))
 			.orElse(-1);
-		int cfp2 = right
-			.getCollectedfrom()
+		int cfp2 = Optional
+			.ofNullable(right.getCollectedfrom())
+			.map(
+				cf -> cf
 					.stream()
 					.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
 					.max(Integer::compare)
+					.orElse(-1))
 			.orElse(-1);

 		if (cfp1 >= 0 && cfp1 > cfp2) {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
@ -432,7 +432,10 @@ public class MergeUtils {

 		// merge datainfo for same context id
 		merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> {
-			r.getDataInfo().addAll(l.getDataInfo());
+			ArrayList<DataInfo> di = new ArrayList<>();
+			di.addAll(r.getDataInfo());
+			di.addAll(l.getDataInfo());
+			r.setDataInfo(di);
 			return r;
 		}));

--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml
@ -45,6 +45,7 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.shuffle.partitions=15000
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -79,6 +80,7 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.shuffle.partitions=10000
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}