Master branch updates from beta September 2023 #337

Manually merged
claudio.atzori merged 1271 commits from beta into master 2023-09-06 11:31:09 +02:00
5 changed files with 45 additions and 52 deletions
Showing only changes of commit de6c4c8968 - Show all commits

View File

@ -1,8 +1,11 @@
package eu.dnetlib.dhp.actionmanager.bipmodel; package eu.dnetlib.dhp.actionmanager;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -46,4 +49,36 @@ public class Constants {
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); .map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
} }
public static StructuredProperty getSubject(String sbj, String classid, String classname) {
if (sbj.equals(NULL))
return null;
StructuredProperty sp = new StructuredProperty();
sp.setValue(sbj);
sp
.setQualifier(
OafMapperUtils
.qualifier(classid
,
classname,
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
ModelConstants.DNET_SUBJECT_TYPOLOGIES));
sp
.setDataInfo(
OafMapperUtils
.dataInfo(
false,
UPDATE_DATA_INFO_TYPE,
true,
false,
OafMapperUtils
.qualifier(
UPDATE_SUBJECT_FOS_CLASS_ID,
UPDATE_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS),
""));
return sp;
}
} }

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.actionmanager.bipfinder; package eu.dnetlib.dhp.actionmanager.bipfinder;
import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; import static eu.dnetlib.dhp.actionmanager.Constants.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;

View File

@ -1,8 +1,8 @@
package eu.dnetlib.dhp.actionmanager.createunresolvedentities; package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; import static eu.dnetlib.dhp.actionmanager.Constants.*;
import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.UPDATE_CLASS_NAME; import static eu.dnetlib.dhp.actionmanager.Constants.UPDATE_CLASS_NAME;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.actionmanager.createunresolvedentities; package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; import static eu.dnetlib.dhp.actionmanager.Constants.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;
@ -10,7 +10,6 @@ import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -21,10 +20,8 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class PrepareFOSSparkJob implements Serializable { public class PrepareFOSSparkJob implements Serializable {
@ -67,17 +64,7 @@ public class PrepareFOSSparkJob implements Serializable {
private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) { private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) {
Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class); Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class);
fosDataset.flatMap((FlatMapFunction<FOSDataModel, FOSDataModel>) v -> { fosDataset.map((MapFunction<FOSDataModel, Result>) value -> {
List<FOSDataModel> fosList = new ArrayList<>();
final String level1 = v.getLevel1();
final String level2 = v.getLevel2();
final String level3 = v.getLevel3();
Arrays
.stream(v.getDoi().split("\u0002"))
.forEach(d -> fosList.add(FOSDataModel.newInstance(d, level1, level2, level3)));
return fosList.iterator();
}, Encoders.bean(FOSDataModel.class))
.map((MapFunction<FOSDataModel, Result>) value -> {
Result r = new Result(); Result r = new Result();
r.setId(DHPUtils.generateUnresolvedIdentifier(value.getDoi(), DOI)); r.setId(DHPUtils.generateUnresolvedIdentifier(value.getDoi(), DOI));
r.setSubject(getSubjects(value)); r.setSubject(getSubjects(value));
@ -91,43 +78,14 @@ public class PrepareFOSSparkJob implements Serializable {
private static List<StructuredProperty> getSubjects(FOSDataModel fos) { private static List<StructuredProperty> getSubjects(FOSDataModel fos) {
return Arrays return Arrays
.asList(getSubject(fos.getLevel1()), getSubject(fos.getLevel2()), getSubject(fos.getLevel3())) .asList(getSubject(fos.getLevel1(), FOS_CLASS_ID, FOS_CLASS_NAME),
getSubject(fos.getLevel2(), FOS_CLASS_ID, FOS_CLASS_NAME),
getSubject(fos.getLevel3(), FOS_CLASS_ID, FOS_CLASS_NAME))
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
private static StructuredProperty getSubject(String sbj) {
if (sbj.equals(NULL))
return null;
StructuredProperty sp = new StructuredProperty();
sp.setValue(sbj);
sp
.setQualifier(
OafMapperUtils
.qualifier(
FOS_CLASS_ID,
FOS_CLASS_NAME,
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
ModelConstants.DNET_SUBJECT_TYPOLOGIES));
sp
.setDataInfo(
OafMapperUtils
.dataInfo(
false,
UPDATE_DATA_INFO_TYPE,
true,
false,
OafMapperUtils
.qualifier(
UPDATE_SUBJECT_FOS_CLASS_ID,
UPDATE_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS),
""));
return sp;
}
} }

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.actionmanager.createunresolvedentities; package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; import static eu.dnetlib.dhp.actionmanager.Constants.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;