[enrichment] changed to make it compile against the new model

This commit is contained in:
Miriam Baglioni 2023-02-15 16:20:24 +01:00
parent 8ddcf10075
commit 624c62f62d
33 changed files with 333 additions and 338 deletions

View File

@ -241,27 +241,27 @@ public class ModelConstants {
} }
public static final AccessRight UNKNOWN_ACCESS_RIGHT() { public static final AccessRight UNKNOWN_ACCESS_RIGHT() {
return OafMapperUtils.accessRight( return OafMapperUtils
.accessRight(
ModelConstants.UNKNOWN, ModelConstants.UNKNOWN,
ModelConstants.NOT_AVAILABLE, ModelConstants.NOT_AVAILABLE,
ModelConstants.DNET_ACCESS_MODES ModelConstants.DNET_ACCESS_MODES);
);
} }
public static final AccessRight EMBARGOED_ACCESS_RIGHT() { public static final AccessRight EMBARGOED_ACCESS_RIGHT() {
return OafMapperUtils.accessRight( return OafMapperUtils
.accessRight(
ACCESS_RIGHT_EMBARGO, ACCESS_RIGHT_EMBARGO,
ACCESS_RIGHT_EMBARGO, ACCESS_RIGHT_EMBARGO,
DNET_ACCESS_MODES DNET_ACCESS_MODES);
);
} }
public static final AccessRight CLOSED_ACCESS_RIGHT() { public static final AccessRight CLOSED_ACCESS_RIGHT() {
return OafMapperUtils.accessRight( return OafMapperUtils
.accessRight(
ACCESS_RIGHT_CLOSED, ACCESS_RIGHT_CLOSED,
"Closed Access", "Closed Access",
ModelConstants.DNET_ACCESS_MODES ModelConstants.DNET_ACCESS_MODES);
);
} }
private static Qualifier qualifier( private static Qualifier qualifier(

View File

@ -6,9 +6,9 @@ import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.Set; import java.util.Set;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import lombok.val; import lombok.val;

View File

@ -55,13 +55,11 @@ public class OafMapperUtils {
return qualifier(UNKNOWN, "Unknown", schemeid); return qualifier(UNKNOWN, "Unknown", schemeid);
} }
public static AccessRight accessRight( public static AccessRight accessRight(
final String classid, final String classid,
final String classname, final String classname,
final String schemeid) { final String schemeid) {
return accessRight(classid, classname, schemeid, null); return accessRight(classid, classname, schemeid, null);
} }
public static AccessRight accessRight( public static AccessRight accessRight(

View File

@ -67,7 +67,7 @@ public class VocabularyTest {
if (t1 == null) { if (t1 == null) {
System.err.println(s1 + " Missing"); System.err.println(s1 + " Missing");
} else { } else {
System.out.println("syn=" + s1 + " term = " + t1.getClassid()+" "+t1.getClassname()); System.out.println("syn=" + s1 + " term = " + t1.getClassid() + " " + t1.getClassname());
System.out System.out
.println( .println(

View File

@ -7,7 +7,6 @@ import java.util.function.BiFunction;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
/** OAF model merging support. */ /** OAF model merging support. */
@ -56,19 +55,19 @@ public class MergeAndGet {
Entity yE = (Entity) y; Entity yE = (Entity) y;
if (xE.getClass().equals(yE.getClass()) if (xE.getClass().equals(yE.getClass())
&& xE.getLastupdatetimestamp() > yE.getLastupdatetimestamp()) { && xE.getLastupdatetimestamp() > yE.getLastupdatetimestamp()) {
return x; return x;
} else if (xE.getClass().equals(yE.getClass()) } else if (xE.getClass().equals(yE.getClass())
&& xE.getLastupdatetimestamp() < yE.getLastupdatetimestamp()) { && xE.getLastupdatetimestamp() < yE.getLastupdatetimestamp()) {
return (G) y; return (G) y;
} else if (isSubClass(xE, yE) && xE.getLastupdatetimestamp() > yE.getLastupdatetimestamp()) { } else if (isSubClass(xE, yE) && xE.getLastupdatetimestamp() > yE.getLastupdatetimestamp()) {
return x; return x;
} else if (isSubClass(xE, yE) && xE.getLastupdatetimestamp() < yE.getLastupdatetimestamp()) { } else if (isSubClass(xE, yE) && xE.getLastupdatetimestamp() < yE.getLastupdatetimestamp()) {
throw new RuntimeException( throw new RuntimeException(
String String
.format( .format(
"SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s", "SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
} }
} }

View File

@ -8,7 +8,6 @@ import java.util.Optional;
import java.util.function.BiFunction; import java.util.function.BiFunction;
import java.util.function.Function; import java.util.function.Function;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -26,8 +25,8 @@ import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
/** Applies a given action payload file to graph table of compatible type. */ /** Applies a given action payload file to graph table of compatible type. */
public class PromoteActionPayloadForGraphTableJob { public class PromoteActionPayloadForGraphTableJob {

View File

@ -8,12 +8,12 @@ import static org.mockito.Mockito.*;
import java.util.function.BiFunction; import java.util.function.BiFunction;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
public class MergeAndGetTest { public class MergeAndGetTest {
@ -97,7 +97,7 @@ public class MergeAndGetTest {
// then // then
Oaf x = fn.get().apply(a, b); Oaf x = fn.get().apply(a, b);
assertTrue(Relation.class.isAssignableFrom(x.getClass())); assertTrue(Relation.class.isAssignableFrom(x.getClass()));
//verify(a).mergeFrom(b); // verify(a).mergeFrom(b);
a = MergeUtils.merge(verify(a), b); a = MergeUtils.merge(verify(a), b);
assertEquals(a, x); assertEquals(a, x);
} }

View File

@ -14,7 +14,6 @@ import java.util.Objects;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -28,8 +27,8 @@ import org.junit.jupiter.params.provider.MethodSource;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
public class PromoteActionPayloadForGraphTableJobTest { public class PromoteActionPayloadForGraphTableJobTest {
private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader(); private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();

View File

@ -3,8 +3,6 @@ package eu.dnetlib.dhp.actionmanager;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -14,6 +12,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
@ -43,28 +43,28 @@ public class Constants {
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final EntityDataInfo SciNoBo_DATA_INFO = OafMapperUtils public static final EntityDataInfo SciNoBo_DATA_INFO = OafMapperUtils
.dataInfo( .dataInfo(
false, false,
false, false,
0.8f, //TODO check 0.8f, // TODO check
"SciNoBo", "SciNoBo",
true, true,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
ModelConstants.PROVENANCE_ENRICH, ModelConstants.PROVENANCE_ENRICH,
null, null,
ModelConstants.DNET_PROVENANCE_ACTIONS)); ModelConstants.DNET_PROVENANCE_ACTIONS));
public static final DataInfo Bip_DATA_INFO3 = OafMapperUtils public static final DataInfo Bip_DATA_INFO3 = OafMapperUtils
.dataInfo( .dataInfo(
0.8f, 0.8f,
UPDATE_DATA_INFO_TYPE, UPDATE_DATA_INFO_TYPE,
false, false,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
UPDATE_MEASURE_BIP_CLASS_ID, UPDATE_MEASURE_BIP_CLASS_ID,
UPDATE_CLASS_NAME, UPDATE_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS)); ModelConstants.DNET_PROVENANCE_ACTIONS));
private Constants() { private Constants() {
} }
@ -101,7 +101,7 @@ public class Constants {
.setDataInfo( .setDataInfo(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
0.0f, //TODO check 0.0f, // TODO check
UPDATE_DATA_INFO_TYPE, UPDATE_DATA_INFO_TYPE,
true, true,
OafMapperUtils OafMapperUtils

View File

@ -11,8 +11,6 @@ import java.util.List;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@ -31,8 +29,10 @@ import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class PrepareBipFinder implements Serializable { public class PrepareBipFinder implements Serializable {
@ -100,38 +100,16 @@ public class PrepareBipFinder implements Serializable {
Instance inst = new Instance(); Instance inst = new Instance();
/* /*
inst * inst .setPid( Arrays .asList( OafMapperUtils .structuredProperty( cleanedPid, OafMapperUtils
.setPid( * .qualifier( PidType.doi.toString(), DOI_CLASSNAME, ModelConstants.DNET_PID_TYPES,
Arrays * ModelConstants.DNET_PID_TYPES), null)));
.asList(
OafMapperUtils
.structuredProperty(
cleanedPid,
OafMapperUtils
.qualifier(
PidType.doi.toString(), DOI_CLASSNAME,
ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES),
null)));
*/ */
r.setInstance(Arrays.asList(inst)); r.setInstance(Arrays.asList(inst));
/* /*
r * r .setDataInfo( OafMapperUtils .dataInfo( false, null, true, false, OafMapperUtils .qualifier(
.setDataInfo( * ModelConstants.PROVENANCE_ENRICH, null, ModelConstants.DNET_PROVENANCE_ACTIONS,
OafMapperUtils * ModelConstants.DNET_PROVENANCE_ACTIONS), null));
.dataInfo(
false, null, true,
false,
OafMapperUtils
.qualifier(
ModelConstants.PROVENANCE_ENRICH,
null,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS),
null));
*/ */
return r; return r;
}, Encoders.bean(Result.class)) }, Encoders.bean(Result.class))
@ -158,22 +136,10 @@ public class PrepareBipFinder implements Serializable {
u.setValue(u.getValue()); u.setValue(u.getValue());
u.setKey(u.getKey()); u.setKey(u.getKey());
/* /*
kv * kv .setDataInfo( OafMapperUtils .dataInfo( false, UPDATE_DATA_INFO_TYPE, true, false,
.setDataInfo( * OafMapperUtils .qualifier( UPDATE_MEASURE_BIP_CLASS_ID, UPDATE_CLASS_NAME,
OafMapperUtils * ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
.dataInfo( * ""));
false,
UPDATE_DATA_INFO_TYPE,
true,
false,
OafMapperUtils
.qualifier(
UPDATE_MEASURE_BIP_CLASS_ID,
UPDATE_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS),
""));
*/ */
return u; return u;
}) })

View File

@ -8,8 +8,6 @@ import java.io.Serializable;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -24,10 +22,12 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class PrepareFOSSparkJob implements Serializable { public class PrepareFOSSparkJob implements Serializable {

View File

@ -8,8 +8,6 @@ import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -24,10 +22,12 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class PrepareSDGSparkJob implements Serializable { public class PrepareSDGSparkJob implements Serializable {

View File

@ -7,8 +7,6 @@ import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import org.apache.commons.cli.ParseException; import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
@ -31,6 +29,8 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import scala.Tuple2; import scala.Tuple2;
public class CreateActionSetSparkJob implements Serializable { public class CreateActionSetSparkJob implements Serializable {
@ -47,17 +47,20 @@ public class CreateActionSetSparkJob implements Serializable {
COLLECTED_FROM.setKey(ModelConstants.OPENOCITATIONS_ID); COLLECTED_FROM.setKey(ModelConstants.OPENOCITATIONS_ID);
COLLECTED_FROM.setValue(ModelConstants.OPENOCITATIONS_NAME); COLLECTED_FROM.setValue(ModelConstants.OPENOCITATIONS_NAME);
DATA_INFO = OafMapperUtils.dataInfo( DATA_INFO = OafMapperUtils
.dataInfo(
TRUST, TRUST,
null, null,
false, false,
OafMapperUtils.qualifier( OafMapperUtils
.qualifier(
OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSID,
OPENCITATIONS_CLASSNAME, OPENCITATIONS_CLASSNAME,
ModelConstants.DNET_PROVENANCE_ACTIONS)); ModelConstants.DNET_PROVENANCE_ACTIONS));
} }
private static final List<Provenance> PROVENANCE = Arrays.asList( private static final List<Provenance> PROVENANCE = Arrays
.asList(
OafMapperUtils.getProvenance(COLLECTED_FROM, DATA_INFO)); OafMapperUtils.getProvenance(COLLECTED_FROM, DATA_INFO));
private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class); private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
@ -144,7 +147,8 @@ public class CreateActionSetSparkJob implements Serializable {
} }
private static String asOpenAireId(String value) { private static String asOpenAireId(String value) {
return IdentifierFactory.idFromPid( return IdentifierFactory
.idFromPid(
"50", PidType.doi.toString(), "50", PidType.doi.toString(),
CleaningFunctions.normalizePidValue(PidType.doi.toString(), value), CleaningFunctions.normalizePidValue(PidType.doi.toString(), value),
true); true);

View File

@ -7,8 +7,6 @@ import java.util.Arrays;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat;
@ -29,10 +27,12 @@ import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Classification;
import eu.dnetlib.dhp.schema.oaf.H2020Programme; import eu.dnetlib.dhp.schema.oaf.H2020Programme;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2; import scala.Tuple2;

View File

@ -20,7 +20,6 @@ import java.util.Optional;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -43,6 +42,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2; import scala.Tuple2;

View File

@ -121,7 +121,7 @@ public class SparkAtomicActionUsageJob implements Serializable {
private static List<Measure> getMeasure(Long downloads, Long views) { private static List<Measure> getMeasure(Long downloads, Long views) {
DataInfo dataInfo = OafMapperUtils DataInfo dataInfo = OafMapperUtils
.dataInfo( .dataInfo(
0.0f, //TODO check 0.0f, // TODO check
UPDATE_DATA_INFO_TYPE, UPDATE_DATA_INFO_TYPE,
false, false,
OafMapperUtils OafMapperUtils

View File

@ -11,7 +11,6 @@ import java.nio.charset.StandardCharsets;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.IntWritable;
@ -33,6 +32,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
import eu.dnetlib.dhp.schema.mdstore.Provenance; import eu.dnetlib.dhp.schema.mdstore.Provenance;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import scala.Tuple2; import scala.Tuple2;
public class GenerateNativeStoreSparkJob { public class GenerateNativeStoreSparkJob {

View File

@ -21,7 +21,7 @@ import scala.util.matching.Regex
case class CrossrefDT(doi: String, json: String, timestamp: Long) {} case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
case class CrossrefAuthor(givenName:String, familyName:String,ORCID:String, sequence:String, rank:Int ){} case class CrossrefAuthor(givenName: String, familyName: String, ORCID: String, sequence: String, rank: Int) {}
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
@ -30,7 +30,6 @@ object CrossrefUtility {
val logger: Logger = LoggerFactory.getLogger(getClass) val logger: Logger = LoggerFactory.getLogger(getClass)
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = { def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input) lazy val json: json4s.JValue = parse(input)
@ -56,7 +55,7 @@ object CrossrefUtility {
(json \ "funder").extractOrElse[List[mappingFunder]](List()) (json \ "funder").extractOrElse[List[mappingFunder]](List())
if (funderList.nonEmpty) { if (funderList.nonEmpty) {
resultList = resultList ::: mappingFunderToRelations(funderList, result ) resultList = resultList ::: mappingFunderToRelations(funderList, result)
} }
resultList = resultList ::: List(result) resultList = resultList ::: List(result)
resultList resultList
@ -73,19 +72,18 @@ object CrossrefUtility {
r r
} }
private def generateSimpleRelationFromAward( private def generateSimpleRelationFromAward(
funder: mappingFunder, funder: mappingFunder,
nsPrefix: String, nsPrefix: String,
extractField: String => String, extractField: String => String,
source:Result source: Result
): List[Relation] = { ): List[Relation] = {
if (funder.award.isDefined && funder.award.get.nonEmpty) if (funder.award.isDefined && funder.award.get.nonEmpty)
funder.award.get funder.award.get
.map(extractField) .map(extractField)
.filter(a => a != null && a.nonEmpty) .filter(a => a != null && a.nonEmpty)
.map(award => { .map(award => {
val targetId = IdentifierFactory.createOpenaireId("project",s"$nsPrefix::$award", true) val targetId = IdentifierFactory.createOpenaireId("project", s"$nsPrefix::$award", true)
createRelation(targetId, source.getId, ModelConstants.PRODUCES) createRelation(targetId, source.getId, ModelConstants.PRODUCES)
}) })
else List() else List()
@ -106,56 +104,74 @@ object CrossrefUtility {
} }
private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = { private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = {
var relList:List[Relation] = List() var relList: List[Relation] = List()
if (funders != null) if (funders != null)
funders.foreach(funder => { funders.foreach(funder => {
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
funder.DOI.get match { funder.DOI.get match {
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" | case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
"10.13039/100010665" => "10.13039/100010665" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" => case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
case "10.13039/501100000781" => case "10.13039/501100000781" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "10.13039/100000001" => relList =relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result) case "10.13039/100000001" =>
case "10.13039/501100001665" => relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result) relList = relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result)
case "10.13039/501100002341" => relList =relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result) case "10.13039/501100001665" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
case "10.13039/501100002341" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result)
case "10.13039/501100001602" => case "10.13039/501100001602" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result) relList =
case "10.13039/501100000923" => relList =relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result) relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result)
case "10.13039/501100000923" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result)
case "10.13039/501100000038" => case "10.13039/501100000038" =>
val targetId = IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false) val targetId =
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100000155" => case "10.13039/501100000155" =>
val targetId = IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false) val targetId =
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100000024" => case "10.13039/501100000024" =>
val targetId = IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false) val targetId =
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false)
case "10.13039/501100002848" => relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result) relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100003448" => relList =relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result) case "10.13039/501100002848" =>
case "10.13039/501100010198" => relList =relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result) relList = relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result)
case "10.13039/501100004564" => relList =relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result) case "10.13039/501100003448" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result)
case "10.13039/501100010198" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result)
case "10.13039/501100004564" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result)
case "10.13039/501100003407" => case "10.13039/501100003407" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result) relList = relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result)
val targetId = IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false) val targetId =
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100006588" | "10.13039/501100004488" => case "10.13039/501100006588" | "10.13039/501100004488" =>
relList =relList ::: generateSimpleRelationFromAward( relList = relList ::: generateSimpleRelationFromAward(
funder, funder,
"irb_hr______", "irb_hr______",
a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""), result a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""),
result
) )
case "10.13039/501100006769" => relList =relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result) case "10.13039/501100006769" =>
case "10.13039/501100001711" => relList =relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result) relList = relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result)
case "10.13039/501100004410" => relList =relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result) case "10.13039/501100001711" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result)
case "10.13039/501100004410" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result)
case "10.13039/100004440" => case "10.13039/100004440" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result) relList = relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false) val targetId =
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case _ => logger.debug("no match for " + funder.DOI.get) case _ => logger.debug("no match for " + funder.DOI.get)
} }
@ -163,18 +179,19 @@ object CrossrefUtility {
} else { } else {
funder.name match { funder.name match {
case "European Unions Horizon 2020 research and innovation program" => case "European Unions Horizon 2020 research and innovation program" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "European Union's" => case "European Union's" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
case "The French National Research Agency (ANR)" | "The French National Research Agency" => case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result) relList = relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
case "CONICYT, Programa de Formación de Capital Humano Avanzado" => case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result) relList = relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result)
case "Wellcome Trust Masters Fellowship" => case "Wellcome Trust Masters Fellowship" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result) relList = relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false) val targetId =
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case _ => logger.debug("no match for " + funder.name) case _ => logger.debug("no match for " + funder.name)
} }
@ -185,11 +202,7 @@ object CrossrefUtility {
} }
private def mappingResult(result: Result, json: JValue, cobjCategory: String, className: String): Result = {
private def mappingResult(result: Result, json: JValue, cobjCategory: String, className:String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID //MAPPING Crossref DOI into PID
@ -236,7 +249,9 @@ object CrossrefUtility {
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
val subtitles = val subtitles =
for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty( for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty(
title, ModelConstants.SUBTITLE_QUALIFIER) title,
ModelConstants.SUBTITLE_QUALIFIER
)
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
// DESCRIPTION // DESCRIPTION
@ -302,28 +317,52 @@ object CrossrefUtility {
if (subjectList.nonEmpty) { if (subjectList.nonEmpty) {
result.setSubject( result.setSubject(
subjectList.map(s => subjectList
OafMapperUtils.subject(s, OafMapperUtils.qualifier(ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_TYPOLOGIES), null) .map(s =>
).asJava) OafMapperUtils.subject(
s,
OafMapperUtils.qualifier(
ModelConstants.DNET_SUBJECT_KEYWORD,
ModelConstants.DNET_SUBJECT_KEYWORD,
ModelConstants.DNET_SUBJECT_TYPOLOGIES
),
null
)
)
.asJava
)
} }
//Mapping Author //Mapping Author
val authorList:List[CrossrefAuthor] = val authorList: List[CrossrefAuthor] =
for { for {
JObject(author) <- json \ "author" JObject(author) <- json \ "author"
JField("ORCID", JString(orcid)) <- author JField("ORCID", JString(orcid)) <- author
JField("given", JString(givenName)) <- author JField("given", JString(givenName)) <- author
JField("family", JString(familyName)) <- author JField("family", JString(familyName)) <- author
JField("sequence", JString(sequence)) <- author JField("sequence", JString(sequence)) <- author
} yield CrossrefAuthor(givenName = givenName, familyName = familyName, ORCID = orcid, sequence = sequence, rank = 0) } yield CrossrefAuthor(
givenName = givenName,
familyName = familyName,
ORCID = orcid,
sequence = sequence,
rank = 0
)
result.setAuthor(authorList.sortWith((a,b) =>{ result.setAuthor(
if (a.sequence.equalsIgnoreCase("first")) authorList
true .sortWith((a, b) => {
else if (b.sequence.equalsIgnoreCase("first")) if (a.sequence.equalsIgnoreCase("first"))
false true
else a.familyName< b.familyName else if (b.sequence.equalsIgnoreCase("first"))
}).zipWithIndex.map(k=> k._1.copy(rank = k._2)).map(k => generateAuthor(k)).asJava) false
else a.familyName < b.familyName
})
.zipWithIndex
.map(k => k._1.copy(rank = k._2))
.map(k => generateAuthor(k))
.asJava
)
// Mapping instance // Mapping instance
val instance = new Instance() val instance = new Instance()
@ -360,7 +399,7 @@ object CrossrefUtility {
) )
} }
if (instance.getLicense!= null) if (instance.getLicense != null)
instance.setAccessright( instance.setAccessright(
decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance) decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance)
) )
@ -392,7 +431,7 @@ object CrossrefUtility {
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
//Mapping book //Mapping book
if (className.toLowerCase.contains("book")) { if (className.toLowerCase.contains("book")) {
val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
if (ISBN.nonEmpty && containerTitles.nonEmpty) { if (ISBN.nonEmpty && containerTitles.nonEmpty) {
val source = s"${containerTitles.head} ISBN: ${ISBN.head}" val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
if (result.getSource != null) { if (result.getSource != null) {
@ -404,8 +443,8 @@ object CrossrefUtility {
} else { } else {
// Mapping Journal // Mapping Journal
val issnInfos = for { val issnInfos = for {
JObject(issn_type) <- json \ "issn-type" JObject(issn_type) <- json \ "issn-type"
JField("type", JString(tp)) <- issn_type JField("type", JString(tp)) <- issn_type
JField("value", JString(vl)) <- issn_type JField("value", JString(vl)) <- issn_type
} yield Tuple2(tp, vl) } yield Tuple2(tp, vl)
@ -418,7 +457,7 @@ object CrossrefUtility {
issnInfos.foreach(tp => { issnInfos.foreach(tp => {
tp._1 match { tp._1 match {
case "electronic" => journal.setIssnOnline(tp._2) case "electronic" => journal.setIssnOnline(tp._2)
case "print" => journal.setIssnPrinted(tp._2) case "print" => journal.setIssnPrinted(tp._2)
} }
}) })
} }
@ -435,7 +474,6 @@ object CrossrefUtility {
} }
} }
result.setInstance(List(instance).asJava) result.setInstance(List(instance).asJava)
result.setId("ID") result.setId("ID")
result.setId(IdentifierFactory.createIdentifier(result, true)) result.setId(IdentifierFactory.createIdentifier(result, true))
@ -453,16 +491,16 @@ object CrossrefUtility {
//CC licenses //CC licenses
if ( if (
license.startsWith("cc") || license.startsWith("cc") ||
license.startsWith("http://creativecommons.org/licenses") || license.startsWith("http://creativecommons.org/licenses") ||
license.startsWith("https://creativecommons.org/licenses") || license.startsWith("https://creativecommons.org/licenses") ||
//ACS Publications Author choice licenses (considered OPEN also by Unpaywall) //ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") || license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") || license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") || license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
//APA (considered OPEN also by Unpaywall) //APA (considered OPEN also by Unpaywall)
license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx") license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")
) { ) {
val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT() val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
@ -481,11 +519,11 @@ object CrossrefUtility {
try { try {
val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd")) val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd"))
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) { if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT() val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
return oaq return oaq
} else { } else {
return ModelConstants.EMBARGOED_ACCESS_RIGHT() return ModelConstants.EMBARGOED_ACCESS_RIGHT()
} }
} catch { } catch {
case _: Exception => { case _: Exception => {
@ -511,7 +549,6 @@ object CrossrefUtility {
ModelConstants.CLOSED_ACCESS_RIGHT() ModelConstants.CLOSED_ACCESS_RIGHT()
} }
private def extractDate(dt: String, datePart: List[List[Int]]): String = { private def extractDate(dt: String, datePart: List[List[Int]]): String = {
if (StringUtils.isNotBlank(dt)) if (StringUtils.isNotBlank(dt))
return GraphCleaningFunctions.cleanDate(dt) return GraphCleaningFunctions.cleanDate(dt)
@ -533,11 +570,11 @@ object CrossrefUtility {
} }
private def generateDate( private def generateDate(
dt: String, dt: String,
datePart: List[List[Int]], datePart: List[List[Int]],
classId: String, classId: String,
schemeId: String schemeId: String
): StructuredProperty = { ): StructuredProperty = {
val dp = extractDate(dt, datePart) val dp = extractDate(dt, datePart)
if (StringUtils.isNotBlank(dp)) if (StringUtils.isNotBlank(dp))
structuredProperty(dp, classId, classId, schemeId) structuredProperty(dp, classId, classId, schemeId)
@ -552,9 +589,9 @@ object CrossrefUtility {
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
resourceType match { resourceType match {
case "publication" => (new Publication, resourceType, term.getClassname) case "publication" => (new Publication, resourceType, term.getClassname)
case "dataset" => (new Dataset, resourceType, term.getClassname) case "dataset" => (new Dataset, resourceType, term.getClassname)
case "software" => (new Software, resourceType, term.getClassname) case "software" => (new Software, resourceType, term.getClassname)
case "otherresearchproduct" => (new OtherResearchProduct, resourceType, term.getClassname) case "otherresearchproduct" => (new OtherResearchProduct, resourceType, term.getClassname)
} }
} else } else
@ -570,7 +607,15 @@ object CrossrefUtility {
if (StringUtils.isNotBlank(ca.ORCID)) if (StringUtils.isNotBlank(ca.ORCID))
a.setPid( a.setPid(
List( List(
OafMapperUtils.authorPid(ca.ORCID, OafMapperUtils.qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES), null) OafMapperUtils.authorPid(
ca.ORCID,
OafMapperUtils.qualifier(
ModelConstants.ORCID_PENDING,
ModelConstants.ORCID_PENDING,
ModelConstants.DNET_PID_TYPES
),
null
)
).asJava ).asJava
) )
a a

View File

@ -7,8 +7,6 @@ import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -29,8 +27,10 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp; package eu.dnetlib.dhp;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
@ -16,10 +17,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class PropagationConstant { public class PropagationConstant {
@ -44,6 +43,7 @@ public class PropagationConstant {
public final static String NULL = "NULL"; public final static String NULL = "NULL";
public final static float PROPAGATION_TRUST = 0.85f;
public static final String INSTITUTIONAL_REPO_TYPE = "institutional"; public static final String INSTITUTIONAL_REPO_TYPE = "institutional";
public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; public static final String PROPAGATION_DATA_INFO_TYPE = "propagation";
@ -90,54 +90,22 @@ public class PropagationConstant {
Country nc = new Country(); Country nc = new Country();
nc.setClassid(classid); nc.setClassid(classid);
nc.setClassname(classname); nc.setClassname(classname);
nc.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
nc.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); nc.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
nc nc
.setDataInfo( .setDataInfo(
getDataInfo( OafMapperUtils
PROPAGATION_DATA_INFO_TYPE, .dataInfo(
PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, PROPAGATION_TRUST,
PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, PROPAGATION_DATA_INFO_TYPE,
ModelConstants.DNET_PROVENANCE_ACTIONS)); true,
OafMapperUtils
.qualifier(
PROPAGATION_COUNTRY_INSTREPO_CLASS_ID,
PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS)));
return nc; return nc;
} }
public static DataInfo getDataInfo(
String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema) {
return getDataInfo(inference_provenance, inference_class_id, inference_class_name, qualifierSchema, "0.85");
}
public static DataInfo getDataInfo(
String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema,
String trust) {
return getDataInfo(
inference_provenance, inference_class_id, inference_class_name, qualifierSchema, trust, true);
}
public static DataInfo getDataInfo(
String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema,
String trust, boolean inferred) {
DataInfo di = new DataInfo();
di.setInferred(inferred);
di.setDeletedbyinference(false);
di.setTrust(trust);
di.setInferenceprovenance(inference_provenance);
di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name, qualifierSchema));
return di;
}
public static Qualifier getQualifier(String inference_class_id, String inference_class_name,
String qualifierSchema) {
Qualifier pa = new Qualifier();
pa.setClassid(inference_class_id);
pa.setClassname(inference_class_name);
pa.setSchemeid(qualifierSchema);
pa.setSchemename(qualifierSchema);
return pa;
}
public static ArrayList<Relation> getOrganizationRelationPair(String orgId, public static ArrayList<Relation> getOrganizationRelationPair(String orgId,
String resultId, String resultId,
String classID, String classID,
@ -186,11 +154,18 @@ public class PropagationConstant {
r.setRelClass(rel_class); r.setRelClass(rel_class);
r.setRelType(rel_type); r.setRelType(rel_type);
r.setSubRelType(subrel_type); r.setSubRelType(subrel_type);
r Provenance p = new Provenance();
p
.setDataInfo( .setDataInfo(
getDataInfo( OafMapperUtils
inference_provenance, inference_class_id, inference_class_name, .dataInfo(
ModelConstants.DNET_PROVENANCE_ACTIONS)); PROPAGATION_TRUST, inference_provenance, true,
OafMapperUtils
.qualifier(
inference_class_id, inference_class_name,
ModelConstants.DNET_PROVENANCE_ACTIONS)));
r.setProvenance(Arrays.asList(p));
return r; return r;
} }

View File

@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -15,8 +14,6 @@ import com.google.gson.Gson;
import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
@ -173,45 +170,39 @@ public class ResultTagger implements Serializable {
.add( .add(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false, TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS)));
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
if (datasources.contains(cId)) if (datasources.contains(cId))
dataInfoList dataInfoList
.add( .add(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false, TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS, CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
DNET_PROVENANCE_ACTIONS), DNET_PROVENANCE_ACTIONS)));
TAGGING_TRUST));
if (czenodo.contains(cId)) if (czenodo.contains(cId))
dataInfoList dataInfoList
.add( .add(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false, TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS)));
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
if (aconstraints.contains(cId)) if (aconstraints.contains(cId))
dataInfoList dataInfoList
.add( .add(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false, TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), DNET_PROVENANCE_ACTIONS)));
TAGGING_TRUST));
} }
}); });
@ -235,45 +226,40 @@ public class ResultTagger implements Serializable {
.add( .add(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false, TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT,
DNET_PROVENANCE_ACTIONS), DNET_PROVENANCE_ACTIONS)));
TAGGING_TRUST));
if (datasources.contains(c)) if (datasources.contains(c))
dataInfoList dataInfoList
.add( .add(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false, TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), DNET_PROVENANCE_ACTIONS)));
TAGGING_TRUST));
if (czenodo.contains(c)) if (czenodo.contains(c))
dataInfoList dataInfoList
.add( .add(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false, TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS)));
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
if (aconstraints.contains(c)) if (aconstraints.contains(c))
dataInfoList dataInfoList
.add( .add(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false, TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), DNET_PROVENANCE_ACTIONS)));
TAGGING_TRUST));
context.setDataInfo(dataInfoList); context.setDataInfo(dataInfoList);
return context; return context;

View File

@ -20,5 +20,5 @@ public class TaggingConstants {
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo"; public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints"; public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints";
public static final String TAGGING_TRUST = "0.8"; public static final float TAGGING_TRUST = 0.8f;
} }

View File

@ -29,9 +29,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.SparkBulkTagJob;
import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
@ -132,12 +130,13 @@ public class SparkEoscBulkTag implements Serializable {
.asList( .asList(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false, TAGGING_TRUST,
BULKTAG_DATA_INFO_TYPE,
true,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), DNET_PROVENANCE_ACTIONS))));
TAGGING_TRUST)));
value.getContext().add(context); value.getContext().add(context);
} }

View File

@ -221,7 +221,7 @@ public class SparkEoscTag {
return words; return words;
} }
private static Set<String> getWordsF(List<Field<String>> elem) { private static Set<String> getWordsF(List<String> elem) {
Set<String> words = new HashSet<>(); Set<String> words = new HashSet<>();
Optional Optional
.ofNullable(elem) .ofNullable(elem)
@ -230,7 +230,7 @@ public class SparkEoscTag {
.forEach( .forEach(
t -> words t -> words
.addAll( .addAll(
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))))); Arrays.asList(t.toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
return words; return words;
} }

View File

@ -96,8 +96,7 @@ public class PrepareDatasourceCountryAssociation {
// filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass
Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class) Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class)
.filter( .filter(
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) && (FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY));
!rel.getDataInfo().getDeletedbyinference());
// filtering of the organization taking only the non deleted by inference and those with information about the // filtering of the organization taking only the non deleted by inference and those with information about the
// country // country

View File

@ -23,6 +23,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Country;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.Tuple2; import scala.Tuple2;
public class SparkCountryPropagationJob { public class SparkCountryPropagationJob {
@ -126,6 +127,7 @@ public class SparkCountryPropagationJob {
.filter(c -> !finalCountries.contains(c.getClassid())) .filter(c -> !finalCountries.contains(c.getClassid()))
.map(c -> getCountry(c.getClassid(), c.getClassname())) .map(c -> getCountry(c.getClassid(), c.getClassname()))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
} }

View File

@ -24,8 +24,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.AuthorPid;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.Tuple2; import scala.Tuple2;
public class SparkOrcidToResultFromSemRelJob { public class SparkOrcidToResultFromSemRelJob {
@ -171,21 +173,26 @@ public class SparkOrcidToResultFromSemRelJob {
} }
} }
if (toaddpid) { if (toaddpid) {
StructuredProperty p = new StructuredProperty(); AuthorPid p = new AuthorPid();
p.setValue(autoritative_author.getOrcid()); p.setValue(autoritative_author.getOrcid());
p p
.setQualifier( .setQualifier(
getQualifier( OafMapperUtils
ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); .qualifier(
ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME,
ModelConstants.DNET_PID_TYPES));
p p
.setDataInfo( .setDataInfo(
getDataInfo( OafMapperUtils
PROPAGATION_DATA_INFO_TYPE, .dataInfo(
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, PROPAGATION_TRUST,
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, PROPAGATION_DATA_INFO_TYPE, true, OafMapperUtils
ModelConstants.DNET_PROVENANCE_ACTIONS)); .qualifier(
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID,
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS)));
Optional<List<StructuredProperty>> authorPid = Optional.ofNullable(author.getPid()); Optional<List<AuthorPid>> authorPid = Optional.ofNullable(author.getPid());
if (authorPid.isPresent()) { if (authorPid.isPresent()) {
authorPid.get().add(p); authorPid.get().add(p);
} else { } else {
@ -197,7 +204,7 @@ public class SparkOrcidToResultFromSemRelJob {
} }
private static boolean containsAllowedPid(Author a) { private static boolean containsAllowedPid(Author a) {
Optional<List<StructuredProperty>> pids = Optional.ofNullable(a.getPid()); Optional<List<AuthorPid>> pids = Optional.ofNullable(a.getPid());
if (!pids.isPresent()) { if (!pids.isPresent()) {
return false; return false;
} }

View File

@ -24,6 +24,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.Tuple2; import scala.Tuple2;
public class SparkResultToCommunityFromOrganizationJob { public class SparkResultToCommunityFromOrganizationJob {
@ -126,16 +128,20 @@ public class SparkResultToCommunityFromOrganizationJob {
.setDataInfo( .setDataInfo(
Arrays Arrays
.asList( .asList(
getDataInfo( OafMapperUtils
PROPAGATION_DATA_INFO_TYPE, .dataInfo(
PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, PROPAGATION_TRUST,
PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, PROPAGATION_DATA_INFO_TYPE, true,
ModelConstants.DNET_PROVENANCE_ACTIONS))); OafMapperUtils
.qualifier(
PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID,
PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS))));
propagatedContexts.add(newContext); propagatedContexts.add(newContext);
} }
} }
res.setContext(propagatedContexts); res.setContext(propagatedContexts);
ret.mergeFrom(res); ret = MergeUtils.merge(ret, res);
} }
return ret; return ret;
}; };

View File

@ -21,6 +21,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.Tuple2; import scala.Tuple2;
public class SparkResultToCommunityThroughSemRelJob { public class SparkResultToCommunityThroughSemRelJob {
@ -122,11 +124,14 @@ public class SparkResultToCommunityThroughSemRelJob {
.setDataInfo( .setDataInfo(
Arrays Arrays
.asList( .asList(
getDataInfo( OafMapperUtils
PROPAGATION_DATA_INFO_TYPE, .dataInfo(
PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_TRUST, PROPAGATION_DATA_INFO_TYPE, true,
PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, OafMapperUtils
ModelConstants.DNET_PROVENANCE_ACTIONS))); .qualifier(
PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID,
PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS))));
return newContext; return newContext;
} }
return null; return null;
@ -139,7 +144,7 @@ public class SparkResultToCommunityThroughSemRelJob {
r.setId(ret.getId()); r.setId(ret.getId());
r.setContext(contextList); r.setContext(contextList);
ret.mergeFrom(r); ret = MergeUtils.merge(ret, r);
} }
return ret; return ret;

View File

@ -115,8 +115,7 @@ public class PrepareInfo implements Serializable {
relation relation
.filter( .filter(
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() && (FilterFunction<Relation>) r -> r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION))
r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")

View File

@ -126,10 +126,14 @@ public class StepActions implements Serializable {
.stream() .stream()
.filter( .filter(
rel -> !rel rel -> !rel
.getDataInfo() .getProvenance()
.getProvenanceaction() .stream()
.getClassid() .anyMatch(
.equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)) p -> p
.getDataInfo()
.getProvenanceaction()
.getClassid()
.equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)))
.count() > 0) { .count() > 0) {
return null; return null;
} }

View File

@ -53,7 +53,8 @@ class ResolveEntitiesTest extends Serializable {
def generateUpdates(spark: SparkSession): Unit = { def generateUpdates(spark: SparkSession): Unit = {
val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString
val pids: List[String] = template.linesWithSeparators.map(l =>l.stripLineEnd) val pids: List[String] = template.linesWithSeparators
.map(l => l.stripLineEnd)
.map { id => .map { id =>
val r = new Result val r = new Result
r.setId(id.toLowerCase.trim) r.setId(id.toLowerCase.trim)
@ -126,7 +127,7 @@ class ResolveEntitiesTest extends Serializable {
entities.foreach { e => entities.foreach { e =>
val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString
spark spark
.createDataset(spark.sparkContext.parallelize(template.linesWithSeparators.map(l =>l.stripLineEnd).toList)) .createDataset(spark.sparkContext.parallelize(template.linesWithSeparators.map(l => l.stripLineEnd).toList))
.as[String] .as[String]
.write .write
.option("compression", "gzip") .option("compression", "gzip")
@ -263,7 +264,8 @@ class ResolveEntitiesTest extends Serializable {
Source Source
.fromInputStream(this.getClass.getResourceAsStream(s"publication")) .fromInputStream(this.getClass.getResourceAsStream(s"publication"))
.mkString .mkString
.linesWithSeparators.map(l =>l.stripLineEnd) .linesWithSeparators
.map(l => l.stripLineEnd)
.next(), .next(),
classOf[Publication] classOf[Publication]
) )

View File

@ -47,7 +47,7 @@ class ScholixGraphTest extends AbstractVocabularyTest {
val inputRelations = Source val inputRelations = Source
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")) .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary"))
.mkString .mkString
val items = inputRelations.linesWithSeparators.map(l =>l.stripLineEnd).toList val items = inputRelations.linesWithSeparators.map(l => l.stripLineEnd).toList
assertNotNull(items) assertNotNull(items)
items.foreach(i => assertTrue(i.nonEmpty)) items.foreach(i => assertTrue(i.nonEmpty))
val result = val result =
@ -69,7 +69,8 @@ class ScholixGraphTest extends AbstractVocabularyTest {
getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix") getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix")
) )
.mkString .mkString
val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators.map(l =>l.stripLineEnd) val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators
.map(l => l.stripLineEnd)
.sliding(2) .sliding(2)
.map(s => (s.head, s(1))) .map(s => (s.head, s(1)))
.map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary]))) .map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])))