Merge branch 'ticket_8369' of https://code-repo.d4science.org/D-Net/dnet-hadoop into ticket_8369
This commit is contained in:
commit
74c40fd4e2
|
@ -241,27 +241,27 @@ public class ModelConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final AccessRight UNKNOWN_ACCESS_RIGHT() {
|
public static final AccessRight UNKNOWN_ACCESS_RIGHT() {
|
||||||
return OafMapperUtils.accessRight(
|
return OafMapperUtils
|
||||||
|
.accessRight(
|
||||||
ModelConstants.UNKNOWN,
|
ModelConstants.UNKNOWN,
|
||||||
ModelConstants.NOT_AVAILABLE,
|
ModelConstants.NOT_AVAILABLE,
|
||||||
ModelConstants.DNET_ACCESS_MODES
|
ModelConstants.DNET_ACCESS_MODES);
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final AccessRight EMBARGOED_ACCESS_RIGHT() {
|
public static final AccessRight EMBARGOED_ACCESS_RIGHT() {
|
||||||
return OafMapperUtils.accessRight(
|
return OafMapperUtils
|
||||||
|
.accessRight(
|
||||||
ACCESS_RIGHT_EMBARGO,
|
ACCESS_RIGHT_EMBARGO,
|
||||||
ACCESS_RIGHT_EMBARGO,
|
ACCESS_RIGHT_EMBARGO,
|
||||||
DNET_ACCESS_MODES
|
DNET_ACCESS_MODES);
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final AccessRight CLOSED_ACCESS_RIGHT() {
|
public static final AccessRight CLOSED_ACCESS_RIGHT() {
|
||||||
return OafMapperUtils.accessRight(
|
return OafMapperUtils
|
||||||
|
.accessRight(
|
||||||
ACCESS_RIGHT_CLOSED,
|
ACCESS_RIGHT_CLOSED,
|
||||||
"Closed Access",
|
"Closed Access",
|
||||||
ModelConstants.DNET_ACCESS_MODES
|
ModelConstants.DNET_ACCESS_MODES);
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Qualifier qualifier(
|
private static Qualifier qualifier(
|
||||||
|
|
|
@ -6,9 +6,9 @@ import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import lombok.val;
|
import lombok.val;
|
||||||
|
|
||||||
|
|
|
@ -55,8 +55,6 @@ public class OafMapperUtils {
|
||||||
return qualifier(UNKNOWN, "Unknown", schemeid);
|
return qualifier(UNKNOWN, "Unknown", schemeid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public static AccessRight accessRight(
|
public static AccessRight accessRight(
|
||||||
final String classid,
|
final String classid,
|
||||||
final String classname,
|
final String classname,
|
||||||
|
|
|
@ -67,7 +67,7 @@ public class VocabularyTest {
|
||||||
if (t1 == null) {
|
if (t1 == null) {
|
||||||
System.err.println(s1 + " Missing");
|
System.err.println(s1 + " Missing");
|
||||||
} else {
|
} else {
|
||||||
System.out.println("syn=" + s1 + " term = " + t1.getClassid()+" "+t1.getClassname());
|
System.out.println("syn=" + s1 + " term = " + t1.getClassid() + " " + t1.getClassname());
|
||||||
|
|
||||||
System.out
|
System.out
|
||||||
.println(
|
.println(
|
||||||
|
|
|
@ -7,7 +7,6 @@ import java.util.function.BiFunction;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
|
|
||||||
/** OAF model merging support. */
|
/** OAF model merging support. */
|
||||||
|
|
|
@ -8,7 +8,6 @@ import java.util.Optional;
|
||||||
import java.util.function.BiFunction;
|
import java.util.function.BiFunction;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -26,8 +25,8 @@ import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
|
||||||
|
|
||||||
/** Applies a given action payload file to graph table of compatible type. */
|
/** Applies a given action payload file to graph table of compatible type. */
|
||||||
public class PromoteActionPayloadForGraphTableJob {
|
public class PromoteActionPayloadForGraphTableJob {
|
||||||
|
|
|
@ -8,12 +8,12 @@ import static org.mockito.Mockito.*;
|
||||||
|
|
||||||
import java.util.function.BiFunction;
|
import java.util.function.BiFunction;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
|
||||||
import org.junit.jupiter.api.Nested;
|
import org.junit.jupiter.api.Nested;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
|
|
||||||
public class MergeAndGetTest {
|
public class MergeAndGetTest {
|
||||||
|
|
||||||
|
@ -97,7 +97,7 @@ public class MergeAndGetTest {
|
||||||
// then
|
// then
|
||||||
Oaf x = fn.get().apply(a, b);
|
Oaf x = fn.get().apply(a, b);
|
||||||
assertTrue(Relation.class.isAssignableFrom(x.getClass()));
|
assertTrue(Relation.class.isAssignableFrom(x.getClass()));
|
||||||
//verify(a).mergeFrom(b);
|
// verify(a).mergeFrom(b);
|
||||||
a = MergeUtils.merge(verify(a), b);
|
a = MergeUtils.merge(verify(a), b);
|
||||||
assertEquals(a, x);
|
assertEquals(a, x);
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,6 @@ import java.util.Objects;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
@ -28,8 +27,8 @@ import org.junit.jupiter.params.provider.MethodSource;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
|
||||||
|
|
||||||
public class PromoteActionPayloadForGraphTableJobTest {
|
public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();
|
private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();
|
||||||
|
|
|
@ -3,8 +3,6 @@ package eu.dnetlib.dhp.actionmanager;
|
||||||
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -14,6 +12,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Subject;
|
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
@ -46,7 +46,7 @@ public class Constants {
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
0.8f, //TODO check
|
0.8f, // TODO check
|
||||||
"SciNoBo",
|
"SciNoBo",
|
||||||
true,
|
true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
|
@ -101,7 +101,7 @@ public class Constants {
|
||||||
.setDataInfo(
|
.setDataInfo(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
0.0f, //TODO check
|
0.0f, // TODO check
|
||||||
UPDATE_DATA_INFO_TYPE,
|
UPDATE_DATA_INFO_TYPE,
|
||||||
true,
|
true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
|
|
|
@ -11,8 +11,6 @@ import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
@ -31,8 +29,10 @@ import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
public class PrepareBipFinder implements Serializable {
|
public class PrepareBipFinder implements Serializable {
|
||||||
|
@ -100,38 +100,16 @@ public class PrepareBipFinder implements Serializable {
|
||||||
Instance inst = new Instance();
|
Instance inst = new Instance();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
inst
|
* inst .setPid( Arrays .asList( OafMapperUtils .structuredProperty( cleanedPid, OafMapperUtils
|
||||||
.setPid(
|
* .qualifier( PidType.doi.toString(), DOI_CLASSNAME, ModelConstants.DNET_PID_TYPES,
|
||||||
Arrays
|
* ModelConstants.DNET_PID_TYPES), null)));
|
||||||
.asList(
|
|
||||||
OafMapperUtils
|
|
||||||
.structuredProperty(
|
|
||||||
cleanedPid,
|
|
||||||
OafMapperUtils
|
|
||||||
.qualifier(
|
|
||||||
PidType.doi.toString(), DOI_CLASSNAME,
|
|
||||||
ModelConstants.DNET_PID_TYPES,
|
|
||||||
ModelConstants.DNET_PID_TYPES),
|
|
||||||
null)));
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
r.setInstance(Arrays.asList(inst));
|
r.setInstance(Arrays.asList(inst));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
r
|
* r .setDataInfo( OafMapperUtils .dataInfo( false, null, true, false, OafMapperUtils .qualifier(
|
||||||
.setDataInfo(
|
* ModelConstants.PROVENANCE_ENRICH, null, ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
OafMapperUtils
|
* ModelConstants.DNET_PROVENANCE_ACTIONS), null));
|
||||||
.dataInfo(
|
|
||||||
false, null, true,
|
|
||||||
false,
|
|
||||||
OafMapperUtils
|
|
||||||
.qualifier(
|
|
||||||
ModelConstants.PROVENANCE_ENRICH,
|
|
||||||
null,
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
|
||||||
null));
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
return r;
|
return r;
|
||||||
}, Encoders.bean(Result.class))
|
}, Encoders.bean(Result.class))
|
||||||
|
@ -158,22 +136,10 @@ public class PrepareBipFinder implements Serializable {
|
||||||
u.setValue(u.getValue());
|
u.setValue(u.getValue());
|
||||||
u.setKey(u.getKey());
|
u.setKey(u.getKey());
|
||||||
/*
|
/*
|
||||||
kv
|
* kv .setDataInfo( OafMapperUtils .dataInfo( false, UPDATE_DATA_INFO_TYPE, true, false,
|
||||||
.setDataInfo(
|
* OafMapperUtils .qualifier( UPDATE_MEASURE_BIP_CLASS_ID, UPDATE_CLASS_NAME,
|
||||||
OafMapperUtils
|
* ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||||
.dataInfo(
|
* ""));
|
||||||
false,
|
|
||||||
UPDATE_DATA_INFO_TYPE,
|
|
||||||
true,
|
|
||||||
false,
|
|
||||||
OafMapperUtils
|
|
||||||
.qualifier(
|
|
||||||
UPDATE_MEASURE_BIP_CLASS_ID,
|
|
||||||
UPDATE_CLASS_NAME,
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
|
||||||
""));
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
return u;
|
return u;
|
||||||
})
|
})
|
||||||
|
|
|
@ -8,8 +8,6 @@ import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
@ -24,10 +22,12 @@ import org.slf4j.LoggerFactory;
|
||||||
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Subject;
|
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
public class PrepareFOSSparkJob implements Serializable {
|
public class PrepareFOSSparkJob implements Serializable {
|
||||||
|
|
|
@ -8,8 +8,6 @@ import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
@ -24,10 +22,12 @@ import org.slf4j.LoggerFactory;
|
||||||
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel;
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Subject;
|
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
public class PrepareSDGSparkJob implements Serializable {
|
public class PrepareSDGSparkJob implements Serializable {
|
||||||
|
|
|
@ -7,8 +7,6 @@ import java.io.IOException;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
|
||||||
import org.apache.commons.cli.ParseException;
|
import org.apache.commons.cli.ParseException;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
@ -31,6 +29,8 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class CreateActionSetSparkJob implements Serializable {
|
public class CreateActionSetSparkJob implements Serializable {
|
||||||
|
@ -47,17 +47,20 @@ public class CreateActionSetSparkJob implements Serializable {
|
||||||
COLLECTED_FROM.setKey(ModelConstants.OPENOCITATIONS_ID);
|
COLLECTED_FROM.setKey(ModelConstants.OPENOCITATIONS_ID);
|
||||||
COLLECTED_FROM.setValue(ModelConstants.OPENOCITATIONS_NAME);
|
COLLECTED_FROM.setValue(ModelConstants.OPENOCITATIONS_NAME);
|
||||||
|
|
||||||
DATA_INFO = OafMapperUtils.dataInfo(
|
DATA_INFO = OafMapperUtils
|
||||||
|
.dataInfo(
|
||||||
TRUST,
|
TRUST,
|
||||||
null,
|
null,
|
||||||
false,
|
false,
|
||||||
OafMapperUtils.qualifier(
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
OPENCITATIONS_CLASSID,
|
OPENCITATIONS_CLASSID,
|
||||||
OPENCITATIONS_CLASSNAME,
|
OPENCITATIONS_CLASSNAME,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS));
|
ModelConstants.DNET_PROVENANCE_ACTIONS));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final List<Provenance> PROVENANCE = Arrays.asList(
|
private static final List<Provenance> PROVENANCE = Arrays
|
||||||
|
.asList(
|
||||||
OafMapperUtils.getProvenance(COLLECTED_FROM, DATA_INFO));
|
OafMapperUtils.getProvenance(COLLECTED_FROM, DATA_INFO));
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
|
private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
|
||||||
|
@ -144,7 +147,8 @@ public class CreateActionSetSparkJob implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String asOpenAireId(String value) {
|
private static String asOpenAireId(String value) {
|
||||||
return IdentifierFactory.idFromPid(
|
return IdentifierFactory
|
||||||
|
.idFromPid(
|
||||||
"50", PidType.doi.toString(),
|
"50", PidType.doi.toString(),
|
||||||
CleaningFunctions.normalizePidValue(PidType.doi.toString(), value),
|
CleaningFunctions.normalizePidValue(PidType.doi.toString(), value),
|
||||||
true);
|
true);
|
||||||
|
|
|
@ -7,8 +7,6 @@ import java.util.Arrays;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||||
|
@ -29,10 +27,12 @@ import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Entity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.H2020Classification;
|
import eu.dnetlib.dhp.schema.oaf.H2020Classification;
|
||||||
import eu.dnetlib.dhp.schema.oaf.H2020Programme;
|
import eu.dnetlib.dhp.schema.oaf.H2020Programme;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Entity;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,6 @@ import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -43,6 +42,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,7 @@ public class SparkAtomicActionUsageJob implements Serializable {
|
||||||
private static List<Measure> getMeasure(Long downloads, Long views) {
|
private static List<Measure> getMeasure(Long downloads, Long views) {
|
||||||
DataInfo dataInfo = OafMapperUtils
|
DataInfo dataInfo = OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
0.0f, //TODO check
|
0.0f, // TODO check
|
||||||
UPDATE_DATA_INFO_TYPE,
|
UPDATE_DATA_INFO_TYPE,
|
||||||
false,
|
false,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
|
|
|
@ -11,7 +11,6 @@ import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.io.IntWritable;
|
import org.apache.hadoop.io.IntWritable;
|
||||||
|
@ -33,6 +32,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
|
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
|
||||||
import eu.dnetlib.dhp.schema.mdstore.Provenance;
|
import eu.dnetlib.dhp.schema.mdstore.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class GenerateNativeStoreSparkJob {
|
public class GenerateNativeStoreSparkJob {
|
||||||
|
|
|
@ -21,7 +21,7 @@ import scala.util.matching.Regex
|
||||||
|
|
||||||
case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
|
case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
|
||||||
|
|
||||||
case class CrossrefAuthor(givenName:String, familyName:String,ORCID:String, sequence:String, rank:Int ){}
|
case class CrossrefAuthor(givenName: String, familyName: String, ORCID: String, sequence: String, rank: Int) {}
|
||||||
|
|
||||||
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
||||||
|
|
||||||
|
@ -30,7 +30,6 @@ object CrossrefUtility {
|
||||||
|
|
||||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||||
|
|
||||||
|
|
||||||
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
|
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json: json4s.JValue = parse(input)
|
lazy val json: json4s.JValue = parse(input)
|
||||||
|
@ -56,7 +55,7 @@ object CrossrefUtility {
|
||||||
(json \ "funder").extractOrElse[List[mappingFunder]](List())
|
(json \ "funder").extractOrElse[List[mappingFunder]](List())
|
||||||
|
|
||||||
if (funderList.nonEmpty) {
|
if (funderList.nonEmpty) {
|
||||||
resultList = resultList ::: mappingFunderToRelations(funderList, result )
|
resultList = resultList ::: mappingFunderToRelations(funderList, result)
|
||||||
}
|
}
|
||||||
resultList = resultList ::: List(result)
|
resultList = resultList ::: List(result)
|
||||||
resultList
|
resultList
|
||||||
|
@ -73,19 +72,18 @@ object CrossrefUtility {
|
||||||
r
|
r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private def generateSimpleRelationFromAward(
|
private def generateSimpleRelationFromAward(
|
||||||
funder: mappingFunder,
|
funder: mappingFunder,
|
||||||
nsPrefix: String,
|
nsPrefix: String,
|
||||||
extractField: String => String,
|
extractField: String => String,
|
||||||
source:Result
|
source: Result
|
||||||
): List[Relation] = {
|
): List[Relation] = {
|
||||||
if (funder.award.isDefined && funder.award.get.nonEmpty)
|
if (funder.award.isDefined && funder.award.get.nonEmpty)
|
||||||
funder.award.get
|
funder.award.get
|
||||||
.map(extractField)
|
.map(extractField)
|
||||||
.filter(a => a != null && a.nonEmpty)
|
.filter(a => a != null && a.nonEmpty)
|
||||||
.map(award => {
|
.map(award => {
|
||||||
val targetId = IdentifierFactory.createOpenaireId("project",s"$nsPrefix::$award", true)
|
val targetId = IdentifierFactory.createOpenaireId("project", s"$nsPrefix::$award", true)
|
||||||
createRelation(targetId, source.getId, ModelConstants.PRODUCES)
|
createRelation(targetId, source.getId, ModelConstants.PRODUCES)
|
||||||
})
|
})
|
||||||
else List()
|
else List()
|
||||||
|
@ -106,7 +104,7 @@ object CrossrefUtility {
|
||||||
}
|
}
|
||||||
|
|
||||||
private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = {
|
private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = {
|
||||||
var relList:List[Relation] = List()
|
var relList: List[Relation] = List()
|
||||||
|
|
||||||
if (funders != null)
|
if (funders != null)
|
||||||
funders.foreach(funder => {
|
funders.foreach(funder => {
|
||||||
|
@ -114,48 +112,66 @@ object CrossrefUtility {
|
||||||
funder.DOI.get match {
|
funder.DOI.get match {
|
||||||
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
|
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
|
||||||
"10.13039/100010665" =>
|
"10.13039/100010665" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
||||||
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
|
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
|
||||||
case "10.13039/501100000781" =>
|
case "10.13039/501100000781" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
||||||
case "10.13039/100000001" => relList =relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result)
|
case "10.13039/100000001" =>
|
||||||
case "10.13039/501100001665" => relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result)
|
||||||
case "10.13039/501100002341" => relList =relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result)
|
case "10.13039/501100001665" =>
|
||||||
|
relList = relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
|
||||||
|
case "10.13039/501100002341" =>
|
||||||
|
relList = relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result)
|
||||||
case "10.13039/501100001602" =>
|
case "10.13039/501100001602" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result)
|
relList =
|
||||||
case "10.13039/501100000923" => relList =relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result)
|
relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result)
|
||||||
|
case "10.13039/501100000923" =>
|
||||||
|
relList = relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result)
|
||||||
case "10.13039/501100000038" =>
|
case "10.13039/501100000038" =>
|
||||||
val targetId = IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false)
|
val targetId =
|
||||||
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false)
|
||||||
|
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
||||||
case "10.13039/501100000155" =>
|
case "10.13039/501100000155" =>
|
||||||
val targetId = IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false)
|
val targetId =
|
||||||
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false)
|
||||||
|
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
||||||
case "10.13039/501100000024" =>
|
case "10.13039/501100000024" =>
|
||||||
val targetId = IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false)
|
val targetId =
|
||||||
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false)
|
||||||
case "10.13039/501100002848" => relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result)
|
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
||||||
case "10.13039/501100003448" => relList =relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result)
|
case "10.13039/501100002848" =>
|
||||||
case "10.13039/501100010198" => relList =relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result)
|
||||||
case "10.13039/501100004564" => relList =relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result)
|
case "10.13039/501100003448" =>
|
||||||
|
relList = relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result)
|
||||||
|
case "10.13039/501100010198" =>
|
||||||
|
relList = relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result)
|
||||||
|
case "10.13039/501100004564" =>
|
||||||
|
relList = relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result)
|
||||||
case "10.13039/501100003407" =>
|
case "10.13039/501100003407" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result)
|
||||||
val targetId = IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false)
|
val targetId =
|
||||||
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false)
|
||||||
|
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
||||||
case "10.13039/501100006588" | "10.13039/501100004488" =>
|
case "10.13039/501100006588" | "10.13039/501100004488" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(
|
relList = relList ::: generateSimpleRelationFromAward(
|
||||||
funder,
|
funder,
|
||||||
"irb_hr______",
|
"irb_hr______",
|
||||||
a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""), result
|
a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""),
|
||||||
|
result
|
||||||
)
|
)
|
||||||
case "10.13039/501100006769" => relList =relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result)
|
case "10.13039/501100006769" =>
|
||||||
case "10.13039/501100001711" => relList =relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result)
|
||||||
case "10.13039/501100004410" => relList =relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result)
|
case "10.13039/501100001711" =>
|
||||||
|
relList = relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result)
|
||||||
|
case "10.13039/501100004410" =>
|
||||||
|
relList = relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result)
|
||||||
case "10.13039/100004440" =>
|
case "10.13039/100004440" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
|
||||||
val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
|
val targetId =
|
||||||
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
|
||||||
|
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
||||||
case _ => logger.debug("no match for " + funder.DOI.get)
|
case _ => logger.debug("no match for " + funder.DOI.get)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -163,18 +179,19 @@ object CrossrefUtility {
|
||||||
} else {
|
} else {
|
||||||
funder.name match {
|
funder.name match {
|
||||||
case "European Union’s Horizon 2020 research and innovation program" =>
|
case "European Union’s Horizon 2020 research and innovation program" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
||||||
case "European Union's" =>
|
case "European Union's" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
|
||||||
case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
|
case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
|
||||||
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
|
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result)
|
||||||
case "Wellcome Trust Masters Fellowship" =>
|
case "Wellcome Trust Masters Fellowship" =>
|
||||||
relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
|
relList = relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
|
||||||
val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
|
val targetId =
|
||||||
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
|
||||||
|
relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
||||||
case _ => logger.debug("no match for " + funder.name)
|
case _ => logger.debug("no match for " + funder.name)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -185,11 +202,7 @@ object CrossrefUtility {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private def mappingResult(result: Result, json: JValue, cobjCategory: String, className: String): Result = {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private def mappingResult(result: Result, json: JValue, cobjCategory: String, className:String): Result = {
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
|
||||||
//MAPPING Crossref DOI into PID
|
//MAPPING Crossref DOI into PID
|
||||||
|
@ -236,7 +249,9 @@ object CrossrefUtility {
|
||||||
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
|
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
|
||||||
val subtitles =
|
val subtitles =
|
||||||
for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty(
|
for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty(
|
||||||
title, ModelConstants.SUBTITLE_QUALIFIER)
|
title,
|
||||||
|
ModelConstants.SUBTITLE_QUALIFIER
|
||||||
|
)
|
||||||
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
|
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
|
||||||
|
|
||||||
// DESCRIPTION
|
// DESCRIPTION
|
||||||
|
@ -302,28 +317,52 @@ object CrossrefUtility {
|
||||||
|
|
||||||
if (subjectList.nonEmpty) {
|
if (subjectList.nonEmpty) {
|
||||||
result.setSubject(
|
result.setSubject(
|
||||||
subjectList.map(s =>
|
subjectList
|
||||||
OafMapperUtils.subject(s, OafMapperUtils.qualifier(ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_TYPOLOGIES), null)
|
.map(s =>
|
||||||
).asJava)
|
OafMapperUtils.subject(
|
||||||
|
s,
|
||||||
|
OafMapperUtils.qualifier(
|
||||||
|
ModelConstants.DNET_SUBJECT_KEYWORD,
|
||||||
|
ModelConstants.DNET_SUBJECT_KEYWORD,
|
||||||
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES
|
||||||
|
),
|
||||||
|
null
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.asJava
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
//Mapping Author
|
//Mapping Author
|
||||||
val authorList:List[CrossrefAuthor] =
|
val authorList: List[CrossrefAuthor] =
|
||||||
for {
|
for {
|
||||||
JObject(author) <- json \ "author"
|
JObject(author) <- json \ "author"
|
||||||
JField("ORCID", JString(orcid)) <- author
|
JField("ORCID", JString(orcid)) <- author
|
||||||
JField("given", JString(givenName)) <- author
|
JField("given", JString(givenName)) <- author
|
||||||
JField("family", JString(familyName)) <- author
|
JField("family", JString(familyName)) <- author
|
||||||
JField("sequence", JString(sequence)) <- author
|
JField("sequence", JString(sequence)) <- author
|
||||||
} yield CrossrefAuthor(givenName = givenName, familyName = familyName, ORCID = orcid, sequence = sequence, rank = 0)
|
} yield CrossrefAuthor(
|
||||||
|
givenName = givenName,
|
||||||
|
familyName = familyName,
|
||||||
|
ORCID = orcid,
|
||||||
|
sequence = sequence,
|
||||||
|
rank = 0
|
||||||
|
)
|
||||||
|
|
||||||
result.setAuthor(authorList.sortWith((a,b) =>{
|
result.setAuthor(
|
||||||
|
authorList
|
||||||
|
.sortWith((a, b) => {
|
||||||
if (a.sequence.equalsIgnoreCase("first"))
|
if (a.sequence.equalsIgnoreCase("first"))
|
||||||
true
|
true
|
||||||
else if (b.sequence.equalsIgnoreCase("first"))
|
else if (b.sequence.equalsIgnoreCase("first"))
|
||||||
false
|
false
|
||||||
else a.familyName< b.familyName
|
else a.familyName < b.familyName
|
||||||
}).zipWithIndex.map(k=> k._1.copy(rank = k._2)).map(k => generateAuthor(k)).asJava)
|
})
|
||||||
|
.zipWithIndex
|
||||||
|
.map(k => k._1.copy(rank = k._2))
|
||||||
|
.map(k => generateAuthor(k))
|
||||||
|
.asJava
|
||||||
|
)
|
||||||
|
|
||||||
// Mapping instance
|
// Mapping instance
|
||||||
val instance = new Instance()
|
val instance = new Instance()
|
||||||
|
@ -360,7 +399,7 @@ object CrossrefUtility {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (instance.getLicense!= null)
|
if (instance.getLicense != null)
|
||||||
instance.setAccessright(
|
instance.setAccessright(
|
||||||
decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance)
|
decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance)
|
||||||
)
|
)
|
||||||
|
@ -392,7 +431,7 @@ object CrossrefUtility {
|
||||||
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
|
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
|
||||||
//Mapping book
|
//Mapping book
|
||||||
if (className.toLowerCase.contains("book")) {
|
if (className.toLowerCase.contains("book")) {
|
||||||
val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn
|
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
|
||||||
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
||||||
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
||||||
if (result.getSource != null) {
|
if (result.getSource != null) {
|
||||||
|
@ -435,7 +474,6 @@ object CrossrefUtility {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
result.setInstance(List(instance).asJava)
|
result.setInstance(List(instance).asJava)
|
||||||
result.setId("ID")
|
result.setId("ID")
|
||||||
result.setId(IdentifierFactory.createIdentifier(result, true))
|
result.setId(IdentifierFactory.createIdentifier(result, true))
|
||||||
|
@ -511,7 +549,6 @@ object CrossrefUtility {
|
||||||
ModelConstants.CLOSED_ACCESS_RIGHT()
|
ModelConstants.CLOSED_ACCESS_RIGHT()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private def extractDate(dt: String, datePart: List[List[Int]]): String = {
|
private def extractDate(dt: String, datePart: List[List[Int]]): String = {
|
||||||
if (StringUtils.isNotBlank(dt))
|
if (StringUtils.isNotBlank(dt))
|
||||||
return GraphCleaningFunctions.cleanDate(dt)
|
return GraphCleaningFunctions.cleanDate(dt)
|
||||||
|
@ -570,7 +607,15 @@ object CrossrefUtility {
|
||||||
if (StringUtils.isNotBlank(ca.ORCID))
|
if (StringUtils.isNotBlank(ca.ORCID))
|
||||||
a.setPid(
|
a.setPid(
|
||||||
List(
|
List(
|
||||||
OafMapperUtils.authorPid(ca.ORCID, OafMapperUtils.qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES), null)
|
OafMapperUtils.authorPid(
|
||||||
|
ca.ORCID,
|
||||||
|
OafMapperUtils.qualifier(
|
||||||
|
ModelConstants.ORCID_PENDING,
|
||||||
|
ModelConstants.ORCID_PENDING,
|
||||||
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
null
|
||||||
|
)
|
||||||
).asJava
|
).asJava
|
||||||
)
|
)
|
||||||
a
|
a
|
||||||
|
|
|
@ -7,8 +7,6 @@ import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -29,8 +27,10 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
[INFO] Scanning for projects...
|
||||||
|
[INFO]
|
||||||
|
[INFO] -------------------< eu.dnetlib.dhp:dhp-enrichment >--------------------
|
||||||
|
[INFO] Building dhp-enrichment 2.0.0-SNAPSHOT
|
||||||
|
[INFO] --------------------------------[ jar ]---------------------------------
|
||||||
|
[INFO] ------------------------------------------------------------------------
|
||||||
|
[INFO] BUILD FAILURE
|
||||||
|
[INFO] ------------------------------------------------------------------------
|
||||||
|
[INFO] Total time: 1.737 s
|
||||||
|
[INFO] Finished at: 2023-02-10T17:53:31+01:00
|
||||||
|
[INFO] ------------------------------------------------------------------------
|
||||||
|
[ERROR] Failed to execute goal on project dhp-enrichment: Could not resolve dependencies for project eu.dnetlib.dhp:dhp-enrichment:jar:2.0.0-SNAPSHOT: Failed to collect dependencies at eu.dnetlib.dhp:dhp-common:jar:2.0.0-SNAPSHOT: Failed to read artifact descriptor for eu.dnetlib.dhp:dhp-common:jar:2.0.0-SNAPSHOT: Failure to find eu.dnetlib.dhp:dhp:pom:2.0.0-SNAPSHOT in https://maven.d4science.org/nexus/content/repositories/dnet45-bootstrap-snapshot/ was cached in the local repository, resolution will not be reattempted until the update interval of dnet45-bootstrap-snapshot has elapsed or updates are forced -> [Help 1]
|
||||||
|
[ERROR]
|
||||||
|
[ERROR] To see the full stack trace of the errors, re-run Maven with the -e switch.
|
||||||
|
[ERROR] Re-run Maven using the -X switch to enable full debug logging.
|
||||||
|
[ERROR]
|
||||||
|
[ERROR] For more information about the errors and possible solutions, please read the following articles:
|
||||||
|
[ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN/DependencyResolutionException
|
|
@ -2,6 +2,7 @@
|
||||||
package eu.dnetlib.dhp;
|
package eu.dnetlib.dhp;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@ -16,10 +17,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Country;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
|
|
||||||
public class PropagationConstant {
|
public class PropagationConstant {
|
||||||
|
|
||||||
|
@ -44,6 +43,7 @@ public class PropagationConstant {
|
||||||
|
|
||||||
public final static String NULL = "NULL";
|
public final static String NULL = "NULL";
|
||||||
|
|
||||||
|
public final static float PROPAGATION_TRUST = 0.85f;
|
||||||
public static final String INSTITUTIONAL_REPO_TYPE = "institutional";
|
public static final String INSTITUTIONAL_REPO_TYPE = "institutional";
|
||||||
|
|
||||||
public static final String PROPAGATION_DATA_INFO_TYPE = "propagation";
|
public static final String PROPAGATION_DATA_INFO_TYPE = "propagation";
|
||||||
|
@ -90,54 +90,22 @@ public class PropagationConstant {
|
||||||
Country nc = new Country();
|
Country nc = new Country();
|
||||||
nc.setClassid(classid);
|
nc.setClassid(classid);
|
||||||
nc.setClassname(classname);
|
nc.setClassname(classname);
|
||||||
nc.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
|
|
||||||
nc.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
|
nc.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
|
||||||
nc
|
nc
|
||||||
.setDataInfo(
|
.setDataInfo(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
|
.dataInfo(
|
||||||
|
PROPAGATION_TRUST,
|
||||||
PROPAGATION_DATA_INFO_TYPE,
|
PROPAGATION_DATA_INFO_TYPE,
|
||||||
|
true,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
PROPAGATION_COUNTRY_INSTREPO_CLASS_ID,
|
PROPAGATION_COUNTRY_INSTREPO_CLASS_ID,
|
||||||
PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME,
|
PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS));
|
ModelConstants.DNET_PROVENANCE_ACTIONS)));
|
||||||
return nc;
|
return nc;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static DataInfo getDataInfo(
|
|
||||||
String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema) {
|
|
||||||
|
|
||||||
return getDataInfo(inference_provenance, inference_class_id, inference_class_name, qualifierSchema, "0.85");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static DataInfo getDataInfo(
|
|
||||||
String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema,
|
|
||||||
String trust) {
|
|
||||||
return getDataInfo(
|
|
||||||
inference_provenance, inference_class_id, inference_class_name, qualifierSchema, trust, true);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public static DataInfo getDataInfo(
|
|
||||||
String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema,
|
|
||||||
String trust, boolean inferred) {
|
|
||||||
DataInfo di = new DataInfo();
|
|
||||||
di.setInferred(inferred);
|
|
||||||
di.setDeletedbyinference(false);
|
|
||||||
di.setTrust(trust);
|
|
||||||
di.setInferenceprovenance(inference_provenance);
|
|
||||||
di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name, qualifierSchema));
|
|
||||||
return di;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Qualifier getQualifier(String inference_class_id, String inference_class_name,
|
|
||||||
String qualifierSchema) {
|
|
||||||
Qualifier pa = new Qualifier();
|
|
||||||
pa.setClassid(inference_class_id);
|
|
||||||
pa.setClassname(inference_class_name);
|
|
||||||
pa.setSchemeid(qualifierSchema);
|
|
||||||
pa.setSchemename(qualifierSchema);
|
|
||||||
return pa;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static ArrayList<Relation> getOrganizationRelationPair(String orgId,
|
public static ArrayList<Relation> getOrganizationRelationPair(String orgId,
|
||||||
String resultId,
|
String resultId,
|
||||||
String classID,
|
String classID,
|
||||||
|
@ -186,11 +154,18 @@ public class PropagationConstant {
|
||||||
r.setRelClass(rel_class);
|
r.setRelClass(rel_class);
|
||||||
r.setRelType(rel_type);
|
r.setRelType(rel_type);
|
||||||
r.setSubRelType(subrel_type);
|
r.setSubRelType(subrel_type);
|
||||||
r
|
Provenance p = new Provenance();
|
||||||
|
p
|
||||||
.setDataInfo(
|
.setDataInfo(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
inference_provenance, inference_class_id, inference_class_name,
|
.dataInfo(
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS));
|
PROPAGATION_TRUST, inference_provenance, true,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
inference_class_id, inference_class_name,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS)));
|
||||||
|
r.setProvenance(Arrays.asList(p));
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
@ -15,8 +14,6 @@ import com.google.gson.Gson;
|
||||||
import com.jayway.jsonpath.DocumentContext;
|
import com.jayway.jsonpath.DocumentContext;
|
||||||
import com.jayway.jsonpath.JsonPath;
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
|
@ -173,45 +170,39 @@ public class ResultTagger implements Serializable {
|
||||||
.add(
|
.add(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
|
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS)));
|
||||||
DNET_PROVENANCE_ACTIONS),
|
|
||||||
TAGGING_TRUST));
|
|
||||||
if (datasources.contains(cId))
|
if (datasources.contains(cId))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS,
|
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
||||||
DNET_PROVENANCE_ACTIONS),
|
DNET_PROVENANCE_ACTIONS)));
|
||||||
TAGGING_TRUST));
|
|
||||||
if (czenodo.contains(cId))
|
if (czenodo.contains(cId))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
|
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS)));
|
||||||
DNET_PROVENANCE_ACTIONS),
|
|
||||||
TAGGING_TRUST));
|
|
||||||
if (aconstraints.contains(cId))
|
if (aconstraints.contains(cId))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
||||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
DNET_PROVENANCE_ACTIONS)));
|
||||||
TAGGING_TRUST));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -235,45 +226,40 @@ public class ResultTagger implements Serializable {
|
||||||
.add(
|
.add(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
|
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT,
|
||||||
DNET_PROVENANCE_ACTIONS),
|
DNET_PROVENANCE_ACTIONS)));
|
||||||
TAGGING_TRUST));
|
|
||||||
if (datasources.contains(c))
|
if (datasources.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
||||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
DNET_PROVENANCE_ACTIONS)));
|
||||||
TAGGING_TRUST));
|
|
||||||
if (czenodo.contains(c))
|
if (czenodo.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
|
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS)));
|
||||||
DNET_PROVENANCE_ACTIONS),
|
|
||||||
TAGGING_TRUST));
|
|
||||||
if (aconstraints.contains(c))
|
if (aconstraints.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
||||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
DNET_PROVENANCE_ACTIONS)));
|
||||||
TAGGING_TRUST));
|
|
||||||
|
|
||||||
context.setDataInfo(dataInfoList);
|
context.setDataInfo(dataInfoList);
|
||||||
return context;
|
return context;
|
||||||
|
|
|
@ -20,5 +20,5 @@ public class TaggingConstants {
|
||||||
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
||||||
public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints";
|
public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints";
|
||||||
|
|
||||||
public static final String TAGGING_TRUST = "0.8";
|
public static final float TAGGING_TRUST = 0.8f;
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,9 +29,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.bulktag.SparkBulkTagJob;
|
|
||||||
import eu.dnetlib.dhp.bulktag.community.*;
|
import eu.dnetlib.dhp.bulktag.community.*;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
|
@ -132,12 +130,13 @@ public class SparkEoscBulkTag implements Serializable {
|
||||||
.asList(
|
.asList(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
TAGGING_TRUST,
|
||||||
|
BULKTAG_DATA_INFO_TYPE,
|
||||||
|
true,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
||||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
DNET_PROVENANCE_ACTIONS))));
|
||||||
TAGGING_TRUST)));
|
|
||||||
value.getContext().add(context);
|
value.getContext().add(context);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -221,7 +221,7 @@ public class SparkEoscTag {
|
||||||
return words;
|
return words;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Set<String> getWordsF(List<Field<String>> elem) {
|
private static Set<String> getWordsF(List<String> elem) {
|
||||||
Set<String> words = new HashSet<>();
|
Set<String> words = new HashSet<>();
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(elem)
|
.ofNullable(elem)
|
||||||
|
@ -230,7 +230,7 @@ public class SparkEoscTag {
|
||||||
.forEach(
|
.forEach(
|
||||||
t -> words
|
t -> words
|
||||||
.addAll(
|
.addAll(
|
||||||
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
Arrays.asList(t.toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||||
|
|
||||||
return words;
|
return words;
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,8 +96,7 @@ public class PrepareDatasourceCountryAssociation {
|
||||||
// filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass
|
// filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass
|
||||||
Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class)
|
Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class)
|
||||||
.filter(
|
.filter(
|
||||||
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) &&
|
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY));
|
||||||
!rel.getDataInfo().getDeletedbyinference());
|
|
||||||
|
|
||||||
// filtering of the organization taking only the non deleted by inference and those with information about the
|
// filtering of the organization taking only the non deleted by inference and those with information about the
|
||||||
// country
|
// country
|
||||||
|
|
|
@ -23,6 +23,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Country;
|
import eu.dnetlib.dhp.schema.oaf.Country;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class SparkCountryPropagationJob {
|
public class SparkCountryPropagationJob {
|
||||||
|
@ -126,6 +127,7 @@ public class SparkCountryPropagationJob {
|
||||||
.filter(c -> !finalCountries.contains(c.getClassid()))
|
.filter(c -> !finalCountries.contains(c.getClassid()))
|
||||||
.map(c -> getCountry(c.getClassid(), c.getClassname()))
|
.map(c -> getCountry(c.getClassid(), c.getClassname()))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,8 +24,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.AuthorPid;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class SparkOrcidToResultFromSemRelJob {
|
public class SparkOrcidToResultFromSemRelJob {
|
||||||
|
@ -171,21 +173,26 @@ public class SparkOrcidToResultFromSemRelJob {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (toaddpid) {
|
if (toaddpid) {
|
||||||
StructuredProperty p = new StructuredProperty();
|
AuthorPid p = new AuthorPid();
|
||||||
p.setValue(autoritative_author.getOrcid());
|
p.setValue(autoritative_author.getOrcid());
|
||||||
p
|
p
|
||||||
.setQualifier(
|
.setQualifier(
|
||||||
getQualifier(
|
OafMapperUtils
|
||||||
ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES));
|
.qualifier(
|
||||||
|
ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME,
|
||||||
|
ModelConstants.DNET_PID_TYPES));
|
||||||
p
|
p
|
||||||
.setDataInfo(
|
.setDataInfo(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
PROPAGATION_DATA_INFO_TYPE,
|
.dataInfo(
|
||||||
|
PROPAGATION_TRUST,
|
||||||
|
PROPAGATION_DATA_INFO_TYPE, true, OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID,
|
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID,
|
||||||
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME,
|
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS));
|
ModelConstants.DNET_PROVENANCE_ACTIONS)));
|
||||||
|
|
||||||
Optional<List<StructuredProperty>> authorPid = Optional.ofNullable(author.getPid());
|
Optional<List<AuthorPid>> authorPid = Optional.ofNullable(author.getPid());
|
||||||
if (authorPid.isPresent()) {
|
if (authorPid.isPresent()) {
|
||||||
authorPid.get().add(p);
|
authorPid.get().add(p);
|
||||||
} else {
|
} else {
|
||||||
|
@ -197,7 +204,7 @@ public class SparkOrcidToResultFromSemRelJob {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean containsAllowedPid(Author a) {
|
private static boolean containsAllowedPid(Author a) {
|
||||||
Optional<List<StructuredProperty>> pids = Optional.ofNullable(a.getPid());
|
Optional<List<AuthorPid>> pids = Optional.ofNullable(a.getPid());
|
||||||
if (!pids.isPresent()) {
|
if (!pids.isPresent()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,19 +107,7 @@ public class SparkResultToProjectThroughSemRelJob {
|
||||||
potentialUpdate
|
potentialUpdate
|
||||||
.getProjectSet()
|
.getProjectSet()
|
||||||
.forEach(
|
.forEach(
|
||||||
projectId -> {
|
projectId -> newRelations
|
||||||
newRelations
|
|
||||||
.add(
|
|
||||||
getRelation(
|
|
||||||
resId,
|
|
||||||
projectId,
|
|
||||||
ModelConstants.IS_PRODUCED_BY,
|
|
||||||
ModelConstants.RESULT_PROJECT,
|
|
||||||
ModelConstants.OUTCOME,
|
|
||||||
PROPAGATION_DATA_INFO_TYPE,
|
|
||||||
PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID,
|
|
||||||
PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME));
|
|
||||||
newRelations
|
|
||||||
.add(
|
.add(
|
||||||
getRelation(
|
getRelation(
|
||||||
projectId,
|
projectId,
|
||||||
|
@ -129,8 +117,7 @@ public class SparkResultToProjectThroughSemRelJob {
|
||||||
ModelConstants.OUTCOME,
|
ModelConstants.OUTCOME,
|
||||||
PROPAGATION_DATA_INFO_TYPE,
|
PROPAGATION_DATA_INFO_TYPE,
|
||||||
PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID,
|
PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID,
|
||||||
PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME));
|
PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)));
|
||||||
});
|
|
||||||
return newRelations.iterator();
|
return newRelations.iterator();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class SparkResultToCommunityFromOrganizationJob {
|
public class SparkResultToCommunityFromOrganizationJob {
|
||||||
|
@ -126,16 +128,20 @@ public class SparkResultToCommunityFromOrganizationJob {
|
||||||
.setDataInfo(
|
.setDataInfo(
|
||||||
Arrays
|
Arrays
|
||||||
.asList(
|
.asList(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
PROPAGATION_DATA_INFO_TYPE,
|
.dataInfo(
|
||||||
|
PROPAGATION_TRUST,
|
||||||
|
PROPAGATION_DATA_INFO_TYPE, true,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID,
|
PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID,
|
||||||
PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME,
|
PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS)));
|
ModelConstants.DNET_PROVENANCE_ACTIONS))));
|
||||||
propagatedContexts.add(newContext);
|
propagatedContexts.add(newContext);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
res.setContext(propagatedContexts);
|
res.setContext(propagatedContexts);
|
||||||
ret.mergeFrom(res);
|
ret = MergeUtils.merge(ret, res);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
|
@ -21,6 +21,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
|
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class SparkResultToCommunityThroughSemRelJob {
|
public class SparkResultToCommunityThroughSemRelJob {
|
||||||
|
@ -122,11 +124,14 @@ public class SparkResultToCommunityThroughSemRelJob {
|
||||||
.setDataInfo(
|
.setDataInfo(
|
||||||
Arrays
|
Arrays
|
||||||
.asList(
|
.asList(
|
||||||
getDataInfo(
|
OafMapperUtils
|
||||||
PROPAGATION_DATA_INFO_TYPE,
|
.dataInfo(
|
||||||
|
PROPAGATION_TRUST, PROPAGATION_DATA_INFO_TYPE, true,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID,
|
PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID,
|
||||||
PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME,
|
PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS)));
|
ModelConstants.DNET_PROVENANCE_ACTIONS))));
|
||||||
return newContext;
|
return newContext;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
@ -139,7 +144,7 @@ public class SparkResultToCommunityThroughSemRelJob {
|
||||||
|
|
||||||
r.setId(ret.getId());
|
r.setId(ret.getId());
|
||||||
r.setContext(contextList);
|
r.setContext(contextList);
|
||||||
ret.mergeFrom(r);
|
ret = MergeUtils.merge(ret, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -139,10 +139,12 @@ public class SparkResultToOrganizationFromIstRepoJob {
|
||||||
organizations
|
organizations
|
||||||
.forEach(
|
.forEach(
|
||||||
orgId -> newRelations
|
orgId -> newRelations
|
||||||
.addAll(
|
.add(
|
||||||
getOrganizationRelationPair(
|
getRelation(
|
||||||
orgId,
|
resultId, orgId,
|
||||||
resultId,
|
ModelConstants.HAS_AUTHOR_INSTITUTION,
|
||||||
|
ModelConstants.RESULT_ORGANIZATION,
|
||||||
|
ModelConstants.AFFILIATION, PROPAGATION_DATA_INFO_TYPE,
|
||||||
PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID,
|
PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID,
|
||||||
PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME))
|
PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME))
|
||||||
|
|
||||||
|
|
|
@ -115,8 +115,7 @@ public class PrepareInfo implements Serializable {
|
||||||
|
|
||||||
relation
|
relation
|
||||||
.filter(
|
.filter(
|
||||||
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
(FilterFunction<Relation>) r -> r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION))
|
||||||
r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION))
|
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
|
|
@ -211,19 +211,6 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
|
||||||
.groupByKey((MapFunction<Relation, String>) r -> r.getSource() + r.getTarget(), Encoders.STRING())
|
.groupByKey((MapFunction<Relation, String>) r -> r.getSource() + r.getTarget(), Encoders.STRING())
|
||||||
.mapGroups(
|
.mapGroups(
|
||||||
(MapGroupsFunction<String, Relation, Relation>) (k, it) -> it.next(), Encoders.bean(Relation.class))
|
(MapGroupsFunction<String, Relation, Relation>) (k, it) -> it.next(), Encoders.bean(Relation.class))
|
||||||
.flatMap(
|
|
||||||
(FlatMapFunction<Relation, Relation>) r -> Arrays
|
|
||||||
.asList(
|
|
||||||
r, getRelation(
|
|
||||||
r.getTarget(), r.getSource(), ModelConstants.IS_AUTHOR_INSTITUTION_OF,
|
|
||||||
ModelConstants.RESULT_ORGANIZATION,
|
|
||||||
ModelConstants.AFFILIATION,
|
|
||||||
PROPAGATION_DATA_INFO_TYPE,
|
|
||||||
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID,
|
|
||||||
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME))
|
|
||||||
.iterator()
|
|
||||||
|
|
||||||
, Encoders.bean(Relation.class))
|
|
||||||
.write()
|
.write()
|
||||||
|
|
||||||
.mode(SaveMode.Append)
|
.mode(SaveMode.Append)
|
||||||
|
|
|
@ -126,10 +126,14 @@ public class StepActions implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(
|
.filter(
|
||||||
rel -> !rel
|
rel -> !rel
|
||||||
|
.getProvenance()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
p -> p
|
||||||
.getDataInfo()
|
.getDataInfo()
|
||||||
.getProvenanceaction()
|
.getProvenanceaction()
|
||||||
.getClassid()
|
.getClassid()
|
||||||
.equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID))
|
.equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)))
|
||||||
.count() > 0) {
|
.count() > 0) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,8 @@ class ResolveEntitiesTest extends Serializable {
|
||||||
def generateUpdates(spark: SparkSession): Unit = {
|
def generateUpdates(spark: SparkSession): Unit = {
|
||||||
val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString
|
val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString
|
||||||
|
|
||||||
val pids: List[String] = template.linesWithSeparators.map(l =>l.stripLineEnd)
|
val pids: List[String] = template.linesWithSeparators
|
||||||
|
.map(l => l.stripLineEnd)
|
||||||
.map { id =>
|
.map { id =>
|
||||||
val r = new Result
|
val r = new Result
|
||||||
r.setId(id.toLowerCase.trim)
|
r.setId(id.toLowerCase.trim)
|
||||||
|
@ -126,7 +127,7 @@ class ResolveEntitiesTest extends Serializable {
|
||||||
entities.foreach { e =>
|
entities.foreach { e =>
|
||||||
val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString
|
val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString
|
||||||
spark
|
spark
|
||||||
.createDataset(spark.sparkContext.parallelize(template.linesWithSeparators.map(l =>l.stripLineEnd).toList))
|
.createDataset(spark.sparkContext.parallelize(template.linesWithSeparators.map(l => l.stripLineEnd).toList))
|
||||||
.as[String]
|
.as[String]
|
||||||
.write
|
.write
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
@ -263,7 +264,8 @@ class ResolveEntitiesTest extends Serializable {
|
||||||
Source
|
Source
|
||||||
.fromInputStream(this.getClass.getResourceAsStream(s"publication"))
|
.fromInputStream(this.getClass.getResourceAsStream(s"publication"))
|
||||||
.mkString
|
.mkString
|
||||||
.linesWithSeparators.map(l =>l.stripLineEnd)
|
.linesWithSeparators
|
||||||
|
.map(l => l.stripLineEnd)
|
||||||
.next(),
|
.next(),
|
||||||
classOf[Publication]
|
classOf[Publication]
|
||||||
)
|
)
|
||||||
|
|
|
@ -47,7 +47,7 @@ class ScholixGraphTest extends AbstractVocabularyTest {
|
||||||
val inputRelations = Source
|
val inputRelations = Source
|
||||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary"))
|
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary"))
|
||||||
.mkString
|
.mkString
|
||||||
val items = inputRelations.linesWithSeparators.map(l =>l.stripLineEnd).toList
|
val items = inputRelations.linesWithSeparators.map(l => l.stripLineEnd).toList
|
||||||
assertNotNull(items)
|
assertNotNull(items)
|
||||||
items.foreach(i => assertTrue(i.nonEmpty))
|
items.foreach(i => assertTrue(i.nonEmpty))
|
||||||
val result =
|
val result =
|
||||||
|
@ -69,7 +69,8 @@ class ScholixGraphTest extends AbstractVocabularyTest {
|
||||||
getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix")
|
getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix")
|
||||||
)
|
)
|
||||||
.mkString
|
.mkString
|
||||||
val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators.map(l =>l.stripLineEnd)
|
val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators
|
||||||
|
.map(l => l.stripLineEnd)
|
||||||
.sliding(2)
|
.sliding(2)
|
||||||
.map(s => (s.head, s(1)))
|
.map(s => (s.head, s(1)))
|
||||||
.map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])))
|
.map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])))
|
||||||
|
|
Loading…
Reference in New Issue