forked from D-Net/dnet-hadoop
applied some suggestiond from Sonar Lint
This commit is contained in:
parent
9814c3e700
commit
e653756e3d
|
@ -5,37 +5,40 @@ import java.util.Map;
|
||||||
|
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
|
||||||
public class Constants {
|
public class Constants {
|
||||||
|
|
||||||
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
|
protected static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
|
||||||
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
|
protected static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
|
||||||
|
|
||||||
public static final String INFERRED = "Inferred by OpenAIRE";
|
public static final String INFERRED = "Inferred by OpenAIRE";
|
||||||
|
public static final String CABF2 = "c_abf2";
|
||||||
|
|
||||||
public static final String HARVESTED = "Harvested";
|
public static final String HARVESTED = "Harvested";
|
||||||
public static final String DEFAULT_TRUST = "0.9";
|
public static final String DEFAULT_TRUST = "0.9";
|
||||||
public static final String USER_CLAIM = "Linked by user";
|
public static final String USER_CLAIM = "Linked by user";
|
||||||
|
|
||||||
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
public static final String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
||||||
|
|
||||||
public static String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
|
public static final String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
|
||||||
|
|
||||||
public static String RESEARCH_COMMUNITY = "Research Community";
|
public static final String RESEARCH_COMMUNITY = "Research Community";
|
||||||
|
|
||||||
public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
|
public static final String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
|
||||||
|
|
||||||
static {
|
static {
|
||||||
accessRightsCoarMap.put("OPEN", "c_abf2");
|
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);
|
||||||
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
||||||
accessRightsCoarMap.put("OPEN SOURCE", "c_abf2");
|
accessRightsCoarMap.put("OPEN SOURCE", CABF2);
|
||||||
accessRightsCoarMap.put("CLOSED", "c_14cb");
|
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_CLOSED, "c_14cb");
|
||||||
accessRightsCoarMap.put("EMBARGO", "c_f1cf");
|
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_EMBARGO, "c_f1cf");
|
||||||
}
|
}
|
||||||
|
|
||||||
static {
|
static {
|
||||||
coarCodeLabelMap.put("c_abf2", "OPEN");
|
coarCodeLabelMap.put(CABF2, ModelConstants.ACCESS_RIGHT_OPEN);
|
||||||
coarCodeLabelMap.put("c_16ec", "RESTRICTED");
|
coarCodeLabelMap.put("c_16ec", "RESTRICTED");
|
||||||
coarCodeLabelMap.put("c_14cb", "CLOSED");
|
coarCodeLabelMap.put("c_14cb", ModelConstants.ACCESS_RIGHT_CLOSED);
|
||||||
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -66,7 +67,7 @@ public class DumpProducts implements Serializable {
|
||||||
|
|
||||||
private static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O execMap(I value,
|
private static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O execMap(I value,
|
||||||
CommunityMap communityMap,
|
CommunityMap communityMap,
|
||||||
String dumpType) {
|
String dumpType) throws NoAvailableEntityTypeException {
|
||||||
|
|
||||||
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
|
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
|
||||||
if (odInfo.isPresent()) {
|
if (odInfo.isPresent()) {
|
||||||
|
@ -94,7 +95,7 @@ public class DumpProducts implements Serializable {
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}).filter(Objects::nonNull).collect(Collectors.toList());
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||||
if (toDumpFor.size() == 0) {
|
if (toDumpFor.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,16 +57,16 @@ public class MakeTar implements Serializable {
|
||||||
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
|
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
RemoteIterator<LocatedFileStatus> dir_iterator = fileSystem.listLocatedStatus(new Path(inputPath));
|
RemoteIterator<LocatedFileStatus> dirIterator = fileSystem.listLocatedStatus(new Path(inputPath));
|
||||||
|
|
||||||
while (dir_iterator.hasNext()) {
|
while (dirIterator.hasNext()) {
|
||||||
LocatedFileStatus fileStatus = dir_iterator.next();
|
LocatedFileStatus fileStatus = dirIterator.next();
|
||||||
|
|
||||||
Path p = fileStatus.getPath();
|
Path p = fileStatus.getPath();
|
||||||
String p_string = p.toString();
|
String pathString = p.toString();
|
||||||
String entity = p_string.substring(p_string.lastIndexOf("/") + 1);
|
String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
|
||||||
|
|
||||||
MakeTarArchive.tarMaxSize(fileSystem, p_string, outputPath + "/" + entity, entity, gBperSplit);
|
MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,10 +40,10 @@ public class QueryInformationSystem {
|
||||||
"{$x//CONFIGURATION/context/@label}" +
|
"{$x//CONFIGURATION/context/@label}" +
|
||||||
"</community>";
|
"</community>";
|
||||||
|
|
||||||
public CommunityMap getCommunityMap(boolean singleCommunity, String community_id)
|
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
|
||||||
throws ISLookUpException, DocumentException, SAXException {
|
throws ISLookUpException, DocumentException, SAXException {
|
||||||
if (singleCommunity)
|
if (singleCommunity)
|
||||||
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + community_id + "'")));
|
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'")));
|
||||||
return getMap(isLookUp.quickSearchProfile(XQUERY_ALL));
|
return getMap(isLookUp.quickSearchProfile(XQUERY_ALL));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,9 +6,8 @@ import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.AccessRight;
|
import eu.dnetlib.dhp.schema.dump.oaf.AccessRight;
|
||||||
|
@ -29,7 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
public class ResultMapper implements Serializable {
|
public class ResultMapper implements Serializable {
|
||||||
|
|
||||||
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
||||||
E in, Map<String, String> communityMap, String dumpType) {
|
E in, Map<String, String> communityMap, String dumpType) throws NoAvailableEntityTypeException {
|
||||||
|
|
||||||
Result out;
|
Result out;
|
||||||
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
||||||
|
@ -136,6 +135,8 @@ public class ResultMapper implements Serializable {
|
||||||
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
throw new NoAvailableEntityTypeException();
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<List<Measure>> mes = Optional.ofNullable(input.getMeasures());
|
Optional<List<Measure>> mes = Optional.ofNullable(input.getMeasures());
|
||||||
|
@ -156,17 +157,15 @@ public class ResultMapper implements Serializable {
|
||||||
// I do not map Access Right UNKNOWN or OTHER
|
// I do not map Access Right UNKNOWN or OTHER
|
||||||
|
|
||||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
|
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
|
||||||
if (oar.isPresent()) {
|
if (oar.isPresent() && Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
|
||||||
if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
|
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
|
||||||
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
|
out
|
||||||
out
|
.setBestaccessright(
|
||||||
.setBestaccessright(
|
AccessRight
|
||||||
AccessRight
|
.newInstance(
|
||||||
.newInstance(
|
code,
|
||||||
code,
|
Constants.coarCodeLabelMap.get(code),
|
||||||
Constants.coarCodeLabelMap.get(code),
|
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||||
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<String> contributorList = new ArrayList<>();
|
final List<String> contributorList = new ArrayList<>();
|
||||||
|
@ -263,7 +262,7 @@ public class ResultMapper implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (iTitle.size() > 0) {
|
if (!iTitle.isEmpty()) {
|
||||||
out.setMaintitle(iTitle.get(0).getValue());
|
out.setMaintitle(iTitle.get(0).getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -272,7 +271,7 @@ public class ResultMapper implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (iTitle.size() > 0) {
|
if (!iTitle.isEmpty()) {
|
||||||
out.setSubtitle(iTitle.get(0).getValue());
|
out.setSubtitle(iTitle.get(0).getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -301,9 +300,8 @@ public class ResultMapper implements Serializable {
|
||||||
|
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(input.getSource())
|
.ofNullable(input.getSource())
|
||||||
.ifPresent(value -> out.setSource(value.stream().map(s -> s.getValue()).collect(Collectors.toList())));
|
.ifPresent(value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList())));
|
||||||
// value.stream().forEach(s -> sourceList.add(s.getValue())));
|
|
||||||
// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
|
|
||||||
List<Subject> subjectList = new ArrayList<>();
|
List<Subject> subjectList = new ArrayList<>();
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(input.getSubject())
|
.ofNullable(input.getSubject())
|
||||||
|
@ -334,14 +332,14 @@ public class ResultMapper implements Serializable {
|
||||||
value -> value
|
value -> value
|
||||||
.stream()
|
.stream()
|
||||||
.map(c -> {
|
.map(c -> {
|
||||||
String community_id = c.getId();
|
String communityId = c.getId();
|
||||||
if (community_id.indexOf("::") > 0) {
|
if (communityId.contains("::")) {
|
||||||
community_id = community_id.substring(0, community_id.indexOf("::"));
|
communityId = communityId.substring(0, communityId.indexOf("::"));
|
||||||
}
|
}
|
||||||
if (communities.contains(community_id)) {
|
if (communities.contains(communityId)) {
|
||||||
Context context = new Context();
|
Context context = new Context();
|
||||||
context.setCode(community_id);
|
context.setCode(communityId);
|
||||||
context.setLabel(communityMap.get(community_id));
|
context.setLabel(communityMap.get(communityId));
|
||||||
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
|
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
|
||||||
if (dataInfo.isPresent()) {
|
if (dataInfo.isPresent()) {
|
||||||
List<Provenance> provenance = new ArrayList<>();
|
List<Provenance> provenance = new ArrayList<>();
|
||||||
|
@ -361,7 +359,11 @@ public class ResultMapper implements Serializable {
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.collect(Collectors.toSet()));
|
.collect(Collectors.toSet()));
|
||||||
|
|
||||||
context.setProvenance(getUniqueProvenance(provenance));
|
try {
|
||||||
|
context.setProvenance(getUniqueProvenance(provenance));
|
||||||
|
} catch (NoAvailableEntityTypeException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return context;
|
return context;
|
||||||
}
|
}
|
||||||
|
@ -371,7 +373,7 @@ public class ResultMapper implements Serializable {
|
||||||
.collect(Collectors.toList()))
|
.collect(Collectors.toList()))
|
||||||
.orElse(new ArrayList<>());
|
.orElse(new ArrayList<>());
|
||||||
|
|
||||||
if (contextList.size() > 0) {
|
if (!contextList.isEmpty()) {
|
||||||
Set<Integer> hashValue = new HashSet<>();
|
Set<Integer> hashValue = new HashSet<>();
|
||||||
List<Context> remainigContext = new ArrayList<>();
|
List<Context> remainigContext = new ArrayList<>();
|
||||||
contextList.forEach(c -> {
|
contextList.forEach(c -> {
|
||||||
|
@ -417,35 +419,34 @@ public class ResultMapper implements Serializable {
|
||||||
private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
|
private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
|
||||||
Optional<eu.dnetlib.dhp.schema.oaf.AccessRight> opAr = Optional.ofNullable(i.getAccessright());
|
Optional<eu.dnetlib.dhp.schema.oaf.AccessRight> opAr = Optional.ofNullable(i.getAccessright());
|
||||||
|
|
||||||
if (opAr.isPresent()) {
|
if (opAr.isPresent() && Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
|
||||||
if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
|
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
|
||||||
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
|
|
||||||
|
|
||||||
instance
|
instance
|
||||||
.setAccessright(
|
.setAccessright(
|
||||||
AccessRight
|
AccessRight
|
||||||
.newInstance(
|
.newInstance(
|
||||||
code,
|
code,
|
||||||
Constants.coarCodeLabelMap.get(code),
|
Constants.coarCodeLabelMap.get(code),
|
||||||
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||||
if (opAr.get().getOpenAccessRoute() != null) {
|
if (opAr.get().getOpenAccessRoute() != null) {
|
||||||
switch (opAr.get().getOpenAccessRoute()) {
|
switch (opAr.get().getOpenAccessRoute()) {
|
||||||
case hybrid:
|
case hybrid:
|
||||||
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.hybrid);
|
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.hybrid);
|
||||||
break;
|
break;
|
||||||
case gold:
|
case gold:
|
||||||
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.gold);
|
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.gold);
|
||||||
break;
|
break;
|
||||||
case green:
|
case green:
|
||||||
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.green);
|
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.green);
|
||||||
break;
|
break;
|
||||||
case bronze:
|
case bronze:
|
||||||
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.bronze);
|
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.bronze);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional
|
Optional
|
||||||
|
@ -498,7 +499,8 @@ public class ResultMapper implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance) {
|
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance)
|
||||||
|
throws NoAvailableEntityTypeException {
|
||||||
Provenance iProv = new Provenance();
|
Provenance iProv = new Provenance();
|
||||||
|
|
||||||
Provenance hProv = new Provenance();
|
Provenance hProv = new Provenance();
|
||||||
|
@ -520,6 +522,8 @@ public class ResultMapper implements Serializable {
|
||||||
case Constants.USER_CLAIM:
|
case Constants.USER_CLAIM:
|
||||||
lProv = getHighestTrust(lProv, p);
|
lProv = getHighestTrust(lProv, p);
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
throw new NoAvailableEntityTypeException();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -599,19 +603,19 @@ public class ResultMapper implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Pid getOrcid(List<StructuredProperty> p) {
|
private static Pid getOrcid(List<StructuredProperty> p) {
|
||||||
List<StructuredProperty> pid_list = p.stream().map(pid -> {
|
List<StructuredProperty> pidList = p.stream().map(pid -> {
|
||||||
if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) ||
|
if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) ||
|
||||||
(pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) {
|
(pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) {
|
||||||
return pid;
|
return pid;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}).filter(pid -> pid != null).collect(Collectors.toList());
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||||
|
|
||||||
if (pid_list.size() == 1) {
|
if (pidList.size() == 1) {
|
||||||
return getAuthorPid(pid_list.get(0));
|
return getAuthorPid(pidList.get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
List<StructuredProperty> orcid = pid_list
|
List<StructuredProperty> orcid = pidList
|
||||||
.stream()
|
.stream()
|
||||||
.filter(
|
.filter(
|
||||||
ap -> ap
|
ap -> ap
|
||||||
|
@ -622,7 +626,7 @@ public class ResultMapper implements Serializable {
|
||||||
if (orcid.size() == 1) {
|
if (orcid.size() == 1) {
|
||||||
return getAuthorPid(orcid.get(0));
|
return getAuthorPid(orcid.get(0));
|
||||||
}
|
}
|
||||||
orcid = pid_list
|
orcid = pidList
|
||||||
.stream()
|
.stream()
|
||||||
.filter(
|
.filter(
|
||||||
ap -> ap
|
ap -> ap
|
||||||
|
|
|
@ -31,9 +31,9 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
public class SaveCommunityMap implements Serializable {
|
public class SaveCommunityMap implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
|
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
|
||||||
private final QueryInformationSystem queryInformationSystem;
|
private final transient QueryInformationSystem queryInformationSystem;
|
||||||
|
|
||||||
private final BufferedWriter writer;
|
private final transient BufferedWriter writer;
|
||||||
|
|
||||||
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
|
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
|
||||||
final Configuration conf = new Configuration();
|
final Configuration conf = new Configuration();
|
||||||
|
@ -84,12 +84,12 @@ public class SaveCommunityMap implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void saveCommunityMap(boolean singleCommunity, String community_id)
|
private void saveCommunityMap(boolean singleCommunity, String communityId)
|
||||||
throws ISLookUpException, IOException, DocumentException, SAXException {
|
throws ISLookUpException, IOException, DocumentException, SAXException {
|
||||||
writer
|
writer
|
||||||
.write(
|
.write(
|
||||||
Utils.OBJECT_MAPPER
|
Utils.OBJECT_MAPPER
|
||||||
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, community_id)));
|
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,15 +5,13 @@ import java.io.Serializable;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.*;
|
import org.apache.hadoop.fs.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
||||||
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
|
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
|
|
||||||
public class SendToZenodoHDFS implements Serializable {
|
public class SendToZenodoHDFS implements Serializable {
|
||||||
|
|
||||||
|
@ -21,8 +19,6 @@ public class SendToZenodoHDFS implements Serializable {
|
||||||
private static final String VERSION = "version"; // to be used to upload a new version of a published deposition
|
private static final String VERSION = "version"; // to be used to upload a new version of a published deposition
|
||||||
private static final String UPDATE = "update"; // to upload content to an open deposition not published
|
private static final String UPDATE = "update"; // to upload content to an open deposition not published
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class);
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
|
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
|
@ -48,15 +44,12 @@ public class SendToZenodoHDFS implements Serializable {
|
||||||
.orElse(false);
|
.orElse(false);
|
||||||
|
|
||||||
final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null);
|
final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null);
|
||||||
// final String communityMapPath = parser.get("communityMapPath");
|
|
||||||
|
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
conf.set("fs.defaultFS", hdfsNameNode);
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
// CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath);
|
|
||||||
|
|
||||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||||
.listFiles(
|
.listFiles(
|
||||||
new Path(hdfsPath), true);
|
new Path(hdfsPath), true);
|
||||||
|
@ -77,19 +70,17 @@ public class SendToZenodoHDFS implements Serializable {
|
||||||
}
|
}
|
||||||
zenodoApiClient.uploadOpenDeposition(depositionId);
|
zenodoApiClient.uploadOpenDeposition(depositionId);
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
throw new NoAvailableEntityTypeException();
|
||||||
}
|
}
|
||||||
|
|
||||||
while (fileStatusListIterator.hasNext()) {
|
while (fileStatusListIterator.hasNext()) {
|
||||||
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||||
|
|
||||||
Path p = fileStatus.getPath();
|
Path p = fileStatus.getPath();
|
||||||
String p_string = p.toString();
|
String pString = p.toString();
|
||||||
if (!p_string.endsWith("_SUCCESS")) {
|
if (!pString.endsWith("_SUCCESS")) {
|
||||||
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
String name = pString.substring(pString.lastIndexOf("/") + 1);
|
||||||
// log.info("Sending information for community: " + name);
|
|
||||||
// if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) {
|
|
||||||
// name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar";
|
|
||||||
// }
|
|
||||||
|
|
||||||
FSDataInputStream inputStream = fileSystem.open(p);
|
FSDataInputStream inputStream = fileSystem.open(p);
|
||||||
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
|
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
|
||||||
|
@ -101,7 +92,7 @@ public class SendToZenodoHDFS implements Serializable {
|
||||||
zenodoApiClient.sendMretadata(metadata);
|
zenodoApiClient.sendMretadata(metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (publish) {
|
if (Boolean.TRUE.equals(publish)) {
|
||||||
zenodoApiClient.publish();
|
zenodoApiClient.publish();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -16,6 +15,7 @@ import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Context;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class splits the dumped results according to the research community - research initiative/infrastructure they
|
* This class splits the dumped results according to the research community - research initiative/infrastructure they
|
||||||
|
@ -56,10 +56,10 @@ public class CommunitySplit implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) {
|
private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) {
|
||||||
Dataset<CommunityResult> community_products = result
|
Dataset<CommunityResult> communityProducts = result
|
||||||
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
|
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
|
||||||
|
|
||||||
community_products
|
communityProducts
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
|
@ -72,7 +72,7 @@ public class CommunitySplit implements Serializable {
|
||||||
return r
|
return r
|
||||||
.getContext()
|
.getContext()
|
||||||
.stream()
|
.stream()
|
||||||
.map(con -> con.getCode())
|
.map(Context::getCode)
|
||||||
.collect(Collectors.toList())
|
.collect(Collectors.toList())
|
||||||
.contains(c);
|
.contains(c);
|
||||||
}
|
}
|
||||||
|
|
|
@ -161,7 +161,7 @@ public class SparkPrepareResultProject implements Serializable {
|
||||||
provenance.setTrust(di.get().getTrust());
|
provenance.setTrust(di.get().getTrust());
|
||||||
p.setProvenance(provenance);
|
p.setProvenance(provenance);
|
||||||
}
|
}
|
||||||
if (relation.getValidated()) {
|
if (Boolean.TRUE.equals(relation.getValidated())) {
|
||||||
p.setValidated(Validated.newInstance(relation.getValidated(), relation.getValidationDate()));
|
p.setValidated(Validated.newInstance(relation.getValidated(), relation.getValidationDate()));
|
||||||
}
|
}
|
||||||
return p;
|
return p;
|
||||||
|
@ -179,8 +179,8 @@ public class SparkPrepareResultProject implements Serializable {
|
||||||
f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
||||||
f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||||
for (Object o : doc.selectNodes("//funding_level_0")) {
|
for (Object o : doc.selectNodes("//funding_level_0")) {
|
||||||
List node = ((Node) o).selectNodes("./name");
|
List<Node> node = ((Node) o).selectNodes("./name");
|
||||||
f.setFundingStream(((Node) node.get(0)).getText());
|
f.setFundingStream((node.get(0)).getText());
|
||||||
}
|
}
|
||||||
|
|
||||||
return f;
|
return f;
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
|
@ -120,7 +121,7 @@ public class CreateContextRelation implements Serializable {
|
||||||
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
|
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
|
||||||
writer.newLine();
|
writer.newLine();
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new MyRuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -62,22 +62,7 @@ public class Extractor implements Serializable {
|
||||||
.readPath(spark, inputPath, inputClazz)
|
.readPath(spark, inputPath, inputClazz)
|
||||||
.flatMap((FlatMapFunction<R, Relation>) value -> {
|
.flatMap((FlatMapFunction<R, Relation>) value -> {
|
||||||
List<Relation> relationList = new ArrayList<>();
|
List<Relation> relationList = new ArrayList<>();
|
||||||
Optional
|
extractRelationsFromInstance(hashCodes, value, relationList);
|
||||||
.ofNullable(value.getInstance())
|
|
||||||
.ifPresent(inst -> inst.forEach(instance -> {
|
|
||||||
Optional
|
|
||||||
.ofNullable(instance.getCollectedfrom())
|
|
||||||
.ifPresent(
|
|
||||||
cf -> getRelatioPair(
|
|
||||||
value, relationList, cf,
|
|
||||||
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
|
|
||||||
Optional
|
|
||||||
.ofNullable(instance.getHostedby())
|
|
||||||
.ifPresent(
|
|
||||||
hb -> getRelatioPair(
|
|
||||||
value, relationList, hb,
|
|
||||||
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
|
|
||||||
}));
|
|
||||||
Set<String> communities = communityMap.keySet();
|
Set<String> communities = communityMap.keySet();
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(value.getContext())
|
.ofNullable(value.getContext())
|
||||||
|
@ -136,8 +121,28 @@ public class Extractor implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private <R extends Result> void extractRelationsFromInstance(Set<Integer> hashCodes, R value,
|
||||||
|
List<Relation> relationList) {
|
||||||
|
Optional
|
||||||
|
.ofNullable(value.getInstance())
|
||||||
|
.ifPresent(inst -> inst.forEach(instance -> {
|
||||||
|
Optional
|
||||||
|
.ofNullable(instance.getCollectedfrom())
|
||||||
|
.ifPresent(
|
||||||
|
cf -> getRelatioPair(
|
||||||
|
value, relationList, cf,
|
||||||
|
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
|
||||||
|
Optional
|
||||||
|
.ofNullable(instance.getHostedby())
|
||||||
|
.ifPresent(
|
||||||
|
hb -> getRelatioPair(
|
||||||
|
value, relationList, hb,
|
||||||
|
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
private static <R extends Result> void getRelatioPair(R value, List<Relation> relationList, KeyValue cf,
|
private static <R extends Result> void getRelatioPair(R value, List<Relation> relationList, KeyValue cf,
|
||||||
String result_dtasource, String datasource_result,
|
String resultDatasource, String datasourceResult,
|
||||||
Set<Integer> hashCodes) {
|
Set<Integer> hashCodes) {
|
||||||
Provenance provenance = Optional
|
Provenance provenance = Optional
|
||||||
.ofNullable(cf.getDataInfo())
|
.ofNullable(cf.getDataInfo())
|
||||||
|
@ -162,7 +167,7 @@ public class Extractor implements Serializable {
|
||||||
Relation r = getRelation(
|
Relation r = getRelation(
|
||||||
value.getId(),
|
value.getId(),
|
||||||
cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
|
cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
|
||||||
result_dtasource, ModelConstants.PROVISION,
|
resultDatasource, ModelConstants.PROVISION,
|
||||||
provenance);
|
provenance);
|
||||||
if (!hashCodes.contains(r.hashCode())) {
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
relationList
|
relationList
|
||||||
|
@ -173,7 +178,7 @@ public class Extractor implements Serializable {
|
||||||
r = getRelation(
|
r = getRelation(
|
||||||
cf.getKey(), value.getId(),
|
cf.getKey(), value.getId(),
|
||||||
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
|
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
|
||||||
datasource_result, ModelConstants.PROVISION,
|
datasourceResult, ModelConstants.PROVISION,
|
||||||
provenance);
|
provenance);
|
||||||
|
|
||||||
if (!hashCodes.contains(r.hashCode())) {
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
|
|
|
@ -9,6 +9,7 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
@ -43,7 +44,7 @@ public class Process implements Serializable {
|
||||||
return (R) ri;
|
return (R) ri;
|
||||||
|
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new MyRuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,7 +92,7 @@ public class Process implements Serializable {
|
||||||
return relationList;
|
return relationList;
|
||||||
|
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new MyRuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,6 @@ import org.dom4j.io.SAXReader;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
@ -94,9 +93,9 @@ public class QueryInformationSystem {
|
||||||
Element root = doc.getRootElement();
|
Element root = doc.getRootElement();
|
||||||
cinfo.setId(root.attributeValue("id"));
|
cinfo.setId(root.attributeValue("id"));
|
||||||
|
|
||||||
Iterator it = root.elementIterator();
|
Iterator<Element> it = root.elementIterator();
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
Element el = (Element) it.next();
|
Element el = it.next();
|
||||||
if (el.getName().equals("category")) {
|
if (el.getName().equals("category")) {
|
||||||
String categoryId = el.attributeValue("id");
|
String categoryId = el.attributeValue("id");
|
||||||
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
|
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
|
||||||
|
@ -143,7 +142,7 @@ public class QueryInformationSystem {
|
||||||
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
|
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
String funder = null;
|
String funder = "";
|
||||||
String grantId = null;
|
String grantId = null;
|
||||||
String funding = null;
|
String funding = null;
|
||||||
for (Object node : el.selectNodes(".//param")) {
|
for (Object node : el.selectNodes(".//param")) {
|
||||||
|
@ -158,9 +157,12 @@ public class QueryInformationSystem {
|
||||||
case "CD_PROJECT_NUMBER":
|
case "CD_PROJECT_NUMBER":
|
||||||
grantId = n.getText();
|
grantId = n.getText();
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
String nsp = null;
|
String nsp = null;
|
||||||
|
|
||||||
switch (funder.toLowerCase()) {
|
switch (funder.toLowerCase()) {
|
||||||
case "ec":
|
case "ec":
|
||||||
if (funding == null) {
|
if (funding == null) {
|
||||||
|
@ -179,10 +181,12 @@ public class QueryInformationSystem {
|
||||||
nsp = "dfgf________::";
|
nsp = "dfgf________::";
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
nsp = funder.toLowerCase();
|
StringBuilder bld = new StringBuilder();
|
||||||
|
bld.append(funder.toLowerCase());
|
||||||
for (int i = funder.length(); i < 12; i++)
|
for (int i = funder.length(); i < 12; i++)
|
||||||
nsp += "_";
|
bld.append("_");
|
||||||
nsp += "::";
|
bld.append("::");
|
||||||
|
nsp = bld.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
return prefix + "|" + nsp + DHPUtils.md5(grantId);
|
return prefix + "|" + nsp + DHPUtils.md5(grantId);
|
||||||
|
|
|
@ -71,22 +71,22 @@ public class SparkDumpRelationJob implements Serializable {
|
||||||
Dataset<Relation> relations = Utils.readPath(spark, inputPath, Relation.class);
|
Dataset<Relation> relations = Utils.readPath(spark, inputPath, Relation.class);
|
||||||
relations
|
relations
|
||||||
.map((MapFunction<Relation, eu.dnetlib.dhp.schema.dump.oaf.graph.Relation>) relation -> {
|
.map((MapFunction<Relation, eu.dnetlib.dhp.schema.dump.oaf.graph.Relation>) relation -> {
|
||||||
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel_new = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
|
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
|
||||||
rel_new
|
relNew
|
||||||
.setSource(
|
.setSource(
|
||||||
Node
|
Node
|
||||||
.newInstance(
|
.newInstance(
|
||||||
relation.getSource(),
|
relation.getSource(),
|
||||||
ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
|
ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
|
||||||
|
|
||||||
rel_new
|
relNew
|
||||||
.setTarget(
|
.setTarget(
|
||||||
Node
|
Node
|
||||||
.newInstance(
|
.newInstance(
|
||||||
relation.getTarget(),
|
relation.getTarget(),
|
||||||
ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
|
ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
|
||||||
|
|
||||||
rel_new
|
relNew
|
||||||
.setReltype(
|
.setReltype(
|
||||||
RelType
|
RelType
|
||||||
.newInstance(
|
.newInstance(
|
||||||
|
@ -96,23 +96,22 @@ public class SparkDumpRelationJob implements Serializable {
|
||||||
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
|
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
|
||||||
if (odInfo.isPresent()) {
|
if (odInfo.isPresent()) {
|
||||||
DataInfo dInfo = odInfo.get();
|
DataInfo dInfo = odInfo.get();
|
||||||
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent()) {
|
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
|
||||||
if (Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
|
Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
|
||||||
rel_new
|
relNew
|
||||||
.setProvenance(
|
.setProvenance(
|
||||||
Provenance
|
Provenance
|
||||||
.newInstance(
|
.newInstance(
|
||||||
dInfo.getProvenanceaction().getClassname(),
|
dInfo.getProvenanceaction().getClassname(),
|
||||||
dInfo.getTrust()));
|
dInfo.getTrust()));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (relation.getValidated()) {
|
if (Boolean.TRUE.equals(relation.getValidated())) {
|
||||||
rel_new.setValidated(relation.getValidated());
|
relNew.setValidated(relation.getValidated());
|
||||||
rel_new.setValidationDate(relation.getValidationDate());
|
relNew.setValidationDate(relation.getValidationDate());
|
||||||
}
|
}
|
||||||
|
|
||||||
return rel_new;
|
return relNew;
|
||||||
|
|
||||||
}, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
|
}, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
|
||||||
.write()
|
.write()
|
||||||
|
|
|
@ -58,7 +58,8 @@ public class SparkOrganizationRelation implements Serializable {
|
||||||
|
|
||||||
final OrganizationMap organizationMap = new Gson()
|
final OrganizationMap organizationMap = new Gson()
|
||||||
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
|
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
|
||||||
log.info("organization map : {}", new Gson().toJson(organizationMap));
|
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
|
||||||
|
log.info("organization map : {}", serializedOrganizationMap);
|
||||||
|
|
||||||
final String communityMapPath = parser.get("communityMapPath");
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
log.info("communityMapPath: {}", communityMapPath);
|
log.info("communityMapPath: {}", communityMapPath);
|
||||||
|
|
|
@ -131,7 +131,6 @@ public class SparkSelectValidRelationsJob implements Serializable {
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
|
||||||
|
|
||||||
|
public class MyRuntimeException extends RuntimeException {
|
||||||
|
|
||||||
|
public MyRuntimeException() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public MyRuntimeException(
|
||||||
|
final String message,
|
||||||
|
final Throwable cause,
|
||||||
|
final boolean enableSuppression,
|
||||||
|
final boolean writableStackTrace) {
|
||||||
|
super(message, cause, enableSuppression, writableStackTrace);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MyRuntimeException(final String message, final Throwable cause) {
|
||||||
|
super(message, cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MyRuntimeException(final String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MyRuntimeException(final Throwable cause) {
|
||||||
|
super(cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
|
||||||
|
|
||||||
|
public class NoAvailableEntityTypeException extends Exception {
|
||||||
|
public NoAvailableEntityTypeException() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public NoAvailableEntityTypeException(
|
||||||
|
final String message,
|
||||||
|
final Throwable cause,
|
||||||
|
final boolean enableSuppression,
|
||||||
|
final boolean writableStackTrace) {
|
||||||
|
super(message, cause, enableSuppression, writableStackTrace);
|
||||||
|
}
|
||||||
|
|
||||||
|
public NoAvailableEntityTypeException(final String message, final Throwable cause) {
|
||||||
|
super(message, cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
public NoAvailableEntityTypeException(final String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public NoAvailableEntityTypeException(final Throwable cause) {
|
||||||
|
super(cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -10,7 +10,6 @@ import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.*;
|
import org.apache.spark.sql.*;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -18,10 +17,8 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC
|
* Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC
|
||||||
|
|
|
@ -10,7 +10,6 @@ import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
import org.apache.spark.sql.*;
|
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
@ -19,13 +18,11 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Selects the results linked to projects. Only for these results the dump will be performed.
|
* Selects the results linked to projects. Only for these results the dump will be performed.
|
||||||
|
|
|
@ -8,17 +8,14 @@ import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.Row;
|
import org.apache.spark.sql.Row;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.junit.jupiter.api.AfterAll;
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -248,14 +245,14 @@ public class PrepareResultProjectJobTest {
|
||||||
org.apache.spark.sql.Dataset<ResultProject> verificationDataset = spark
|
org.apache.spark.sql.Dataset<ResultProject> verificationDataset = spark
|
||||||
.createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
|
.createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
|
||||||
|
|
||||||
assertEquals(2, verificationDataset.count() );
|
assertEquals(2, verificationDataset.count());
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
1,
|
1,
|
||||||
verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
|
verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
|
||||||
assertEquals(
|
assertEquals(
|
||||||
1,
|
1,
|
||||||
verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
|
verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
|
||||||
|
|
||||||
verificationDataset.createOrReplaceTempView("dataset");
|
verificationDataset.createOrReplaceTempView("dataset");
|
||||||
|
|
||||||
|
@ -269,59 +266,59 @@ public class PrepareResultProjectJobTest {
|
||||||
assertEquals(3, resultExplodedProvenance.count());
|
assertEquals(3, resultExplodedProvenance.count());
|
||||||
assertEquals(3, resultExplodedProvenance.filter("validatedByFunder = true").count());
|
assertEquals(3, resultExplodedProvenance.filter("validatedByFunder = true").count());
|
||||||
assertEquals(
|
assertEquals(
|
||||||
2,
|
2,
|
||||||
resultExplodedProvenance
|
resultExplodedProvenance
|
||||||
.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
|
.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
1,
|
1,
|
||||||
resultExplodedProvenance
|
resultExplodedProvenance
|
||||||
.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'")
|
.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
2,
|
2,
|
||||||
resultExplodedProvenance
|
resultExplodedProvenance
|
||||||
.filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'")
|
.filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
1,
|
1,
|
||||||
resultExplodedProvenance
|
resultExplodedProvenance
|
||||||
.filter(
|
.filter(
|
||||||
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " +
|
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " +
|
||||||
"and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " +
|
"and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " +
|
||||||
"and validatedByFunder = true " +
|
"and validatedByFunder = true " +
|
||||||
"and validationDate = '2021-08-06'")
|
"and validationDate = '2021-08-06'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
1,
|
1,
|
||||||
resultExplodedProvenance
|
resultExplodedProvenance
|
||||||
.filter(
|
.filter(
|
||||||
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " +
|
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " +
|
||||||
"and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' " +
|
"and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' " +
|
||||||
"and validatedByFunder = true and validationDate = '2021-08-04'")
|
"and validatedByFunder = true and validationDate = '2021-08-04'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
1,
|
1,
|
||||||
resultExplodedProvenance
|
resultExplodedProvenance
|
||||||
.filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'")
|
.filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
1,
|
1,
|
||||||
resultExplodedProvenance
|
resultExplodedProvenance
|
||||||
.filter(
|
.filter(
|
||||||
"project = '40|aka_________::03376222b28a3aebf2730ac514818d04' " +
|
"project = '40|aka_________::03376222b28a3aebf2730ac514818d04' " +
|
||||||
"and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " +
|
"and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " +
|
||||||
"and validatedByFunder = true and validationDate = '2021-08-05'")
|
"and validatedByFunder = true and validationDate = '2021-08-05'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());
|
3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
@ -98,7 +99,7 @@ public class DumpOrganizationProjectDatasourceTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void dumpProjectTest() {
|
public void dumpProjectTest() throws NoAvailableEntityTypeException {
|
||||||
|
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project")
|
||||||
|
@ -127,7 +128,7 @@ public class DumpOrganizationProjectDatasourceTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void dumpDatasourceTest() {
|
public void dumpDatasourceTest() throws NoAvailableEntityTypeException {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
|
@ -54,7 +54,7 @@ class TestApply extends java.io.Serializable{
|
||||||
assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy"))
|
assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy"))
|
||||||
assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN"))
|
assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN"))
|
||||||
assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access"))
|
assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access"))
|
||||||
assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid))
|
assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.gold))
|
||||||
assertTrue(pa.getBestaccessright.getClassid.equals("OPEN"))
|
assertTrue(pa.getBestaccessright.getClassid.equals("OPEN"))
|
||||||
assertTrue(pa.getBestaccessright.getClassname.equals("Open Access"))
|
assertTrue(pa.getBestaccessright.getClassname.equals("Open Access"))
|
||||||
|
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue