applied some suggestiond from Sonar Lint

This commit is contained in:
Miriam Baglioni 2021-10-04 18:40:07 +02:00
parent 9814c3e700
commit e653756e3d
24 changed files with 274 additions and 214 deletions

View File

@ -5,37 +5,40 @@ import java.util.Map;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.common.ModelConstants;
public class Constants { public class Constants {
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap(); protected static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap(); protected static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
public static final String INFERRED = "Inferred by OpenAIRE"; public static final String INFERRED = "Inferred by OpenAIRE";
public static final String CABF2 = "c_abf2";
public static final String HARVESTED = "Harvested"; public static final String HARVESTED = "Harvested";
public static final String DEFAULT_TRUST = "0.9"; public static final String DEFAULT_TRUST = "0.9";
public static final String USER_CLAIM = "Linked by user"; public static final String USER_CLAIM = "Linked by user";
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; public static final String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
public static String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/"; public static final String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
public static String RESEARCH_COMMUNITY = "Research Community"; public static final String RESEARCH_COMMUNITY = "Research Community";
public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative"; public static final String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
static { static {
accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);
accessRightsCoarMap.put("RESTRICTED", "c_16ec"); accessRightsCoarMap.put("RESTRICTED", "c_16ec");
accessRightsCoarMap.put("OPEN SOURCE", "c_abf2"); accessRightsCoarMap.put("OPEN SOURCE", CABF2);
accessRightsCoarMap.put("CLOSED", "c_14cb"); accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_CLOSED, "c_14cb");
accessRightsCoarMap.put("EMBARGO", "c_f1cf"); accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_EMBARGO, "c_f1cf");
} }
static { static {
coarCodeLabelMap.put("c_abf2", "OPEN"); coarCodeLabelMap.put(CABF2, ModelConstants.ACCESS_RIGHT_OPEN);
coarCodeLabelMap.put("c_16ec", "RESTRICTED"); coarCodeLabelMap.put("c_16ec", "RESTRICTED");
coarCodeLabelMap.put("c_14cb", "CLOSED"); coarCodeLabelMap.put("c_14cb", ModelConstants.ACCESS_RIGHT_CLOSED);
coarCodeLabelMap.put("c_f1cf", "EMBARGO"); coarCodeLabelMap.put("c_f1cf", "EMBARGO");
} }

View File

@ -17,6 +17,7 @@ import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
/** /**
@ -66,7 +67,7 @@ public class DumpProducts implements Serializable {
private static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O execMap(I value, private static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O execMap(I value,
CommunityMap communityMap, CommunityMap communityMap,
String dumpType) { String dumpType) throws NoAvailableEntityTypeException {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo()); Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
if (odInfo.isPresent()) { if (odInfo.isPresent()) {
@ -94,7 +95,7 @@ public class DumpProducts implements Serializable {
} }
return null; return null;
}).filter(Objects::nonNull).collect(Collectors.toList()); }).filter(Objects::nonNull).collect(Collectors.toList());
if (toDumpFor.size() == 0) { if (toDumpFor.isEmpty()) {
return null; return null;
} }
} }

View File

@ -57,16 +57,16 @@ public class MakeTar implements Serializable {
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit) public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
throws IOException { throws IOException {
RemoteIterator<LocatedFileStatus> dir_iterator = fileSystem.listLocatedStatus(new Path(inputPath)); RemoteIterator<LocatedFileStatus> dirIterator = fileSystem.listLocatedStatus(new Path(inputPath));
while (dir_iterator.hasNext()) { while (dirIterator.hasNext()) {
LocatedFileStatus fileStatus = dir_iterator.next(); LocatedFileStatus fileStatus = dirIterator.next();
Path p = fileStatus.getPath(); Path p = fileStatus.getPath();
String p_string = p.toString(); String pathString = p.toString();
String entity = p_string.substring(p_string.lastIndexOf("/") + 1); String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
MakeTarArchive.tarMaxSize(fileSystem, p_string, outputPath + "/" + entity, entity, gBperSplit); MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit);
} }
} }

View File

@ -40,10 +40,10 @@ public class QueryInformationSystem {
"{$x//CONFIGURATION/context/@label}" + "{$x//CONFIGURATION/context/@label}" +
"</community>"; "</community>";
public CommunityMap getCommunityMap(boolean singleCommunity, String community_id) public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
throws ISLookUpException, DocumentException, SAXException { throws ISLookUpException, DocumentException, SAXException {
if (singleCommunity) if (singleCommunity)
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + community_id + "'"))); return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'")));
return getMap(isLookUp.quickSearchProfile(XQUERY_ALL)); return getMap(isLookUp.quickSearchProfile(XQUERY_ALL));
} }

View File

@ -6,9 +6,8 @@ import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.dump.oaf.*; import eu.dnetlib.dhp.schema.dump.oaf.*;
import eu.dnetlib.dhp.schema.dump.oaf.AccessRight; import eu.dnetlib.dhp.schema.dump.oaf.AccessRight;
@ -29,7 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.*;
public class ResultMapper implements Serializable { public class ResultMapper implements Serializable {
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map( public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
E in, Map<String, String> communityMap, String dumpType) { E in, Map<String, String> communityMap, String dumpType) throws NoAvailableEntityTypeException {
Result out; Result out;
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
@ -136,6 +135,8 @@ public class ResultMapper implements Serializable {
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
break; break;
default:
throw new NoAvailableEntityTypeException();
} }
Optional<List<Measure>> mes = Optional.ofNullable(input.getMeasures()); Optional<List<Measure>> mes = Optional.ofNullable(input.getMeasures());
@ -156,8 +157,7 @@ public class ResultMapper implements Serializable {
// I do not map Access Right UNKNOWN or OTHER // I do not map Access Right UNKNOWN or OTHER
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright()); Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
if (oar.isPresent()) { if (oar.isPresent() && Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
out out
.setBestaccessright( .setBestaccessright(
@ -167,7 +167,6 @@ public class ResultMapper implements Serializable {
Constants.coarCodeLabelMap.get(code), Constants.coarCodeLabelMap.get(code),
Constants.COAR_ACCESS_RIGHT_SCHEMA)); Constants.COAR_ACCESS_RIGHT_SCHEMA));
} }
}
final List<String> contributorList = new ArrayList<>(); final List<String> contributorList = new ArrayList<>();
Optional Optional
@ -263,7 +262,7 @@ public class ResultMapper implements Serializable {
.stream() .stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
.collect(Collectors.toList()); .collect(Collectors.toList());
if (iTitle.size() > 0) { if (!iTitle.isEmpty()) {
out.setMaintitle(iTitle.get(0).getValue()); out.setMaintitle(iTitle.get(0).getValue());
} }
@ -272,7 +271,7 @@ public class ResultMapper implements Serializable {
.stream() .stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
.collect(Collectors.toList()); .collect(Collectors.toList());
if (iTitle.size() > 0) { if (!iTitle.isEmpty()) {
out.setSubtitle(iTitle.get(0).getValue()); out.setSubtitle(iTitle.get(0).getValue());
} }
@ -301,9 +300,8 @@ public class ResultMapper implements Serializable {
Optional Optional
.ofNullable(input.getSource()) .ofNullable(input.getSource())
.ifPresent(value -> out.setSource(value.stream().map(s -> s.getValue()).collect(Collectors.toList()))); .ifPresent(value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList())));
// value.stream().forEach(s -> sourceList.add(s.getValue())));
// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
List<Subject> subjectList = new ArrayList<>(); List<Subject> subjectList = new ArrayList<>();
Optional Optional
.ofNullable(input.getSubject()) .ofNullable(input.getSubject())
@ -334,14 +332,14 @@ public class ResultMapper implements Serializable {
value -> value value -> value
.stream() .stream()
.map(c -> { .map(c -> {
String community_id = c.getId(); String communityId = c.getId();
if (community_id.indexOf("::") > 0) { if (communityId.contains("::")) {
community_id = community_id.substring(0, community_id.indexOf("::")); communityId = communityId.substring(0, communityId.indexOf("::"));
} }
if (communities.contains(community_id)) { if (communities.contains(communityId)) {
Context context = new Context(); Context context = new Context();
context.setCode(community_id); context.setCode(communityId);
context.setLabel(communityMap.get(community_id)); context.setLabel(communityMap.get(communityId));
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo()); Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
if (dataInfo.isPresent()) { if (dataInfo.isPresent()) {
List<Provenance> provenance = new ArrayList<>(); List<Provenance> provenance = new ArrayList<>();
@ -361,7 +359,11 @@ public class ResultMapper implements Serializable {
.filter(Objects::nonNull) .filter(Objects::nonNull)
.collect(Collectors.toSet())); .collect(Collectors.toSet()));
try {
context.setProvenance(getUniqueProvenance(provenance)); context.setProvenance(getUniqueProvenance(provenance));
} catch (NoAvailableEntityTypeException e) {
e.printStackTrace();
}
} }
return context; return context;
} }
@ -371,7 +373,7 @@ public class ResultMapper implements Serializable {
.collect(Collectors.toList())) .collect(Collectors.toList()))
.orElse(new ArrayList<>()); .orElse(new ArrayList<>());
if (contextList.size() > 0) { if (!contextList.isEmpty()) {
Set<Integer> hashValue = new HashSet<>(); Set<Integer> hashValue = new HashSet<>();
List<Context> remainigContext = new ArrayList<>(); List<Context> remainigContext = new ArrayList<>();
contextList.forEach(c -> { contextList.forEach(c -> {
@ -417,8 +419,7 @@ public class ResultMapper implements Serializable {
private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
Optional<eu.dnetlib.dhp.schema.oaf.AccessRight> opAr = Optional.ofNullable(i.getAccessright()); Optional<eu.dnetlib.dhp.schema.oaf.AccessRight> opAr = Optional.ofNullable(i.getAccessright());
if (opAr.isPresent()) { if (opAr.isPresent() && Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
instance instance
@ -445,7 +446,7 @@ public class ResultMapper implements Serializable {
} }
} }
}
} }
Optional Optional
@ -498,7 +499,8 @@ public class ResultMapper implements Serializable {
} }
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance) { private static List<Provenance> getUniqueProvenance(List<Provenance> provenance)
throws NoAvailableEntityTypeException {
Provenance iProv = new Provenance(); Provenance iProv = new Provenance();
Provenance hProv = new Provenance(); Provenance hProv = new Provenance();
@ -520,6 +522,8 @@ public class ResultMapper implements Serializable {
case Constants.USER_CLAIM: case Constants.USER_CLAIM:
lProv = getHighestTrust(lProv, p); lProv = getHighestTrust(lProv, p);
break; break;
default:
throw new NoAvailableEntityTypeException();
} }
} }
@ -599,19 +603,19 @@ public class ResultMapper implements Serializable {
} }
private static Pid getOrcid(List<StructuredProperty> p) { private static Pid getOrcid(List<StructuredProperty> p) {
List<StructuredProperty> pid_list = p.stream().map(pid -> { List<StructuredProperty> pidList = p.stream().map(pid -> {
if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) || if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) ||
(pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) { (pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) {
return pid; return pid;
} }
return null; return null;
}).filter(pid -> pid != null).collect(Collectors.toList()); }).filter(Objects::nonNull).collect(Collectors.toList());
if (pid_list.size() == 1) { if (pidList.size() == 1) {
return getAuthorPid(pid_list.get(0)); return getAuthorPid(pidList.get(0));
} }
List<StructuredProperty> orcid = pid_list List<StructuredProperty> orcid = pidList
.stream() .stream()
.filter( .filter(
ap -> ap ap -> ap
@ -622,7 +626,7 @@ public class ResultMapper implements Serializable {
if (orcid.size() == 1) { if (orcid.size() == 1) {
return getAuthorPid(orcid.get(0)); return getAuthorPid(orcid.get(0));
} }
orcid = pid_list orcid = pidList
.stream() .stream()
.filter( .filter(
ap -> ap ap -> ap

View File

@ -31,9 +31,9 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
public class SaveCommunityMap implements Serializable { public class SaveCommunityMap implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
private final QueryInformationSystem queryInformationSystem; private final transient QueryInformationSystem queryInformationSystem;
private final BufferedWriter writer; private final transient BufferedWriter writer;
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
final Configuration conf = new Configuration(); final Configuration conf = new Configuration();
@ -84,12 +84,12 @@ public class SaveCommunityMap implements Serializable {
} }
private void saveCommunityMap(boolean singleCommunity, String community_id) private void saveCommunityMap(boolean singleCommunity, String communityId)
throws ISLookUpException, IOException, DocumentException, SAXException { throws ISLookUpException, IOException, DocumentException, SAXException {
writer writer
.write( .write(
Utils.OBJECT_MAPPER Utils.OBJECT_MAPPER
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, community_id))); .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)));
} }
} }

View File

@ -5,15 +5,13 @@ import java.io.Serializable;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.*;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.api.MissingConceptDoiException; import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
import eu.dnetlib.dhp.common.api.ZenodoAPIClient; import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
public class SendToZenodoHDFS implements Serializable { public class SendToZenodoHDFS implements Serializable {
@ -21,8 +19,6 @@ public class SendToZenodoHDFS implements Serializable {
private static final String VERSION = "version"; // to be used to upload a new version of a published deposition private static final String VERSION = "version"; // to be used to upload a new version of a published deposition
private static final String UPDATE = "update"; // to upload content to an open deposition not published private static final String UPDATE = "update"; // to upload content to an open deposition not published
private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class);
public static void main(final String[] args) throws Exception, MissingConceptDoiException { public static void main(final String[] args) throws Exception, MissingConceptDoiException {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
@ -48,15 +44,12 @@ public class SendToZenodoHDFS implements Serializable {
.orElse(false); .orElse(false);
final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null); final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null);
// final String communityMapPath = parser.get("communityMapPath");
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf); FileSystem fileSystem = FileSystem.get(conf);
// CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath);
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
.listFiles( .listFiles(
new Path(hdfsPath), true); new Path(hdfsPath), true);
@ -77,19 +70,17 @@ public class SendToZenodoHDFS implements Serializable {
} }
zenodoApiClient.uploadOpenDeposition(depositionId); zenodoApiClient.uploadOpenDeposition(depositionId);
break; break;
default:
throw new NoAvailableEntityTypeException();
} }
while (fileStatusListIterator.hasNext()) { while (fileStatusListIterator.hasNext()) {
LocatedFileStatus fileStatus = fileStatusListIterator.next(); LocatedFileStatus fileStatus = fileStatusListIterator.next();
Path p = fileStatus.getPath(); Path p = fileStatus.getPath();
String p_string = p.toString(); String pString = p.toString();
if (!p_string.endsWith("_SUCCESS")) { if (!pString.endsWith("_SUCCESS")) {
String name = p_string.substring(p_string.lastIndexOf("/") + 1); String name = pString.substring(pString.lastIndexOf("/") + 1);
// log.info("Sending information for community: " + name);
// if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) {
// name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar";
// }
FSDataInputStream inputStream = fileSystem.open(p); FSDataInputStream inputStream = fileSystem.open(p);
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen()); zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
@ -101,7 +92,7 @@ public class SendToZenodoHDFS implements Serializable {
zenodoApiClient.sendMretadata(metadata); zenodoApiClient.sendMretadata(metadata);
} }
if (publish) { if (Boolean.TRUE.equals(publish)) {
zenodoApiClient.publish(); zenodoApiClient.publish();
} }
} }

View File

@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;
import java.util.Optional; import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -16,6 +15,7 @@ import org.apache.spark.sql.SparkSession;
import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
import eu.dnetlib.dhp.schema.dump.oaf.community.Context;
/** /**
* This class splits the dumped results according to the research community - research initiative/infrastructure they * This class splits the dumped results according to the research community - research initiative/infrastructure they
@ -56,10 +56,10 @@ public class CommunitySplit implements Serializable {
} }
private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) { private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) {
Dataset<CommunityResult> community_products = result Dataset<CommunityResult> communityProducts = result
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c)); .filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
community_products communityProducts
.write() .write()
.option("compression", "gzip") .option("compression", "gzip")
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
@ -72,7 +72,7 @@ public class CommunitySplit implements Serializable {
return r return r
.getContext() .getContext()
.stream() .stream()
.map(con -> con.getCode()) .map(Context::getCode)
.collect(Collectors.toList()) .collect(Collectors.toList())
.contains(c); .contains(c);
} }

View File

@ -161,7 +161,7 @@ public class SparkPrepareResultProject implements Serializable {
provenance.setTrust(di.get().getTrust()); provenance.setTrust(di.get().getTrust());
p.setProvenance(provenance); p.setProvenance(provenance);
} }
if (relation.getValidated()) { if (Boolean.TRUE.equals(relation.getValidated())) {
p.setValidated(Validated.newInstance(relation.getValidated(), relation.getValidationDate())); p.setValidated(Validated.newInstance(relation.getValidated(), relation.getValidationDate()));
} }
return p; return p;
@ -179,8 +179,8 @@ public class SparkPrepareResultProject implements Serializable {
f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText()); f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText()); f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
for (Object o : doc.selectNodes("//funding_level_0")) { for (Object o : doc.selectNodes("//funding_level_0")) {
List node = ((Node) o).selectNodes("./name"); List<Node> node = ((Node) o).selectNodes("./name");
f.setFundingStream(((Node) node.get(0)).getText()); f.setFundingStream((node.get(0)).getText());
} }
return f; return f;

View File

@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.dump.oaf.graph.*; import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
@ -120,7 +121,7 @@ public class CreateContextRelation implements Serializable {
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r)); writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
writer.newLine(); writer.newLine();
} catch (final Exception e) { } catch (final Exception e) {
throw new RuntimeException(e); throw new MyRuntimeException(e);
} }
} }

View File

@ -62,22 +62,7 @@ public class Extractor implements Serializable {
.readPath(spark, inputPath, inputClazz) .readPath(spark, inputPath, inputClazz)
.flatMap((FlatMapFunction<R, Relation>) value -> { .flatMap((FlatMapFunction<R, Relation>) value -> {
List<Relation> relationList = new ArrayList<>(); List<Relation> relationList = new ArrayList<>();
Optional extractRelationsFromInstance(hashCodes, value, relationList);
.ofNullable(value.getInstance())
.ifPresent(inst -> inst.forEach(instance -> {
Optional
.ofNullable(instance.getCollectedfrom())
.ifPresent(
cf -> getRelatioPair(
value, relationList, cf,
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
Optional
.ofNullable(instance.getHostedby())
.ifPresent(
hb -> getRelatioPair(
value, relationList, hb,
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
}));
Set<String> communities = communityMap.keySet(); Set<String> communities = communityMap.keySet();
Optional Optional
.ofNullable(value.getContext()) .ofNullable(value.getContext())
@ -136,8 +121,28 @@ public class Extractor implements Serializable {
} }
private <R extends Result> void extractRelationsFromInstance(Set<Integer> hashCodes, R value,
List<Relation> relationList) {
Optional
.ofNullable(value.getInstance())
.ifPresent(inst -> inst.forEach(instance -> {
Optional
.ofNullable(instance.getCollectedfrom())
.ifPresent(
cf -> getRelatioPair(
value, relationList, cf,
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
Optional
.ofNullable(instance.getHostedby())
.ifPresent(
hb -> getRelatioPair(
value, relationList, hb,
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
}));
}
private static <R extends Result> void getRelatioPair(R value, List<Relation> relationList, KeyValue cf, private static <R extends Result> void getRelatioPair(R value, List<Relation> relationList, KeyValue cf,
String result_dtasource, String datasource_result, String resultDatasource, String datasourceResult,
Set<Integer> hashCodes) { Set<Integer> hashCodes) {
Provenance provenance = Optional Provenance provenance = Optional
.ofNullable(cf.getDataInfo()) .ofNullable(cf.getDataInfo())
@ -162,7 +167,7 @@ public class Extractor implements Serializable {
Relation r = getRelation( Relation r = getRelation(
value.getId(), value.getId(),
cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY, cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
result_dtasource, ModelConstants.PROVISION, resultDatasource, ModelConstants.PROVISION,
provenance); provenance);
if (!hashCodes.contains(r.hashCode())) { if (!hashCodes.contains(r.hashCode())) {
relationList relationList
@ -173,7 +178,7 @@ public class Extractor implements Serializable {
r = getRelation( r = getRelation(
cf.getKey(), value.getId(), cf.getKey(), value.getId(),
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
datasource_result, ModelConstants.PROVISION, datasourceResult, ModelConstants.PROVISION,
provenance); provenance);
if (!hashCodes.contains(r.hashCode())) { if (!hashCodes.contains(r.hashCode())) {

View File

@ -9,6 +9,7 @@ import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.dump.oaf.Provenance; import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
@ -43,7 +44,7 @@ public class Process implements Serializable {
return (R) ri; return (R) ri;
} catch (final Exception e) { } catch (final Exception e) {
throw new RuntimeException(e); throw new MyRuntimeException(e);
} }
} }
@ -91,7 +92,7 @@ public class Process implements Serializable {
return relationList; return relationList;
} catch (final Exception e) { } catch (final Exception e) {
throw new RuntimeException(e); throw new MyRuntimeException(e);
} }
} }

View File

@ -13,7 +13,6 @@ import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
@ -94,9 +93,9 @@ public class QueryInformationSystem {
Element root = doc.getRootElement(); Element root = doc.getRootElement();
cinfo.setId(root.attributeValue("id")); cinfo.setId(root.attributeValue("id"));
Iterator it = root.elementIterator(); Iterator<Element> it = root.elementIterator();
while (it.hasNext()) { while (it.hasNext()) {
Element el = (Element) it.next(); Element el = it.next();
if (el.getName().equals("category")) { if (el.getName().equals("category")) {
String categoryId = el.attributeValue("id"); String categoryId = el.attributeValue("id");
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2); categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
@ -143,7 +142,7 @@ public class QueryInformationSystem {
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) { if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
return null; return null;
} }
String funder = null; String funder = "";
String grantId = null; String grantId = null;
String funding = null; String funding = null;
for (Object node : el.selectNodes(".//param")) { for (Object node : el.selectNodes(".//param")) {
@ -158,9 +157,12 @@ public class QueryInformationSystem {
case "CD_PROJECT_NUMBER": case "CD_PROJECT_NUMBER":
grantId = n.getText(); grantId = n.getText();
break; break;
default:
break;
} }
} }
String nsp = null; String nsp = null;
switch (funder.toLowerCase()) { switch (funder.toLowerCase()) {
case "ec": case "ec":
if (funding == null) { if (funding == null) {
@ -179,10 +181,12 @@ public class QueryInformationSystem {
nsp = "dfgf________::"; nsp = "dfgf________::";
break; break;
default: default:
nsp = funder.toLowerCase(); StringBuilder bld = new StringBuilder();
bld.append(funder.toLowerCase());
for (int i = funder.length(); i < 12; i++) for (int i = funder.length(); i < 12; i++)
nsp += "_"; bld.append("_");
nsp += "::"; bld.append("::");
nsp = bld.toString();
} }
return prefix + "|" + nsp + DHPUtils.md5(grantId); return prefix + "|" + nsp + DHPUtils.md5(grantId);

View File

@ -71,22 +71,22 @@ public class SparkDumpRelationJob implements Serializable {
Dataset<Relation> relations = Utils.readPath(spark, inputPath, Relation.class); Dataset<Relation> relations = Utils.readPath(spark, inputPath, Relation.class);
relations relations
.map((MapFunction<Relation, eu.dnetlib.dhp.schema.dump.oaf.graph.Relation>) relation -> { .map((MapFunction<Relation, eu.dnetlib.dhp.schema.dump.oaf.graph.Relation>) relation -> {
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel_new = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation(); eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
rel_new relNew
.setSource( .setSource(
Node Node
.newInstance( .newInstance(
relation.getSource(), relation.getSource(),
ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2)))); ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
rel_new relNew
.setTarget( .setTarget(
Node Node
.newInstance( .newInstance(
relation.getTarget(), relation.getTarget(),
ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2)))); ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
rel_new relNew
.setReltype( .setReltype(
RelType RelType
.newInstance( .newInstance(
@ -96,9 +96,9 @@ public class SparkDumpRelationJob implements Serializable {
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo()); Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
if (odInfo.isPresent()) { if (odInfo.isPresent()) {
DataInfo dInfo = odInfo.get(); DataInfo dInfo = odInfo.get();
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent()) { if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
if (Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) { Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
rel_new relNew
.setProvenance( .setProvenance(
Provenance Provenance
.newInstance( .newInstance(
@ -106,13 +106,12 @@ public class SparkDumpRelationJob implements Serializable {
dInfo.getTrust())); dInfo.getTrust()));
} }
} }
} if (Boolean.TRUE.equals(relation.getValidated())) {
if (relation.getValidated()) { relNew.setValidated(relation.getValidated());
rel_new.setValidated(relation.getValidated()); relNew.setValidationDate(relation.getValidationDate());
rel_new.setValidationDate(relation.getValidationDate());
} }
return rel_new; return relNew;
}, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class)) }, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
.write() .write()

View File

@ -58,7 +58,8 @@ public class SparkOrganizationRelation implements Serializable {
final OrganizationMap organizationMap = new Gson() final OrganizationMap organizationMap = new Gson()
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class); .fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
log.info("organization map : {}", new Gson().toJson(organizationMap)); final String serializedOrganizationMap = new Gson().toJson(organizationMap);
log.info("organization map : {}", serializedOrganizationMap);
final String communityMapPath = parser.get("communityMapPath"); final String communityMapPath = parser.get("communityMapPath");
log.info("communityMapPath: {}", communityMapPath); log.info("communityMapPath: {}", communityMapPath);

View File

@ -131,7 +131,6 @@ public class SparkSelectValidRelationsJob implements Serializable {
.option("compression", "gzip") .option("compression", "gzip")
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.json(outputPath); .json(outputPath);
;
} }
} }

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
public class MyRuntimeException extends RuntimeException {
public MyRuntimeException() {
super();
}
public MyRuntimeException(
final String message,
final Throwable cause,
final boolean enableSuppression,
final boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public MyRuntimeException(final String message, final Throwable cause) {
super(message, cause);
}
public MyRuntimeException(final String message) {
super(message);
}
public MyRuntimeException(final Throwable cause) {
super(cause);
}
}

View File

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
public class NoAvailableEntityTypeException extends Exception {
public NoAvailableEntityTypeException() {
super();
}
public NoAvailableEntityTypeException(
final String message,
final Throwable cause,
final boolean enableSuppression,
final boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public NoAvailableEntityTypeException(final String message, final Throwable cause) {
super(message, cause);
}
public NoAvailableEntityTypeException(final String message) {
super(message);
}
public NoAvailableEntityTypeException(final Throwable cause) {
super(cause);
}
}

View File

@ -10,7 +10,6 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*; import org.apache.spark.sql.*;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -18,10 +17,8 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
/** /**
* Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC * Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC

View File

@ -10,7 +10,6 @@ import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
@ -19,13 +18,11 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
/** /**
* Selects the results linked to projects. Only for these results the dump will be performed. * Selects the results linked to projects. Only for these results the dump will be performed.

View File

@ -8,17 +8,14 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -248,7 +245,7 @@ public class PrepareResultProjectJobTest {
org.apache.spark.sql.Dataset<ResultProject> verificationDataset = spark org.apache.spark.sql.Dataset<ResultProject> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
assertEquals(2, verificationDataset.count() ); assertEquals(2, verificationDataset.count());
assertEquals( assertEquals(
1, 1,

View File

@ -6,6 +6,7 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.HashMap; import java.util.HashMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@ -98,7 +99,7 @@ public class DumpOrganizationProjectDatasourceTest {
} }
@Test @Test
public void dumpProjectTest() { public void dumpProjectTest() throws NoAvailableEntityTypeException {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project") .getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project")
@ -127,7 +128,7 @@ public class DumpOrganizationProjectDatasourceTest {
} }
@Test @Test
public void dumpDatasourceTest() { public void dumpDatasourceTest() throws NoAvailableEntityTypeException {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource") .getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource")
.getPath(); .getPath();

View File

@ -54,7 +54,7 @@ class TestApply extends java.io.Serializable{
assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy")) assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy"))
assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN"))
assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access"))
assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid)) assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.gold))
assertTrue(pa.getBestaccessright.getClassid.equals("OPEN")) assertTrue(pa.getBestaccessright.getClassid.equals("OPEN"))
assertTrue(pa.getBestaccessright.getClassname.equals("Open Access")) assertTrue(pa.getBestaccessright.getClassname.equals("Open Access"))