forked from D-Net/dnet-hadoop
applied some suggestiond from Sonar Lint
This commit is contained in:
parent
9814c3e700
commit
e653756e3d
|
@ -5,37 +5,40 @@ import java.util.Map;
|
|||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
|
||||
public class Constants {
|
||||
|
||||
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
|
||||
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
|
||||
protected static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
|
||||
protected static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
|
||||
|
||||
public static final String INFERRED = "Inferred by OpenAIRE";
|
||||
public static final String CABF2 = "c_abf2";
|
||||
|
||||
public static final String HARVESTED = "Harvested";
|
||||
public static final String DEFAULT_TRUST = "0.9";
|
||||
public static final String USER_CLAIM = "Linked by user";
|
||||
|
||||
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
||||
public static final String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
||||
|
||||
public static String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
|
||||
public static final String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
|
||||
|
||||
public static String RESEARCH_COMMUNITY = "Research Community";
|
||||
public static final String RESEARCH_COMMUNITY = "Research Community";
|
||||
|
||||
public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
|
||||
public static final String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
|
||||
|
||||
static {
|
||||
accessRightsCoarMap.put("OPEN", "c_abf2");
|
||||
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);
|
||||
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
||||
accessRightsCoarMap.put("OPEN SOURCE", "c_abf2");
|
||||
accessRightsCoarMap.put("CLOSED", "c_14cb");
|
||||
accessRightsCoarMap.put("EMBARGO", "c_f1cf");
|
||||
accessRightsCoarMap.put("OPEN SOURCE", CABF2);
|
||||
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_CLOSED, "c_14cb");
|
||||
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_EMBARGO, "c_f1cf");
|
||||
}
|
||||
|
||||
static {
|
||||
coarCodeLabelMap.put("c_abf2", "OPEN");
|
||||
coarCodeLabelMap.put(CABF2, ModelConstants.ACCESS_RIGHT_OPEN);
|
||||
coarCodeLabelMap.put("c_16ec", "RESTRICTED");
|
||||
coarCodeLabelMap.put("c_14cb", "CLOSED");
|
||||
coarCodeLabelMap.put("c_14cb", ModelConstants.ACCESS_RIGHT_CLOSED);
|
||||
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ import org.apache.spark.sql.SaveMode;
|
|||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
/**
|
||||
|
@ -66,7 +67,7 @@ public class DumpProducts implements Serializable {
|
|||
|
||||
private static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O execMap(I value,
|
||||
CommunityMap communityMap,
|
||||
String dumpType) {
|
||||
String dumpType) throws NoAvailableEntityTypeException {
|
||||
|
||||
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
|
||||
if (odInfo.isPresent()) {
|
||||
|
@ -94,7 +95,7 @@ public class DumpProducts implements Serializable {
|
|||
}
|
||||
return null;
|
||||
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||
if (toDumpFor.size() == 0) {
|
||||
if (toDumpFor.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,16 +57,16 @@ public class MakeTar implements Serializable {
|
|||
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
|
||||
throws IOException {
|
||||
|
||||
RemoteIterator<LocatedFileStatus> dir_iterator = fileSystem.listLocatedStatus(new Path(inputPath));
|
||||
RemoteIterator<LocatedFileStatus> dirIterator = fileSystem.listLocatedStatus(new Path(inputPath));
|
||||
|
||||
while (dir_iterator.hasNext()) {
|
||||
LocatedFileStatus fileStatus = dir_iterator.next();
|
||||
while (dirIterator.hasNext()) {
|
||||
LocatedFileStatus fileStatus = dirIterator.next();
|
||||
|
||||
Path p = fileStatus.getPath();
|
||||
String p_string = p.toString();
|
||||
String entity = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||
String pathString = p.toString();
|
||||
String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
|
||||
|
||||
MakeTarArchive.tarMaxSize(fileSystem, p_string, outputPath + "/" + entity, entity, gBperSplit);
|
||||
MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -40,10 +40,10 @@ public class QueryInformationSystem {
|
|||
"{$x//CONFIGURATION/context/@label}" +
|
||||
"</community>";
|
||||
|
||||
public CommunityMap getCommunityMap(boolean singleCommunity, String community_id)
|
||||
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
|
||||
throws ISLookUpException, DocumentException, SAXException {
|
||||
if (singleCommunity)
|
||||
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + community_id + "'")));
|
||||
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'")));
|
||||
return getMap(isLookUp.quickSearchProfile(XQUERY_ALL));
|
||||
|
||||
}
|
||||
|
|
|
@ -6,9 +6,8 @@ import java.util.*;
|
|||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.AccessRight;
|
||||
|
@ -29,7 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
|||
public class ResultMapper implements Serializable {
|
||||
|
||||
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
||||
E in, Map<String, String> communityMap, String dumpType) {
|
||||
E in, Map<String, String> communityMap, String dumpType) throws NoAvailableEntityTypeException {
|
||||
|
||||
Result out;
|
||||
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
||||
|
@ -136,6 +135,8 @@ public class ResultMapper implements Serializable {
|
|||
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
||||
|
||||
break;
|
||||
default:
|
||||
throw new NoAvailableEntityTypeException();
|
||||
}
|
||||
|
||||
Optional<List<Measure>> mes = Optional.ofNullable(input.getMeasures());
|
||||
|
@ -156,8 +157,7 @@ public class ResultMapper implements Serializable {
|
|||
// I do not map Access Right UNKNOWN or OTHER
|
||||
|
||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
|
||||
if (oar.isPresent()) {
|
||||
if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
|
||||
if (oar.isPresent() && Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
|
||||
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
|
||||
out
|
||||
.setBestaccessright(
|
||||
|
@ -167,7 +167,6 @@ public class ResultMapper implements Serializable {
|
|||
Constants.coarCodeLabelMap.get(code),
|
||||
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||
}
|
||||
}
|
||||
|
||||
final List<String> contributorList = new ArrayList<>();
|
||||
Optional
|
||||
|
@ -263,7 +262,7 @@ public class ResultMapper implements Serializable {
|
|||
.stream()
|
||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
||||
.collect(Collectors.toList());
|
||||
if (iTitle.size() > 0) {
|
||||
if (!iTitle.isEmpty()) {
|
||||
out.setMaintitle(iTitle.get(0).getValue());
|
||||
}
|
||||
|
||||
|
@ -272,7 +271,7 @@ public class ResultMapper implements Serializable {
|
|||
.stream()
|
||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
||||
.collect(Collectors.toList());
|
||||
if (iTitle.size() > 0) {
|
||||
if (!iTitle.isEmpty()) {
|
||||
out.setSubtitle(iTitle.get(0).getValue());
|
||||
}
|
||||
|
||||
|
@ -301,9 +300,8 @@ public class ResultMapper implements Serializable {
|
|||
|
||||
Optional
|
||||
.ofNullable(input.getSource())
|
||||
.ifPresent(value -> out.setSource(value.stream().map(s -> s.getValue()).collect(Collectors.toList())));
|
||||
// value.stream().forEach(s -> sourceList.add(s.getValue())));
|
||||
// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
|
||||
.ifPresent(value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList())));
|
||||
|
||||
List<Subject> subjectList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getSubject())
|
||||
|
@ -334,14 +332,14 @@ public class ResultMapper implements Serializable {
|
|||
value -> value
|
||||
.stream()
|
||||
.map(c -> {
|
||||
String community_id = c.getId();
|
||||
if (community_id.indexOf("::") > 0) {
|
||||
community_id = community_id.substring(0, community_id.indexOf("::"));
|
||||
String communityId = c.getId();
|
||||
if (communityId.contains("::")) {
|
||||
communityId = communityId.substring(0, communityId.indexOf("::"));
|
||||
}
|
||||
if (communities.contains(community_id)) {
|
||||
if (communities.contains(communityId)) {
|
||||
Context context = new Context();
|
||||
context.setCode(community_id);
|
||||
context.setLabel(communityMap.get(community_id));
|
||||
context.setCode(communityId);
|
||||
context.setLabel(communityMap.get(communityId));
|
||||
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
|
||||
if (dataInfo.isPresent()) {
|
||||
List<Provenance> provenance = new ArrayList<>();
|
||||
|
@ -361,7 +359,11 @@ public class ResultMapper implements Serializable {
|
|||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toSet()));
|
||||
|
||||
try {
|
||||
context.setProvenance(getUniqueProvenance(provenance));
|
||||
} catch (NoAvailableEntityTypeException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
return context;
|
||||
}
|
||||
|
@ -371,7 +373,7 @@ public class ResultMapper implements Serializable {
|
|||
.collect(Collectors.toList()))
|
||||
.orElse(new ArrayList<>());
|
||||
|
||||
if (contextList.size() > 0) {
|
||||
if (!contextList.isEmpty()) {
|
||||
Set<Integer> hashValue = new HashSet<>();
|
||||
List<Context> remainigContext = new ArrayList<>();
|
||||
contextList.forEach(c -> {
|
||||
|
@ -417,8 +419,7 @@ public class ResultMapper implements Serializable {
|
|||
private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
|
||||
Optional<eu.dnetlib.dhp.schema.oaf.AccessRight> opAr = Optional.ofNullable(i.getAccessright());
|
||||
|
||||
if (opAr.isPresent()) {
|
||||
if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
|
||||
if (opAr.isPresent() && Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
|
||||
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
|
||||
|
||||
instance
|
||||
|
@ -445,7 +446,7 @@ public class ResultMapper implements Serializable {
|
|||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Optional
|
||||
|
@ -498,7 +499,8 @@ public class ResultMapper implements Serializable {
|
|||
|
||||
}
|
||||
|
||||
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance) {
|
||||
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance)
|
||||
throws NoAvailableEntityTypeException {
|
||||
Provenance iProv = new Provenance();
|
||||
|
||||
Provenance hProv = new Provenance();
|
||||
|
@ -520,6 +522,8 @@ public class ResultMapper implements Serializable {
|
|||
case Constants.USER_CLAIM:
|
||||
lProv = getHighestTrust(lProv, p);
|
||||
break;
|
||||
default:
|
||||
throw new NoAvailableEntityTypeException();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -599,19 +603,19 @@ public class ResultMapper implements Serializable {
|
|||
}
|
||||
|
||||
private static Pid getOrcid(List<StructuredProperty> p) {
|
||||
List<StructuredProperty> pid_list = p.stream().map(pid -> {
|
||||
List<StructuredProperty> pidList = p.stream().map(pid -> {
|
||||
if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) ||
|
||||
(pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) {
|
||||
return pid;
|
||||
}
|
||||
return null;
|
||||
}).filter(pid -> pid != null).collect(Collectors.toList());
|
||||
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||
|
||||
if (pid_list.size() == 1) {
|
||||
return getAuthorPid(pid_list.get(0));
|
||||
if (pidList.size() == 1) {
|
||||
return getAuthorPid(pidList.get(0));
|
||||
}
|
||||
|
||||
List<StructuredProperty> orcid = pid_list
|
||||
List<StructuredProperty> orcid = pidList
|
||||
.stream()
|
||||
.filter(
|
||||
ap -> ap
|
||||
|
@ -622,7 +626,7 @@ public class ResultMapper implements Serializable {
|
|||
if (orcid.size() == 1) {
|
||||
return getAuthorPid(orcid.get(0));
|
||||
}
|
||||
orcid = pid_list
|
||||
orcid = pidList
|
||||
.stream()
|
||||
.filter(
|
||||
ap -> ap
|
||||
|
|
|
@ -31,9 +31,9 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|||
public class SaveCommunityMap implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
|
||||
private final QueryInformationSystem queryInformationSystem;
|
||||
private final transient QueryInformationSystem queryInformationSystem;
|
||||
|
||||
private final BufferedWriter writer;
|
||||
private final transient BufferedWriter writer;
|
||||
|
||||
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
|
||||
final Configuration conf = new Configuration();
|
||||
|
@ -84,12 +84,12 @@ public class SaveCommunityMap implements Serializable {
|
|||
|
||||
}
|
||||
|
||||
private void saveCommunityMap(boolean singleCommunity, String community_id)
|
||||
private void saveCommunityMap(boolean singleCommunity, String communityId)
|
||||
throws ISLookUpException, IOException, DocumentException, SAXException {
|
||||
writer
|
||||
.write(
|
||||
Utils.OBJECT_MAPPER
|
||||
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, community_id)));
|
||||
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -5,15 +5,13 @@ import java.io.Serializable;
|
|||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.*;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
||||
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||
|
||||
public class SendToZenodoHDFS implements Serializable {
|
||||
|
||||
|
@ -21,8 +19,6 @@ public class SendToZenodoHDFS implements Serializable {
|
|||
private static final String VERSION = "version"; // to be used to upload a new version of a published deposition
|
||||
private static final String UPDATE = "update"; // to upload content to an open deposition not published
|
||||
|
||||
private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class);
|
||||
|
||||
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
|
@ -48,15 +44,12 @@ public class SendToZenodoHDFS implements Serializable {
|
|||
.orElse(false);
|
||||
|
||||
final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null);
|
||||
// final String communityMapPath = parser.get("communityMapPath");
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
conf.set("fs.defaultFS", hdfsNameNode);
|
||||
|
||||
FileSystem fileSystem = FileSystem.get(conf);
|
||||
|
||||
// CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath);
|
||||
|
||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||
.listFiles(
|
||||
new Path(hdfsPath), true);
|
||||
|
@ -77,19 +70,17 @@ public class SendToZenodoHDFS implements Serializable {
|
|||
}
|
||||
zenodoApiClient.uploadOpenDeposition(depositionId);
|
||||
break;
|
||||
default:
|
||||
throw new NoAvailableEntityTypeException();
|
||||
}
|
||||
|
||||
while (fileStatusListIterator.hasNext()) {
|
||||
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||
|
||||
Path p = fileStatus.getPath();
|
||||
String p_string = p.toString();
|
||||
if (!p_string.endsWith("_SUCCESS")) {
|
||||
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||
// log.info("Sending information for community: " + name);
|
||||
// if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) {
|
||||
// name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar";
|
||||
// }
|
||||
String pString = p.toString();
|
||||
if (!pString.endsWith("_SUCCESS")) {
|
||||
String name = pString.substring(pString.lastIndexOf("/") + 1);
|
||||
|
||||
FSDataInputStream inputStream = fileSystem.open(p);
|
||||
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
|
||||
|
@ -101,7 +92,7 @@ public class SendToZenodoHDFS implements Serializable {
|
|||
zenodoApiClient.sendMretadata(metadata);
|
||||
}
|
||||
|
||||
if (publish) {
|
||||
if (Boolean.TRUE.equals(publish)) {
|
||||
zenodoApiClient.publish();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -16,6 +15,7 @@ import org.apache.spark.sql.SparkSession;
|
|||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Context;
|
||||
|
||||
/**
|
||||
* This class splits the dumped results according to the research community - research initiative/infrastructure they
|
||||
|
@ -56,10 +56,10 @@ public class CommunitySplit implements Serializable {
|
|||
}
|
||||
|
||||
private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) {
|
||||
Dataset<CommunityResult> community_products = result
|
||||
Dataset<CommunityResult> communityProducts = result
|
||||
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
|
||||
|
||||
community_products
|
||||
communityProducts
|
||||
.write()
|
||||
.option("compression", "gzip")
|
||||
.mode(SaveMode.Overwrite)
|
||||
|
@ -72,7 +72,7 @@ public class CommunitySplit implements Serializable {
|
|||
return r
|
||||
.getContext()
|
||||
.stream()
|
||||
.map(con -> con.getCode())
|
||||
.map(Context::getCode)
|
||||
.collect(Collectors.toList())
|
||||
.contains(c);
|
||||
}
|
||||
|
|
|
@ -161,7 +161,7 @@ public class SparkPrepareResultProject implements Serializable {
|
|||
provenance.setTrust(di.get().getTrust());
|
||||
p.setProvenance(provenance);
|
||||
}
|
||||
if (relation.getValidated()) {
|
||||
if (Boolean.TRUE.equals(relation.getValidated())) {
|
||||
p.setValidated(Validated.newInstance(relation.getValidated(), relation.getValidationDate()));
|
||||
}
|
||||
return p;
|
||||
|
@ -179,8 +179,8 @@ public class SparkPrepareResultProject implements Serializable {
|
|||
f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
||||
f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||
for (Object o : doc.selectNodes("//funding_level_0")) {
|
||||
List node = ((Node) o).selectNodes("./name");
|
||||
f.setFundingStream(((Node) node.get(0)).getText());
|
||||
List<Node> node = ((Node) o).selectNodes("./name");
|
||||
f.setFundingStream((node.get(0)).getText());
|
||||
}
|
||||
|
||||
return f;
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
|
@ -120,7 +121,7 @@ public class CreateContextRelation implements Serializable {
|
|||
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
|
||||
writer.newLine();
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
throw new MyRuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -62,22 +62,7 @@ public class Extractor implements Serializable {
|
|||
.readPath(spark, inputPath, inputClazz)
|
||||
.flatMap((FlatMapFunction<R, Relation>) value -> {
|
||||
List<Relation> relationList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(value.getInstance())
|
||||
.ifPresent(inst -> inst.forEach(instance -> {
|
||||
Optional
|
||||
.ofNullable(instance.getCollectedfrom())
|
||||
.ifPresent(
|
||||
cf -> getRelatioPair(
|
||||
value, relationList, cf,
|
||||
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
|
||||
Optional
|
||||
.ofNullable(instance.getHostedby())
|
||||
.ifPresent(
|
||||
hb -> getRelatioPair(
|
||||
value, relationList, hb,
|
||||
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
|
||||
}));
|
||||
extractRelationsFromInstance(hashCodes, value, relationList);
|
||||
Set<String> communities = communityMap.keySet();
|
||||
Optional
|
||||
.ofNullable(value.getContext())
|
||||
|
@ -136,8 +121,28 @@ public class Extractor implements Serializable {
|
|||
|
||||
}
|
||||
|
||||
private <R extends Result> void extractRelationsFromInstance(Set<Integer> hashCodes, R value,
|
||||
List<Relation> relationList) {
|
||||
Optional
|
||||
.ofNullable(value.getInstance())
|
||||
.ifPresent(inst -> inst.forEach(instance -> {
|
||||
Optional
|
||||
.ofNullable(instance.getCollectedfrom())
|
||||
.ifPresent(
|
||||
cf -> getRelatioPair(
|
||||
value, relationList, cf,
|
||||
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
|
||||
Optional
|
||||
.ofNullable(instance.getHostedby())
|
||||
.ifPresent(
|
||||
hb -> getRelatioPair(
|
||||
value, relationList, hb,
|
||||
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
|
||||
}));
|
||||
}
|
||||
|
||||
private static <R extends Result> void getRelatioPair(R value, List<Relation> relationList, KeyValue cf,
|
||||
String result_dtasource, String datasource_result,
|
||||
String resultDatasource, String datasourceResult,
|
||||
Set<Integer> hashCodes) {
|
||||
Provenance provenance = Optional
|
||||
.ofNullable(cf.getDataInfo())
|
||||
|
@ -162,7 +167,7 @@ public class Extractor implements Serializable {
|
|||
Relation r = getRelation(
|
||||
value.getId(),
|
||||
cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
|
||||
result_dtasource, ModelConstants.PROVISION,
|
||||
resultDatasource, ModelConstants.PROVISION,
|
||||
provenance);
|
||||
if (!hashCodes.contains(r.hashCode())) {
|
||||
relationList
|
||||
|
@ -173,7 +178,7 @@ public class Extractor implements Serializable {
|
|||
r = getRelation(
|
||||
cf.getKey(), value.getId(),
|
||||
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
|
||||
datasource_result, ModelConstants.PROVISION,
|
||||
datasourceResult, ModelConstants.PROVISION,
|
||||
provenance);
|
||||
|
||||
if (!hashCodes.contains(r.hashCode())) {
|
||||
|
|
|
@ -9,6 +9,7 @@ import org.apache.commons.lang3.StringUtils;
|
|||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
|
@ -43,7 +44,7 @@ public class Process implements Serializable {
|
|||
return (R) ri;
|
||||
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
throw new MyRuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,7 +92,7 @@ public class Process implements Serializable {
|
|||
return relationList;
|
||||
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
throw new MyRuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,7 +13,6 @@ import org.dom4j.io.SAXReader;
|
|||
import org.jetbrains.annotations.NotNull;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
|
@ -94,9 +93,9 @@ public class QueryInformationSystem {
|
|||
Element root = doc.getRootElement();
|
||||
cinfo.setId(root.attributeValue("id"));
|
||||
|
||||
Iterator it = root.elementIterator();
|
||||
Iterator<Element> it = root.elementIterator();
|
||||
while (it.hasNext()) {
|
||||
Element el = (Element) it.next();
|
||||
Element el = it.next();
|
||||
if (el.getName().equals("category")) {
|
||||
String categoryId = el.attributeValue("id");
|
||||
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
|
||||
|
@ -143,7 +142,7 @@ public class QueryInformationSystem {
|
|||
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
|
||||
return null;
|
||||
}
|
||||
String funder = null;
|
||||
String funder = "";
|
||||
String grantId = null;
|
||||
String funding = null;
|
||||
for (Object node : el.selectNodes(".//param")) {
|
||||
|
@ -158,9 +157,12 @@ public class QueryInformationSystem {
|
|||
case "CD_PROJECT_NUMBER":
|
||||
grantId = n.getText();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
String nsp = null;
|
||||
|
||||
switch (funder.toLowerCase()) {
|
||||
case "ec":
|
||||
if (funding == null) {
|
||||
|
@ -179,10 +181,12 @@ public class QueryInformationSystem {
|
|||
nsp = "dfgf________::";
|
||||
break;
|
||||
default:
|
||||
nsp = funder.toLowerCase();
|
||||
StringBuilder bld = new StringBuilder();
|
||||
bld.append(funder.toLowerCase());
|
||||
for (int i = funder.length(); i < 12; i++)
|
||||
nsp += "_";
|
||||
nsp += "::";
|
||||
bld.append("_");
|
||||
bld.append("::");
|
||||
nsp = bld.toString();
|
||||
}
|
||||
|
||||
return prefix + "|" + nsp + DHPUtils.md5(grantId);
|
||||
|
|
|
@ -71,22 +71,22 @@ public class SparkDumpRelationJob implements Serializable {
|
|||
Dataset<Relation> relations = Utils.readPath(spark, inputPath, Relation.class);
|
||||
relations
|
||||
.map((MapFunction<Relation, eu.dnetlib.dhp.schema.dump.oaf.graph.Relation>) relation -> {
|
||||
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel_new = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
|
||||
rel_new
|
||||
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
|
||||
relNew
|
||||
.setSource(
|
||||
Node
|
||||
.newInstance(
|
||||
relation.getSource(),
|
||||
ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
|
||||
|
||||
rel_new
|
||||
relNew
|
||||
.setTarget(
|
||||
Node
|
||||
.newInstance(
|
||||
relation.getTarget(),
|
||||
ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
|
||||
|
||||
rel_new
|
||||
relNew
|
||||
.setReltype(
|
||||
RelType
|
||||
.newInstance(
|
||||
|
@ -96,9 +96,9 @@ public class SparkDumpRelationJob implements Serializable {
|
|||
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
|
||||
if (odInfo.isPresent()) {
|
||||
DataInfo dInfo = odInfo.get();
|
||||
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent()) {
|
||||
if (Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
|
||||
rel_new
|
||||
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
|
||||
Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
|
||||
relNew
|
||||
.setProvenance(
|
||||
Provenance
|
||||
.newInstance(
|
||||
|
@ -106,13 +106,12 @@ public class SparkDumpRelationJob implements Serializable {
|
|||
dInfo.getTrust()));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (relation.getValidated()) {
|
||||
rel_new.setValidated(relation.getValidated());
|
||||
rel_new.setValidationDate(relation.getValidationDate());
|
||||
if (Boolean.TRUE.equals(relation.getValidated())) {
|
||||
relNew.setValidated(relation.getValidated());
|
||||
relNew.setValidationDate(relation.getValidationDate());
|
||||
}
|
||||
|
||||
return rel_new;
|
||||
return relNew;
|
||||
|
||||
}, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
|
||||
.write()
|
||||
|
|
|
@ -58,7 +58,8 @@ public class SparkOrganizationRelation implements Serializable {
|
|||
|
||||
final OrganizationMap organizationMap = new Gson()
|
||||
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
|
||||
log.info("organization map : {}", new Gson().toJson(organizationMap));
|
||||
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
|
||||
log.info("organization map : {}", serializedOrganizationMap);
|
||||
|
||||
final String communityMapPath = parser.get("communityMapPath");
|
||||
log.info("communityMapPath: {}", communityMapPath);
|
||||
|
|
|
@ -131,7 +131,6 @@ public class SparkSelectValidRelationsJob implements Serializable {
|
|||
.option("compression", "gzip")
|
||||
.mode(SaveMode.Overwrite)
|
||||
.json(outputPath);
|
||||
;
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
|
||||
|
||||
public class MyRuntimeException extends RuntimeException {
|
||||
|
||||
public MyRuntimeException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public MyRuntimeException(
|
||||
final String message,
|
||||
final Throwable cause,
|
||||
final boolean enableSuppression,
|
||||
final boolean writableStackTrace) {
|
||||
super(message, cause, enableSuppression, writableStackTrace);
|
||||
}
|
||||
|
||||
public MyRuntimeException(final String message, final Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
public MyRuntimeException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public MyRuntimeException(final Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
|
||||
|
||||
public class NoAvailableEntityTypeException extends Exception {
|
||||
public NoAvailableEntityTypeException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public NoAvailableEntityTypeException(
|
||||
final String message,
|
||||
final Throwable cause,
|
||||
final boolean enableSuppression,
|
||||
final boolean writableStackTrace) {
|
||||
super(message, cause, enableSuppression, writableStackTrace);
|
||||
}
|
||||
|
||||
public NoAvailableEntityTypeException(final String message, final Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
public NoAvailableEntityTypeException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public NoAvailableEntityTypeException(final Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
|
||||
}
|
|
@ -10,7 +10,6 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -18,10 +17,8 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
/**
|
||||
* Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC
|
||||
|
|
|
@ -10,7 +10,6 @@ import org.apache.commons.io.IOUtils;
|
|||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
|
@ -19,13 +18,11 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Selects the results linked to projects. Only for these results the dump will be performed.
|
||||
|
|
|
@ -8,17 +8,14 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
|
|
|
@ -6,6 +6,7 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
import java.util.HashMap;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -98,7 +99,7 @@ public class DumpOrganizationProjectDatasourceTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void dumpProjectTest() {
|
||||
public void dumpProjectTest() throws NoAvailableEntityTypeException {
|
||||
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project")
|
||||
|
@ -127,7 +128,7 @@ public class DumpOrganizationProjectDatasourceTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void dumpDatasourceTest() {
|
||||
public void dumpDatasourceTest() throws NoAvailableEntityTypeException {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource")
|
||||
.getPath();
|
||||
|
|
|
@ -54,7 +54,7 @@ class TestApply extends java.io.Serializable{
|
|||
assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy"))
|
||||
assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN"))
|
||||
assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access"))
|
||||
assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid))
|
||||
assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.gold))
|
||||
assertTrue(pa.getBestaccessright.getClassid.equals("OPEN"))
|
||||
assertTrue(pa.getBestaccessright.getClassname.equals("Open Access"))
|
||||
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue