1
0
Fork 0

applied some suggestiond from Sonar Lint

This commit is contained in:
Miriam Baglioni 2021-10-04 18:40:07 +02:00
parent 9814c3e700
commit e653756e3d
24 changed files with 274 additions and 214 deletions

View File

@ -5,37 +5,40 @@ import java.util.Map;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.common.ModelConstants;
public class Constants {
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
protected static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
protected static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
public static final String INFERRED = "Inferred by OpenAIRE";
public static final String CABF2 = "c_abf2";
public static final String HARVESTED = "Harvested";
public static final String DEFAULT_TRUST = "0.9";
public static final String USER_CLAIM = "Linked by user";
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
public static final String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
public static String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
public static final String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
public static String RESEARCH_COMMUNITY = "Research Community";
public static final String RESEARCH_COMMUNITY = "Research Community";
public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
public static final String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
static {
accessRightsCoarMap.put("OPEN", "c_abf2");
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
accessRightsCoarMap.put("OPEN SOURCE", "c_abf2");
accessRightsCoarMap.put("CLOSED", "c_14cb");
accessRightsCoarMap.put("EMBARGO", "c_f1cf");
accessRightsCoarMap.put("OPEN SOURCE", CABF2);
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_CLOSED, "c_14cb");
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_EMBARGO, "c_f1cf");
}
static {
coarCodeLabelMap.put("c_abf2", "OPEN");
coarCodeLabelMap.put(CABF2, ModelConstants.ACCESS_RIGHT_OPEN);
coarCodeLabelMap.put("c_16ec", "RESTRICTED");
coarCodeLabelMap.put("c_14cb", "CLOSED");
coarCodeLabelMap.put("c_14cb", ModelConstants.ACCESS_RIGHT_CLOSED);
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
}

View File

@ -17,6 +17,7 @@ import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.schema.oaf.*;
/**
@ -66,7 +67,7 @@ public class DumpProducts implements Serializable {
private static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O execMap(I value,
CommunityMap communityMap,
String dumpType) {
String dumpType) throws NoAvailableEntityTypeException {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
if (odInfo.isPresent()) {
@ -94,7 +95,7 @@ public class DumpProducts implements Serializable {
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
if (toDumpFor.size() == 0) {
if (toDumpFor.isEmpty()) {
return null;
}
}

View File

@ -57,16 +57,16 @@ public class MakeTar implements Serializable {
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
throws IOException {
RemoteIterator<LocatedFileStatus> dir_iterator = fileSystem.listLocatedStatus(new Path(inputPath));
RemoteIterator<LocatedFileStatus> dirIterator = fileSystem.listLocatedStatus(new Path(inputPath));
while (dir_iterator.hasNext()) {
LocatedFileStatus fileStatus = dir_iterator.next();
while (dirIterator.hasNext()) {
LocatedFileStatus fileStatus = dirIterator.next();
Path p = fileStatus.getPath();
String p_string = p.toString();
String entity = p_string.substring(p_string.lastIndexOf("/") + 1);
String pathString = p.toString();
String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
MakeTarArchive.tarMaxSize(fileSystem, p_string, outputPath + "/" + entity, entity, gBperSplit);
MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit);
}
}

View File

@ -40,10 +40,10 @@ public class QueryInformationSystem {
"{$x//CONFIGURATION/context/@label}" +
"</community>";
public CommunityMap getCommunityMap(boolean singleCommunity, String community_id)
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
throws ISLookUpException, DocumentException, SAXException {
if (singleCommunity)
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + community_id + "'")));
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'")));
return getMap(isLookUp.quickSearchProfile(XQUERY_ALL));
}

View File

@ -6,9 +6,8 @@ import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.dump.oaf.*;
import eu.dnetlib.dhp.schema.dump.oaf.AccessRight;
@ -29,7 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.*;
public class ResultMapper implements Serializable {
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
E in, Map<String, String> communityMap, String dumpType) {
E in, Map<String, String> communityMap, String dumpType) throws NoAvailableEntityTypeException {
Result out;
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
@ -136,6 +135,8 @@ public class ResultMapper implements Serializable {
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
break;
default:
throw new NoAvailableEntityTypeException();
}
Optional<List<Measure>> mes = Optional.ofNullable(input.getMeasures());
@ -156,17 +157,15 @@ public class ResultMapper implements Serializable {
// I do not map Access Right UNKNOWN or OTHER
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
if (oar.isPresent()) {
if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
out
.setBestaccessright(
AccessRight
.newInstance(
code,
Constants.coarCodeLabelMap.get(code),
Constants.COAR_ACCESS_RIGHT_SCHEMA));
}
if (oar.isPresent() && Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
out
.setBestaccessright(
AccessRight
.newInstance(
code,
Constants.coarCodeLabelMap.get(code),
Constants.COAR_ACCESS_RIGHT_SCHEMA));
}
final List<String> contributorList = new ArrayList<>();
@ -263,7 +262,7 @@ public class ResultMapper implements Serializable {
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
.collect(Collectors.toList());
if (iTitle.size() > 0) {
if (!iTitle.isEmpty()) {
out.setMaintitle(iTitle.get(0).getValue());
}
@ -272,7 +271,7 @@ public class ResultMapper implements Serializable {
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
.collect(Collectors.toList());
if (iTitle.size() > 0) {
if (!iTitle.isEmpty()) {
out.setSubtitle(iTitle.get(0).getValue());
}
@ -301,9 +300,8 @@ public class ResultMapper implements Serializable {
Optional
.ofNullable(input.getSource())
.ifPresent(value -> out.setSource(value.stream().map(s -> s.getValue()).collect(Collectors.toList())));
// value.stream().forEach(s -> sourceList.add(s.getValue())));
// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
.ifPresent(value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList())));
List<Subject> subjectList = new ArrayList<>();
Optional
.ofNullable(input.getSubject())
@ -334,14 +332,14 @@ public class ResultMapper implements Serializable {
value -> value
.stream()
.map(c -> {
String community_id = c.getId();
if (community_id.indexOf("::") > 0) {
community_id = community_id.substring(0, community_id.indexOf("::"));
String communityId = c.getId();
if (communityId.contains("::")) {
communityId = communityId.substring(0, communityId.indexOf("::"));
}
if (communities.contains(community_id)) {
if (communities.contains(communityId)) {
Context context = new Context();
context.setCode(community_id);
context.setLabel(communityMap.get(community_id));
context.setCode(communityId);
context.setLabel(communityMap.get(communityId));
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
if (dataInfo.isPresent()) {
List<Provenance> provenance = new ArrayList<>();
@ -361,7 +359,11 @@ public class ResultMapper implements Serializable {
.filter(Objects::nonNull)
.collect(Collectors.toSet()));
context.setProvenance(getUniqueProvenance(provenance));
try {
context.setProvenance(getUniqueProvenance(provenance));
} catch (NoAvailableEntityTypeException e) {
e.printStackTrace();
}
}
return context;
}
@ -371,7 +373,7 @@ public class ResultMapper implements Serializable {
.collect(Collectors.toList()))
.orElse(new ArrayList<>());
if (contextList.size() > 0) {
if (!contextList.isEmpty()) {
Set<Integer> hashValue = new HashSet<>();
List<Context> remainigContext = new ArrayList<>();
contextList.forEach(c -> {
@ -417,35 +419,34 @@ public class ResultMapper implements Serializable {
private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
Optional<eu.dnetlib.dhp.schema.oaf.AccessRight> opAr = Optional.ofNullable(i.getAccessright());
if (opAr.isPresent()) {
if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
if (opAr.isPresent() && Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
instance
.setAccessright(
AccessRight
.newInstance(
code,
Constants.coarCodeLabelMap.get(code),
Constants.COAR_ACCESS_RIGHT_SCHEMA));
if (opAr.get().getOpenAccessRoute() != null) {
switch (opAr.get().getOpenAccessRoute()) {
case hybrid:
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.hybrid);
break;
case gold:
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.gold);
break;
case green:
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.green);
break;
case bronze:
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.bronze);
break;
instance
.setAccessright(
AccessRight
.newInstance(
code,
Constants.coarCodeLabelMap.get(code),
Constants.COAR_ACCESS_RIGHT_SCHEMA));
if (opAr.get().getOpenAccessRoute() != null) {
switch (opAr.get().getOpenAccessRoute()) {
case hybrid:
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.hybrid);
break;
case gold:
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.gold);
break;
case green:
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.green);
break;
case bronze:
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.bronze);
break;
}
}
}
}
Optional
@ -498,7 +499,8 @@ public class ResultMapper implements Serializable {
}
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance) {
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance)
throws NoAvailableEntityTypeException {
Provenance iProv = new Provenance();
Provenance hProv = new Provenance();
@ -520,6 +522,8 @@ public class ResultMapper implements Serializable {
case Constants.USER_CLAIM:
lProv = getHighestTrust(lProv, p);
break;
default:
throw new NoAvailableEntityTypeException();
}
}
@ -599,19 +603,19 @@ public class ResultMapper implements Serializable {
}
private static Pid getOrcid(List<StructuredProperty> p) {
List<StructuredProperty> pid_list = p.stream().map(pid -> {
List<StructuredProperty> pidList = p.stream().map(pid -> {
if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) ||
(pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) {
return pid;
}
return null;
}).filter(pid -> pid != null).collect(Collectors.toList());
}).filter(Objects::nonNull).collect(Collectors.toList());
if (pid_list.size() == 1) {
return getAuthorPid(pid_list.get(0));
if (pidList.size() == 1) {
return getAuthorPid(pidList.get(0));
}
List<StructuredProperty> orcid = pid_list
List<StructuredProperty> orcid = pidList
.stream()
.filter(
ap -> ap
@ -622,7 +626,7 @@ public class ResultMapper implements Serializable {
if (orcid.size() == 1) {
return getAuthorPid(orcid.get(0));
}
orcid = pid_list
orcid = pidList
.stream()
.filter(
ap -> ap

View File

@ -31,9 +31,9 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
public class SaveCommunityMap implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
private final QueryInformationSystem queryInformationSystem;
private final transient QueryInformationSystem queryInformationSystem;
private final BufferedWriter writer;
private final transient BufferedWriter writer;
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
final Configuration conf = new Configuration();
@ -84,12 +84,12 @@ public class SaveCommunityMap implements Serializable {
}
private void saveCommunityMap(boolean singleCommunity, String community_id)
private void saveCommunityMap(boolean singleCommunity, String communityId)
throws ISLookUpException, IOException, DocumentException, SAXException {
writer
.write(
Utils.OBJECT_MAPPER
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, community_id)));
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)));
}
}

View File

@ -5,15 +5,13 @@ import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
public class SendToZenodoHDFS implements Serializable {
@ -21,8 +19,6 @@ public class SendToZenodoHDFS implements Serializable {
private static final String VERSION = "version"; // to be used to upload a new version of a published deposition
private static final String UPDATE = "update"; // to upload content to an open deposition not published
private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class);
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
@ -48,15 +44,12 @@ public class SendToZenodoHDFS implements Serializable {
.orElse(false);
final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null);
// final String communityMapPath = parser.get("communityMapPath");
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
// CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath);
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
.listFiles(
new Path(hdfsPath), true);
@ -77,19 +70,17 @@ public class SendToZenodoHDFS implements Serializable {
}
zenodoApiClient.uploadOpenDeposition(depositionId);
break;
default:
throw new NoAvailableEntityTypeException();
}
while (fileStatusListIterator.hasNext()) {
LocatedFileStatus fileStatus = fileStatusListIterator.next();
Path p = fileStatus.getPath();
String p_string = p.toString();
if (!p_string.endsWith("_SUCCESS")) {
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
// log.info("Sending information for community: " + name);
// if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) {
// name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar";
// }
String pString = p.toString();
if (!pString.endsWith("_SUCCESS")) {
String name = pString.substring(pString.lastIndexOf("/") + 1);
FSDataInputStream inputStream = fileSystem.open(p);
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
@ -101,7 +92,7 @@ public class SendToZenodoHDFS implements Serializable {
zenodoApiClient.sendMretadata(metadata);
}
if (publish) {
if (Boolean.TRUE.equals(publish)) {
zenodoApiClient.publish();
}
}

View File

@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.spark.SparkConf;
@ -16,6 +15,7 @@ import org.apache.spark.sql.SparkSession;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
import eu.dnetlib.dhp.schema.dump.oaf.community.Context;
/**
* This class splits the dumped results according to the research community - research initiative/infrastructure they
@ -56,10 +56,10 @@ public class CommunitySplit implements Serializable {
}
private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) {
Dataset<CommunityResult> community_products = result
Dataset<CommunityResult> communityProducts = result
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
community_products
communityProducts
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
@ -72,7 +72,7 @@ public class CommunitySplit implements Serializable {
return r
.getContext()
.stream()
.map(con -> con.getCode())
.map(Context::getCode)
.collect(Collectors.toList())
.contains(c);
}

View File

@ -161,7 +161,7 @@ public class SparkPrepareResultProject implements Serializable {
provenance.setTrust(di.get().getTrust());
p.setProvenance(provenance);
}
if (relation.getValidated()) {
if (Boolean.TRUE.equals(relation.getValidated())) {
p.setValidated(Validated.newInstance(relation.getValidated(), relation.getValidationDate()));
}
return p;
@ -179,8 +179,8 @@ public class SparkPrepareResultProject implements Serializable {
f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
for (Object o : doc.selectNodes("//funding_level_0")) {
List node = ((Node) o).selectNodes("./name");
f.setFundingStream(((Node) node.get(0)).getText());
List<Node> node = ((Node) o).selectNodes("./name");
f.setFundingStream((node.get(0)).getText());
}
return f;

View File

@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
import eu.dnetlib.dhp.schema.oaf.Datasource;
@ -120,7 +121,7 @@ public class CreateContextRelation implements Serializable {
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
writer.newLine();
} catch (final Exception e) {
throw new RuntimeException(e);
throw new MyRuntimeException(e);
}
}

View File

@ -62,22 +62,7 @@ public class Extractor implements Serializable {
.readPath(spark, inputPath, inputClazz)
.flatMap((FlatMapFunction<R, Relation>) value -> {
List<Relation> relationList = new ArrayList<>();
Optional
.ofNullable(value.getInstance())
.ifPresent(inst -> inst.forEach(instance -> {
Optional
.ofNullable(instance.getCollectedfrom())
.ifPresent(
cf -> getRelatioPair(
value, relationList, cf,
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
Optional
.ofNullable(instance.getHostedby())
.ifPresent(
hb -> getRelatioPair(
value, relationList, hb,
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
}));
extractRelationsFromInstance(hashCodes, value, relationList);
Set<String> communities = communityMap.keySet();
Optional
.ofNullable(value.getContext())
@ -136,8 +121,28 @@ public class Extractor implements Serializable {
}
private <R extends Result> void extractRelationsFromInstance(Set<Integer> hashCodes, R value,
List<Relation> relationList) {
Optional
.ofNullable(value.getInstance())
.ifPresent(inst -> inst.forEach(instance -> {
Optional
.ofNullable(instance.getCollectedfrom())
.ifPresent(
cf -> getRelatioPair(
value, relationList, cf,
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
Optional
.ofNullable(instance.getHostedby())
.ifPresent(
hb -> getRelatioPair(
value, relationList, hb,
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
}));
}
private static <R extends Result> void getRelatioPair(R value, List<Relation> relationList, KeyValue cf,
String result_dtasource, String datasource_result,
String resultDatasource, String datasourceResult,
Set<Integer> hashCodes) {
Provenance provenance = Optional
.ofNullable(cf.getDataInfo())
@ -162,7 +167,7 @@ public class Extractor implements Serializable {
Relation r = getRelation(
value.getId(),
cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
result_dtasource, ModelConstants.PROVISION,
resultDatasource, ModelConstants.PROVISION,
provenance);
if (!hashCodes.contains(r.hashCode())) {
relationList
@ -173,7 +178,7 @@ public class Extractor implements Serializable {
r = getRelation(
cf.getKey(), value.getId(),
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
datasource_result, ModelConstants.PROVISION,
datasourceResult, ModelConstants.PROVISION,
provenance);
if (!hashCodes.contains(r.hashCode())) {

View File

@ -9,6 +9,7 @@ import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
@ -43,7 +44,7 @@ public class Process implements Serializable {
return (R) ri;
} catch (final Exception e) {
throw new RuntimeException(e);
throw new MyRuntimeException(e);
}
}
@ -91,7 +92,7 @@ public class Process implements Serializable {
return relationList;
} catch (final Exception e) {
throw new RuntimeException(e);
throw new MyRuntimeException(e);
}
}

View File

@ -13,7 +13,6 @@ import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
@ -94,9 +93,9 @@ public class QueryInformationSystem {
Element root = doc.getRootElement();
cinfo.setId(root.attributeValue("id"));
Iterator it = root.elementIterator();
Iterator<Element> it = root.elementIterator();
while (it.hasNext()) {
Element el = (Element) it.next();
Element el = it.next();
if (el.getName().equals("category")) {
String categoryId = el.attributeValue("id");
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
@ -143,7 +142,7 @@ public class QueryInformationSystem {
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
return null;
}
String funder = null;
String funder = "";
String grantId = null;
String funding = null;
for (Object node : el.selectNodes(".//param")) {
@ -158,9 +157,12 @@ public class QueryInformationSystem {
case "CD_PROJECT_NUMBER":
grantId = n.getText();
break;
default:
break;
}
}
String nsp = null;
switch (funder.toLowerCase()) {
case "ec":
if (funding == null) {
@ -179,10 +181,12 @@ public class QueryInformationSystem {
nsp = "dfgf________::";
break;
default:
nsp = funder.toLowerCase();
StringBuilder bld = new StringBuilder();
bld.append(funder.toLowerCase());
for (int i = funder.length(); i < 12; i++)
nsp += "_";
nsp += "::";
bld.append("_");
bld.append("::");
nsp = bld.toString();
}
return prefix + "|" + nsp + DHPUtils.md5(grantId);

View File

@ -71,22 +71,22 @@ public class SparkDumpRelationJob implements Serializable {
Dataset<Relation> relations = Utils.readPath(spark, inputPath, Relation.class);
relations
.map((MapFunction<Relation, eu.dnetlib.dhp.schema.dump.oaf.graph.Relation>) relation -> {
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel_new = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
rel_new
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
relNew
.setSource(
Node
.newInstance(
relation.getSource(),
ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
rel_new
relNew
.setTarget(
Node
.newInstance(
relation.getTarget(),
ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
rel_new
relNew
.setReltype(
RelType
.newInstance(
@ -96,23 +96,22 @@ public class SparkDumpRelationJob implements Serializable {
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
if (odInfo.isPresent()) {
DataInfo dInfo = odInfo.get();
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent()) {
if (Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
rel_new
.setProvenance(
Provenance
.newInstance(
dInfo.getProvenanceaction().getClassname(),
dInfo.getTrust()));
}
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
relNew
.setProvenance(
Provenance
.newInstance(
dInfo.getProvenanceaction().getClassname(),
dInfo.getTrust()));
}
}
if (relation.getValidated()) {
rel_new.setValidated(relation.getValidated());
rel_new.setValidationDate(relation.getValidationDate());
if (Boolean.TRUE.equals(relation.getValidated())) {
relNew.setValidated(relation.getValidated());
relNew.setValidationDate(relation.getValidationDate());
}
return rel_new;
return relNew;
}, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
.write()

View File

@ -58,7 +58,8 @@ public class SparkOrganizationRelation implements Serializable {
final OrganizationMap organizationMap = new Gson()
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
log.info("organization map : {}", new Gson().toJson(organizationMap));
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
log.info("organization map : {}", serializedOrganizationMap);
final String communityMapPath = parser.get("communityMapPath");
log.info("communityMapPath: {}", communityMapPath);

View File

@ -131,7 +131,6 @@ public class SparkSelectValidRelationsJob implements Serializable {
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
public class MyRuntimeException extends RuntimeException {
public MyRuntimeException() {
super();
}
public MyRuntimeException(
final String message,
final Throwable cause,
final boolean enableSuppression,
final boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public MyRuntimeException(final String message, final Throwable cause) {
super(message, cause);
}
public MyRuntimeException(final String message) {
super(message);
}
public MyRuntimeException(final Throwable cause) {
super(cause);
}
}

View File

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
public class NoAvailableEntityTypeException extends Exception {
public NoAvailableEntityTypeException() {
super();
}
public NoAvailableEntityTypeException(
final String message,
final Throwable cause,
final boolean enableSuppression,
final boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public NoAvailableEntityTypeException(final String message, final Throwable cause) {
super(message, cause);
}
public NoAvailableEntityTypeException(final String message) {
super(message);
}
public NoAvailableEntityTypeException(final Throwable cause) {
super(cause);
}
}

View File

@ -10,7 +10,6 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.slf4j.Logger;
@ -18,10 +17,8 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
/**
* Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC

View File

@ -10,7 +10,6 @@ import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
@ -19,13 +18,11 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
/**
* Selects the results linked to projects. Only for these results the dump will be performed.

View File

@ -8,17 +8,14 @@ import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
@ -248,14 +245,14 @@ public class PrepareResultProjectJobTest {
org.apache.spark.sql.Dataset<ResultProject> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
assertEquals(2, verificationDataset.count() );
assertEquals(2, verificationDataset.count());
assertEquals(
1,
verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
1,
verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
assertEquals(
1,
verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
1,
verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
verificationDataset.createOrReplaceTempView("dataset");
@ -269,59 +266,59 @@ public class PrepareResultProjectJobTest {
assertEquals(3, resultExplodedProvenance.count());
assertEquals(3, resultExplodedProvenance.filter("validatedByFunder = true").count());
assertEquals(
2,
resultExplodedProvenance
.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
.count());
2,
resultExplodedProvenance
.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
.count());
assertEquals(
1,
resultExplodedProvenance
.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'")
.count());
1,
resultExplodedProvenance
.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'")
.count());
assertEquals(
2,
resultExplodedProvenance
.filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'")
.count());
2,
resultExplodedProvenance
.filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'")
.count());
assertEquals(
1,
resultExplodedProvenance
.filter(
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " +
"and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " +
"and validatedByFunder = true " +
"and validationDate = '2021-08-06'")
.count());
1,
resultExplodedProvenance
.filter(
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " +
"and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " +
"and validatedByFunder = true " +
"and validationDate = '2021-08-06'")
.count());
assertEquals(
1,
resultExplodedProvenance
.filter(
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " +
"and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' " +
"and validatedByFunder = true and validationDate = '2021-08-04'")
.count());
1,
resultExplodedProvenance
.filter(
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " +
"and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' " +
"and validatedByFunder = true and validationDate = '2021-08-04'")
.count());
assertEquals(
1,
resultExplodedProvenance
.filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'")
.count());
1,
resultExplodedProvenance
.filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'")
.count());
assertEquals(
1,
resultExplodedProvenance
.filter(
"project = '40|aka_________::03376222b28a3aebf2730ac514818d04' " +
"and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " +
"and validatedByFunder = true and validationDate = '2021-08-05'")
.count());
1,
resultExplodedProvenance
.filter(
"project = '40|aka_________::03376222b28a3aebf2730ac514818d04' " +
"and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " +
"and validatedByFunder = true and validationDate = '2021-08-05'")
.count());
assertEquals(
3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());
3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());
}
}

View File

@ -6,6 +6,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
@ -98,7 +99,7 @@ public class DumpOrganizationProjectDatasourceTest {
}
@Test
public void dumpProjectTest() {
public void dumpProjectTest() throws NoAvailableEntityTypeException {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project")
@ -127,7 +128,7 @@ public class DumpOrganizationProjectDatasourceTest {
}
@Test
public void dumpDatasourceTest() {
public void dumpDatasourceTest() throws NoAvailableEntityTypeException {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource")
.getPath();

View File

@ -54,7 +54,7 @@ class TestApply extends java.io.Serializable{
assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy"))
assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN"))
assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access"))
assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid))
assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.gold))
assertTrue(pa.getBestaccessright.getClassid.equals("OPEN"))
assertTrue(pa.getBestaccessright.getClassname.equals("Open Access"))