Compare commits

..

10 Commits

9 changed files with 253 additions and 35 deletions

View File

@ -19,7 +19,7 @@ import java.io.Serializable;
*/ */
public class ResearchInitiative implements Serializable { public class ResearchInitiative implements Serializable {
private String id; // openaireId private String id; // openaireId
private String originalId; // context id private String acronym; // context id
private String name; // context name private String name; // context name
private String type; // context type: research initiative or research community private String type; // context type: research initiative or research community
private String description; private String description;
@ -57,12 +57,12 @@ public class ResearchInitiative implements Serializable {
this.name = label; this.name = label;
} }
public String getOriginalId() { public String getAcronym() {
return originalId; return acronym;
} }
public void setOriginalId(String originalId) { public void setAcronym(String acronym) {
this.originalId = originalId; this.acronym = acronym;
} }
public String getDescription() { public String getDescription() {

View File

@ -1,8 +1,6 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkArgument;
import java.text.ParseException; import java.text.ParseException;
@ -10,6 +8,8 @@ import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import eu.dnetlib.dhp.schema.common.ModelSupport;
/** /**
* Relation models any edge between two nodes in the OpenAIRE graph. It has a source id and a target id pointing to * Relation models any edge between two nodes in the OpenAIRE graph. It has a source id and a target id pointing to
* graph node identifiers and it is further characterised by the semantic of the link through the fields relType, * graph node identifiers and it is further characterised by the semantic of the link through the fields relType,
@ -137,7 +137,10 @@ public class Relation extends Oaf {
try { try {
setValidationDate(ModelSupport.oldest(getValidationDate(), r.getValidationDate())); setValidationDate(ModelSupport.oldest(getValidationDate(), r.getValidationDate()));
} catch (ParseException e) { } catch (ParseException e) {
throw new IllegalArgumentException(String.format("invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(), getValidationDate())); throw new IllegalArgumentException(String
.format(
"invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(),
getValidationDate()));
} }
super.mergeFrom(r); super.mergeFrom(r);

View File

@ -4,6 +4,11 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo;
import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -22,11 +27,6 @@ import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
public class PrepareResultInstRepoAssociation { public class PrepareResultInstRepoAssociation {
private static final Logger log = LoggerFactory.getLogger(PrepareResultInstRepoAssociation.class); private static final Logger log = LoggerFactory.getLogger(PrepareResultInstRepoAssociation.class);
@ -56,9 +56,10 @@ public class PrepareResultInstRepoAssociation {
final String alreadyLinkedPath = parser.get("alreadyLinkedPath"); final String alreadyLinkedPath = parser.get("alreadyLinkedPath");
log.info("alreadyLinkedPath {}: ", alreadyLinkedPath); log.info("alreadyLinkedPath {}: ", alreadyLinkedPath);
List<String> blacklist = Optional.ofNullable(parser.get("blacklist")) List<String> blacklist = Optional
.map(v -> Arrays.asList(v.split(";"))) .ofNullable(parser.get("blacklist"))
.orElse(new ArrayList<>()); .map(v -> Arrays.asList(v.split(";")))
.orElse(new ArrayList<>());
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
@ -91,14 +92,13 @@ public class PrepareResultInstRepoAssociation {
private static void prepareDatasourceOrganization( private static void prepareDatasourceOrganization(
SparkSession spark, String datasourceOrganizationPath, List<String> blacklist) { SparkSession spark, String datasourceOrganizationPath, List<String> blacklist) {
String blacklisted = ""; String blacklisted = "";
if(blacklist.size() > 0 ){ if (blacklist.size() > 0) {
blacklisted = " AND d.id != '" + blacklist.get(0) + "'"; blacklisted = " AND d.id != '" + blacklist.get(0) + "'";
for (int i = 1; i < blacklist.size(); i++) { for (int i = 1; i < blacklist.size(); i++) {
blacklisted += " AND d.id != '" + blacklist.get(i) + "'"; blacklisted += " AND d.id != '" + blacklist.get(i) + "'";
} }
} }
String query = "SELECT source datasourceId, target organizationId " String query = "SELECT source datasourceId, target organizationId "
+ "FROM ( SELECT id " + "FROM ( SELECT id "
+ "FROM datasource " + "FROM datasource "

View File

@ -70,10 +70,10 @@ public class CreateContextRelation implements Serializable {
cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class)); cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class));
log.info("Creating relations for projects... "); log.info("Creating relations for projects... ");
// cce cce
// .execute( .execute(
// Process::getRelation, CONTEX_RELATION_PROJECT, Process::getRelation, CONTEX_RELATION_PROJECT,
// ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
cce.close(); cce.close();

View File

@ -147,7 +147,7 @@ public class Extractor implements Serializable {
.map( .map(
paction -> Provenance paction -> Provenance
.newInstance( .newInstance(
paction.getClassid(), paction.getClassname(),
dinfo.getTrust())) dinfo.getTrust()))
.orElse( .orElse(
Provenance Provenance

View File

@ -35,7 +35,7 @@ public class Process implements Serializable {
ri.setType(Constants.RESEARCH_INFRASTRUCTURE); ri.setType(Constants.RESEARCH_INFRASTRUCTURE);
} }
ri.setId(Utils.getContextId(ci.getId())); ri.setId(Utils.getContextId(ci.getId()));
ri.setOriginalId(ci.getId()); ri.setAcronym(ci.getId());
ri.setDescription(ci.getDescription()); ri.setDescription(ci.getDescription());
ri.setName(ci.getName()); ri.setName(ci.getName());

View File

@ -12,6 +12,7 @@ import org.dom4j.Node;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -113,14 +114,72 @@ public class QueryInformationSystem {
@NotNull @NotNull
private List<String> getCategoryList(Element el, String prefix) { private List<String> getCategoryList(Element el, String prefix) {
List<String> datasourceList = new ArrayList<>(); List<String> datasourceList = new ArrayList<>();
for (Object node : el.selectNodes(".//param")) { for (Object node : el.selectNodes(".//concept")) {
Node n = (Node) node; String oid = getOpenaireId((Node) node, prefix);
if (n.valueOf("./@name").equals("openaireId")) { if (oid != null)
datasourceList.add(prefix + "|" + n.getText()); datasourceList.add(oid);
}
} }
return datasourceList; return datasourceList;
} }
private String getOpenaireId(Node el, String prefix) {
for (Object node : el.selectNodes(".//param")) {
Node n = (Node) node;
if (n.valueOf("./@name").equals("openaireId")) {
return prefix + "|" + n.getText();
}
}
return makeOpenaireId(el, prefix);
}
private String makeOpenaireId(Node el, String prefix) {
String funder = null;
String grantId = null;
String funding = null;
for (Object node : el.selectNodes(".//param")) {
Node n = (Node) node;
switch (n.valueOf("./@name")) {
case "funding":
funding = n.getText();
break;
case "funder":
funder = n.getText();
break;
case "CD_PROJECT_NUMBER":
grantId = n.getText();
break;
}
}
String nsp = null;
switch (funder.toLowerCase()) {
case "ec":
if (funding == null) {
return null;
}
if (funding.toLowerCase().startsWith("h2020")) {
nsp = "corda__h2020::";
} else {
nsp = "corda_______::";
}
break;
case "tubitak":
nsp = "tubitakf____::";
break;
case "dfg":
nsp = "dfgf________::";
break;
default:
nsp = funder.toLowerCase();
for (int i = funder.length(); i < 12; i++)
nsp += "_";
nsp += "::";
}
return prefix + "|" + nsp + DHPUtils.md5(grantId);
}
} }

View File

@ -97,7 +97,7 @@ public class CreateEntityTest {
Assertions.assertEquals(12, riList.size()); Assertions.assertEquals(12, riList.size());
riList.stream().forEach(c -> { riList.stream().forEach(c -> {
switch (c.getOriginalId()) { switch (c.getAcronym()) {
case "mes": case "mes":
Assertions Assertions
.assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY)); .assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY));
@ -115,9 +115,9 @@ public class CreateEntityTest {
String String
.format( .format(
"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5(c.getOriginalId())))); DHPUtils.md5(c.getAcronym()))));
Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes")); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes"));
Assertions.assertTrue("mes".equals(c.getOriginalId())); Assertions.assertTrue("mes".equals(c.getAcronym()));
break; break;
case "clarin": case "clarin":
Assertions Assertions
@ -130,9 +130,9 @@ public class CreateEntityTest {
String String
.format( .format(
"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5(c.getOriginalId())))); DHPUtils.md5(c.getAcronym()))));
Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin")); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin"));
Assertions.assertTrue("clarin".equals(c.getOriginalId())); Assertions.assertTrue("clarin".equals(c.getAcronym()));
break; break;
} }
// TODO add check for all the others Entities // TODO add check for all the others Entities

View File

@ -9,11 +9,14 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation; import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class CreateRelationTest { public class CreateRelationTest {
@ -203,6 +206,7 @@ public class CreateRelationTest {
" <param name=\"suggestedAcknowledgement\"/>\n" + " <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"zenodoCommunity\">oac_ni</param>\n" + " <param name=\"zenodoCommunity\">oac_ni</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" + " <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"ni::projects\" label=\"NI Content providers\"/>\n" +
" <category claim=\"false\" id=\"ni::contentproviders\" label=\"NI Content providers\">\n" + " <category claim=\"false\" id=\"ni::contentproviders\" label=\"NI Content providers\">\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::1\" label=\"OpenNeuro\">\n" + " <concept claim=\"false\" id=\"ni::contentproviders::1\" label=\"OpenNeuro\">\n" +
" <param name=\"openaireId\">re3data_____::5b9bf9171d92df854cf3c520692e9122</param>\n" + " <param name=\"openaireId\">re3data_____::5b9bf9171d92df854cf3c520692e9122</param>\n" +
@ -437,7 +441,65 @@ public class CreateRelationTest {
" <param name=\"suggestedAcknowledgement\"/>\n" + " <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"zenodoCommunity\">oaa_elixir-gr</param>\n" + " <param name=\"zenodoCommunity\">oaa_elixir-gr</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" + " <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"elixir-gr::projects\" label=\"ELIXIR GR Projects\"/>\n" + " <category claim=\"false\" id=\"elixir-gr::projects\" label=\"ELIXIR GR Projects\">\n" +
" <concept claim=\"false\" id=\"ni::projects::12\" label=\"\">\n" +
" <param name=\"projectfullname\">BIO-INFORMATICS RESEARCH NETWORK COORDINATING CENTER (BIRN-CC)</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1U24RR025736-01</param>\n" +
" <param name=\"funder\">NIH</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::13\" label=\"\">\n" +
" <param name=\"projectfullname\">COLLABORATIVE RESEARCH: The Cognitive Neuroscience of Category Learning</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">0223843</param>\n" +
" <param name=\"funder\">NSF</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::14\" label=\"\">\n" +
" <param name=\"projectfullname\">The Cognitive Atlas: Developing an Interdisciplinary Knowledge Base Through Socia</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">5R01MH082795-05</param>\n" +
" <param name=\"funder\">NIH</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::15\" label=\"\">\n" +
" <param name=\"projectfullname\">Fragmented early life environmental and emotional / cognitive vulnerabilities</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1P50MH096889-01A1</param>\n" +
" <param name=\"funder\">NIH</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::16\" label=\"\">\n" +
" <param name=\"projectfullname\">Enhancement of the 1000 Functional Connectome Project</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1R03MH096321-01A1</param>\n" +
" <param name=\"funder\">TUBITAK</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::17\" label=\"\">\n" +
" <param name=\"projectfullname\">CRCNS Data Sharing: An open data repository for cognitive neuroscience: The OpenfMRI Project</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1131441</param>\n" +
" <param name=\"funder\">NSF</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::18\" label=\"\">\n" +
" <param name=\"projectfullname\">Enhancing Human Cortical Plasticity: Visual Psychophysics and fMRI</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">0121950</param>\n" +
" <param name=\"funder\">NSF</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::18\" label=\"\">\n" +
" <param name=\"projectfullname\">Transforming statistical methodology for neuroimaging meta-analysis.</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">100309</param>\n" +
" <param name=\"funder\">WT</param>\n" +
" </concept>\n" +
" </category>" +
" <category claim=\"false\" id=\"elixir-gr::contentproviders\" label=\"Elixir-GR Content providers\">\n" " <category claim=\"false\" id=\"elixir-gr::contentproviders\" label=\"Elixir-GR Content providers\">\n"
+ +
" <concept claim=\"false\" id=\"elixir-gr::contentproviders::1\" label=\"bio.tools\">\n" + " <concept claim=\"false\" id=\"elixir-gr::contentproviders::1\" label=\"bio.tools\">\n" +
@ -566,4 +628,98 @@ public class CreateRelationTest {
tmp.contains("10|doajarticles::2899208a99aa7d142646e0a80bfeef05")); tmp.contains("10|doajarticles::2899208a99aa7d142646e0a80bfeef05"));
} }
@Test
public void test2() {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem
.getContextRelation(consumer, "projects", ModelSupport.getIdPrefix(Project.class));
cInfoList.forEach(c -> System.out.println(new Gson().toJson(c)));
List<Relation> rList = new ArrayList<>();
cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
Assertions.assertEquals(44, rList.size());
Assertions
.assertFalse(
rList
.stream()
.map(r -> r.getSource().getId())
.collect(Collectors.toSet())
.contains(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))));
Assertions
.assertEquals(
2,
rList
.stream()
.filter(
r -> r
.getSource()
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("clarin"))))
.collect(Collectors.toList())
.size());
Assertions
.assertEquals(
2,
rList
.stream()
.filter(
r -> r
.getTarget()
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("clarin"))))
.collect(Collectors.toList())
.size());
Set<String> tmp = rList
.stream()
.filter(
r -> r
.getSource()
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("clarin"))))
.map(r -> r.getTarget().getId())
.collect(Collectors.toSet());
Assertions
.assertTrue(
tmp.contains("40|corda__h2020::b5a4eb56bf84bef2ebc193306b4d423f") &&
tmp.contains("40|corda_______::ef782b2d85676aa3e5a907427feb18c4"));
rList.forEach(rel -> {
if (rel.getSource().getId().startsWith("40|")) {
String proj = rel.getSource().getId().substring(3);
Assertions.assertTrue(proj.substring(0, proj.indexOf("::")).length() == 12);
}
});
}
} }