diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 327c33d6f..8bae191d3 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 873046e08..ad8cd57b4 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 8099a72e4..08f5de9ee 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index a700a2918..369e25b24 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index c7cb11b08..60e66f45a 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT ../ diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 6e474f2f3..1909ddcca 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.raw.common; +package eu.dnetlib.dhp.common; import java.nio.charset.StandardCharsets; import java.text.Normalizer; diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index fe5d0c431..5e864cf94 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT ../ diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java new file mode 100644 index 000000000..db523ad1a --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java @@ -0,0 +1,69 @@ + +package eu.dnetlib.dhp.schema.common; + +import java.util.Comparator; + +import eu.dnetlib.dhp.schema.oaf.Qualifier; + +public class LicenseComparator implements Comparator { + + @Override + public int compare(Qualifier left, Qualifier right) { + + if (left == null && right == null) + return 0; + if (left == null) + return 1; + if (right == null) + return -1; + + String lClass = left.getClassid(); + String rClass = right.getClassid(); + + if (lClass.equals(rClass)) + return 0; + + if (lClass.equals("OPEN SOURCE")) + return -1; + if (rClass.equals("OPEN SOURCE")) + return 1; + + if (lClass.equals("OPEN")) + return -1; + if (rClass.equals("OPEN")) + return 1; + + if (lClass.equals("6MONTHS")) + return -1; + if (rClass.equals("6MONTHS")) + return 1; + + if (lClass.equals("12MONTHS")) + return -1; + if (rClass.equals("12MONTHS")) + return 1; + + if (lClass.equals("EMBARGO")) + return -1; + if (rClass.equals("EMBARGO")) + return 1; + + if (lClass.equals("RESTRICTED")) + return -1; + if (rClass.equals("RESTRICTED")) + return 1; + + if (lClass.equals("CLOSED")) + return -1; + if (rClass.equals("CLOSED")) + return 1; + + if (lClass.equals("UNKNOWN")) + return -1; + if (rClass.equals("UNKNOWN")) + return 1; + + // Else (but unlikely), lexicographical ordering will do. + return lClass.compareTo(rClass); + } +} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java index cc77e1ea0..9d572ee30 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java @@ -8,7 +8,7 @@ public class DataInfo implements Serializable { private Boolean invisible = false; private Boolean inferred; - private Boolean deletedbyinference; + private Boolean deletedbyinference = false; private String trust; private String inferenceprovenance; private Qualifier provenanceaction; diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java index 1a85c6842..8358bc4b3 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.schema.oaf; import java.io.Serializable; +import java.util.Objects; public class Field implements Serializable { @@ -39,6 +40,6 @@ public class Field implements Serializable { if (getClass() != obj.getClass()) return false; Field other = (Field) obj; - return getValue().equals(other.getValue()); + return Objects.equals(getValue(), other.getValue()); } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java index 09742748d..2823ee49d 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java @@ -106,6 +106,7 @@ public abstract class OafEntity extends Oaf implements Serializable { .stream(lists) .filter(Objects::nonNull) .flatMap(List::stream) + .filter(Objects::nonNull) .distinct() .collect(Collectors.toList()); } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java new file mode 100644 index 000000000..c5259d07e --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java @@ -0,0 +1,39 @@ + +package eu.dnetlib.dhp.schema.oaf; + +import java.io.Serializable; +import java.util.Objects; + +public class Programme implements Serializable { + private String code; + private String description; + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + + Programme programme = (Programme) o; + return Objects.equals(code, programme.code); + } + + +} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java index 924c08cc9..1fcfb305e 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java @@ -58,6 +58,8 @@ public class Project extends OafEntity implements Serializable { private Float fundedamount; + private List programme; + public Field getWebsiteurl() { return websiteurl; } @@ -266,6 +268,14 @@ public class Project extends OafEntity implements Serializable { this.fundedamount = fundedamount; } + public List getProgramme() { + return programme; + } + + public void setProgramme(List programme) { + this.programme = programme; + } + @Override public void mergeFrom(OafEntity e) { super.mergeFrom(e); @@ -320,6 +330,9 @@ public class Project extends OafEntity implements Serializable { fundedamount = p.getFundedamount() != null && compareTrust(this, e) < 0 ? p.getFundedamount() : fundedamount; + + programme = mergeLists(programme, p.getProgramme()); + mergeOAFDataInfo(e); } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 11316f36e..9d723f998 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -256,7 +256,25 @@ public class Result extends OafEntity implements Serializable { subject = mergeLists(subject, r.getSubject()); + // merge title lists: main title with higher trust and distinct between the others + StructuredProperty baseMainTitle = null; + if (title != null) { + baseMainTitle = getMainTitle(title); + title.remove(baseMainTitle); + } + + StructuredProperty newMainTitle = null; + if (r.getTitle() != null) { + newMainTitle = getMainTitle(r.getTitle()); + r.getTitle().remove(newMainTitle); + } + + if (newMainTitle != null && compareTrust(this, r) < 0) + baseMainTitle = newMainTitle; + title = mergeLists(title, r.getTitle()); + if (title != null && baseMainTitle != null) + title.add(baseMainTitle); relevantdate = mergeLists(relevantdate, r.getRelevantdate()); @@ -306,4 +324,15 @@ public class Result extends OafEntity implements Serializable { } return a.size() > b.size() ? a : b; } + + private StructuredProperty getMainTitle(List titles) { + // need to check if the list of titles contains more than 1 main title? (in that case, we should chose which + // main title select in the list) + for (StructuredProperty title : titles) { + if (title.getQualifier() != null && title.getQualifier().getClassid() != null) + if (title.getQualifier().getClassid().equals("main title")) + return title; + } + return null; + } } diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 22a81f7da..ec6247102 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 1c5465c14..282ca476d 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT dhp-aggregation @@ -38,48 +38,6 @@ ${project.version} - - eu.dnetlib - dnet-actionmanager-common - - - eu.dnetlib - dnet-openaireplus-mapping-utils - - - saxonica - saxon - - - saxonica - saxon-dom - - - jgrapht - jgrapht - - - net.sf.ehcache - ehcache - - - org.springframework - spring-test - - - org.apache.* - * - - - apache - * - - - - - eu.dnetlib - dnet-openaire-data-protos - net.sf.saxon @@ -100,11 +58,15 @@ jaxen + - org.apache.hadoop - hadoop-distcp + org.apache.commons + commons-csv + 1.8 + + diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java new file mode 100644 index 000000000..c6dab13a0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -0,0 +1,123 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.HashMap; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import scala.Tuple2; + +public class PrepareProgramme { + + private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + PrepareProgramme.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String programmePath = parser.get("programmePath"); + log.info("programmePath {}: ", programmePath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + exec(spark, programmePath, outputPath); + }); + } + + private static void removeOutputDir(SparkSession spark, String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + + private static void exec(SparkSession spark, String programmePath, String outputPath) { + Dataset programme = readPath(spark, programmePath, CSVProgramme.class); + + programme + .toJavaRDD() + .filter(p -> !p.getCode().contains("FP7")) + .mapToPair(csvProgramme -> new Tuple2<>(csvProgramme.getCode(), csvProgramme)) + .reduceByKey((a, b) -> { + if (StringUtils.isEmpty(a.getShortTitle())) { + if (StringUtils.isEmpty(b.getShortTitle())) { + if (StringUtils.isEmpty(a.getTitle())) { + if (StringUtils.isNotEmpty(b.getTitle())) { + a.setShortTitle(b.getTitle()); + a.setLanguage(b.getLanguage()); + } + } else {// notIsEmpty a.getTitle + if (StringUtils.isEmpty(b.getTitle())) { + a.setShortTitle(a.getTitle()); + } else { + if (b.getLanguage().equalsIgnoreCase("en")) { + a.setShortTitle(b.getTitle()); + a.setLanguage(b.getLanguage()); + } else { + a.setShortTitle(a.getTitle()); + } + } + } + } else {// not isEmpty b.getShortTitle + a.setShortTitle(b.getShortTitle()); + // a.setLanguage(b.getLanguage()); + } + } + return a; + + }) + .map(p -> { + CSVProgramme csvProgramme = p._2(); + if (StringUtils.isEmpty(csvProgramme.getShortTitle())) { + csvProgramme.setShortTitle(csvProgramme.getTitle()); + } + return OBJECT_MAPPER.writeValueAsString(csvProgramme); + }) + .saveAsTextFile(outputPath); + + } + + public static Dataset readPath( + SparkSession spark, String inputPath, Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java new file mode 100644 index 000000000..3d8226f4d --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -0,0 +1,137 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.*; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import scala.Tuple2; + +public class PrepareProjects { + + private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final HashMap programmeMap = new HashMap<>(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + PrepareProjects.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String projectPath = parser.get("projectPath"); + log.info("projectPath {}: ", projectPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + final String dbProjectPath = parser.get("dbProjectPath"); + log.info("dbProjectPath {}: ", dbProjectPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + exec(spark, projectPath, dbProjectPath, outputPath); + }); + } + + private static void removeOutputDir(SparkSession spark, String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + + private static void exec(SparkSession spark, String progjectPath, String dbProjectPath, String outputPath) { + Dataset project = readPath(spark, progjectPath, CSVProject.class); + Dataset dbProjects = readPath(spark, dbProjectPath, ProjectSubset.class); + + dbProjects.joinWith(project, dbProjects.col("code").equalTo(project.col("id")), "left") + .flatMap((FlatMapFunction, CSVProject>) value -> { + Optional csvProject = Optional.ofNullable(value._2()); + if(! csvProject.isPresent()){ + return null; + } + List csvProjectList = new ArrayList<>(); + String[] programme = csvProject.get().getProgramme().split(";"); + Arrays + .stream(programme) + .forEach(p -> { + CSVProject proj = new CSVProject(); + proj.setProgramme(p); + proj.setId(csvProject.get().getId()); + csvProjectList.add(proj); + }); + + return csvProjectList.iterator(); + }, Encoders.bean(CSVProject.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); +// +// .map(value -> { +// Optional csvProject = Optional.ofNullable(value._2()); +// }, Encoders.bean(CSVProject.class)) +// .filter(Objects::nonNull) +// .toJavaRDD() +// .flatMap(p -> { +// List csvProjectList = new ArrayList<>(); +// String[] programme = p.getProgramme().split(";"); +// Arrays +// .stream(programme) +// .forEach(value -> { +// CSVProject csvProject = new CSVProject(); +// csvProject.setProgramme(value); +// csvProject.setId(p.getId()); +// csvProjectList.add(csvProject); +// }); +// +// return csvProjectList.iterator(); +// }) +// .map(p -> OBJECT_MAPPER.writeValueAsString(p)) +// .saveAsTextFile(outputPath); + + } + + public static Dataset readPath( + SparkSession spark, String inputPath, Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java new file mode 100644 index 000000000..cfbb62f21 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java @@ -0,0 +1,16 @@ +package eu.dnetlib.dhp.actionmanager.project; + +import java.io.Serializable; + +public class ProjectSubset implements Serializable { + + private String code; + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java new file mode 100644 index 000000000..0015dc60f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java @@ -0,0 +1,113 @@ +package eu.dnetlib.dhp.actionmanager.project; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.DbClient; +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.BufferedWriter; +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; +import java.sql.ResultSet; +import java.util.Arrays; +import java.util.List; +import java.util.function.Consumer; +import java.util.function.Function; + +public class ReadProjectsFromDB implements Closeable { + + private final DbClient dbClient; + private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); + private final Configuration conf; + private final BufferedWriter writer; + private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private final static String query = "SELECT code " + + "from projects where id like 'corda__h2020%' " ; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + ReadProjectsFromDB.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json"))); + + parser.parseArgument(args); + + final String dbUrl = parser.get("postgresUrl"); + final String dbUser = parser.get("postgresUser"); + final String dbPassword = parser.get("postgresPassword"); + final String hdfsPath = parser.get("hdfsPath") ; + final String hdfsNameNode = parser.get("hdfsNameNode"); + + try (final ReadProjectsFromDB rbl = new ReadProjectsFromDB(hdfsPath, hdfsNameNode, dbUrl, dbUser, + dbPassword)) { + + log.info("Processing blacklist..."); + rbl.execute(query, rbl::processProjectsEntry); + + } + } + public void execute(final String sql, final Function> producer) throws Exception { + + final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); + + dbClient.processResults(sql, consumer); + } + + public List processProjectsEntry(ResultSet rs) { + try { + ProjectSubset p = new ProjectSubset(); + p.setCode(rs.getString("code")); + + return Arrays.asList(p); + + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + protected void writeProject(final ProjectSubset r) { + try { + writer.write(OBJECT_MAPPER.writeValueAsString(r)); + writer.newLine(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + public ReadProjectsFromDB( + final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) + throws Exception { + + this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); + this.conf = new Configuration(); + this.conf.set("fs.defaultFS", hdfsNameNode); + FileSystem fileSystem = FileSystem.get(this.conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + + this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + } + + @Override + public void close() throws IOException { + dbClient.close(); + writer.close(); + } +} + diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java new file mode 100644 index 000000000..1023e2d19 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -0,0 +1,161 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Objects; +import java.util.Optional; +import java.util.function.Consumer; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.hadoop.mapred.TextOutputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.rdd.SequenceFileRDDFunctions; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Programme; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.utils.DHPUtils; +import scala.Function1; +import scala.Tuple2; +import scala.runtime.BoxedUnit; + +public class SparkAtomicActionJob { + private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final HashMap programmeMap = new HashMap<>(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + SparkAtomicActionJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String projectPath = parser.get("projectPath"); + log.info("projectPath: {}", projectPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + final String programmePath = parser.get("programmePath"); + log.info("programmePath {}: ", programmePath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + getAtomicActions( + spark, + projectPath, + programmePath, + outputPath); + }); + } + + private static void removeOutputDir(SparkSession spark, String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + + private static void getAtomicActions(SparkSession spark, String projectPatH, + String programmePath, + String outputPath) { + + Dataset project = readPath(spark, projectPatH, CSVProject.class); + Dataset programme = readPath(spark, programmePath, CSVProgramme.class); + + project + .joinWith(programme, project.col("programme").equalTo(programme.col("code")), "left") + .map(c -> { + CSVProject csvProject = c._1(); + Optional csvProgramme = Optional.ofNullable(c._2()); + if (csvProgramme.isPresent()) { + Project p = new Project(); + p + .setId( + createOpenaireId( + ModelSupport.entityIdPrefix.get("project"), + "corda__h2020", csvProject.getId())); + Programme pm = new Programme(); + pm.setCode(csvProject.getProgramme()); + pm.setDescription(csvProgramme.get().getShortTitle()); + p.setProgramme(Arrays.asList(pm)); + return p; + } + + return null; + }, Encoders.bean(Project.class)) + .filter(Objects::nonNull) + .groupByKey( + (MapFunction) p -> p.getId(), + Encoders.STRING()) + .mapGroups((MapGroupsFunction) (s, it) -> { + Project first = it.next(); + it.forEachRemaining(p -> { + first.mergeFrom(p); + }); + return first; + }, Encoders.bean(Project.class)) + .toJavaRDD() + .map(p -> new AtomicAction(Project.class, p)) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))) + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); + + } + + public static Dataset readPath( + SparkSession spark, String inputPath, Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } + + public static String createOpenaireId( + final String prefix, final String nsPrefix, final String id) { + + return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(id)); + + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVParser.java new file mode 100644 index 000000000..ef29a6b6a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVParser.java @@ -0,0 +1,37 @@ + +package eu.dnetlib.dhp.actionmanager.project.csvutils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVRecord; +import org.apache.commons.lang.reflect.FieldUtils; + +public class CSVParser { + + public List parse(String csvFile, String classForName) + throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { + final CSVFormat format = CSVFormat.EXCEL + .withHeader() + .withDelimiter(';') + .withQuote('"') + .withTrim(); + List ret = new ArrayList<>(); + final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format); + final Set headers = parser.getHeaderMap().keySet(); + Class clazz = Class.forName(classForName); + for (CSVRecord csvRecord : parser.getRecords()) { + final Object cc = clazz.newInstance(); + for (String header : headers) { + FieldUtils.writeField(cc, header, csvRecord.get(header), true); + + } + ret.add((R) cc); + } + + return ret; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProgramme.java new file mode 100644 index 000000000..a9069e510 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProgramme.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.actionmanager.project.csvutils; + +import java.io.Serializable; + +public class CSVProgramme implements Serializable { + private String rcn; + private String code; + private String title; + private String shortTitle; + private String language; + + public String getRcn() { + return rcn; + } + + public void setRcn(String rcn) { + this.rcn = rcn; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getShortTitle() { + return shortTitle; + } + + public void setShortTitle(String shortTitle) { + this.shortTitle = shortTitle; + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProject.java new file mode 100644 index 000000000..ff18c6260 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProject.java @@ -0,0 +1,197 @@ + +package eu.dnetlib.dhp.actionmanager.project.csvutils; + +import java.io.Serializable; + +public class CSVProject implements Serializable { + private String rcn; + private String id; + private String acronym; + private String status; + private String programme; + private String topics; + private String frameworkProgramme; + private String title; + private String startDate; + private String endDate; + private String projectUrl; + private String objective; + private String totalCost; + private String ecMaxContribution; + private String call; + private String fundingScheme; + private String coordinator; + private String coordinatorCountry; + private String participants; + private String participantCountries; + private String subjects; + + public String getRcn() { + return rcn; + } + + public void setRcn(String rcn) { + this.rcn = rcn; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getAcronym() { + return acronym; + } + + public void setAcronym(String acronym) { + this.acronym = acronym; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getProgramme() { + return programme; + } + + public void setProgramme(String programme) { + this.programme = programme; + } + + public String getTopics() { + return topics; + } + + public void setTopics(String topics) { + this.topics = topics; + } + + public String getFrameworkProgramme() { + return frameworkProgramme; + } + + public void setFrameworkProgramme(String frameworkProgramme) { + this.frameworkProgramme = frameworkProgramme; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getStartDate() { + return startDate; + } + + public void setStartDate(String startDate) { + this.startDate = startDate; + } + + public String getEndDate() { + return endDate; + } + + public void setEndDate(String endDate) { + this.endDate = endDate; + } + + public String getProjectUrl() { + return projectUrl; + } + + public void setProjectUrl(String projectUrl) { + this.projectUrl = projectUrl; + } + + public String getObjective() { + return objective; + } + + public void setObjective(String objective) { + this.objective = objective; + } + + public String getTotalCost() { + return totalCost; + } + + public void setTotalCost(String totalCost) { + this.totalCost = totalCost; + } + + public String getEcMaxContribution() { + return ecMaxContribution; + } + + public void setEcMaxContribution(String ecMaxContribution) { + this.ecMaxContribution = ecMaxContribution; + } + + public String getCall() { + return call; + } + + public void setCall(String call) { + this.call = call; + } + + public String getFundingScheme() { + return fundingScheme; + } + + public void setFundingScheme(String fundingScheme) { + this.fundingScheme = fundingScheme; + } + + public String getCoordinator() { + return coordinator; + } + + public void setCoordinator(String coordinator) { + this.coordinator = coordinator; + } + + public String getCoordinatorCountry() { + return coordinatorCountry; + } + + public void setCoordinatorCountry(String coordinatorCountry) { + this.coordinatorCountry = coordinatorCountry; + } + + public String getParticipants() { + return participants; + } + + public void setParticipants(String participants) { + this.participants = participants; + } + + public String getParticipantCountries() { + return participantCountries; + } + + public void setParticipantCountries(String participantCountries) { + this.participantCountries = participantCountries; + } + + public String getSubjects() { + return subjects; + } + + public void setSubjects(String subjects) { + this.subjects = subjects; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java new file mode 100644 index 000000000..2b72b229a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java @@ -0,0 +1,97 @@ + +package eu.dnetlib.dhp.actionmanager.project.csvutils; + +import java.io.BufferedWriter; +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class ReadCSV implements Closeable { + private static final Log log = LogFactory.getLog(ReadCSV.class); + private final Configuration conf; + private final BufferedWriter writer; + private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private String csvFile; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + ReadCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + + try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) { + + log.info("Getting CSV file..."); + readCSV.execute(classForName); + + } + } + + public void execute(final String classForName) throws Exception { + CSVParser csvParser = new CSVParser(); + csvParser + .parse(csvFile, classForName) + .stream() + .forEach(p -> write(p)); + + } + + @Override + public void close() throws IOException { + writer.close(); + } + + public ReadCSV( + final String hdfsPath, + final String hdfsNameNode, + final String fileURL) + throws Exception { + this.conf = new Configuration(); + this.conf.set("fs.defaultFS", hdfsNameNode); + HttpConnector httpConnector = new HttpConnector(); + FileSystem fileSystem = FileSystem.get(this.conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.csvFile = httpConnector.getInputSource(fileURL); + ; + } + + protected void write(final Object p) { + try { + writer.write(OBJECT_MAPPER.writeValueAsString(p)); + writer.newLine(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorPluginErrorLogList.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorPluginErrorLogList.java new file mode 100644 index 000000000..9d3f88265 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorPluginErrorLogList.java @@ -0,0 +1,20 @@ + +package eu.dnetlib.dhp.actionmanager.project.httpconnector; + +import java.util.LinkedList; + +public class CollectorPluginErrorLogList extends LinkedList { + + private static final long serialVersionUID = -6925786561303289704L; + + @Override + public String toString() { + String log = new String(); + int index = 0; + for (String errorMessage : this) { + log += String.format("Retry #%s: %s / ", index++, errorMessage); + } + return log; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorServiceException.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorServiceException.java new file mode 100644 index 000000000..9167d97b4 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorServiceException.java @@ -0,0 +1,20 @@ + +package eu.dnetlib.dhp.actionmanager.project.httpconnector; + +public class CollectorServiceException extends Exception { + + private static final long serialVersionUID = 7523999812098059764L; + + public CollectorServiceException(String string) { + super(string); + } + + public CollectorServiceException(String string, Throwable exception) { + super(string, exception); + } + + public CollectorServiceException(Throwable exception) { + super(exception); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnector.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnector.java new file mode 100644 index 000000000..e20518b55 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnector.java @@ -0,0 +1,240 @@ + +package eu.dnetlib.dhp.actionmanager.project.httpconnector; + +import java.io.IOException; +import java.io.InputStream; +import java.net.*; +import java.security.GeneralSecurityException; +import java.security.cert.X509Certificate; +import java.util.List; +import java.util.Map; + +import javax.net.ssl.HttpsURLConnection; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * @author jochen, michele, andrea + */ +public class HttpConnector { + + private static final Log log = LogFactory.getLog(HttpConnector.class); + + private int maxNumberOfRetry = 6; + private int defaultDelay = 120; // seconds + private int readTimeOut = 120; // seconds + + private String responseType = null; + + private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; + + public HttpConnector() { + CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL)); + } + + /** + * Given the URL returns the content via HTTP GET + * + * @param requestUrl the URL + * @return the content of the downloaded resource + * @throws CollectorServiceException when retrying more than maxNumberOfRetry times + */ + public String getInputSource(final String requestUrl) throws CollectorServiceException { + return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList()); + } + + /** + * Given the URL returns the content as a stream via HTTP GET + * + * @param requestUrl the URL + * @return the content of the downloaded resource as InputStream + * @throws CollectorServiceException when retrying more than maxNumberOfRetry times + */ + public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorServiceException { + return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList()); + } + + private String attemptDownlaodAsString(final String requestUrl, final int retryNumber, + final CollectorPluginErrorLogList errorList) + throws CollectorServiceException { + try { + InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList()); + try { + return IOUtils.toString(s); + } catch (IOException e) { + log.error("error while retrieving from http-connection occured: " + requestUrl, e); + Thread.sleep(defaultDelay * 1000); + errorList.add(e.getMessage()); + return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList); + } finally { + IOUtils.closeQuietly(s); + } + } catch (InterruptedException e) { + throw new CollectorServiceException(e); + } + } + + private InputStream attemptDownload(final String requestUrl, final int retryNumber, + final CollectorPluginErrorLogList errorList) + throws CollectorServiceException { + + if (retryNumber > maxNumberOfRetry) { + throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList); + } + + log.debug("Downloading " + requestUrl + " - try: " + retryNumber); + try { + InputStream input = null; + + try { + final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); + urlConn.setInstanceFollowRedirects(false); + urlConn.setReadTimeout(readTimeOut * 1000); + urlConn.addRequestProperty("User-Agent", userAgent); + + if (log.isDebugEnabled()) { + logHeaderFields(urlConn); + } + + int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) { + log.warn("waiting and repeating request after " + retryAfter + " sec."); + Thread.sleep(retryAfter * 1000); + errorList.add("503 Service Unavailable"); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, errorList); + } else if ((urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) + || (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP)) { + final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); + log.debug("The requested url has been moved to " + newUrl); + errorList + .add( + String + .format( + "%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), + newUrl)); + urlConn.disconnect(); + return attemptDownload(newUrl, retryNumber + 1, errorList); + } else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) { + log + .error( + String + .format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage())); + Thread.sleep(defaultDelay * 1000); + errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage())); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, errorList); + } else { + input = urlConn.getInputStream(); + responseType = urlConn.getContentType(); + return input; + } + } catch (IOException e) { + log.error("error while retrieving from http-connection occured: " + requestUrl, e); + Thread.sleep(defaultDelay * 1000); + errorList.add(e.getMessage()); + return attemptDownload(requestUrl, retryNumber + 1, errorList); + } + } catch (InterruptedException e) { + throw new CollectorServiceException(e); + } + } + + private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { + log.debug("StatusCode: " + urlConn.getResponseMessage()); + + for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { + if (e.getKey() != null) { + for (String v : e.getValue()) { + log.debug(" key: " + e.getKey() + " - value: " + v); + } + } + } + } + + private int obtainRetryAfter(final Map> headerMap) { + for (String key : headerMap.keySet()) { + if ((key != null) && key.toLowerCase().equals("retry-after") && (headerMap.get(key).size() > 0) + && NumberUtils.isCreatable(headerMap.get(key).get(0))) { + return Integer + .parseInt(headerMap.get(key).get(0)) + 10; + } + } + return -1; + } + + private String obtainNewLocation(final Map> headerMap) throws CollectorServiceException { + for (String key : headerMap.keySet()) { + if ((key != null) && key.toLowerCase().equals("location") && (headerMap.get(key).size() > 0)) { + return headerMap.get(key).get(0); + } + } + throw new CollectorServiceException("The requested url has been MOVED, but 'location' param is MISSING"); + } + + /** + * register for https scheme; this is a workaround and not intended for the use in trusted environments + */ + public void initTrustManager() { + final X509TrustManager tm = new X509TrustManager() { + + @Override + public void checkClientTrusted(final X509Certificate[] xcs, final String string) { + } + + @Override + public void checkServerTrusted(final X509Certificate[] xcs, final String string) { + } + + @Override + public X509Certificate[] getAcceptedIssuers() { + return null; + } + }; + try { + final SSLContext ctx = SSLContext.getInstance("TLS"); + ctx.init(null, new TrustManager[] { + tm + }, null); + HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory()); + } catch (GeneralSecurityException e) { + log.fatal(e); + throw new IllegalStateException(e); + } + } + + public int getMaxNumberOfRetry() { + return maxNumberOfRetry; + } + + public void setMaxNumberOfRetry(final int maxNumberOfRetry) { + this.maxNumberOfRetry = maxNumberOfRetry; + } + + public int getDefaultDelay() { + return defaultDelay; + } + + public void setDefaultDelay(final int defaultDelay) { + this.defaultDelay = defaultDelay; + } + + public int getReadTimeOut() { + return readTimeOut; + } + + public void setReadTimeOut(final int readTimeOut) { + this.readTimeOut = readTimeOut; + } + + public String getResponseType() { + return responseType; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json new file mode 100644 index 000000000..a0856e10e --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false +}, +{ +"paramName": "pjp", +"paramLongName": "projectPath", +"paramDescription": "the URL from where to get the projects file", +"paramRequired": true +}, +{ +"paramName": "pp", +"paramLongName": "programmePath", +"paramDescription": "the URL from where to get the programme file", +"paramRequired": true +}, +{ +"paramName": "o", +"paramLongName": "outputPath", +"paramDescription": "the path of the new ActionSet", +"paramRequired": true +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/config-default.xml new file mode 100644 index 000000000..fe82ae194 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/config-default.xml @@ -0,0 +1,54 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml new file mode 100644 index 000000000..ca0a73b97 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -0,0 +1,145 @@ + + + + projectFileURL + the url where to get the projects file + + + + programmeFileURL + the url where to get the programme file + + + + outputPath + path where to store the action set + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV + --hdfsNameNode${nameNode} + --fileURL${projectFileURL} + --hdfsPath${workingDir}/projects + --classForNameeu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject + + + + + + + + eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV + --hdfsNameNode${nameNode} + --fileURL${programmeFileURL} + --hdfsPath${workingDir}/programme + --classForNameeu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme + + + + + + + + eu.dnetlib.dhp.actionmanager.project.ReadProjectsFromDB + --hdfsPath${workingDir}/dbProjects + --hdfsNameNode${nameNode} + --postgresUrl${postgresURL} + --postgresUser${postgresUser} + --postgresPassword${postgresPassword} + + + + + + + + yarn + cluster + PrepareProgramme + eu.dnetlib.dhp.actionmanager.project.PrepareProgramme + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --programmePath${workingDir}/programme + --outputPath${workingDir}/preparedProgramme + + + + + + + + yarn + cluster + PrepareProgramme + eu.dnetlib.dhp.actionmanager.project.PrepareProjects + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --projectPath${workingDir}/projects + --outputPath${workingDir}/preparedProjects + + + + + + + + yarn + cluster + ProjectProgrammeAS + eu.dnetlib.dhp.actionmanager.project.SparkAtomicActionJob + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --projectPath${workingDir}/preparedProjects + --programmePath${workingDir}/preparedProgramme + --outputPath${outputPath} + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json new file mode 100644 index 000000000..dd3de70f6 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json @@ -0,0 +1,29 @@ +[ + + { + "paramName": "fu", + "paramLongName" : "fileURL", + "paramDescription" : "the url of the file to download", + "paramRequired" : true + }, + { + "paramName": "hp", + "paramLongName" : "hdfsPath", + "paramDescription" : "where to save the file", + "paramRequired" : true + }, + { + "paramName": "hnn", + "paramLongName" : "hdfsNameNode", + "paramDescription" : "the name node", + "paramRequired" : true + }, + { + "paramName": "cfn", + "paramLongName" : "classForName", + "paramDescription" : "the name of the class to deserialize the csv to", + "paramRequired" : true +} + + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json new file mode 100644 index 000000000..54083e108 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false +}, +{ +"paramName": "pp", +"paramLongName": "programmePath", +"paramDescription": "the URL from where to get the programme file", +"paramRequired": true +}, +{ +"paramName": "o", +"paramLongName": "outputPath", +"paramDescription": "the path of the new ActionSet", +"paramRequired": true +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json new file mode 100644 index 000000000..49f9c7306 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false +}, +{ +"paramName": "pjp", +"paramLongName": "projectPath", +"paramDescription": "the URL from where to get the programme file", +"paramRequired": true +}, +{ +"paramName": "o", +"paramLongName": "outputPath", +"paramDescription": "the path of the new ActionSet", +"paramRequired": true +}, + { + "paramName": "dbp", + "paramLongName": "dbProjectPath", + "paramDescription": "the path of the project code read from db", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json new file mode 100644 index 000000000..9a2eadaa7 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json @@ -0,0 +1,32 @@ +[ + { + "paramName": "p", + "paramLongName": "hdfsPath", + "paramDescription": "the path where storing the sequential file", + "paramRequired": true + }, + { + "paramName": "nn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the name node on hdfs", + "paramRequired": true + }, + { + "paramName": "pgurl", + "paramLongName": "postgresUrl", + "paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb", + "paramRequired": true + }, + { + "paramName": "pguser", + "paramLongName": "postgresUser", + "paramDescription": "postgres user", + "paramRequired": false + }, + { + "paramName": "pgpasswd", + "paramLongName": "postgresPassword", + "paramDescription": "postgres password", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java new file mode 100644 index 000000000..17fdd4511 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java @@ -0,0 +1,41 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVParser; + +public class CSVParserTest { + + private static Path workingDir; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName()); + + } + + @Test + public void readProgrammeTest() throws Exception { + + String programmecsv = IOUtils + .toString( + getClass() + .getClassLoader() + .getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv")); + + CSVParser csvParser = new CSVParser(); + + List pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme"); + + System.out.println(pl.size()); + + } +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java new file mode 100644 index 000000000..7f890a8a3 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java @@ -0,0 +1,94 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; + +public class PrepareProgrammeTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final ClassLoader cl = eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class + .getClassLoader(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareProgrammeTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void numberDistinctProgrammeTest() throws Exception { + PrepareProgramme + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-programmePath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz").getPath(), + "-outputPath", + workingDir.toString() + "/preparedProgramme" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/preparedProgramme") + .map(item -> OBJECT_MAPPER.readValue(item, CSVProgramme.class)); + + Assertions.assertEquals(277, tmp.count()); + + Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); + + Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count()); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java new file mode 100644 index 000000000..73bedb741 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -0,0 +1,96 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; + +public class PrepareProjectTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final ClassLoader cl = PrepareProjectTest.class + .getClassLoader(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(PrepareProjectTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(PrepareProjectTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PrepareProjectTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareProjectTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void numberDistinctProgrammeTest() throws Exception { + PrepareProjects + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-projectPath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/projects_subset.json").getPath(), + "-outputPath", + workingDir.toString() + "/preparedProjects" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/preparedProjects") + .map(item -> OBJECT_MAPPER.readValue(item, CSVProject.class)); + + Assertions.assertEquals(20, tmp.count()); + + Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProject.class)); + + Assertions.assertEquals(0, verificationDataset.filter("length(id) = 0").count()); + Assertions.assertEquals(0, verificationDataset.filter("length(programme) = 0").count()); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java new file mode 100644 index 000000000..64c6ac32f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -0,0 +1,98 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import eu.dnetlib.dhp.schema.action.AtomicAction; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.io.Text; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Project; + +public class SparkUpdateProjectTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final ClassLoader cl = eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class + .getClassLoader(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(SparkUpdateProjectTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void numberDistinctProgrammeTest() throws Exception { + SparkAtomicActionJob + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-programmePath", + getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz") + .getPath(), + "-projectPath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json").getPath(), + "-outputPath", + workingDir.toString() + "/actionSet" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Project)aa.getPayload())) + ; + + Assertions.assertEquals(14, tmp.count()); + +// Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); +// +// Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count()); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java new file mode 100644 index 000000000..51a7019ca --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java @@ -0,0 +1,39 @@ + +package eu.dnetlib.dhp.actionmanager.project.httpconnector; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.ssl.SSLContextBuilder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class HttpConnectorTest { + + private static final Log log = LogFactory.getLog(HttpConnectorTest.class); + private static HttpConnector connector; + + private static final String URL = "http://cordis.europa.eu/data/reference/cordisref-H2020programmes.csv"; + private static final String URL_MISCONFIGURED_SERVER = "https://www.alexandria.unisg.ch/cgi/oai2?verb=Identify"; + private static final String URL_GOODSNI_SERVER = "https://air.unimi.it/oai/openaire?verb=Identify"; + + private static final SSLContextBuilder sslContextBuilder = new SSLContextBuilder(); + private static SSLConnectionSocketFactory sslSocketFactory; + + @BeforeAll + public static void setUp() { + connector = new HttpConnector(); + } + + @Test + + public void testGetInputSource() throws CollectorServiceException { + System.out.println(connector.getInputSource(URL)); + } + + @Test + public void testGoodServers() throws CollectorServiceException { + System.out.println(connector.getInputSource(URL_GOODSNI_SERVER)); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz new file mode 100644 index 000000000..1afa73061 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json new file mode 100644 index 000000000..b8805b2db --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json @@ -0,0 +1,16 @@ +{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} +{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} +{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} +{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} +{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.2.1.4.","topics":null,"frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} +{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} +{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} +{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} +{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} +{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/programme.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/programme.csv new file mode 100644 index 000000000..6a9c855a0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/programme.csv @@ -0,0 +1,25 @@ +rcn;code;title;shortTitle;language +664331;H2020-EU.3.3.2.;Un approvisionnement en électricité à faible coût et à faibles émissions de carbone;Low-cost, low-carbon energy supply;fr +664355;H2020-EU.3.3.7.;Absorción por el mercado de la innovación energética - explotación del Programa Energía Inteligente - Europa Europe;Market uptake of energy innovation;es +664323;H2020-EU.3.3.1.;Ridurre il consumo di energia e le emissioni di carbonio grazie all'uso intelligente e sostenibile;Reducing energy consumption and carbon footprint;it +664233;H2020-EU.2.3.2.3.;Wsparcie innowacji rynkowych;Supporting market-driven innovation;pl +664199;H2020-EU.2.1.5.1.;Tecnologías para las fábricas del futuro;Technologies for Factories of the Future;es +664235;H2020-EU.3.;PRIORITÉ «Défis de société»;Societal Challenges;fr +664355;H2020-EU.3.3.7.;"Assorbimento di mercato dell'innovazione energetica - iniziative fondate sul programma ""Energia intelligente - Europa""";Market uptake of energy innovation;it +664355;H2020-EU.3.3.7.;"Markteinführung von Energieinnovationen – Aufbau auf ""Intelligente Energie – Europa";Market uptake of energy innovation;de +664235;H2020-EU.3.;"PRIORIDAD ""Retos de la sociedad""";Societal Challenges;es +664231;H2020-EU.2.3.2.2.;Mejorar la capacidad de innovación de las PYME;Enhancing the innovation capacity of SMEs;es +664223;H2020-EU.2.3.;LIDERAZGO INDUSTRIAL - Innovación en la pequeña y mediana empresa;Innovation in SMEs;es +664323;H2020-EU.3.3.1.;Réduire la consommation d'énergie et l'empreinte carbone en utilisant l'énergie de manière intelligente et durable;Reducing energy consumption and carbon footprint;fr +664323;H2020-EU.3.3.1.;Reducir el consumo de energía y la huella de carbono mediante un uso inteligente y sostenible;Reducing energy consumption and carbon footprint;es +664215;H2020-EU.2.1.6.4.;Beitrag der europäischen Forschung zu internationalen Weltraumpartnerschaften;Research in support of international space partnerships;de +664213;H2020-EU.2.1.6.3.;Permettere lo sfruttamento dei dati spaziali;;it +664213;H2020-EU.2.1.6.3.;Permettre l'exploitation des données spatiales;Enabling exploitation of space data;fr +664231;H2020-EU.2.3.2.2.;Zwiększenie zdolności MŚP pod względem innowacji;Enhancing the innovation capacity of SMEs;pl +664231;H2020-EU.2.3.2.2.;Rafforzare la capacità di innovazione delle PMI;Enhancing the innovation capacity of SMEs;it +664213;H2020-EU.2.1.6.3.;Grundlagen für die Nutzung von Weltraumdaten;Enabling exploitation of space data;de +664211;H2020-EU.2.1.6.2.;Favorecer los avances en las tecnologías espaciales;Enabling advances in space technology;es +664209;H2020-EU.2.1.6.1.;Assurer la compétitivité et l'indépendance de l'Europe et promouvoir l'innovation dans le secteur spatial européen;Competitiveness, non-dependence and innovation;fr +664231;H2020-EU.2.3.2.2.;Renforcement de la capacité d'innovation des PME;Enhancing the innovation capacity of SMEs;fr +664203;H2020-EU.2.1.5.3.;Tecnologías sostenibles, eficientes en su utilización de recursos y de baja emisión de carbono en las industrias de transformación de gran consumo energético;Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries;es +664103;H2020-EU.1.2.1.;FET Open;FET Open;es \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json new file mode 100644 index 000000000..edf83fbc8 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json @@ -0,0 +1,16 @@ +{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} +{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} +{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} +{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} +{"rcn":"229236","id":"886776","acronym":"BIOBESTicide","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} +{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} +{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} +{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} +{"rcn":"229297","id":"954782","acronym":"MiniLLock","status":"SIGNED","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020","frameworkProgramme":"H2020","title":"Mini Launch Lock devices for small satellites","startDate":"2020-05-01","endDate":"2022-04-30","projectUrl":"","objective":"Space industry is experiencing the most important paradigm shift in its history with the rise of small satellites and megaconstellations.\nSatellite miniaturization requires to reduce significantly production and orbit launching costs. To address the\nnew challenge of this manufacturing process and switch from craftsmanship to industrialization, space industry is turning\ntowards other domains looking for new solutions, disruptive technologies, and manufacturing process.\nMini Launch Lock devices for small satellites (MiniLLock) proposes innovative actuators on the cutting edge of customer\ndemand. They offer plug and play solutions that can directly be integrated into industry for satellites robotized production.\nMiniLLock is smaller, lighter, safer, with a longer lifetime and generates significantly less shocks and vibrations than\nstandard actuators such as electromagnet and pyrotechnics. MiniLLock offers performances which have never been reached\nwith any other materials.\nNimesis is the only company that can provide such cost-effective actuators suitable to small satellite with high performances\nand reliability, enabling features previously impossible.\nMiniLLock will accelerate and leverage the commercialization of Nimesis technology and ensure Europe worldwide\nleadership\nand independence in the new space emergent environment.\nNimesis ambitions to become the global leader of this domain with a turnover of € 26 million and a market share of 28% in\n2027.","totalCost":"2413543,75","ecMaxContribution":"1689480,63","call":"H2020-EIC-SMEInst-2018-2020-3","fundingScheme":"SME-2b","coordinator":"NIMESIS TECHNOLOGY SARL","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229299","id":"101003374","acronym":"NOPHOS","status":"SIGNED","programme":"H2020-EU.4.","topics":"WF-02-2019","frameworkProgramme":"H2020","title":"Unravelling protein phosphorylation mechanisms and phosphoproteome changes under nitrosative stress conditions in E.coli","startDate":"2020-07-01","endDate":"2022-06-30","projectUrl":"","objective":"Currently, we face a global antibiotic resistance crisis aggravated by the slow development of more effective and anti-resistance promoting therapeutical solutions. Protein phosphorylation (PP) has recently emerged as one of the major post-translational modification in bacteria, involved in the regulation of multiple physiological processes. In this MSCA individual fellowship application we aim to bridge the current gap in the field for prokaryotes by unravelling the unknown regulatory role of PP on proteins involved in nitrosative stress (NS) detoxification in the model bacterium E.coli. We propose to examine for the first time both global protein modifications (e.g. phosphoproteomics) under nitrogen species stress, as well as characterize PP in individual proteins involved in NS response. We will construct a network model that reflect the phosphoproteomic changes upon NS in E.coli, that may pave the way for the design of new bacterial targets. Understanding how bacteria respond to the chemical weapons of the human innate system is fundamental to develop efficient therapies. We will pioneer research on the mechanism and the regulation of nitric oxide detoxification proteins already identified as phosphorylated, by analyzing how this modification influences their stability and activity in vitro and in vivo. This project opens up new research paths on bacterial detoxification systems and signalling in general, addressing for the first time the role of PP in these processes. The proposal brings together transversal and scientific skills that will enable the researcher to lead the development of this emerging field and position herself as an expert in the area, and aims at establishing the importance of PP in NO microbial response, a novelty in this field.","totalCost":"147815,04","ecMaxContribution":"147815,04","call":"H2020-WF-02-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSIDADE NOVA DE LISBOA","coordinatorCountry":"PT","participants":"","participantCountries":"","subjects":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz new file mode 100644 index 000000000..85440ceed Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz differ diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 37abc22f6..a3cc15b74 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index 92289ec2d..91bcb9d1c 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -79,8 +79,6 @@ public class SparkRemoveBlacklistedRelationJob { Dataset inputRelation = readRelations(spark, inputPath); Dataset mergesRelation = readRelations(spark, mergesPath); - log.info("InputRelationCount: {}", inputRelation.count()); - Dataset dedupSource = blackListed .joinWith( mergesRelation, blackListed.col("source").equalTo(mergesRelation.col("target")), @@ -103,11 +101,6 @@ public class SparkRemoveBlacklistedRelationJob { return c._1(); }, Encoders.bean(Relation.class)); - dedupBL - .write() - .mode(SaveMode.Overwrite) - .json(blacklistPath + "/deduped"); - inputRelation .joinWith( dedupBL, (inputRelation diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index bbfd15674..585848589 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -61,12 +61,6 @@ public class BlackListTest { spark.stop(); } - /* - * String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); final String outputPath = - * parser.get("outputPath"); log.info("outputPath {}: ", outputPath); final String blacklistPath = - * parser.get("hdfsPath"); log.info("blacklistPath {}: ", blacklistPath); final String mergesPath = - * parser.get("mergesPath"); log.info("mergesPath {}: ", mergesPath); - */ @Test public void noRemoveTest() throws Exception { SparkRemoveBlacklistedRelationJob diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 8b7ec3851..0e84d99a4 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 @@ -57,7 +57,7 @@ eu.dnetlib dnet-openaire-broker-common - [1.0.0,2.0.0) + [2.0.0,3.0.0) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java index 29f6cbe3a..0716bd98d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java @@ -20,7 +20,7 @@ public enum Topic { // ENRICHMENT MORE ENRICH_MORE_PID("ENRICH/MORE/PID"), ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), ENRICH_MORE_ABSTRACT( "ENRICH/MORE/ABSTRACT"), ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), ENRICH_MORE_PROJECT( - "ENRICH/MORE/PROJECT"), ENRICH_MORE_SUBJECT_MESHEUROPMC( + "ENRICH/MORE/PROJECT"), ENRICH_MORE_SOFTWARE("ENRICH/MORE/SOFTWARE"), ENRICH_MORE_SUBJECT_MESHEUROPMC( "ENRICH/MORE/SUBJECT/MESHEUROPMC"), ENRICH_MORE_SUBJECT_ARXIV( "ENRICH/MORE/SUBJECT/ARXIV"), ENRICH_MORE_SUBJECT_JEL( "ENRICH/MORE/SUBJECT/JEL"), ENRICH_MORE_SUBJECT_DDC( @@ -28,7 +28,21 @@ public enum Topic { "ENRICH/MORE/SUBJECT/ACM"), ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"), // ADDITION - ADD_BY_PROJECT("ADD/BY_PROJECT"); + ADD_BY_PROJECT("ADD/BY_PROJECT"), + + // OTHER RELS + ENRICH_MISSING_PUBLICATION_IS_RELATED_TO( + "ENRICH/MISSING/PUBLICATION/IS_RELATED_TO"), ENRICH_MISSING_PUBLICATION_REFERENCES( + "ENRICH/MISSING/PUBLICATION/REFERENCES"), ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY( + "ENRICH/MISSING/PUBLICATION/IS_REFERENCED_BY"), ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO( + "ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_TO"), ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY( + "ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_BY"), + + ENRICH_MISSING_DATASET_IS_RELATED_TO("ENRICH/MISSING/DATASET/IS_RELATED_TO"), ENRICH_MISSING_DATASET_REFERENCES( + "ENRICH/MISSING/DATASET/REFERENCES"), ENRICH_MISSING_DATASET_IS_REFERENCED_BY( + "ENRICH/MISSING/DATASET/IS_REFERENCED_BY"), ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO( + "ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_TO"), ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY( + "ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_BY"),; Topic(final String path) { this.path = path; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index 43ebd6dd8..d5e577972 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -4,48 +4,101 @@ package eu.dnetlib.dhp.broker.oa; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Column; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.EventFactory; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAbstract; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAuthorOrcid; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingOpenAccess; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPid; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingProject; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationDate; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSubject; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreOpenAccess; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMorePid; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsReferencedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsRelatedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetReferences; +import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMissingProject; +import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMoreProject; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsReferencedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsRelatedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationReferences; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAbstract; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAuthorOrcid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingOpenAccess; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSoftware; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSubject; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSoftware; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject; +import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; public class GenerateEventsApplication { private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class); - private static final UpdateMatcher enrichMissingAbstract = new EnrichMissingAbstract(); - private static final UpdateMatcher enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid(); - private static final UpdateMatcher enrichMissingOpenAccess = new EnrichMissingOpenAccess(); - private static final UpdateMatcher enrichMissingPid = new EnrichMissingPid(); - private static final UpdateMatcher enrichMissingProject = new EnrichMissingProject(); - private static final UpdateMatcher enrichMissingPublicationDate = new EnrichMissingPublicationDate(); - private static final UpdateMatcher enrichMissingSubject = new EnrichMissingSubject(); - private static final UpdateMatcher enrichMoreOpenAccess = new EnrichMoreOpenAccess(); - private static final UpdateMatcher enrichMorePid = new EnrichMorePid(); - private static final UpdateMatcher enrichMoreSubject = new EnrichMoreSubject(); + // Simple Matchers + private static final UpdateMatcher enrichMissingAbstract = new EnrichMissingAbstract(); + private static final UpdateMatcher enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid(); + private static final UpdateMatcher enrichMissingOpenAccess = new EnrichMissingOpenAccess(); + private static final UpdateMatcher enrichMissingPid = new EnrichMissingPid(); + private static final UpdateMatcher enrichMissingPublicationDate = new EnrichMissingPublicationDate(); + private static final UpdateMatcher enrichMissingSubject = new EnrichMissingSubject(); + private static final UpdateMatcher enrichMoreOpenAccess = new EnrichMoreOpenAccess(); + private static final UpdateMatcher enrichMorePid = new EnrichMorePid(); + private static final UpdateMatcher enrichMoreSubject = new EnrichMoreSubject(); + + // Advanced matchers + private static final UpdateMatcher>, ?> enrichMissingProject = new EnrichMissingProject(); + private static final UpdateMatcher>, ?> enrichMoreProject = new EnrichMoreProject(); + + private static final UpdateMatcher>, ?> enrichMissingSoftware = new EnrichMissingSoftware(); + private static final UpdateMatcher>, ?> enrichMoreSoftware = new EnrichMoreSoftware(); + + private static final UpdateMatcher>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy(); + + private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy(); + + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -68,9 +121,19 @@ public class GenerateEventsApplication { log.info("eventsPath: {}", eventsPath); final SparkConf conf = new SparkConf(); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); removeOutputDir(spark, eventsPath); - generateEvents(spark, graphPath, eventsPath); + + final JavaRDD eventsRdd = sc.emptyRDD(); + + eventsRdd.union(generateSimpleEvents(spark, graphPath, Publication.class)); + eventsRdd.union(generateSimpleEvents(spark, graphPath, eu.dnetlib.dhp.schema.oaf.Dataset.class)); + eventsRdd.union(generateSimpleEvents(spark, graphPath, Software.class)); + eventsRdd.union(generateSimpleEvents(spark, graphPath, OtherResearchProduct.class)); + + eventsRdd.saveAsTextFile(eventsPath, GzipCodec.class); }); } @@ -79,11 +142,34 @@ public class GenerateEventsApplication { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - private static void generateEvents(final SparkSession spark, final String graphPath, final String eventsPath) { - // TODO + private static JavaRDD generateSimpleEvents(final SparkSession spark, + final String graphPath, + final Class resultClazz) { + + final Dataset results = readPath( + spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) + .filter(r -> r.getDataInfo().getDeletedbyinference()); + + final Dataset rels = readPath(spark, graphPath + "/relation", Relation.class) + .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); + + final Column c = null; // TODO + + final Dataset aa = results + .joinWith(rels, results.col("id").equalTo(rels.col("source")), "inner") + .groupBy(rels.col("target")) + .agg(c) + .filter(x -> x.size() > 1) + // generateSimpleEvents(...) + // flatMap() + // toRdd() + ; + + return null; + } - private List generateEvents(final Result... children) { + private List generateSimpleEvents(final Collection children) { final List> list = new ArrayList<>(); for (final Result target : children) { @@ -91,7 +177,6 @@ public class GenerateEventsApplication { list.addAll(enrichMissingAuthorOrcid.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingOpenAccess.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingPid.searchUpdatesForRecord(target, children)); - list.addAll(enrichMissingProject.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingPublicationDate.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingSubject.searchUpdatesForRecord(target, children)); list.addAll(enrichMoreOpenAccess.searchUpdatesForRecord(target, children)); @@ -102,4 +187,94 @@ public class GenerateEventsApplication { return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); } + private List generateProjectsEvents(final Collection>> childrenWithProjects) { + final List> list = new ArrayList<>(); + + for (final Pair> target : childrenWithProjects) { + list.addAll(enrichMissingProject.searchUpdatesForRecord(target, childrenWithProjects)); + list.addAll(enrichMoreProject.searchUpdatesForRecord(target, childrenWithProjects)); + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + } + + private List generateSoftwareEvents(final Collection>> childrenWithSoftwares) { + final List> list = new ArrayList<>(); + + for (final Pair> target : childrenWithSoftwares) { + list.addAll(enrichMissingSoftware.searchUpdatesForRecord(target, childrenWithSoftwares)); + list.addAll(enrichMoreSoftware.searchUpdatesForRecord(target, childrenWithSoftwares)); + } + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + } + + private List generatePublicationRelatedEvents(final String relType, + final Collection>>> childrenWithRels) { + + final List> list = new ArrayList<>(); + + final List>> cleanedChildrens = childrenWithRels + .stream() + .filter(p -> p.getRight().containsKey(relType)) + .map(p -> Pair.of(p.getLeft(), p.getRight().get(relType))) + .filter(p -> p.getRight().size() > 0) + .collect(Collectors.toList()); + + for (final Pair> target : cleanedChildrens) { + if (relType.equals("isRelatedTo")) { + list.addAll(enrichMisissingPublicationIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("references")) { + list.addAll(enrichMissingPublicationReferences.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isReferencedBy")) { + list.addAll(enrichMissingPublicationIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedTo")) { + list.addAll(enrichMissingPublicationIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedBy")) { + list.addAll(enrichMissingPublicationIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + + } + + private List generateDatasetRelatedEvents(final String relType, + final Collection>>> childrenWithRels) { + + final List> list = new ArrayList<>(); + + final List>> cleanedChildrens = childrenWithRels + .stream() + .filter(p -> p.getRight().containsKey(relType)) + .map(p -> Pair.of(p.getLeft(), p.getRight().get(relType))) + .filter(p -> p.getRight().size() > 0) + .collect(Collectors.toList()); + + for (final Pair> target : cleanedChildrens) { + if (relType.equals("isRelatedTo")) { + list.addAll(enrichMisissingDatasetIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("references")) { + list.addAll(enrichMissingDatasetReferences.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isReferencedBy")) { + list.addAll(enrichMissingDatasetIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedTo")) { + list.addAll(enrichMissingDatasetIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedBy")) { + list.addAll(enrichMissingDatasetIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + + } + + public static Dataset readPath( + final SparkSession spark, + final String inputPath, + final Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java deleted file mode 100644 index b6e5b3b57..000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java +++ /dev/null @@ -1,35 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.Arrays; -import java.util.List; - -import eu.dnetlib.broker.objects.Project; -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingProject extends UpdateMatcher { - - public EnrichMissingProject() { - super(true); - } - - @Override - protected List> findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); - return Arrays.asList(); - } - - @Override - public UpdateInfo generateUpdateInfo(final Project highlightValue, - final Result source, - final Result target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_PROJECT, - highlightValue, source, target, - (p, prj) -> p.getProjects().add(prj), - prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); - } - -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index b8b6132cd..5bfe108a5 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -12,9 +12,8 @@ import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Result; -public abstract class UpdateMatcher { +public abstract class UpdateMatcher { private final boolean multipleUpdate; @@ -22,11 +21,11 @@ public abstract class UpdateMatcher { this.multipleUpdate = multipleUpdate; } - public Collection> searchUpdatesForRecord(final Result res, final Result... others) { + public Collection> searchUpdatesForRecord(final K res, final Collection others) { final Map> infoMap = new HashMap<>(); - for (final Result source : others) { + for (final K source : others) { if (source != res) { for (final UpdateInfo info : findUpdates(source, res)) { final String s = DigestUtils.md5Hex(info.getHighlightValueAsString()); @@ -52,13 +51,18 @@ public abstract class UpdateMatcher { } } - protected abstract List> findUpdates(Result source, Result target); + protected abstract List> findUpdates(K source, K target); - protected abstract UpdateInfo generateUpdateInfo(final T highlightValue, final Result source, - final Result target); + protected abstract UpdateInfo generateUpdateInfo(final T highlightValue, + final K source, + final K target); protected static boolean isMissing(final List> list) { return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue()); } + protected boolean isMissing(final Field field) { + return field == null || StringUtils.isBlank(field.getValue()); + } + } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java new file mode 100644 index 000000000..321fd4318 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java @@ -0,0 +1,63 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public abstract class AbstractEnrichMissingDataset + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + private final Topic topic; + + public AbstractEnrichMissingDataset(final Topic topic) { + super(true); + this.topic = topic; + } + + @Override + protected final List> findUpdates( + final Pair> source, + final Pair> target) { + + final Set existingDatasets = target + .getRight() + .stream() + .map(Dataset::getId) + .collect(Collectors.toSet()); + + return source + .getRight() + .stream() + .filter(d -> !existingDatasets.contains(d.getId())) + .map(ConversionUtils::oafDatasetToBrokerDataset) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); + + } + + @Override + protected final UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + getTopic(), + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } + + public Topic getTopic() { + return topic; + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java new file mode 100644 index 000000000..74ce761f4 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsReferencedBy() { + super(Topic.ENRICH_MISSING_DATASET_IS_REFERENCED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java new file mode 100644 index 000000000..05a891059 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsRelatedTo() { + super(Topic.ENRICH_MISSING_DATASET_IS_RELATED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java new file mode 100644 index 000000000..23bd68fa1 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsSupplementedBy() { + super(Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java new file mode 100644 index 000000000..03160b6f0 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsSupplementedTo() { + super(Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java new file mode 100644 index 000000000..bf1df053d --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetReferences() { + super(Topic.ENRICH_MISSING_DATASET_REFERENCES); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java new file mode 100644 index 000000000..461266d56 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java @@ -0,0 +1,41 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingProject + extends UpdateMatcher>, eu.dnetlib.broker.objects.Project> { + + public EnrichMissingProject() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Project highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PROJECT, + highlightValue, source.getLeft(), target.getLeft(), + (p, prj) -> p.getProjects().add(prj), + prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java new file mode 100644 index 000000000..d9bfb62d5 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java @@ -0,0 +1,40 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMoreProject extends UpdateMatcher>, eu.dnetlib.broker.objects.Project> { + + public EnrichMoreProject() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Project highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_PROJECT, + highlightValue, source.getLeft(), target.getLeft(), + (p, prj) -> p.getProjects().add(prj), + prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java new file mode 100644 index 000000000..75e77b3c6 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java @@ -0,0 +1,63 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public abstract class AbstractEnrichMissingPublication + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + private final Topic topic; + + public AbstractEnrichMissingPublication(final Topic topic) { + super(true); + this.topic = topic; + } + + @Override + protected final List> findUpdates( + final Pair> source, + final Pair> target) { + + final Set existingPublications = target + .getRight() + .stream() + .map(Publication::getId) + .collect(Collectors.toSet()); + + return source + .getRight() + .stream() + .filter(d -> !existingPublications.contains(d.getId())) + .map(ConversionUtils::oafPublicationToBrokerPublication) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); + + } + + @Override + protected final UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + getTopic(), + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getPublications().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } + + public Topic getTopic() { + return topic; + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java new file mode 100644 index 000000000..73fa8a45f --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsReferencedBy() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java new file mode 100644 index 000000000..361ea3b34 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsRelatedTo() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_RELATED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java new file mode 100644 index 000000000..7e8863b1e --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsSupplementedBy() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java new file mode 100644 index 000000000..dc4e51377 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsSupplementedTo() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java new file mode 100644 index 000000000..5198098bc --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationReferences() { + super(Topic.ENRICH_MISSING_PUBLICATION_REFERENCES); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java similarity index 79% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java index 43cf738f8..a418b633e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java @@ -1,15 +1,16 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingAbstract extends UpdateMatcher { +public class EnrichMissingAbstract extends UpdateMatcher { public EnrichMissingAbstract() { super(false); @@ -24,7 +25,8 @@ public class EnrichMissingAbstract extends UpdateMatcher { } @Override - public UpdateInfo generateUpdateInfo(final String highlightValue, final Result source, + public UpdateInfo generateUpdateInfo(final String highlightValue, + final Result source, final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_ABSTRACT, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java similarity index 77% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index beeccdbe8..b5c2f7e72 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,10 +7,11 @@ import java.util.List; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingAuthorOrcid extends UpdateMatcher> { +public class EnrichMissingAuthorOrcid extends UpdateMatcher> { public EnrichMissingAuthorOrcid() { super(true); @@ -24,7 +25,8 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher @Override public UpdateInfo> generateUpdateInfo(final Pair highlightValue, - final Result source, final Result target) { + final Result source, + final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_AUTHOR_ORCID, highlightValue, source, target, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java similarity index 88% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java index a4a2ea0c6..c7e9dcbc1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,12 +7,13 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingOpenAccess extends UpdateMatcher { +public class EnrichMissingOpenAccess extends UpdateMatcher { public EnrichMissingOpenAccess() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java similarity index 85% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index a8df62541..522d46d40 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,11 +7,12 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingPid extends UpdateMatcher { +public class EnrichMissingPid extends UpdateMatcher { public EnrichMissingPid() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java similarity index 62% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java index e9ec082c4..197ace97c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java @@ -1,14 +1,16 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingPublicationDate extends UpdateMatcher { +public class EnrichMissingPublicationDate extends UpdateMatcher { public EnrichMissingPublicationDate() { super(false); @@ -16,12 +18,15 @@ public class EnrichMissingPublicationDate extends UpdateMatcher { @Override protected List> findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); - return Arrays.asList(); + if (isMissing(target.getDateofacceptance()) && !isMissing(source.getDateofacceptance())) { + return Arrays.asList(generateUpdateInfo(source.getDateofacceptance().getValue(), source, target)); + } + return new ArrayList<>(); } @Override - public UpdateInfo generateUpdateInfo(final String highlightValue, final Result source, + public UpdateInfo generateUpdateInfo(final String highlightValue, + final Result source, final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_PUBLICATION_DATE, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSoftware.java new file mode 100644 index 000000000..4fcba43a4 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSoftware.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.simple; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; + +public class EnrichMissingSoftware + extends UpdateMatcher>, eu.dnetlib.broker.objects.Software> { + + public EnrichMissingSoftware() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Software highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_SOFTWARE, + highlightValue, source.getLeft(), target.getLeft(), + (p, s) -> p.getSoftwares().add(s), + s -> s.getName()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java similarity index 88% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index 79e9d469b..290bad48b 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -8,13 +8,14 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -public class EnrichMissingSubject extends UpdateMatcher> { +public class EnrichMissingSubject extends UpdateMatcher> { public EnrichMissingSubject() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java similarity index 88% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index 40c9b0500..c376da44d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -7,12 +7,13 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMoreOpenAccess extends UpdateMatcher { +public class EnrichMoreOpenAccess extends UpdateMatcher { public EnrichMoreOpenAccess() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java similarity index 87% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index 0e7b7766a..2ee327c83 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -7,11 +7,12 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMorePid extends UpdateMatcher { +public class EnrichMorePid extends UpdateMatcher { public EnrichMorePid() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSoftware.java new file mode 100644 index 000000000..a1affff62 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSoftware.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.simple; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; + +public class EnrichMoreSoftware + extends UpdateMatcher>, eu.dnetlib.broker.objects.Software> { + + public EnrichMoreSoftware() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Software highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_SOFTWARE, + highlightValue, source.getLeft(), target.getLeft(), + (p, s) -> p.getSoftwares().add(s), + s -> s.getName()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java similarity index 87% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index e6374479b..b38445e88 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -8,11 +8,12 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMoreSubject extends UpdateMatcher> { +public class EnrichMoreSubject extends UpdateMatcher> { public EnrichMoreSubject() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index d61d5bfb7..8e97192bf 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -4,4 +4,6 @@ package eu.dnetlib.dhp.broker.oa.util; public class BrokerConstants { public final static String OPEN_ACCESS = "OPEN"; + public final static String IS_MERGED_IN_CLASS = "isMergedIn"; + } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index 2e2ce202a..2f87d0ee7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -7,6 +7,8 @@ import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.broker.objects.Pid; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class ConversionUtils { @@ -33,4 +35,15 @@ public class ConversionUtils { return Pair.of(sp.getQualifier().getClassid(), sp.getValue()); } + public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) { + final eu.dnetlib.broker.objects.Dataset res = new eu.dnetlib.broker.objects.Dataset(); + // TODO + return res; + } + + public static final eu.dnetlib.broker.objects.Publication oafPublicationToBrokerPublication(final Publication d) { + final eu.dnetlib.broker.objects.Publication res = new eu.dnetlib.broker.objects.Publication(); + // TODO + return res; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index fcc356ac0..44cf9e67c 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java new file mode 100644 index 000000000..ee5fd5165 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java @@ -0,0 +1,170 @@ + +package eu.dnetlib.dhp.oa.dedup; + +import java.text.Normalizer; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; + +import com.wcohen.ss.JaroWinkler; + +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.pace.model.Person; +import scala.Tuple2; + +public class AuthorMerger { + + private static final Double THRESHOLD = 0.95; + + public static List merge(List> authors) { + + authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); + + List author = new ArrayList<>(); + + for (List a : authors) { + author = mergeAuthor(author, a); + } + + return author; + + } + + public static List mergeAuthor(final List a, final List b) { + int pa = countAuthorsPids(a); + int pb = countAuthorsPids(b); + List base, enrich; + int sa = authorsSize(a); + int sb = authorsSize(b); + + if (pa == pb) { + base = sa > sb ? a : b; + enrich = sa > sb ? b : a; + } else { + base = pa > pb ? a : b; + enrich = pa > pb ? b : a; + } + enrichPidFromList(base, enrich); + return base; + } + + private static void enrichPidFromList(List base, List enrich) { + if (base == null || enrich == null) + return; + final Map basePidAuthorMap = base + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .flatMap( + a -> a + .getPid() + .stream() + .map(p -> new Tuple2<>(pidToComparableString(p), a))) + .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); + + final List> pidToEnrich = enrich + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .flatMap( + a -> a + .getPid() + .stream() + .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) + .map(p -> new Tuple2<>(p, a))) + .collect(Collectors.toList()); + + pidToEnrich + .forEach( + a -> { + Optional> simAuthor = base + .stream() + .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) + .max(Comparator.comparing(Tuple2::_1)); + + if (simAuthor.isPresent()) { + double th = THRESHOLD; + // increase the threshold if the surname is too short + if (simAuthor.get()._2().getSurname() != null + && simAuthor.get()._2().getSurname().length() <= 3) + th = 0.99; + + if (simAuthor.get()._1() > th) { + Author r = simAuthor.get()._2(); + if (r.getPid() == null) { + r.setPid(new ArrayList<>()); + } + r.getPid().add(a._1()); + } + } + }); + } + + public static String pidToComparableString(StructuredProperty pid) { + return (pid.getQualifier() != null + ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" + : "") + + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); + } + + public static int countAuthorsPids(List authors) { + if (authors == null) + return 0; + + return (int) authors.stream().filter(AuthorMerger::hasPid).count(); + } + + private static int authorsSize(List authors) { + if (authors == null) + return 0; + return authors.size(); + } + + private static Double sim(Author a, Author b) { + + final Person pa = parse(a); + final Person pb = parse(b); + + // if both are accurate (e.g. they have name and surname) + if (pa.isAccurate() & pb.isAccurate()) { + return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5 + + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5; + } else { + return new JaroWinkler() + .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); + } + } + + private static boolean hasPid(Author a) { + if (a == null || a.getPid() == null || a.getPid().size() == 0) + return false; + return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); + } + + private static Person parse(Author author) { + if (StringUtils.isNotBlank(author.getSurname())) { + return new Person(author.getSurname() + ", " + author.getName(), false); + } else { + return new Person(author.getFullname(), false); + } + } + + private static String normalize(final String s) { + return nfd(s) + .toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError + // in case + // of large input strings + .replaceAll("(\\W)+", " ") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") + .trim(); + } + + private static String nfd(final String s) { + return Normalizer.normalize(s, Normalizer.Form.NFD); + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index fa06424d7..8028d5a94 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -1,8 +1,10 @@ package eu.dnetlib.dhp.oa.dedup; +import java.io.Serializable; import java.util.Collection; import java.util.Iterator; +import java.util.List; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; @@ -67,16 +69,19 @@ public class DedupRecordFactory { (MapFunction, String>) entity -> entity._1(), Encoders.STRING()) .mapGroups( (MapGroupsFunction, T>) (key, - values) -> entityMerger(key, values, ts, dataInfo), + values) -> entityMerger(key, values, ts, dataInfo, clazz), Encoders.bean(clazz)); } - private static T entityMerger( - String id, Iterator> entities, long ts, DataInfo dataInfo) { + public static T entityMerger( + String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) + throws IllegalAccessException, InstantiationException { - T entity = entities.next()._2(); + T entity = clazz.newInstance(); final Collection dates = Lists.newArrayList(); + final List> authors = Lists.newArrayList(); + entities .forEachRemaining( t -> { @@ -84,17 +89,17 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - Result er = (Result) entity; - er.setAuthor(DedupUtility.mergeAuthor(er.getAuthor(), r1.getAuthor())); - - if (r1.getDateofacceptance() != null) { + if (r1.getAuthor() != null && r1.getAuthor().size() > 0) + authors.add(r1.getAuthor()); + if (r1.getDateofacceptance() != null) dates.add(r1.getDateofacceptance().getValue()); - } } }); + // set authors and date if (ModelSupport.isSubClass(entity, Result.class)) { ((Result) entity).setDateofacceptance(DatePicker.pick(dates)); + ((Result) entity).setAuthor(AuthorMerger.merge(authors)); } entity.setId(id); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java index d3ae8ee4f..222794d64 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java @@ -32,7 +32,6 @@ import eu.dnetlib.pace.model.Person; import scala.Tuple2; public class DedupUtility { - private static final Double THRESHOLD = 0.95; public static Map constructAccumulator( final DedupConfig dedupConf, final SparkContext context) { @@ -82,58 +81,6 @@ public class DedupUtility { } } - public static List mergeAuthor(final List a, final List b) { - int pa = countAuthorsPids(a); - int pb = countAuthorsPids(b); - List base, enrich; - int sa = authorsSize(a); - int sb = authorsSize(b); - - if (pa == pb) { - base = sa > sb ? a : b; - enrich = sa > sb ? b : a; - } else { - base = pa > pb ? a : b; - enrich = pa > pb ? b : a; - } - enrichPidFromList(base, enrich); - return base; - } - - private static void enrichPidFromList(List base, List enrich) { - if (base == null || enrich == null) - return; - final Map basePidAuthorMap = base - .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) - .flatMap(a -> a.getPid().stream().map(p -> new Tuple2<>(p.toComparableString(), a))) - .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); - - final List> pidToEnrich = enrich - .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) - .flatMap( - a -> a - .getPid() - .stream() - .filter(p -> !basePidAuthorMap.containsKey(p.toComparableString())) - .map(p -> new Tuple2<>(p, a))) - .collect(Collectors.toList()); - - pidToEnrich - .forEach( - a -> { - Optional> simAuhtor = base - .stream() - .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) - .max(Comparator.comparing(Tuple2::_1)); - if (simAuhtor.isPresent() && simAuhtor.get()._1() > THRESHOLD) { - Author r = simAuhtor.get()._2(); - r.getPid().add(a._1()); - } - }); - } - public static String createDedupRecordPath( final String basePath, final String actionSetId, final String entityType) { return String.format("%s/%s/%s_deduprecord", basePath, actionSetId, entityType); @@ -153,65 +100,6 @@ public class DedupUtility { return String.format("%s/%s/%s_mergerel", basePath, actionSetId, entityType); } - private static Double sim(Author a, Author b) { - - final Person pa = parse(a); - final Person pb = parse(b); - - if (pa.isAccurate() & pb.isAccurate()) { - return new JaroWinkler() - .score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())); - } else { - return new JaroWinkler() - .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); - } - } - - private static String normalize(final String s) { - return nfd(s) - .toLowerCase() - // do not compact the regexes in a single expression, would cause StackOverflowError - // in case - // of large input strings - .replaceAll("(\\W)+", " ") - .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") - .replaceAll("(\\p{Punct})+", " ") - .replaceAll("(\\d)+", " ") - .replaceAll("(\\n)+", " ") - .trim(); - } - - private static String nfd(final String s) { - return Normalizer.normalize(s, Normalizer.Form.NFD); - } - - private static Person parse(Author author) { - if (StringUtils.isNotBlank(author.getSurname())) { - return new Person(author.getSurname() + ", " + author.getName(), false); - } else { - return new Person(author.getFullname(), false); - } - } - - private static int countAuthorsPids(List authors) { - if (authors == null) - return 0; - - return (int) authors.stream().filter(DedupUtility::hasPid).count(); - } - - private static int authorsSize(List authors) { - if (authors == null) - return 0; - return authors.size(); - } - - private static boolean hasPid(Author a) { - if (a == null || a.getPid() == null || a.getPid().size() == 0) - return false; - return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); - } - public static List getConfigurations(String isLookUpUrl, String orchestrator) throws ISLookUpException, DocumentException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookUpUrl); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java new file mode 100644 index 000000000..b8ccb038d --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -0,0 +1,160 @@ + +package eu.dnetlib.dhp.oa.dedup; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.io.Serializable; +import java.nio.file.Paths; +import java.util.*; + +import org.codehaus.jackson.map.ObjectMapper; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.pace.util.MapDocumentUtil; +import scala.Tuple2; + +public class EntityMergerTest implements Serializable { + + List> publications; + List> publications2; + + String testEntityBasePath; + DataInfo dataInfo; + String dedupId = "dedup_id"; + Publication pub_top; + + @BeforeEach + public void setUp() throws Exception { + + testEntityBasePath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI()) + .toFile() + .getAbsolutePath(); + + publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class); + publications2 = readSample(testEntityBasePath + "/publication_merge2.json", Publication.class); + + pub_top = getTopPub(publications); + + dataInfo = setDI(); + + } + + @Test + public void publicationMergerTest() throws InstantiationException, IllegalAccessException { + + Publication pub_merged = DedupRecordFactory + .entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); + + assertEquals(dedupId, pub_merged.getId()); + + assertEquals(pub_merged.getJournal(), pub_top.getJournal()); + assertEquals(pub_merged.getBestaccessright(), pub_top.getBestaccessright()); + assertEquals(pub_merged.getResulttype(), pub_top.getResulttype()); + assertEquals(pub_merged.getLanguage(), pub_merged.getLanguage()); + assertEquals(pub_merged.getPublisher(), pub_top.getPublisher()); + assertEquals(pub_merged.getEmbargoenddate(), pub_top.getEmbargoenddate()); + assertEquals(pub_merged.getResourcetype().getClassid(), "0004"); + assertEquals(pub_merged.getDateoftransformation(), pub_top.getDateoftransformation()); + assertEquals(pub_merged.getOaiprovenance(), pub_top.getOaiprovenance()); + assertEquals(pub_merged.getDateofcollection(), pub_top.getDateofcollection()); + assertEquals(pub_merged.getInstance().size(), 3); + assertEquals(pub_merged.getCountry().size(), 2); + assertEquals(pub_merged.getSubject().size(), 0); + assertEquals(pub_merged.getTitle().size(), 2); + assertEquals(pub_merged.getRelevantdate().size(), 0); + assertEquals(pub_merged.getDescription().size(), 0); + assertEquals(pub_merged.getSource().size(), 0); + assertEquals(pub_merged.getFulltext().size(), 0); + assertEquals(pub_merged.getFormat().size(), 0); + assertEquals(pub_merged.getContributor().size(), 0); + assertEquals(pub_merged.getCoverage().size(), 0); + assertEquals(pub_merged.getContext().size(), 0); + assertEquals(pub_merged.getExternalReference().size(), 0); + assertEquals(pub_merged.getOriginalId().size(), 3); + assertEquals(pub_merged.getCollectedfrom().size(), 3); + assertEquals(pub_merged.getPid().size(), 1); + assertEquals(pub_merged.getExtraInfo().size(), 0); + + // verify datainfo + assertEquals(pub_merged.getDataInfo(), dataInfo); + + // verify datepicker + assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30"); + + // verify authors + assertEquals(pub_merged.getAuthor().size(), 9); + assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4); + + // verify title + int count = 0; + for (StructuredProperty title : pub_merged.getTitle()) { + if (title.getQualifier().getClassid().equals("main title")) + count++; + } + assertEquals(count, 1); + } + + @Test + public void publicationMergerTest2() throws InstantiationException, IllegalAccessException { + + Publication pub_merged = DedupRecordFactory + .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); + + assertEquals(pub_merged.getAuthor().size(), 27); + // insert assertions here + + } + + public DataInfo setDI() { + DataInfo dataInfo = new DataInfo(); + dataInfo.setTrust("0.9"); + dataInfo.setDeletedbyinference(false); + dataInfo.setInferenceprovenance("testing"); + dataInfo.setInferred(true); + return dataInfo; + } + + public Publication getTopPub(List> publications) { + + Double maxTrust = 0.0; + Publication maxPub = new Publication(); + for (Tuple2 publication : publications) { + Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust()); + if (pubTrust > maxTrust) { + maxTrust = pubTrust; + maxPub = publication._2(); + } + } + return maxPub; + } + + public List> readSample(String path, Class clazz) { + List> res = new ArrayList<>(); + BufferedReader reader; + try { + reader = new BufferedReader(new FileReader(path)); + String line = reader.readLine(); + while (line != null) { + res + .add( + new Tuple2<>( + MapDocumentUtil.getJPathString("$.id", line), + new ObjectMapper().readValue(line, clazz))); + // read next line + line = reader.readLine(); + } + reader.close(); + } catch (IOException e) { + e.printStackTrace(); + } + + return res; + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/MergeAuthorTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/MergeAuthorTest.java deleted file mode 100644 index a217a2657..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/MergeAuthorTest.java +++ /dev/null @@ -1,54 +0,0 @@ - -package eu.dnetlib.dhp.oa.dedup; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.codehaus.jackson.map.ObjectMapper; -import org.junit.jupiter.api.BeforeEach; - -import eu.dnetlib.dhp.schema.oaf.Publication; - -public class MergeAuthorTest { - - private List publicationsToMerge; - private final ObjectMapper mapper = new ObjectMapper(); - - @BeforeEach - public void setUp() throws Exception { - final String json = IOUtils - .toString( - this.getClass().getResourceAsStream("/eu/dnetlib/dhp/dedup/json/authors_merge.json")); - - publicationsToMerge = Arrays - .asList(json.split("\n")) - .stream() - .map( - s -> { - try { - return mapper.readValue(s, Publication.class); - } catch (IOException e) { - throw new RuntimeException(e); - } - }) - .collect(Collectors.toList()); - } - - // FIX ME Michele DB this tests doesn't work - // @Test - public void test() throws Exception { - Publication dedup = new Publication(); - - publicationsToMerge - .forEach( - p -> { - dedup.mergeFrom(p); - dedup.setAuthor(DedupUtility.mergeAuthor(dedup.getAuthor(), p.getAuthor())); - }); - - System.out.println(mapper.writeValueAsString(dedup)); - } -} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json new file mode 100644 index 000000000..28548c532 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json @@ -0,0 +1,3 @@ +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.95"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}], "id": "50|a89337edbe55::4930db9e954866d70916cbfba9f81f97", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": [], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-9999"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2019-11-05T14:49:22.351Z", "fulltext": [], "dateoftransformation": "2019-11-05T16:10:58.988Z", "description": [], "format": [], "journal": {"issnPrinted": "1459-6067", "conferencedate": "", "conferenceplace": "", "name": "Agricultural and Food Science", "edition": "", "iss": "3", "sp": "", "vol": "27", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "1795-1895", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "eng", "classname": "English", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [], "extraInfo": [], "originalId": [], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2018-09-30"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "doi", "classname": "doi", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.1016/j.nicl.2015.11.006"}], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}], "id": "50|base_oa_____::0968af610a356656706657e4f234b340", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "NeuroImage: Clinical", "key": "10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "http://creativecommons.org/licenses/by-nc-nd/4.0/"}, "url": ["http://dx.doi.org/10.1016/j.nicl.2015.11.006"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}, {"surname": "Klein", "name": "Christine", "pid": [], "rank": 10, "affiliation": [], "fullname": "Klein, Christine"}, {"surname": "Deuschl", "name": "Gu\\u0308nther", "pid": [], "rank": 11, "affiliation": [], "fullname": "Deuschl, G\\u00fcnther"}, {"surname": "Eimeren", "name": "Thilo", "pid": [], "rank": 12, "affiliation": [], "fullname": "van Eimeren, Thilo"}, {"surname": "Witt", "name": "Karsten", "pid": [], "rank": 13, "affiliation": [], "fullname": "Witt, Karsten"}], "source": [], "dateofcollection": "2017-07-27T19:04:09.131Z", "fulltext": [], "dateoftransformation": "2019-01-23T10:15:19.582Z", "description": [], "format": [], "journal": {"issnPrinted": "2213-1582", "conferencedate": "", "conferenceplace": "", "name": "NeuroImage: Clinical", "edition": "", "iss": "", "sp": "63", "vol": "10", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "70", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Elsevier BV"}, "language": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "IT", "classname": "Italy", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["10.1016/j.nicl.2015.11.006"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}], "id": "50|CrisUnsNoviS::9f9d014eea45dab432cab636c4c9cf39", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": ["https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2019-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "accessright": {"classid": "UNKNOWN", "classname": "UNKNOWN", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}, {"qualifier": {"classid": "pubmed", "classname": "pubmed"}, "value": "pubmed.it"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [{"qualifier": {"classid": "id", "classname": "id"}, "value": "12345678"}], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-1023"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2020-03-10T15:05:38.685Z", "fulltext": [], "dateoftransformation": "2020-03-11T20:11:13.15Z", "description": [], "format": [], "journal": {"issnPrinted": "", "conferencedate": "", "conferenceplace": "", "name": "", "edition": "", "iss": "", "sp": "", "vol": "", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "en", "classname": "en", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["(BISIS)113444", "https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "test title", "classname": "test title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Antichains of copies of ultrahomogeneous structures"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge2.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge2.json new file mode 100644 index 000000000..a7937c287 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge2.json @@ -0,0 +1,4 @@ +{"id":"50|doajarticles::842fa3b99fcdccafb4d5c8a815f56efa","dateofcollection":"2020-04-06T12:22:31.216Z","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Increased Expression of Herpes Virus-Encoded hsv1-miR-H18 and hsv2-miR-H9-5p in Cancer-Containing Prostate Tissue Compared to That in Benign Prostate Hyperplasia Tissue"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Korean Continence Society"},"bestaccessright":null,"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"collectedfrom":[{"dataInfo":null,"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5213/inj.1632552.276"}],"author":[{"affiliation":null,"fullname":"Seok Joong Yun","name":null,"pid":[],"rank":1,"surname":null},{"affiliation":null,"fullname":"Pildu Jeong","name":null,"pid":[],"rank":2,"surname":null},{"affiliation":null,"fullname":"Ho Won Kang","name":null,"pid":[],"rank":3,"surname":null},{"affiliation":null,"fullname":"Helen Ki Shinn","name":null,"pid":[],"rank":4,"surname":null},{"affiliation":null,"fullname":"Ye-Hwan Kim","name":null,"pid":[],"rank":5,"surname":null},{"affiliation":null,"fullname":"Chunri Yan","name":null,"pid":[],"rank":6,"surname":null},{"affiliation":null,"fullname":"Young-Ki Choi","name":null,"pid":[],"rank":7,"surname":null},{"affiliation":null,"fullname":"Dongho Kim","name":null,"pid":[],"rank":8,"surname":null},{"affiliation":null,"fullname":"Dong Hee Ryu","name":null,"pid":[],"rank":9,"surname":null},{"affiliation":null,"fullname":"Yun-Sok Ha","name":null,"pid":[],"rank":10,"surname":null},{"affiliation":null,"fullname":"Tae-Hwan Kim","name":null,"pid":[],"rank":11,"surname":null},{"affiliation":null,"fullname":"Tae Gyun Kwon","name":null,"pid":[],"rank":12,"surname":null},{"affiliation":null,"fullname":"Jung Min Kim","name":null,"pid":[],"rank":13,"surname":null},{"affiliation":null,"fullname":"Sang Heon Suh","name":null,"pid":[],"rank":14,"surname":null},{"affiliation":null,"fullname":"Seon-Kyu Kim","name":null,"pid":[],"rank":15,"surname":null},{"affiliation":null,"fullname":"Seon-Young Kim","name":null,"pid":[],"rank":16,"surname":null},{"affiliation":null,"fullname":"Sang Tae Kim","name":null,"pid":[],"rank":17,"surname":null},{"affiliation":null,"fullname":"Won Tae Kim","name":null,"pid":[],"rank":18,"surname":null},{"affiliation":null,"fullname":"Ok-Jun Lee","name":null,"pid":[],"rank":19,"surname":null},{"affiliation":null,"fullname":"Sung-Kwon Moon","name":null,"pid":[],"rank":20,"surname":null},{"affiliation":null,"fullname":"Nam-Hyung Kim","name":null,"pid":[],"rank":21,"surname":null},{"affiliation":null,"fullname":"Isaac Yi Kim","name":null,"pid":[],"rank":22,"surname":null},{"affiliation":null,"fullname":"Jayoung Kim","name":null,"pid":[],"rank":23,"surname":null},{"affiliation":null,"fullname":"Hee-Jae Cha","name":null,"pid":[],"rank":24,"surname":null},{"affiliation":null,"fullname":"Yung-Hyun Choi","name":null,"pid":[],"rank":25,"surname":null},{"affiliation":null,"fullname":"Eun-Jong Cha","name":null,"pid":[],"rank":26,"surname":null},{"affiliation":null,"fullname":"Wun-Jae Kim","name":null,"pid":[],"rank":27,"surname":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"MicroRNAs"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Neoplasms"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Herpesviridae"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Hyperplasia"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Diseases of the genitourinary system. Urology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"RC870-923"}],"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Purpose: Previously, we reported the presence of virus-encoded microRNAs (miRNAs) in the urine of prostate cancer (CaP) patients. In this study, we investigated the expression of two herpes virus-encoded miRNAs in prostate tissue. Methods: A total of 175 tissue samples from noncancerous benign prostatic hyperplasia (BPH), 248 tissue samples from patients with CaP and BPH, and 50 samples from noncancerous surrounding tissues from these same patients were analyzed for the expression of two herpes virus-encoded miRNAs by real-time polymerase chain reaction (PCR) and immunocytochemistry using nanoparticles as molecular beacons. Results: Real-time reverse transcription-PCR results revealed significantly higher expression of hsv1-miR-H18 and hsv2-miRH9- 5p in surrounding noncancerous and CaP tissues than that in BPH tissue (each comparison, P<0.001). Of note, these miRNA were expressed equivalently in the CaP tissues and surrounding noncancerous tissues. Moreover, immunocytochemistry clearly demonstrated a significant enrichment of both hsv1-miR-H18 and hsv2-miR-H9 beacon-labeled cells in CaP and surrounding noncancerous tissue compared to that in BPH tissue (each comparison, P<0.05 for hsv1-miR-H18 and hsv2- miR-H9). Conclusions: These results suggest that increased expression of hsv1-miR-H18 and hsv2-miR-H95p might be associated with tumorigenesis in the prostate. Further studies will be required to elucidate the role of these miRNAs with respect to CaP and herpes viral infections."}],"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"embargoenddate":null,"resourcetype":null,"context":[],"instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|doajarticles::52db9a4f8e176f6e8e1d9f0c1e0a2de0","value":"International Neurourology Journal"},"instancetype":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.einj.org/upload/pdf/inj-1632552-276.pdf","https://doaj.org/toc/2093-4777","https://doaj.org/toc/2093-6931"]}]} +{"id":"50|od_______267::b5f5da11a8239ef57655cea8675cb466","dateofcollection":"","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Increased Expression of Herpes Virus-Encoded hsv1-miR-H18 and hsv2-miR-H9-5p in Cancer-Containing Prostate Tissue Compared to That in Benign Prostate Hyperplasia Tissue"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Korean Continence Society"},"bestaccessright":null,"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"collectedfrom":[{"dataInfo":null,"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"}],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5213/inj.1632552.276"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"pmc","classname":"pmc","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"PMC4932644"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"pmid","classname":"pmid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"27377944"}],"author":[{"affiliation":null,"fullname":"Yun, Seok Joong","name":"Seok Joong","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-7737-4746"}],"rank":1,"surname":"Yun"},{"affiliation":null,"fullname":"Jeong, Pildu","name":"Pildu","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-5602-5376"}],"rank":2,"surname":"Jeong"},{"affiliation":null,"fullname":"Kang, Ho Won","name":"Ho Won","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-8164-4427"}],"rank":3,"surname":"Kang"},{"affiliation":null,"fullname":"Shinn, Helen Ki","name":"Helen Ki","pid":[],"rank":4,"surname":"Shinn"},{"affiliation":null,"fullname":"Kim, Ye-Hwan","name":"Ye-Hwan","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-8676-7119"}],"rank":5,"surname":"Kim"},{"affiliation":null,"fullname":"Yan, Chunri","name":"Chunri","pid":[],"rank":6,"surname":"Yan"},{"affiliation":null,"fullname":"Choi, Young-Ki","name":"Young-Ki","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1894-9869"}],"rank":7,"surname":"Choi"},{"affiliation":null,"fullname":"Kim, Dongho","name":"Dongho","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1409-3311"}],"rank":8,"surname":"Kim"},{"affiliation":null,"fullname":"Ryu, Dong Hee","name":"Dong Hee","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-6088-298X"}],"rank":9,"surname":"Ryu"},{"affiliation":null,"fullname":"Ha, Yun-Sok","name":"Yun-Sok","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-3732-9814"}],"rank":10,"surname":"Ha"},{"affiliation":null,"fullname":"Kim, Tae-Hwan","name":"Tae-Hwan","pid":[],"rank":11,"surname":"Kim"},{"affiliation":null,"fullname":"Kwon, Tae Gyun","name":"Tae Gyun","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-4390-0952"}],"rank":12,"surname":"Kwon"},{"affiliation":null,"fullname":"Kim, Jung Min","name":"Jung Min","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-6319-0217"}],"rank":13,"surname":"Kim"},{"affiliation":null,"fullname":"Suh, Sang Heon","name":"Sang Heon","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-4560-8880"}],"rank":14,"surname":"Suh"},{"affiliation":null,"fullname":"Kim, Seon-Kyu","name":"Seon-Kyu","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-4176-5187"}],"rank":15,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Seon-Young","name":"Seon-Young","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1030-7730"}],"rank":16,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Sang Tae","name":"Sang Tae","pid":[],"rank":17,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Won Tae","name":"Won Tae","pid":[],"rank":18,"surname":"Kim"},{"affiliation":null,"fullname":"Lee, Ok-Jun","name":"Ok-Jun","pid":[],"rank":19,"surname":"Lee"},{"affiliation":null,"fullname":"Moon, Sung-Kwon","name":"Sung-Kwon","pid":[],"rank":20,"surname":"Moon"},{"affiliation":null,"fullname":"Kim, Nam-Hyung","name":"Nam-Hyung","pid":[],"rank":21,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Isaac Yi","name":"Isaac Yi","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1967-5281"}],"rank":22,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Jayoung","name":"Jayoung","pid":[],"rank":23,"surname":"Kim"},{"affiliation":null,"fullname":"Cha, Hee-Jae","name":"Hee-Jae","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6963-2685"}],"rank":24,"surname":"Cha"},{"affiliation":null,"fullname":"Choi, Yung-Hyun","name":"Yung-Hyun","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1454-3124"}],"rank":25,"surname":"Choi"},{"affiliation":null,"fullname":"Cha, Eun-Jong","name":"Eun-Jong","pid":[],"rank":26,"surname":"Cha"},{"affiliation":null,"fullname":"Kim, Wun-Jae","name":"Wun-Jae","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-8060-8926"}],"rank":27,"surname":"Kim"}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Original Article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Fundamental Science for Neurourology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"MicroRNAs"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Neoplasms"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Herpesviridae"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Hyperplasia"}],"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Purpose: Previously, we reported the presence of virus-encoded microRNAs (miRNAs) in the urine of prostate cancer (CaP) patients. In this study, we investigated the expression of two herpes virus-encoded miRNAs in prostate tissue. Methods: A total of 175 tissue samples from noncancerous benign prostatic hyperplasia (BPH), 248 tissue samples from patients with CaP and BPH, and 50 samples from noncancerous surrounding tissues from these same patients were analyzed for the expression of two herpes virus-encoded miRNAs by real-time polymerase chain reaction (PCR) and immunocytochemistry using nanoparticles as molecular beacons. Results: Real-time reverse transcription-PCR results revealed significantly higher expression of hsv1-miR-H18 and hsv2-miRH9- 5p in surrounding noncancerous and CaP tissues than that in BPH tissue (each comparison, P<0.001). Of note, these miRNA were expressed equivalently in the CaP tissues and surrounding noncancerous tissues. Moreover, immunocytochemistry clearly demonstrated a significant enrichment of both hsv1-miR-H18 and hsv2-miR-H9 beacon-labeled cells in CaP and surrounding noncancerous tissue compared to that in BPH tissue (each comparison, P<0.05 for hsv1-miR-H18 and hsv2- miR-H9). Conclusions: These results suggest that increased expression of hsv1-miR-H18 and hsv2-miR-H95p might be associated with tumorigenesis in the prostate. Further studies will be required to elucidate the role of these miRNAs with respect to CaP and herpes viral infections."}],"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"embargoenddate":null,"resourcetype":null,"context":[],"instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},"instancetype":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://europepmc.org/articles/PMC4932644"]}]} +{"id":"50|doiboost____::0ca46ff10b2b4c756191719d85302b14","dateofcollection":"2019-02-15","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Increased Expression of Herpes Virus-Encoded hsv1-miR-H18 and hsv2-miR-H9-5p in Cancer-Containing Prostate Tissue Compared to That in Benign Prostate Hyperplasia Tissue"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"}],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5213/inj.1632552.276"}],"author":[{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Seok Joong Yun","name":"Seok Joong","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2105974574"}],"rank":1,"surname":"Yun"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Pildu Jeong","name":"Pildu","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2041919263"}],"rank":2,"surname":"Jeong"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Ho Won Kang","name":"Ho Won","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2164408067"}],"rank":3,"surname":"Kang"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Inha University"}],"fullname":"Helen Ki Shinn","name":"Helen Ki","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2045077081"}],"rank":4,"surname":"Shinn"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Ye-Hwan Kim","name":"Ye-Hwan","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2276303457"}],"rank":5,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Chunri Yan","name":"Chunri","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2186750404"}],"rank":6,"surname":"Yan"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Young-Ki Choi","name":"Young-Ki","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2311466124"}],"rank":7,"surname":"Choi"},{"affiliation":[],"fullname":"Dongho Kim","name":"Dongho","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2644843893"}],"rank":8,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Dong Hee Ryu","name":"Dong Hee","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2117604941"}],"rank":9,"surname":"Ryu"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kyungpook National University"}],"fullname":"Yun-Sok Ha","name":"Yun-Sok","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2145233282"}],"rank":10,"surname":"Ha"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kyungpook National University"}],"fullname":"Tae-Hwan Kim","name":"Tae-Hwan","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2509096378"}],"rank":11,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kyungpook National University"}],"fullname":"Tae Gyun Kwon","name":"Tae Gyun","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"1978978081"}],"rank":12,"surname":"Kwon"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Daejeon University"}],"fullname":"Jung Min Kim","name":"Jung Min","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2265841962"}],"rank":13,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"KAIST"}],"fullname":"Sang Heon Suh","name":"Sang Heon","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2890693470"}],"rank":14,"surname":"Suh"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Korea Research Institute of Bioscience and Biotechnology"}],"fullname":"Seon-Kyu Kim","name":"Seon-Kyu","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2162364977"}],"rank":15,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Korea Research Institute of Bioscience and Biotechnology"}],"fullname":"Seon-Young Kim","name":"Seon-Young","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2344797375"}],"rank":16,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Seoul National University Bundang Hospital"}],"fullname":"Sang Tae Kim","name":"Sang Tae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2257827509"}],"rank":17,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Won Tae Kim","name":"Won Tae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2617237649"}],"rank":18,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Ok-Jun Lee","name":"Ok-Jun","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2112231548"}],"rank":19,"surname":"Lee"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chung-Ang University"}],"fullname":"Sung-Kwon Moon","name":"Sung-Kwon","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2796689429"}],"rank":20,"surname":"Moon"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Nam-Hyung Kim","name":"Nam-Hyung","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2136287741"}],"rank":21,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Rutgers University"}],"fullname":"Isaac Yi Kim","name":"Isaac Yi","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2015295992"}],"rank":22,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Harvard University"}],"fullname":"Jayoung Kim","name":"Jayoung","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2130848131"}],"rank":23,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kosin University"}],"fullname":"Hee-Jae Cha","name":"Hee-Jae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2113489867"}],"rank":24,"surname":"Cha"},{"affiliation":[],"fullname":"Yung-Hyun Choi","name":"Yung-Hyun","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2151282194"}],"rank":25,"surname":"Choi"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Eun-Jong Cha","name":"Eun-Jong","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2109572239"}],"rank":26,"surname":"Cha"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Wun-Jae Kim","name":"Wun-Jae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2113339670"}],"rank":27,"surname":"Kim"}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Purpose:"}],"dateofacceptance":null,"embargoenddate":null,"resourcetype":null,"context":null,"instance":null} +{"id":"Previously, we reported the presence of virus-encoded microRNAs (miRNAs) in the urine of prostate cancer (CaP) patients. In this study, we investigated the expression of two herpes virus-encoded miRNAs in prostate tissue.","dateofcollection":"false\u0004\u0004false\u0004false\u0004\u0005\u0005\u0005\u0004\u00032016-6-30","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":null,"inferred":null,"invisible":null,"provenanceaction":null,"trust":null},"qualifier":{"classid":"","classname":null,"schemeid":null,"schemename":null},"value":"false"},{"dataInfo":{"deletedbyinference":null,"inferenceprovenance":null,"inferred":null,"invisible":null,"provenanceaction":null,"trust":null},"qualifier":null,"value":null}],"publisher":{"dataInfo":{"deletedbyinference":null,"inferenceprovenance":null,"inferred":null,"invisible":null,"provenanceaction":null,"trust":null},"value":""},"bestaccessright":{"classid":"","classname":null,"schemeid":null,"schemename":null},"dataInfo":{"deletedbyinference":null,"inferenceprovenance":"UNKNOWN\u0005not available\u0005dnet:access_modes\u0005dnet:access_modes\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::081b82f96300b6a6e3d282bad31cb6e2\u0005Crossref\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::55045bd2a65019fd8e6741a755395c8c\u0005Unknown Repository\u00040001\u0005Article\u0005dnet:publication_resource\u0005dnet:publication_resource\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004http://einj.org/upload/pdf/inj-1632552-276.pdf","inferred":null,"invisible":null,"provenanceaction":{"classid":"UNKNOWN\u0005not available\u0005dnet:access_modes\u0005dnet:access_modes","classname":"false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::5f532a3fc4f1ea403f37070f59a7a53a\u0005Microsoft Academic Graph","schemeid":"false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005","schemename":""},"trust":"RESTRICTED\u0005Restricted\u0005dnet:access_modes\u0005dnet:access_modes\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::081b82f96300b6a6e3d282bad31cb6e2\u0005Crossref\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|doajarticles::52db9a4f8e176f6e8e1d9f0c1e0a2de0\u0005International Neurourology Journal\u00040001\u0005Article\u0005dnet:publication_resource\u0005dnet:publication_resource\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004http://dx.doi.org/10.5213/inj.1632552.276"},"collectedfrom":null,"pid":null,"author":null,"resulttype":null,"language":null,"country":null,"subject":null,"description":null,"dateofacceptance":null,"embargoenddate":null,"resourcetype":null,"context":null,"instance":null} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml index dff376c2d..429c8a648 100644 --- a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index c13bec8e6..8454c29a4 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index fe9833e3e..2dc0f2436 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 1c65e8ade..4800def0a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag; import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.ArrayList; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -100,6 +101,7 @@ public class SparkBulkTagJob { ResultTagger resultTagger = new ResultTagger(); readPath(spark, inputPath, resultClazz) + .map(patchResult(), Encoders.bean(resultClazz)) .map( (MapFunction) value -> resultTagger .enrichContextCriteria( @@ -119,4 +121,17 @@ public class SparkBulkTagJob { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } + // TODO remove this hack as soon as the values fixed by this method will be provided as NON null + private static MapFunction patchResult() { + return (MapFunction) r -> { + if (r.getDataInfo().getDeletedbyinference() == null) { + r.getDataInfo().setDeletedbyinference(false); + } + if (r.getContext() == null) { + r.setContext(new ArrayList<>()); + } + return r; + }; + } + } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 29ddde15f..844fe2962 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -131,7 +131,7 @@ public class CommunityConfiguration implements Serializable { p -> { if (p.getSnd() == null) return p.getFst(); - if (((SelectionConstraints) p.getSnd()).verifyCriteria(param)) + if (p.getSnd().verifyCriteria(param)) return p.getFst(); else return null; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 3d0db2063..f54a1ceba 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -34,7 +34,7 @@ public class VerbResolver implements Serializable { .collect( Collectors .toMap( - value -> (String) ((ClassInfo) value) + value -> (String) value .getAnnotationInfo() .get(0) .getParameterValues() diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 98b573102..f28c5aa06 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -77,9 +77,15 @@ public class PrepareDatasourceCountryAssociation { List allowedtypes, String inputPath, String outputPath) { - String whitelisted = ""; - for (String i : whitelist) { - whitelisted += " OR id = '" + i + "'"; + String whitelisted = " d.id = '" + whitelist.get(0) + "'"; + for (int i = 1; i < whitelist.size(); i++) { + whitelisted += " OR d.id = '" + whitelist.get(i) + "'"; + } + + String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'"; + + for (int i = 1; i < allowedtypes.size(); i++) { + allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'"; } Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); @@ -90,26 +96,39 @@ public class PrepareDatasourceCountryAssociation { relation.createOrReplaceTempView("relation"); organization.createOrReplaceTempView("organization"); - String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country " - + "FROM ( SELECT id " - + " FROM datasource " - + " WHERE (datainfo.deletedbyinference = false " - + whitelisted - + ") " - + getConstraintList("datasourcetype.classid = '", allowedtypes) - + ") d " - + "JOIN ( SELECT source, target " - + " FROM relation " - + " WHERE relclass = '" - + ModelConstants.IS_PROVIDED_BY - + "' " - + " AND datainfo.deletedbyinference = false ) rel " - + "ON d.id = rel.source " - + "JOIN (SELECT id, country " - + " FROM organization " - + " WHERE datainfo.deletedbyinference = false " - + " AND length(country.classid) > 0) o " - + "ON o.id = rel.target"; +// String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country " +// + "FROM ( SELECT id " +// + " FROM datasource " +// + " WHERE (datainfo.deletedbyinference = false " +// + whitelisted +// + ") " +// + getConstraintList("datasourcetype.classid = '", allowedtypes) +// + ") d " +// + "JOIN ( SELECT source, target " +// + " FROM relation " +// + " WHERE relclass = '" +// + ModelConstants.IS_PROVIDED_BY +// + "' " +// + " AND datainfo.deletedbyinference = false ) rel " +// + "ON d.id = rel.source " +// + "JOIN (SELECT id, country " +// + " FROM organization " +// + " WHERE datainfo.deletedbyinference = false " +// + " AND length(country.classid) > 0) o " +// + "ON o.id = rel.target"; + + String query = "SELECT source dataSourceId, " + + "named_struct('classid', country.classid, 'classname', country.classname) country " + + "FROM datasource d " + + "JOIN relation rel " + + "ON d.id = rel.source " + + "JOIN organization o " + + "ON o.id = rel.target " + + "WHERE rel.datainfo.deletedbyinference = false " + + "and rel.relclass = '" + ModelConstants.IS_PROVIDED_BY + "'" + + "and o.datainfo.deletedbyinference = false " + + "and length(o.country.classid) > 0 " + + "and (" + allowed + " or " + whitelisted + ")"; spark .sql(query) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 34b376413..8d0d6c48b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -4,7 +4,12 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import java.util.ArrayList; +import java.util.Set; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; @@ -13,6 +18,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.*; +import scala.Tuple2; public class PrepareResultCountrySet { private static final Logger log = LoggerFactory.getLogger(PrepareResultCountrySet.class); @@ -60,6 +66,7 @@ public class PrepareResultCountrySet { conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); getPotentialResultToUpdate( spark, inputPath, @@ -89,10 +96,33 @@ public class PrepareResultCountrySet { spark .sql(RESULT_COUNTRYSET_QUERY) .as(Encoders.bean(ResultCountrySet.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Append) - .json(outputPath); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList countryList = a.getCountrySet(); + Set countryCodes = countryList + .stream() + .map(country -> country.getClassid()) + .collect(Collectors.toSet()); + b + .getCountrySet() + .stream() + .forEach(c -> { + if (!countryCodes.contains(c.getClassid())) { + countryList.add(c); + countryCodes.add(c.getClassid()); + } + + }); + a.setCountrySet(countryList); + return a; + }) + .map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2())) + .saveAsTextFile(outputPath, GzipCodec.class); +// .write() +// .option("compression", "gzip") +// .mode(SaveMode.Append) +// .json(outputPath); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index bea847ca7..3fc127064 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; @@ -121,30 +122,39 @@ public class SparkOrcidToResultFromSemRelJob { } private static void enrichAuthor(Author a, List au) { + PacePerson pp = new PacePerson(a.getFullname(), false); for (AutoritativeAuthor aa : au) { - if (enrichAuthor(aa, a)) { + if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname())) { return; } } } - private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) { + private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author, + String author_name, + String author_surname) { boolean toaddpid = false; if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { if (StringUtils.isNotEmpty(author.getSurname())) { + author_surname = author.getSurname(); + } + if (StringUtils.isNotEmpty(author_surname)) { if (autoritative_author .getSurname() .trim() - .equalsIgnoreCase(author.getSurname().trim())) { + .equalsIgnoreCase(author_surname.trim())) { // have the same surname. Check the name if (StringUtils.isNotEmpty(autoritative_author.getName())) { if (StringUtils.isNotEmpty(author.getName())) { + author_name = author.getName(); + } + if (StringUtils.isNotEmpty(author_name)) { if (autoritative_author .getName() .trim() - .equalsIgnoreCase(author.getName().trim())) { + .equalsIgnoreCase(author_name.trim())) { toaddpid = true; } // they could be differently written (i.e. only the initials of the name @@ -154,7 +164,7 @@ public class SparkOrcidToResultFromSemRelJob { .getName() .trim() .substring(0, 0) - .equalsIgnoreCase(author.getName().trim().substring(0, 0))) { + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { toaddpid = true; } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 1f6264c18..0791fd68c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -105,11 +105,7 @@ public class SparkResultToProjectThroughSemRelJob { .stream() .forEach( (p -> { - if (potential_update - .getProjectSet() - .contains(p)) { - potential_update.getProjectSet().remove(p); - } + potential_update.getProjectSet().remove(p); })); } String resId = potential_update.getResultId(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 5574aad75..750d333e5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.*; @@ -19,6 +20,7 @@ import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; public class PrepareResultCommunitySet { @@ -93,10 +95,24 @@ public class PrepareResultCommunitySet { result_organizationset .map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class)) .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList cl = a.getCommunityList(); + b.getCommunityList().stream().forEach(s -> { + if (!cl.contains(s)) { + cl.add(s); + } + }); + a.setCommunityList(cl); + return a; + }) + .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) + .saveAsTextFile(outputPath, GzipCodec.class); +// .write() +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .json(outputPath); } private static MapFunction mapResultCommunityFn( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index 723aa8960..09340369d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -53,9 +53,7 @@ public class PrepareResultCommunitySetStep2 { conf, isSparkSessionManaged, spark -> { - if (isTest(parser)) { - removeOutputDir(spark, outputPath); - } + removeOutputDir(spark, outputPath); mergeInfo(spark, inputPath, outputPath); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 0ce741b87..ff34bd42a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -84,7 +84,7 @@ public class SparkResultToOrganizationFromIstRepoJob { conf, isSparkSessionManaged, spark -> { - removeOutputDir(spark, outputPath); + // removeOutputDir(spark, outputPath); if (saveGraph) { execPropagation( spark, @@ -136,9 +136,7 @@ public class SparkResultToOrganizationFromIstRepoJob { .stream() .forEach( rId -> { - if (organization_list.contains(rId)) { - organization_list.remove(rId); - } + organization_list.remove(rId); }); } String resultId = potential_update.getResultId(); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index 435b76605..cfcccc5f0 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -99,6 +99,7 @@ public class ResultToOrganizationJobTest { .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); Assertions.assertEquals(0, tmp.count()); + FileUtils.deleteDirectory(workingDir.toFile()); } /** @@ -171,6 +172,7 @@ public class ResultToOrganizationJobTest { + "(target = '20|opendoar____::124266ebc4ece2934eb80edfda3f2091' " + "or target = '20|dedup_wf_001::5168917a6aeeea55269daeac1af2ecd2')") .count()); + FileUtils.deleteDirectory(workingDir.toFile()); } @Test @@ -266,5 +268,6 @@ public class ResultToOrganizationJobTest { "relclass = 'isAuthorInstitutionOf' and " + "substring(source, 1,2) = '20' and substring(target, 1, 2) = '50'") .count()); + FileUtils.deleteDirectory(workingDir.toFile()); } } diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 62968c410..aee3d27c1 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index b9c4e6c80..ab1e89187 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -10,7 +10,19 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.NOT_AVAILABLE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN; import java.util.ArrayList; import java.util.Arrays; @@ -18,6 +30,7 @@ import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -25,6 +38,8 @@ import org.dom4j.DocumentFactory; import org.dom4j.DocumentHelper; import org.dom4j.Node; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; +import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.DataInfo; @@ -46,10 +61,14 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public abstract class AbstractMdRecordToOafMapper { - protected final Map code2name; + protected final VocabularyGroup vocs; protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; + protected static final Qualifier ORCID_PID_TYPE = qualifier( + "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier MAG_PID_TYPE = qualifier( + "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Map nsContext = new HashMap<>(); @@ -66,8 +85,8 @@ public abstract class AbstractMdRecordToOafMapper { protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); - protected AbstractMdRecordToOafMapper(final Map code2name) { - this.code2name = code2name; + protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs) { + this.vocs = vocs; } public List processMdRecord(final String xml) { @@ -75,8 +94,7 @@ public abstract class AbstractMdRecordToOafMapper { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); final Document doc = DocumentHelper - .parseText( - xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); + .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); final String type = doc.valueOf("//dr:CobjCategory/@type"); final KeyValue collectedFrom = getProvenanceDatasource( @@ -103,7 +121,7 @@ public abstract class AbstractMdRecordToOafMapper { } } - private KeyValue getProvenanceDatasource(Document doc, String xpathId, String xpathName) { + private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); @@ -111,9 +129,7 @@ public abstract class AbstractMdRecordToOafMapper { return null; } - return keyValue( - createOpenaireId(10, dsId, true), - dsName); + return keyValue(createOpenaireId(10, dsId, true), dsName); } protected List createOafs( @@ -211,8 +227,14 @@ public abstract class AbstractMdRecordToOafMapper { return res; } - protected Relation getRelation(String source, String target, String relType, String subRelType, String relClass, - KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) { + protected Relation getRelation(final String source, + final String target, + final String relType, + final String subRelType, + final String relClass, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp) { final Relation rel = new Relation(); rel.setRelType(relType); rel.setSubRelType(subRelType); @@ -243,10 +265,7 @@ public abstract class AbstractMdRecordToOafMapper { r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier"))); r.setCollectedfrom(Arrays.asList(collectedFrom)); - r - .setPid( - prepareListStructProps( - doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + r.setPid(prepareResultPids(doc, info)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES @@ -269,9 +288,13 @@ public abstract class AbstractMdRecordToOafMapper { r.setCoverage(prepareCoverages(doc, info)); r.setContext(prepareContexts(doc, info)); r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES - r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy)); + final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); + r.setInstance(instances); + r.setBestaccessright(getBestAccessRights(instances)); } + protected abstract List prepareResultPids(Document doc, DataInfo info); + private List prepareContexts(final Document doc, final DataInfo info) { final List list = new ArrayList<>(); for (final Object o : doc.selectNodes("//oaf:concept")) { @@ -289,7 +312,10 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); protected abstract List prepareInstances( - Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby); + Document doc, + DataInfo info, + KeyValue collectedfrom, + KeyValue hostedby); protected abstract List> prepareSources(Document doc, DataInfo info); @@ -314,13 +340,16 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareAuthors(Document doc, DataInfo info); protected abstract List> prepareOtherResearchProductTools( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract List> prepareOtherResearchProductContactGroups( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract List> prepareOtherResearchProductContactPersons( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info); @@ -329,7 +358,8 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareSoftwareLicenses(Document doc, DataInfo info); protected abstract List> prepareSoftwareDocumentationUrls( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract List prepareDatasetGeoLocations(Document doc, DataInfo info); @@ -345,6 +375,34 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Field prepareDatasetStorageDate(Document doc, DataInfo info); + protected static Qualifier getBestAccessRights(final List instanceList) { + if (instanceList != null) { + final Optional min = instanceList + .stream() + .map(i -> i.getAccessright()) + .min(new LicenseComparator()); + + final Qualifier rights = min.isPresent() ? min.get() : new Qualifier(); + + if (StringUtils.isBlank(rights.getClassid())) { + rights.setClassid(UNKNOWN); + } + if (StringUtils.isBlank(rights.getClassname()) + || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { + rights.setClassname(NOT_AVAILABLE); + } + if (StringUtils.isBlank(rights.getSchemeid())) { + rights.setSchemeid(DNET_ACCESS_MODES); + } + if (StringUtils.isBlank(rights.getSchemename())) { + rights.setSchemename(DNET_ACCESS_MODES); + } + + return rights; + } + return null; + } + private Journal prepareJournal(final Document doc, final DataInfo info) { final Node n = doc.selectSingleNode("//oaf:journal"); if (n != null) { @@ -358,29 +416,18 @@ public abstract class AbstractMdRecordToOafMapper { final String vol = n.valueOf("@vol"); final String edition = n.valueOf("@edition"); if (StringUtils.isNotBlank(name)) { - return journal( - name, - issnPrinted, - issnOnline, - issnLinking, - ep, - iss, - sp, - vol, - edition, - null, - null, - info); + return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } } return null; } - protected Qualifier prepareQualifier( - final Node node, final String xpath, final String schemeId, final String schemeName) { - final String classId = node.valueOf(xpath); - final String className = code2name.get(classId); - return qualifier(classId, className, schemeId, schemeName); + protected Qualifier prepareQualifier(final Node node, final String xpath, final String schemeId) { + return prepareQualifier(node.valueOf(xpath).trim(), schemeId); + } + + protected Qualifier prepareQualifier(final String classId, final String schemeId) { + return vocs.getTermAsQualifier(schemeId, classId); } protected List prepareListStructProps( @@ -388,20 +435,40 @@ public abstract class AbstractMdRecordToOafMapper { final String xpath, final String xpathClassId, final String schemeId, - final String schemeName, final DataInfo info) { final List res = new ArrayList<>(); + for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; - final String classId = n.valueOf(xpathClassId); - final String className = code2name.get(classId); - res.add(structuredProperty(n.getText(), classId, className, schemeId, schemeName, info)); + final String classId = n.valueOf(xpathClassId).trim(); + res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info)); + } + return res; + } + + protected List prepareListStructPropsWithValidQualifier( + final Node node, + final String xpath, + final String xpathClassId, + final String schemeId, + final DataInfo info) { + final List res = new ArrayList<>(); + + for (final Object o : node.selectNodes(xpath)) { + final Node n = (Node) o; + final String classId = n.valueOf(xpathClassId).trim(); + if (vocs.termExists(schemeId, classId)) { + res.add(structuredProperty(n.getText(), vocs.getTermAsQualifier(schemeId, classId), info)); + } } return res; } protected List prepareListStructProps( - final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) { + final Node node, + final String xpath, + final Qualifier qualifier, + final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; @@ -411,19 +478,17 @@ public abstract class AbstractMdRecordToOafMapper { } protected List prepareListStructProps( - final Node node, final String xpath, final DataInfo info) { + final Node node, + final String xpath, + final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res .add( structuredProperty( - n.getText(), - n.valueOf("@classid"), - n.valueOf("@classname"), - n.valueOf("@schemeid"), - n.valueOf("@schemename"), - info)); + n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), + n.valueOf("@schemename"), info)); } return res; } @@ -449,8 +514,7 @@ public abstract class AbstractMdRecordToOafMapper { final Node n = doc.selectSingleNode("//oaf:datainfo"); if (n == null) { - return dataInfo( - false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); + return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); @@ -464,12 +528,8 @@ public abstract class AbstractMdRecordToOafMapper { final String trust = n.valueOf("./oaf:trust"); return dataInfo( - deletedbyinference, - inferenceprovenance, - inferred, - false, - qualifier(paClassId, paClassName, paSchemeId, paSchemeName), - trust); + deletedbyinference, inferenceprovenance, inferred, false, + qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { @@ -477,7 +537,9 @@ public abstract class AbstractMdRecordToOafMapper { } protected List> prepareListFields( - final Node node, final String xpath, final DataInfo info) { + final Node node, + final String xpath, + final DataInfo info) { return listFields(info, prepareListString(node, xpath)); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 739c7a462..3becdec44 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -4,8 +4,10 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; -import java.sql.SQLException; -import java.util.*; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -24,10 +26,22 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import scala.Tuple2; public class GenerateEntitiesApplication { @@ -40,13 +54,12 @@ public class GenerateEntitiesApplication { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - MigrateMongoMdstoresApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json"))); + GenerateEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json"))); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); @@ -55,29 +68,28 @@ public class GenerateEntitiesApplication { final String sourcePaths = parser.get("sourcePaths"); final String targetPath = parser.get("targetPath"); - final String dbUrl = parser.get("postgresUrl"); - final String dbUser = parser.get("postgresUser"); - final String dbPassword = parser.get("postgresPassword"); + // final String dbUrl = parser.get("postgresUrl"); + // final String dbUser = parser.get("postgresUser"); + // final String dbPassword = parser.get("postgresPassword"); - final Map code2name = loadClassNames(dbUrl, dbUser, dbPassword); + final String isLookupUrl = parser.get("isLookupUrl"); - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, targetPath); - generateEntities(spark, code2name, sourcePaths, targetPath); - }); + final VocabularyGroup vocs = loadVocsFromIS(isLookupUrl); // MAP: vocId -> voc + + final SparkConf conf = new SparkConf(); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, targetPath); + generateEntities(spark, vocs, sourcePaths, targetPath); + }); } private static void generateEntities( final SparkSession spark, - final Map code2name, + final VocabularyGroup vocs, final String sourcePaths, final String targetPath) { - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final List existingSourcePaths = Arrays .stream(sourcePaths.split(",")) .filter(p -> exists(sc, p)) @@ -94,7 +106,7 @@ public class GenerateEntitiesApplication { sc .sequenceFile(sp, Text.class, Text.class) .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) - .map(k -> convertToListOaf(k._1(), k._2(), code2name)) + .map(k -> convertToListOaf(k._1(), k._2(), vocs)) .filter(Objects::nonNull) .flatMap(list -> list.iterator())); } @@ -110,7 +122,7 @@ public class GenerateEntitiesApplication { .saveAsTextFile(targetPath, GzipCodec.class); } - private static Oaf merge(Oaf o1, Oaf o2) { + private static Oaf merge(final Oaf o1, final Oaf o2) { if (ModelSupport.isSubClass(o1, OafEntity.class)) { ((OafEntity) o1).mergeFrom((OafEntity) o2); } else if (ModelSupport.isSubClass(o1, Relation.class)) { @@ -122,14 +134,16 @@ public class GenerateEntitiesApplication { } private static List convertToListOaf( - final String id, final String s, final Map code2name) { + final String id, + final String s, + final VocabularyGroup vocs) { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { case "native_oaf": - return new OafToOafMapper(code2name).processMdRecord(s); + return new OafToOafMapper(vocs).processMdRecord(s); case "native_odf": - return new OdfToOafMapper(code2name).processMdRecord(s); + return new OdfToOafMapper(vocs).processMdRecord(s); case "datasource": return Arrays.asList(convertFromJson(s, Datasource.class)); case "organization": @@ -151,29 +165,33 @@ public class GenerateEntitiesApplication { } } - private static Map loadClassNames( - final String dbUrl, final String dbUser, final String dbPassword) throws IOException { + private static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { + final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); - log.info("Loading vocabulary terms from db..."); + final String xquery = IOUtils + .toString( + GenerateEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); - final Map map = new HashMap<>(); + final VocabularyGroup vocs = new VocabularyGroup(); - try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) { - dbClient - .processResults( - "select code, name from class", - rs -> { - try { - map.put(rs.getString("code"), rs.getString("name")); - } catch (final SQLException e) { - e.printStackTrace(); - } - }); + for (final String s : isLookUpService.quickSearchProfile(xquery)) { + final String[] arr = s.split("@=@"); + if (arr.length == 4) { + final String vocId = arr[0].trim(); + final String vocName = arr[1].trim(); + final String termId = arr[2].trim(); + final String termName = arr[3].trim(); + + if (!vocs.vocabularyExists(vocId)) { + vocs.addVocabulary(vocId, vocName); + } + + vocs.addTerm(vocId, termId, termName); + } } - log.info("Found " + map.size() + " terms."); - - return map; + return vocs; } private static Oaf convertFromJson(final String s, final Class clazz) { @@ -196,7 +214,7 @@ public class GenerateEntitiesApplication { } } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 54594cb80..6f91ce733 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -1,10 +1,19 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; -import java.util.*; +import java.util.ArrayList; +import java.util.List; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -14,14 +23,22 @@ import org.dom4j.Node; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.common.PacePerson; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.GeoLocation; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OafToOafMapper extends AbstractMdRecordToOafMapper { - public OafToOafMapper(final Map code2name) { - super(code2name); + public OafToOafMapper(final VocabularyGroup vocs) { + super(vocs); } @Override @@ -39,14 +56,25 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { author.setSurname(p.getNormalisedSurname()); } - final String pid = e.attributeValue("nameIdentifier"); - final String pidType = e.attributeValue("nameIdentifierScheme"); + final String pid = e.valueOf("./@nameIdentifier"); + final String type = e + .valueOf("./@nameIdentifierScheme") + .trim() + .toUpperCase() + .replaceAll(" ", "") + .replaceAll("_", ""); - if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) { - author.setPid(new ArrayList<>()); - author - .getPid() - .add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info)); + author.setPid(new ArrayList<>()); + + if (StringUtils.isNotBlank(pid)) { + if (type.startsWith("ORCID")) { + final String cleanedId = pid + .replaceAll("http://orcid.org/", "") + .replaceAll("https://orcid.org/", ""); + author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); + } else if (type.startsWith("MAGID")) { + author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info)); + } } res.add(author); @@ -56,7 +84,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareLanguages(final Document doc) { - return prepareQualifier(doc, "//dc:language", DNET_LANGUAGES, DNET_LANGUAGES); + return prepareQualifier(doc, "//dc:language", DNET_LANGUAGES); } @Override @@ -103,29 +131,21 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype( - prepareQualifier( - doc, - "//dr:CobjCategory", - DNET_PUBLICATION_RESOURCE, - DNET_PUBLICATION_RESOURCE)); + .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance - .setAccessright( - prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); + .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance - .setProcessingchargeamount( - field(doc.valueOf("//oaf:processingchargeamount"), info)); + .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance - .setProcessingchargecurrency( - field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); - List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); + final List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); instance .setUrl( nodes @@ -158,19 +178,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareSoftwareCodeRepositoryUrl( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected List prepareSoftwareLicenses( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareSoftwareDocumentationUrls( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @@ -182,13 +205,15 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareDatasetMetadataVersionNumber( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected Field prepareDatasetLastMetadataUpdate( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // NOT PRESENT IN OAF } @@ -216,19 +241,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareOtherResearchProductTools( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareOtherResearchProductContactGroups( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareOtherResearchProductContactPersons( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @@ -269,4 +297,10 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } + + @Override + protected List prepareResultPids(final Document doc, final DataInfo info) { + return prepareListStructPropsWithValidQualifier( + doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 30b980c42..bbd9442e1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -4,16 +4,32 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENTED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENT_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PART; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SUPPLEMENT; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; -import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; -import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.common.PacePerson; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; @@ -22,15 +38,14 @@ import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/"; - public OdfToOafMapper(final Map code2name) { - super(code2name); + public OdfToOafMapper(final VocabularyGroup vocs) { + super(vocs); } @Override @@ -48,7 +63,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final String fullname = n.valueOf("./datacite:creatorName"); author.setFullname(fullname); - PacePerson pp = new PacePerson(fullname, false); + final PacePerson pp = new PacePerson(fullname, false); final String name = n.valueOf("./datacite:givenName"); if (StringUtils.isBlank(name) & pp.isAccurate()) { author.setName(pp.getNormalisedFirstName()); @@ -63,6 +78,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { author.setSurname(surname); } + if (StringUtils.isBlank(author.getFullname())) { + author.setFullname(String.format("%s, %s", author.getSurname(), author.getName())); + } + author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info)); author.setPid(preparePids(n, info)); author.setRank(pos++); @@ -74,13 +93,21 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { private List preparePids(final Node n, final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { - res - .add( - structuredProperty( - ((Node) o).getText(), - prepareQualifier( - (Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES), - info)); + + final String id = ((Node) o).getText(); + final String type = ((Node) o) + .valueOf("./@nameIdentifierScheme") + .trim() + .toUpperCase() + .replaceAll(" ", "") + .replaceAll("_", ""); + + if (type.startsWith("ORCID")) { + final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); + res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); + } else if (type.startsWith("MAGID")) { + res.add(structuredProperty(id, MAG_PID_TYPE, info)); + } } return res; } @@ -94,22 +121,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype( - prepareQualifier( - doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance - .setAccessright( - prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); + .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance - .setProcessingchargecurrency( - field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); final Set url = new HashSet<>(); for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { @@ -149,11 +172,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { res .add( structuredProperty( - ((Node) o).getText(), - "UNKNOWN", - "UNKNOWN", - DNET_DATA_CITE_DATE, - DNET_DATA_CITE_DATE, + ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info)); } } @@ -192,58 +211,57 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareLanguages(final Document doc) { - return prepareQualifier(doc, "//datacite:language", DNET_LANGUAGES, DNET_LANGUAGES); + return prepareQualifier(doc, "//datacite:language", DNET_LANGUAGES); } @Override protected List> prepareOtherResearchProductTools( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // Not present in ODF ??? } @Override protected List> prepareOtherResearchProductContactGroups( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return prepareListFields( - doc, - "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", - info); + doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); } @Override protected List> prepareOtherResearchProductContactPersons( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return prepareListFields( - doc, - "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", - info); + doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); } @Override protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { - return prepareQualifier( - doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages"); + return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages"); } @Override protected Field prepareSoftwareCodeRepositoryUrl( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // Not present in ODF ??? } @Override protected List prepareSoftwareLicenses( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // Not present in ODF ??? } @Override protected List> prepareSoftwareDocumentationUrls( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return prepareListFields( - doc, - "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", - info); + doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); } // DATASETS @@ -264,13 +282,15 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareDatasetMetadataVersionNumber( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // Not present in ODF ??? } @Override protected Field prepareDatasetLastMetadataUpdate( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return prepareField(doc, "//datacite:date[@dateType='Updated']", info); } @@ -346,9 +366,26 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { return prepareQualifier( - doc, - "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", - DNET_DATA_CITE_RESOURCE, - DNET_DATA_CITE_RESOURCE); + doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE); } + + @Override + protected List prepareResultPids(final Document doc, final DataInfo info) { + final List res = new ArrayList<>(); + res + .addAll( + prepareListStructPropsWithValidQualifier( + doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info)); + res + .addAll( + prepareListStructPropsWithValidQualifier( + doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", DNET_PID_TYPES, info)); + res + .addAll( + prepareListStructPropsWithValidQualifier( + doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']", + "@alternateIdentifierType", DNET_PID_TYPES, info)); + return res; + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java new file mode 100644 index 000000000..7714f6d90 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.oa.graph.raw.common; + +import java.util.HashMap; +import java.util.Map; + +public class Vocabulary { + + private final String id; + private final String name; + + private final Map terms = new HashMap<>(); + + public Vocabulary(final String id, final String name) { + this.id = id; + this.name = name; + } + + public String getId() { + return id; + } + + public String getName() { + return name; + } + + protected Map getTerms() { + return terms; + } + + public VocabularyTerm getTerm(final String id) { + return terms.get(id.toLowerCase()); + } + + protected void addTerm(final String id, final String name) { + terms.put(id.toLowerCase(), new VocabularyTerm(id, name)); + } + + protected boolean termExists(final String id) { + return terms.containsKey(id.toLowerCase()); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java new file mode 100644 index 000000000..127f73e22 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java @@ -0,0 +1,49 @@ + +package eu.dnetlib.dhp.oa.graph.raw.common; + +import java.util.HashMap; +import java.util.Map; + +import eu.dnetlib.dhp.schema.oaf.Qualifier; + +public class VocabularyGroup { + + private final Map vocs = new HashMap<>(); + + public void addVocabulary(final String id, final String name) { + vocs.put(id.toLowerCase(), new Vocabulary(id, name)); + } + + public void addTerm(final String vocId, final String id, final String name) { + if (vocabularyExists(vocId)) { + vocs.get(vocId.toLowerCase()).addTerm(id, name); + } + } + + public VocabularyTerm getTerm(final String vocId, final String id) { + if (termExists(vocId, id)) { + return vocs.get(vocId.toLowerCase()).getTerm(id); + } else { + return new VocabularyTerm(id, id); + } + } + + public Qualifier getTermAsQualifier(final String vocId, final String id) { + if (termExists(vocId, id)) { + final Vocabulary v = vocs.get(vocId.toLowerCase()); + final VocabularyTerm t = v.getTerm(id); + return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName()); + } else { + return OafMapperUtils.qualifier(id, id, vocId, vocId); + } + } + + public boolean termExists(final String vocId, final String id) { + return vocabularyExists(vocId) && vocs.get(vocId.toLowerCase()).termExists(id); + } + + public boolean vocabularyExists(final String vocId) { + return vocs.containsKey(vocId.toLowerCase()); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java new file mode 100644 index 000000000..b3c785923 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java @@ -0,0 +1,22 @@ + +package eu.dnetlib.dhp.oa.graph.raw.common; + +public class VocabularyTerm { + + private final String id; + private final String name; + + public VocabularyTerm(final String id, final String name) { + this.id = id; + this.name = name; + } + + public String getId() { + return id; + } + + public String getName() { + return name; + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json index 293bf041b..9e3992bcf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json @@ -18,22 +18,9 @@ "paramRequired": true }, { - "paramName": "pgurl", - "paramLongName": "postgresUrl", - "paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb", + "paramName": "islookup", + "paramLongName": "islookup", + "paramDescription": "the url of the ISLookupService", "paramRequired": true - }, - { - "paramName": "pguser", - "paramLongName": "postgresUser", - "paramDescription": "postgres user", - "paramRequired": false - }, - { - "paramName": "pgpasswd", - "paramLongName": "postgresPassword", - "paramDescription": "postgres password", - "paramRequired": false } - ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index fa015499c..f8426c35f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -34,6 +34,10 @@ mongoDb mongo database + + isLookupUrl + the address of the lookUp service + sparkDriverMemory @@ -233,9 +237,7 @@ --sourcePaths${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims --targetPath${workingDir}/entities_claim - --postgresUrl${postgresURL} - --postgresUser${postgresUser} - --postgresPassword${postgresPassword} + --islookup${isLookupUrl} @@ -282,9 +284,7 @@ --sourcePaths${contentPath}/db_records,${contentPath}/oaf_records,${contentPath}/odf_records --targetPath${workingDir}/entities - --postgresUrl${postgresURL} - --postgresUser${postgresUser} - --postgresPassword${postgresPassword} + --islookup${isLookupUrl} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml index cd0a4025e..f6485ea9c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml @@ -9,17 +9,10 @@ the temporary path to store entities before dispatching - postgresURL - the postgres URL to access to the database - - - postgresUser - the user postgres - - - postgresPassword - the password postgres + isLookupUrl + the address of the lookUp service + sparkDriverMemory memory for driver process @@ -62,9 +55,7 @@ -mt yarn-cluster -s${migrationPathStep1}/db_records,${migrationPathStep1}/oaf_records,${migrationPathStep1}/odf_records -t${migrationPathStep2}/all_entities - -pgurl${postgresURL} - -pguser${postgresUser} - -pgpasswd${postgresPassword} + --islookup${isLookupUrl} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql index aeb04aff9..d13bd4342 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql @@ -22,12 +22,13 @@ SELECT '' AS inferenceprovenance, d.id AS collectedfromid, d.officialname AS collectedfromname, - o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country, + o.country || '@@@' || COALESCE(cntr.name,o.country) || '@@@dnet:countries@@@dnet:countries' AS country, 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, - ARRAY[]::text[] AS pid + FROM dsm_organizations o LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom) + LEFT OUTER JOIN class cntr ON (cntr.code = o.country) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery new file mode 100644 index 000000000..4938c0aa4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery @@ -0,0 +1,5 @@ +for $x in collection(' /db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') + let $vocid := $x//VOCABULARY_NAME/@code + let $vocname := $x//VOCABULARY_NAME/text() + for $term in ($x//TERM) + return concat($vocid,' @=@ ',$vocname,' @=@ ',$term/@code,' @=@ ',$term/@english_name) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 5a006e351..dad427ce4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -9,7 +9,6 @@ import static org.mockito.Mockito.when; import java.io.IOException; import java.util.List; -import java.util.Map; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -20,25 +19,43 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; @ExtendWith(MockitoExtension.class) public class MappersTest { @Mock - private Map code2name; + private VocabularyGroup vocs; @BeforeEach public void setUp() throws Exception { - when(code2name.get(anyString())).thenAnswer(invocation -> invocation.getArgument(0)); + when(vocs.getTermAsQualifier(anyString(), anyString())) + .thenAnswer( + invocation -> OafMapperUtils + .qualifier( + invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), + invocation.getArgument(0))); + + when(vocs.termExists(anyString(), anyString())).thenReturn(true); + } @Test void testPublication() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); - final List list = new OafToOafMapper(code2name).processMdRecord(xml); + final List list = new OafToOafMapper(vocs).processMdRecord(xml); assertEquals(3, list.size()); assertTrue(list.get(0) instanceof Publication); @@ -54,13 +71,13 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(p.getAuthor().size() > 0); - Optional author = p + final Optional author = p .getAuthor() .stream() .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - StructuredProperty pid = author + final StructuredProperty pid = author .get() .getPid() .stream() @@ -68,7 +85,7 @@ public class MappersTest { .get(); assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals("ORCID", pid.getQualifier().getClassid()); - assertEquals("ORCID", pid.getQualifier().getClassname()); + assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals("Votsi,Nefta", author.get().getFullname()); @@ -78,8 +95,23 @@ public class MappersTest { assertTrue(p.getSubject().size() > 0); assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline())); assertTrue(StringUtils.isNotBlank(p.getJournal().getName())); - assertTrue(p.getInstance().size() > 0); + assertTrue(p.getPid().size() > 0); + assertEquals(p.getPid().get(0).getValue(), "10.3897/oneeco.2.e13718"); + assertEquals(p.getPid().get(0).getQualifier().getClassid(), "doi"); + + assertNotNull(p.getInstance()); + assertTrue(p.getInstance().size() > 0); + p + .getInstance() + .stream() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); + + assertNotNull(p.getBestaccessright()); + assertEquals("OPEN", p.getBestaccessright().getClassid()); assertValidId(r1.getSource()); assertValidId(r1.getTarget()); assertValidId(r2.getSource()); @@ -97,6 +129,7 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r1.getRelType())); assertTrue(StringUtils.isNotBlank(r2.getRelType())); + // System.out.println(new ObjectMapper().writeValueAsString(p)); // System.out.println(new ObjectMapper().writeValueAsString(r1)); // System.out.println(new ObjectMapper().writeValueAsString(r2)); } @@ -105,7 +138,7 @@ public class MappersTest { void testDataset() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml")); - final List list = new OdfToOafMapper(code2name).processMdRecord(xml); + final List list = new OdfToOafMapper(vocs).processMdRecord(xml); assertEquals(3, list.size()); assertTrue(list.get(0) instanceof Dataset); @@ -121,13 +154,13 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); - Optional author = d + final Optional author = d .getAuthor() .stream() .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - StructuredProperty pid = author + final StructuredProperty pid = author .get() .getPid() .stream() @@ -135,7 +168,7 @@ public class MappersTest { .get(); assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals("ORCID", pid.getQualifier().getClassid()); - assertEquals("ORCID", pid.getQualifier().getClassname()); + assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals("Baracchini, Theo", author.get().getFullname()); @@ -143,13 +176,13 @@ public class MappersTest { assertEquals("Theo", author.get().getName()); assertEquals(1, author.get().getAffiliation().size()); - Optional> opAff = author + final Optional> opAff = author .get() .getAffiliation() .stream() .findFirst(); assertTrue(opAff.isPresent()); - Field affiliation = opAff.get(); + final Field affiliation = opAff.get(); assertEquals("ISTI-CNR", affiliation.getValue()); assertTrue(d.getSubject().size() > 0); @@ -157,6 +190,16 @@ public class MappersTest { assertTrue(d.getContext().size() > 0); assertTrue(d.getContext().get(0).getId().length() > 0); + assertNotNull(d.getInstance()); + assertTrue(d.getInstance().size() > 0); + d + .getInstance() + .stream() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); + assertValidId(r1.getSource()); assertValidId(r1.getTarget()); assertValidId(r2.getSource()); @@ -177,7 +220,7 @@ public class MappersTest { void testSoftware() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); - final List list = new OdfToOafMapper(code2name).processMdRecord(xml); + final List list = new OdfToOafMapper(vocs).processMdRecord(xml); assertEquals(1, list.size()); assertTrue(list.get(0) instanceof Software); diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml index e0ee03660..e0ce739cf 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index 39699b3b6..62bf7186c 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index dbdc54fc0..72d68a389 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -3,7 +3,9 @@ package eu.dnetlib.dhp.oa.provision; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.HashSet; import java.util.Optional; +import java.util.Set; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -19,8 +21,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Splitter; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; +import com.google.common.collect.Sets; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; @@ -58,6 +62,8 @@ public class PrepareRelationsJob { public static final int MAX_RELS = 100; + public static final int DEFAULT_NUM_PARTITIONS = 3000; + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( @@ -79,6 +85,24 @@ public class PrepareRelationsJob { String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + int relPartitions = Optional + .ofNullable(parser.get("relPartitions")) + .map(Integer::valueOf) + .orElse(DEFAULT_NUM_PARTITIONS); + log.info("relPartitions: {}", relPartitions); + + Set relationFilter = Optional + .ofNullable(parser.get("relationFilter")) + .map(s -> Sets.newHashSet(Splitter.on(",").split(s))) + .orElse(new HashSet<>()); + log.info("relationFilter: {}", relationFilter); + + int maxRelations = Optional + .ofNullable(parser.get("maxRelations")) + .map(Integer::valueOf) + .orElse(MAX_RELS); + log.info("maxRelations: {}", maxRelations); + SparkConf conf = new SparkConf(); runWithSparkSession( @@ -86,25 +110,74 @@ public class PrepareRelationsJob { isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - prepareRelationsFromPaths(spark, inputRelationsPath, outputPath); + prepareRelationsRDD( + spark, inputRelationsPath, outputPath, relationFilter, relPartitions, maxRelations); }); } - private static void prepareRelationsFromPaths( - SparkSession spark, String inputRelationsPath, String outputPath) { + /** + * Dataset based implementation that prepares the graph relations by limiting the number of outgoing links and + * filtering the relation types according to the given criteria. + * + * @param spark the spark session + * @param inputRelationsPath source path for the graph relations + * @param outputPath output path for the processed relations + * @param relationFilter set of relation filters applied to the `relClass` field + * @param maxRelations maximum number of allowed outgoing edges + */ + private static void prepareRelations( + SparkSession spark, String inputRelationsPath, String outputPath, Set relationFilter, + int maxRelations) { readPathRelation(spark, inputRelationsPath) .filter("dataInfo.deletedbyinference == false") + .filter((FilterFunction) rel -> !relationFilter.contains(rel.getRelClass())) .groupByKey( (MapFunction) value -> value.getSource(), Encoders.STRING()) .flatMapGroups( (FlatMapGroupsFunction) (key, values) -> Iterators - .limit(values, MAX_RELS), + .limit(values, maxRelations), Encoders.bean(SortableRelation.class)) .write() .mode(SaveMode.Overwrite) .parquet(outputPath); } + /** + * RDD based implementation that prepares the graph relations by limiting the number of outgoing links and filtering + * the relation types according to the given criteria. Moreover, outgoing links kept within the given limit are + * prioritized according to the weights indicated in eu.dnetlib.dhp.oa.provision.model.SortableRelation. + * + * @param spark the spark session + * @param inputRelationsPath source path for the graph relations + * @param outputPath output path for the processed relations + * @param relationFilter set of relation filters applied to the `relClass` field + * @param maxRelations maximum number of allowed outgoing edges + */ + // TODO work in progress + private static void prepareRelationsRDD( + SparkSession spark, String inputRelationsPath, String outputPath, Set relationFilter, int relPartitions, + int maxRelations) { + JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath).repartition(relPartitions); + RelationPartitioner partitioner = new RelationPartitioner(rels.getNumPartitions()); + + // only consider those that are not virtually deleted + RDD d = rels + .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter(rel -> !relationFilter.contains(rel.getRelClass())) + .mapToPair( + (PairFunction) rel -> new Tuple2<>(rel, rel)) + .groupByKey(partitioner) + .map(group -> Iterables.limit(group._2(), maxRelations)) + .flatMap(group -> group.iterator()) + .rdd(); + + spark + .createDataset(d, Encoders.bean(SortableRelation.class)) + .write() + .mode(SaveMode.Overwrite) + .parquet(outputPath); + } + /** * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text * file, @@ -123,31 +196,6 @@ public class PrepareRelationsJob { Encoders.bean(SortableRelation.class)); } - // TODO work in progress - private static void prepareRelationsRDDFromPaths( - SparkSession spark, String inputRelationsPath, String outputPath, int numPartitions) { - JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath).repartition(numPartitions); - - RDD d = rels - .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) // only - // consider - // those - // that are not virtually - // deleted - .mapToPair( - (PairFunction) rel -> new Tuple2<>(rel, rel)) - .groupByKey(new RelationPartitioner(rels.getNumPartitions())) - .map(p -> Iterables.limit(p._2(), MAX_RELS)) - .flatMap(p -> p.iterator()) - .rdd(); - - spark - .createDataset(d, Encoders.bean(SortableRelation.class)) - .write() - .mode(SaveMode.Overwrite) - .parquet(outputPath); - } - private static JavaRDD readPathRelationRDD( SparkSession spark, final String inputPath) { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java index 7c866001b..b6571b9bf 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java @@ -16,10 +16,10 @@ public class SortableRelation extends Relation implements Comparable, static { weights.put("outcome", 0); weights.put("supplement", 1); - weights.put("publicationDataset", 2); + weights.put("affiliation", 2); weights.put("relationship", 3); - weights.put("similarity", 4); - weights.put("affiliation", 5); + weights.put("publicationDataset", 4); + weights.put("similarity", 5); weights.put("provision", 6); weights.put("participation", 7); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 6cb025b4f..21b526ab1 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix; import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.stream.Collectors; @@ -95,7 +96,7 @@ public class TemplateFactory { .add("metadata", instancemetadata) .add( "webresources", - webresources + (webresources != null ? webresources : new ArrayList()) .stream() .filter(StringUtils::isNotBlank) .map(w -> getWebResource(w)) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index ce1c71312..f99298130 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -174,6 +174,7 @@ public class XmlRecordFactory implements Serializable { entity .getCollectedfrom() .stream() + .filter(XmlRecordFactory::kvNotBlank) .map(kv -> XmlSerializationUtils.mapKeyValue("collectedfrom", kv)) .collect(Collectors.toList())); } @@ -183,6 +184,7 @@ public class XmlRecordFactory implements Serializable { entity .getOriginalId() .stream() + .filter(Objects::nonNull) .map(s -> XmlSerializationUtils.asXmlElement("originalId", s)) .collect(Collectors.toList())); } @@ -192,6 +194,7 @@ public class XmlRecordFactory implements Serializable { entity .getPid() .stream() + .filter(Objects::nonNull) .map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p)) .collect(Collectors.toList())); } @@ -213,6 +216,7 @@ public class XmlRecordFactory implements Serializable { r .getTitle() .stream() + .filter(Objects::nonNull) .map(t -> XmlSerializationUtils.mapStructuredProperty("title", t)) .collect(Collectors.toList())); } @@ -225,6 +229,7 @@ public class XmlRecordFactory implements Serializable { r .getAuthor() .stream() + .filter(Objects::nonNull) .map( a -> { final StringBuilder sb = new StringBuilder(" isNotBlank(sp.getQualifier().getClassid()) && isNotBlank(sp.getValue())) + .collect( + Collectors + .toMap( + p -> getAuthorPidType(p.getQualifier().getClassid()), + p -> p, + (p1, p2) -> p1)) + .values() .forEach( sp -> { - String pidType = XmlSerializationUtils - .escapeXml( - sp.getQualifier().getClassid()) - .replaceAll("\\W", ""); + String pidType = getAuthorPidType(sp.getQualifier().getClassid()); String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); - // ugly hack: some records - // provide swapped pidtype and - // pidvalue + // ugly hack: some records provide swapped pidtype and pidvalue if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); } else { - pidType = pidType.replaceAll("\\W", "").replaceAll("\\d", ""); if (isNotBlank(pidType)) { sb .append( @@ -285,6 +292,7 @@ public class XmlRecordFactory implements Serializable { r .getContributor() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("contributor", c.getValue())) .collect(Collectors.toList())); } @@ -294,6 +302,7 @@ public class XmlRecordFactory implements Serializable { r .getCountry() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.mapQualifier("country", c)) .collect(Collectors.toList())); } @@ -303,6 +312,7 @@ public class XmlRecordFactory implements Serializable { r .getCoverage() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("coverage", c.getValue())) .collect(Collectors.toList())); } @@ -319,6 +329,7 @@ public class XmlRecordFactory implements Serializable { r .getDescription() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("description", c.getValue())) .collect(Collectors.toList())); } @@ -333,6 +344,7 @@ public class XmlRecordFactory implements Serializable { r .getSubject() .stream() + .filter(Objects::nonNull) .map(s -> XmlSerializationUtils.mapStructuredProperty("subject", s)) .collect(Collectors.toList())); } @@ -345,6 +357,7 @@ public class XmlRecordFactory implements Serializable { r .getRelevantdate() .stream() + .filter(Objects::nonNull) .map(s -> XmlSerializationUtils.mapStructuredProperty("relevantdate", s)) .collect(Collectors.toList())); } @@ -357,6 +370,7 @@ public class XmlRecordFactory implements Serializable { r .getSource() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("source", c.getValue())) .collect(Collectors.toList())); } @@ -366,6 +380,7 @@ public class XmlRecordFactory implements Serializable { r .getFormat() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("format", c.getValue())) .collect(Collectors.toList())); } @@ -429,6 +444,7 @@ public class XmlRecordFactory implements Serializable { orp .getContactperson() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("contactperson", c.getValue())) .collect(Collectors.toList())); } @@ -439,6 +455,7 @@ public class XmlRecordFactory implements Serializable { orp .getContactgroup() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c.getValue())) .collect(Collectors.toList())); } @@ -448,6 +465,7 @@ public class XmlRecordFactory implements Serializable { orp .getTool() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("tool", c.getValue())) .collect(Collectors.toList())); } @@ -461,6 +479,7 @@ public class XmlRecordFactory implements Serializable { s .getDocumentationUrl() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c.getValue())) .collect(Collectors.toList())); } @@ -470,6 +489,7 @@ public class XmlRecordFactory implements Serializable { s .getLicense() .stream() + .filter(Objects::nonNull) .map(l -> XmlSerializationUtils.mapStructuredProperty("license", l)) .collect(Collectors.toList())); } @@ -576,6 +596,7 @@ public class XmlRecordFactory implements Serializable { ds .getOdlanguages() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue())) .collect(Collectors.toList())); } @@ -585,6 +606,7 @@ public class XmlRecordFactory implements Serializable { ds .getOdcontenttypes() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c.getValue())) .collect(Collectors.toList())); } @@ -697,6 +719,7 @@ public class XmlRecordFactory implements Serializable { ds .getPolicies() .stream() + .filter(XmlRecordFactory::kvNotBlank) .map(kv -> XmlSerializationUtils.mapKeyValue("policies", kv)) .collect(Collectors.toList())); } @@ -709,6 +732,7 @@ public class XmlRecordFactory implements Serializable { ds .getSubjects() .stream() + .filter(Objects::nonNull) .map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp)) .collect(Collectors.toList())); } @@ -735,6 +759,7 @@ public class XmlRecordFactory implements Serializable { o .getAlternativeNames() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c.getValue())) .collect(Collectors.toList())); } @@ -744,7 +769,7 @@ public class XmlRecordFactory implements Serializable { XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); } if (o.getLogourl() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getLogourl().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); } if (o.getEclegalbody() != null) { @@ -776,13 +801,13 @@ public class XmlRecordFactory implements Serializable { .asXmlElement( "echighereducation", o.getEchighereducation().getValue())); } - if (o.getEcinternationalorganization() != null) { + if (o.getEcinternationalorganizationeurinterests() != null) { metadata .add( XmlSerializationUtils .asXmlElement( "ecinternationalorganizationeurinterests", - o.getEcinternationalorganization().getValue())); + o.getEcinternationalorganizationeurinterests().getValue())); } if (o.getEcinternationalorganization() != null) { metadata @@ -862,6 +887,7 @@ public class XmlRecordFactory implements Serializable { p .getSubjects() .stream() + .filter(Objects::nonNull) .map(sp -> XmlSerializationUtils.mapStructuredProperty("subject", sp)) .collect(Collectors.toList())); } @@ -912,7 +938,12 @@ public class XmlRecordFactory implements Serializable { if (p.getFundingtree() != null) { metadata .addAll( - p.getFundingtree().stream().map(ft -> ft.getValue()).collect(Collectors.toList())); + p + .getFundingtree() + .stream() + .filter(Objects::nonNull) + .map(ft -> ft.getValue()) + .collect(Collectors.toList())); } break; @@ -923,6 +954,17 @@ public class XmlRecordFactory implements Serializable { return metadata; } + private String getAuthorPidType(String s) { + return XmlSerializationUtils + .escapeXml(s) + .replaceAll("\\W", "") + .replaceAll("\\d", ""); + } + + private static boolean kvNotBlank(KeyValue kv) { + return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()); + } + private void mapDatasourceType(List metadata, final Qualifier dsType) { metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", dsType)); @@ -960,7 +1002,7 @@ public class XmlRecordFactory implements Serializable { .add( XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); } - if (re.getResulttype() != null & re.getResulttype().isBlank()) { + if (re.getResulttype() != null && re.getResulttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype())); } if (re.getCollectedfrom() != null) { @@ -969,6 +1011,7 @@ public class XmlRecordFactory implements Serializable { re .getCollectedfrom() .stream() + .filter(XmlRecordFactory::kvNotBlank) .map(kv -> XmlSerializationUtils.mapKeyValue("collectedfrom", kv)) .collect(Collectors.toList())); } @@ -986,10 +1029,10 @@ public class XmlRecordFactory implements Serializable { if (isNotBlank(re.getOfficialname())) { metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname())); } - if (re.getDatasourcetype() != null & !re.getDatasourcetype().isBlank()) { + if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) { mapDatasourceType(metadata, re.getDatasourcetype()); } - if (re.getOpenairecompatibility() != null & !re.getOpenairecompatibility().isBlank()) { + if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) { metadata .add( XmlSerializationUtils @@ -1006,7 +1049,7 @@ public class XmlRecordFactory implements Serializable { .add( XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); } - if (re.getCountry() != null & !re.getCountry().isBlank()) { + if (re.getCountry() != null && !re.getCountry().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); } break; @@ -1020,10 +1063,10 @@ public class XmlRecordFactory implements Serializable { if (isNotBlank(re.getAcronym())) { metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym())); } - if (re.getContracttype() != null & !re.getContracttype().isBlank()) { + if (re.getContracttype() != null && !re.getContracttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("contracttype", re.getContracttype())); } - if (re.getFundingtree() != null & contexts != null) { + if (re.getFundingtree() != null && contexts != null) { metadata .addAll( re @@ -1091,12 +1134,12 @@ public class XmlRecordFactory implements Serializable { .add( XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } - if (instance.getCollectedfrom() != null) { + if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) { fields .add( XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); } - if (instance.getHostedby() != null) { + if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) { fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby())); } if (instance.getDateofacceptance() != null diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json index bfb248d01..71b2becc4 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json @@ -21,6 +21,18 @@ "paramName": "rp", "paramLongName": "relPartitions", "paramDescription": "number or partitions for the relations Dataset", - "paramRequired": true + "paramRequired": false + }, + { + "paramName": "rf", + "paramLongName": "relationFilter", + "paramDescription": "filter applied reading relations (by relClass)", + "paramRequired": false + }, + { + "paramName": "mr", + "paramLongName": "maxRelations", + "paramDescription": "maximum number of relations allowed for a each entity", + "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 298ac7589..6983ecf53 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -9,6 +9,30 @@ isLookupUrl URL for the isLookup service + + relPartitions + number or partitions for the relations Dataset + + + relationFilter + filter applied reading relations (by relClass) + + + maxRelations + maximum number of relations allowed for a each entity + + + otherDsTypeId + mapping used to populate datasourceTypeUi field + + + format + metadata format name (DMF|TMF) + + + batchSize + number of records to be included in each indexing request + sparkDriverMemoryForJoining @@ -56,6 +80,10 @@ spark2EventLogDir spark 2.* event log dir location + + sparkNetworkTimeout + configures spark.network.timeout + @@ -69,12 +97,16 @@ - + - + - ${wf:conf('reuseRecords') eq false} - ${wf:conf('reuseRecords') eq true} + ${wf:conf('resumeFrom') eq 'prepare_relations'} + ${wf:conf('resumeFrom') eq 'fork_join_related_entities'} + ${wf:conf('resumeFrom') eq 'join_all_entities'} + ${wf:conf('resumeFrom') eq 'adjancency_lists'} + ${wf:conf('resumeFrom') eq 'convert_to_xml'} + ${wf:conf('resumeFrom') eq 'to_solr_index'} @@ -309,7 +341,6 @@ - yarn @@ -419,4 +450,5 @@ + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/child.st b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/child.st index 1d3cffea0..0af39f230 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/child.st +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/child.st @@ -1,3 +1,3 @@ <$name$$if(hasId)$ objidentifier="$id$"$else$$endif$> - $metadata:{$it$}$ + $metadata:{ it | $it$ }$ \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java new file mode 100644 index 000000000..f485ea680 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -0,0 +1,47 @@ +package eu.dnetlib.dhp.oa.provision; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; +import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import org.apache.commons.io.IOUtils; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.io.SAXReader; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.StringReader; + +import static org.junit.jupiter.api.Assertions.*; + +public class XmlRecordFactoryTest { + + private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; + + @Test + public void testXMLRecordFactory() throws IOException, DocumentException { + + String json = IOUtils.toString(getClass().getResourceAsStream("joined_entity.json")); + + assertNotNull(json); + JoinedEntity je = new ObjectMapper().readValue(json, JoinedEntity.class); + assertNotNull(je); + + ContextMapper contextMapper = new ContextMapper(); + + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, otherDsTypeId); + + String xml = xmlRecordFactory.build(je); + + assertNotNull(xml); + + Document doc = new SAXReader().read(new StringReader(xml)); + + assertNotNull(doc); + + System.out.println(doc.asXML()); + + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/joined_entity.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/joined_entity.json new file mode 100644 index 000000000..c51264698 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/joined_entity.json @@ -0,0 +1,1551 @@ +{ + "links": [ + { + "relatedEntity": { + "code": null, + "codeRepositoryUrl": null, + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:crosswalk", + "classname": "sysimport:crosswalk", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "qualifier": { + "classid": "pmc", + "classname": "pmc", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "PMC17177" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:crosswalk", + "classname": "sysimport:crosswalk", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "qualifier": { + "classid": "pmid", + "classname": "pmid", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "11005843" + } + ], + "projectTitle": null, + "websiteurl": null, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemename": "dnet:result_typologies", + "schemeid": "dnet:result_typologies" + }, + "legalname": null, + "collectedfrom": [ + { + "dataInfo": null, + "value": "PubMed Central", + "key": "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357" + } + ], + "id": "50|od_______267::4d85ada0191a351f529d1e8ace1a7117", + "title": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:crosswalk", + "classname": "sysimport:crosswalk", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemename": "dnet:dataCite_title", + "schemeid": "dnet:dataCite_title" + }, + "value": "Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo" + }, + "fundingtree": null, + "contracttype": null, + "type": "publication", + "acronym": null, + "openairecompatibility": null, + "publisher": "The National Academy of Sciences", + "instances": [ + { + "refereed": null, + "hostedby": { + "dataInfo": null, + "value": "Europe PubMed Central", + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c" + }, + "processingchargeamount": null, + "license": null, + "processingchargecurrency": null, + "distributionlocation": "", + "url": [ + "https://europepmc.org/articles/PMC17177/" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:crosswalk", + "classname": "sysimport:crosswalk", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "value": "2000-09-26" + }, + "collectedfrom": { + "dataInfo": null, + "value": "PubMed Central", + "key": "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357" + }, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0038", + "classname": "Other literature type", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + } + ], + "legalshortname": null, + "country": null, + "dateofacceptance": "2000-09-26", + "datasourcetype": null, + "datasourcetypeui": null, + "officialname": null + }, + "relation": { + "subRelType": "dedup", + "relClass": "merges", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:dedup", + "classname": "sysimport:dedup", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": true, + "inferenceprovenance": "decisiontree-dedup-test", + "invisible": false, + "trust": null + }, + "target": "50|od_______267::4d85ada0191a351f529d1e8ace1a7117", + "lastupdatetimestamp": null, + "relType": "resultResult", + "source": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10", + "collectedfrom": null, + "properties": [] + } + }, + { + "relatedEntity": { + "code": null, + "codeRepositoryUrl": null, + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "pmid", + "classname": "pmid", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "11005843" + } + ], + "projectTitle": null, + "websiteurl": null, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemename": "dnet:result_typologies", + "schemeid": "dnet:result_typologies" + }, + "legalname": null, + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "scholExplorer", + "key": "10|openaire____::e034d6a11054f5ade9221ebac484e864" + } + ], + "id": "50|scholexplore::ef20f9d1cd983037b45cccce4e3f5f8a", + "title": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemename": "dnet:dataCite_title", + "schemeid": "dnet:dataCite_title" + }, + "value": "Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo." + }, + "fundingtree": null, + "contracttype": null, + "type": "publication", + "acronym": null, + "openairecompatibility": null, + "publisher": "", + "instances": [ + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [ + "https://www.ncbi.nlm.nih.gov/pubmed/11005843" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "scholExplorer", + "key": "10|openaire____::e034d6a11054f5ade9221ebac484e864" + }, + "accessright": { + "classid": "UNKNOWN", + "classname": "not available", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0000", + "classname": "Unknown", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + } + ], + "legalshortname": null, + "country": null, + "dateofacceptance": "", + "datasourcetype": null, + "datasourcetypeui": null, + "officialname": null + }, + "relation": { + "subRelType": "dedup", + "relClass": "merges", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:dedup", + "classname": "sysimport:dedup", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": true, + "inferenceprovenance": "decisiontree-dedup-test", + "invisible": false, + "trust": null + }, + "target": "50|scholexplore::ef20f9d1cd983037b45cccce4e3f5f8a", + "lastupdatetimestamp": null, + "relType": "resultResult", + "source": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10", + "collectedfrom": null, + "properties": [] + } + }, + { + "relatedEntity": { + "code": null, + "codeRepositoryUrl": null, + "pid": [], + "projectTitle": null, + "websiteurl": null, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemename": "dnet:result_typologies", + "schemeid": "dnet:result_typologies" + }, + "legalname": null, + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "ORCID", + "key": "10|openaire____::806360c771262b4d6770e7cdf04b5c5a" + } + ], + "id": "50|orcid_______::631fd913d925af01a94ea70aa3cec3d6", + "title": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemename": "dnet:dataCite_title", + "schemeid": "dnet:dataCite_title" + }, + "value": "Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo" + }, + "fundingtree": null, + "contracttype": null, + "type": "publication", + "acronym": null, + "openairecompatibility": null, + "publisher": "", + "instances": [ + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "2000-01-01" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "ORCID", + "key": "10|openaire____::806360c771262b4d6770e7cdf04b5c5a" + }, + "accessright": { + "classid": "UNKNOWN", + "classname": "UNKNOWN", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + } + ], + "legalshortname": null, + "country": null, + "dateofacceptance": "2000-01-01", + "datasourcetype": null, + "datasourcetypeui": null, + "officialname": null + }, + "relation": { + "subRelType": "dedup", + "relClass": "merges", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:dedup", + "classname": "sysimport:dedup", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": true, + "inferenceprovenance": "decisiontree-dedup-test", + "invisible": false, + "trust": null + }, + "target": "50|orcid_______::631fd913d925af01a94ea70aa3cec3d6", + "lastupdatetimestamp": null, + "relType": "resultResult", + "source": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10", + "collectedfrom": null, + "properties": [] + } + }, + { + "relatedEntity": { + "code": null, + "codeRepositoryUrl": null, + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "mag_id", + "classname": "Microsoft Academic Graph Identifier", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "https://academic.microsoft.com/#/detail/145311948" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "urn", + "classname": "urn", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "http://en.wikipedia.org/wiki/Johns_Hopkins_University" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "grid", + "classname": "grid", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "grid.21107.35" + } + ], + "projectTitle": null, + "websiteurl": "http://www.jhu.edu/", + "resulttype": null, + "legalname": "Johns Hopkins University", + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Microsoft Academic Graph", + "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "GRID - Global Research Identifier Database", + "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977" + }, + { + "dataInfo": null, + "value": "Registry of Research Data Repository", + "key": "10|openaire____::21f8a223b9925c2f87c404096080b046" + }, + { + "dataInfo": null, + "value": "Research Councils UK", + "key": "10|openaire____::ab2d3310741ea80d3b8726f651502858" + }, + { + "dataInfo": null, + "value": "CORDA - COmmon Research DAta Warehouse", + "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f" + }, + { + "dataInfo": null, + "value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", + "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078" + }, + { + "dataInfo": null, + "value": "NSF - National Science Foundation", + "key": "10|openaire____::dd69b4a1513c9de9f46faf24048da1e8" + }, + { + "dataInfo": null, + "value": "DOAJ-Articles", + "key": "10|driver______::bee53aa31dc2cbb538c10c2b65fa5824" + }, + { + "dataInfo": null, + "value": "OpenDOAR", + "key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb" + } + ], + "id": "20|dedup_wf_001::8c05abe4d8f889305207a845e9e31d9d", + "title": null, + "fundingtree": null, + "contracttype": null, + "type": "organization", + "acronym": null, + "openairecompatibility": null, + "publisher": null, + "instances": null, + "legalshortname": "JHU", + "country": { + "classid": "US", + "classname": "United States", + "schemename": "dnet:countries", + "schemeid": "dnet:countries" + }, + "dateofacceptance": null, + "datasourcetype": null, + "datasourcetypeui": null, + "officialname": null + }, + "relation": { + "subRelType": "affiliation", + "relClass": "hasAuthorInstitution", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "target": "20|dedup_wf_001::8c05abe4d8f889305207a845e9e31d9d", + "lastupdatetimestamp": 0, + "relType": "resultOrganization", + "source": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10", + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Microsoft Academic Graph", + "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" + } + ], + "properties": [] + } + }, + { + "relatedEntity": { + "code": null, + "codeRepositoryUrl": null, + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "10.1073/pnas.200372597" + } + ], + "projectTitle": null, + "websiteurl": null, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemename": "dnet:result_typologies", + "schemeid": "dnet:result_typologies" + }, + "legalname": null, + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Crossref", + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Microsoft Academic Graph", + "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "UnpayWall", + "key": "10|openaire____::8ac8380272269217cb09a928c8caa993" + } + ], + "id": "50|doiboost____::4317e3fa670267960efa09c1fd8339c9", + "title": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemename": "dnet:dataCite_title", + "schemeid": "dnet:dataCite_title" + }, + "value": "Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo" + }, + "fundingtree": null, + "contracttype": null, + "type": "publication", + "acronym": null, + "openairecompatibility": null, + "publisher": "", + "instances": [ + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [ + "http://www.pnas.org/content/97/21/11198.full.pdf" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "UnpayWall", + "key": "10|openaire____::8ac8380272269217cb09a928c8caa993" + }, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + }, + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [ + "https://syndication.highwire.org/content/doi/10.1073/pnas.200372597" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Crossref", + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" + }, + "accessright": { + "classid": "UNKNOWN", + "classname": "not available", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + }, + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [ + "https://academic.microsoft.com/#/detail/2045899555" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Microsoft Academic Graph", + "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" + }, + "accessright": { + "classid": "UNKNOWN", + "classname": "not available", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + }, + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [ + "http://dx.doi.org/10.1073/pnas.200372597" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Crossref", + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" + }, + "accessright": { + "classid": "RESTRICTED", + "classname": "Restricted", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + } + ], + "legalshortname": null, + "country": null, + "dateofacceptance": "2000-9-26", + "datasourcetype": null, + "datasourcetypeui": null, + "officialname": null + }, + "relation": { + "subRelType": "dedup", + "relClass": "merges", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:dedup", + "classname": "sysimport:dedup", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": true, + "inferenceprovenance": "decisiontree-dedup-test", + "invisible": false, + "trust": null + }, + "target": "50|doiboost____::4317e3fa670267960efa09c1fd8339c9", + "lastupdatetimestamp": null, + "relType": "resultResult", + "source": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10", + "collectedfrom": null, + "properties": [] + } + } + ], + "entity": { + "deleted": false, + "oaf": "{\"collectedfrom\":[{\"key\":\"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357\",\"value\":\"PubMed Central\",\"dataInfo\":null},{\"key\":\"10|openaire____::e034d6a11054f5ade9221ebac484e864\",\"value\":\"scholExplorer\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"key\":\"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\":\"ORCID\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"key\":\"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2\",\"value\":\"Crossref\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"key\":\"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a\",\"value\":\"Microsoft Academic Graph\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"key\":\"10|openaire____::8ac8380272269217cb09a928c8caa993\",\"value\":\"UnpayWall\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"dataInfo\":{\"invisible\":false,\"inferred\":true,\"deletedbyinference\":false,\"trust\":\"0.8\",\"inferenceprovenance\":\"decisiontree-dedup-test\",\"provenanceaction\":{\"classid\":\"sysimport:dedup\",\"classname\":\"sysimport:dedup\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}},\"lastupdatetimestamp\":1589967085191,\"id\":\"50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10\",\"originalId\":[\"od_______267::4d85ada0191a351f529d1e8ace1a7117\",\"38908045\",\"10.1073/pnas.200372597\"],\"pid\":[{\"value\":\"PMC17177\",\"qualifier\":{\"classid\":\"pmc\",\"classname\":\"pmc\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},{\"value\":\"11005843\",\"qualifier\":{\"classid\":\"pmid\",\"classname\":\"pmid\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},{\"value\":\"10.1073/pnas.200372597\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"dateofcollection\":\"2019-07-01T18:50:57Z\",\"dateoftransformation\":\"\",\"extraInfo\":[],\"oaiprovenance\":{\"originDescription\":{\"harvestDate\":\"2020-05-10T12:23:13.896Z\",\"altered\":true,\"baseURL\":\"mongodb%3A%2F%2Fservices.openaire.eu\",\"identifier\":\"\",\"datestamp\":\"\",\"metadataNamespace\":\"\"}},\"author\":[{\"fullname\":\"S. Kim\",\"name\":\"S.\",\"surname\":\"Kim\",\"rank\":1,\"pid\":[],\"affiliation\":[]},{\"fullname\":\"Q. Li\",\"name\":\"Q.\",\"surname\":\"Li\",\"rank\":2,\"pid\":[{\"value\":\"2719402459\",\"qualifier\":{\"classid\":\"MAG Identifier\",\"classname\":\"MAG Identifier\",\"schemeid\":null,\"schemename\":null},\"dataInfo\":null}],\"affiliation\":[{\"value\":\"Johns Hopkins University\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}]},{\"fullname\":\"C. V. Dang\",\"name\":\"C. V.\",\"surname\":\"Dang\",\"rank\":3,\"pid\":[{\"value\":\"2250822210\",\"qualifier\":{\"classid\":\"MAG Identifier\",\"classname\":\"MAG Identifier\",\"schemeid\":null,\"schemename\":null},\"dataInfo\":null},{\"value\":\"0000-0002-4031-2522\",\"qualifier\":{\"classid\":\"ORCID\",\"classname\":\"ORCID\",\"schemeid\":null,\"schemename\":null},\"dataInfo\":null}],\"affiliation\":[]},{\"fullname\":\"L. A. Lee\",\"name\":\"L. A.\",\"surname\":\"Lee\",\"rank\":4,\"pid\":[],\"affiliation\":[]}],\"resulttype\":{\"classid\":\"publication\",\"classname\":\"publication\",\"schemeid\":\"dnet:result_typologies\",\"schemename\":\"dnet:result_typologies\"},\"language\":{\"classid\":\"eng\",\"classname\":\"English\",\"schemeid\":\"dnet:languages\",\"schemename\":\"dnet:languages\"},\"country\":[],\"subject\":[{\"value\":\"Biological Sciences\",\"qualifier\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},{\"value\":\"Multidisciplinary\",\"qualifier\":{\"classid\":\"keyword\",\"classname\":\"keyword\",\"schemeid\":\"dnet:subject\",\"schemename\":\"dnet:subject\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"title\":[{\"value\":\"Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo\",\"qualifier\":{\"classid\":\"main title\",\"classname\":\"main title\",\"schemeid\":\"dnet:dataCite_title\",\"schemename\":\"dnet:dataCite_title\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},{\"value\":\"Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo.\",\"qualifier\":{\"classid\":\"main title\",\"classname\":\"main title\",\"schemeid\":\"dnet:dataCite_title\",\"schemename\":\"dnet:dataCite_title\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"relevantdate\":[{\"value\":\"2018-11-13\",\"qualifier\":{\"classid\":\"dnet:date\",\"classname\":\"dnet:date\",\"schemeid\":\"dnet:date\",\"schemename\":\"dnet:date\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"value\":\"2000-01-01\",\"qualifier\":{\"classid\":\"issued\",\"classname\":\"issued\",\"schemeid\":\"dnet:dataCite_date\",\"schemename\":\"dnet:dataCite_date\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"value\":\"2000-9-26\",\"qualifier\":{\"classid\":\"published-online\",\"classname\":\"published-online\",\"schemeid\":\"dnet:dataCite_date\",\"schemename\":\"dnet:dataCite_date\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"value\":\"2000-10-10\",\"qualifier\":{\"classid\":\"published-print\",\"classname\":\"published-print\",\"schemeid\":\"dnet:dataCite_date\",\"schemename\":\"dnet:dataCite_date\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"description\":[{\"value\":\"Overexpression of c-Myc in immortalized cells increases cell\\n proliferation, inhibits cell differentiation, and promotes cell\\n transformation. Recent evidence suggests that these effects, however,\\n do not necessarily occur when c-Myc is overexpressed in primary\\n mammalian cells. We sought to determine the immediate effects of\\n transient overexpression of c-Myc in primary cells in\\n vivo by using recombinant adenovirus to overexpress human\\n MYC in mouse liver. Mice were intravenously injected\\n with adenoviruses encoding MYC (Ad/Myc), E2F-1\\n (Ad/E2F-1), or \u03b2-galactosidase (Ad/LacZ). Transgene expression\\n was detectable 4 days after injection. Expression of ectopic c-Myc was\\n immediately accompanied by enlarged and dysmorphic hepatocytes in the\\n absence of significant cell proliferation or apoptosis. These\\n findings were not present in the livers of mice injected with\\n Ad/E2F-1 or Ad/LacZ. Prominent hepatocyte nuclei and nucleoli were\\n associated with the up-regulation of large- and small-subunit ribosomal\\n and nucleolar genes, suggesting that c-Myc may induce their expression\\n to increase cell mass. Our studies support a role for c-Myc in the\\n in vivo control of vertebrate cell size and metabolism\\n independent of cell proliferation.\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}}],\"dateofacceptance\":{\"value\":\"2000-01-01\",\"dataInfo\":null},\"publisher\":{\"value\":\"The National Academy of Sciences\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},\"embargoenddate\":null,\"source\":[{\"value\":\"Scopus - Elsevier\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"fulltext\":[],\"format\":[],\"contributor\":[],\"resourcetype\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"},\"coverage\":[],\"bestaccessright\":null,\"context\":[],\"externalReference\":[{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"membrane\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"membrane\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"tek\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"tek\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosome biogenesis\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosome biogenesis\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"bn51\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"bn51\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"c-myc\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"c-myc\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"nucleolin\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"nucleolin\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"uptake\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"uptake\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell growth\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell growth\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"\u03b2-galactosidase\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"\u03b2-galactosidase\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"estrogen receptor\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"estrogen receptor\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell differentiation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell differentiation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"pathogenesis\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"pathogenesis\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"localization\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"localization\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"p19arf\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"p19arf\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"glucose metabolism\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"glucose metabolism\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell cycle regulation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell cycle regulation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mrdb\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mrdb\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"gene expression\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"gene expression\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"eif-2a\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"eif-2a\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"cyclin d2\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"cyclin d2\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"pseudouridylation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"pseudouridylation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"e2f-1\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"e2f-1\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"keratinocyte proliferation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"keratinocyte proliferation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"biosynthesis\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"biosynthesis\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"nucleolus\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"nucleolus\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"metabolism\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"metabolism\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"dihydroorotase\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"dihydroorotase\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"liver regeneration\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"liver regeneration\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosome assembly\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosome assembly\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"protein translation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"protein translation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"translation initiation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"translation initiation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"nucleus\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"nucleus\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mdm2\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mdm2\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell cycle\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell cycle\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"ornithine decarboxylase\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"ornithine decarboxylase\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"https://www.targetvalidation.org\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"https://www.targetvalidation.org\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mfl\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mfl\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell proliferation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell proliferation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"chromatin\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"chromatin\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"eif-4e\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"eif-4e\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"e2a\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"e2a\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell development\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell development\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"myc\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"myc\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell-cycle phase\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell-cycle phase\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"cyclins d1\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"cyclins d1\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosomal subunit\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosomal subunit\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null}],\"instance\":[{\"license\":null,\"accessright\":{\"classid\":\"OPEN\",\"classname\":\"Open Access\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0038\",\"classname\":\"Other literature type\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c\",\"value\":\"Europe PubMed Central\",\"dataInfo\":null},\"url\":[\"https://europepmc.org/articles/PMC17177/\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357\",\"value\":\"PubMed Central\",\"dataInfo\":null},\"dateofacceptance\":{\"value\":\"2000-09-26\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},\"processingchargeamount\":null,\"processingchargecurrency\":null,\"refereed\":null},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"UNKNOWN\",\"classname\":\"not available\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0000\",\"classname\":\"Unknown\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[\"https://www.ncbi.nlm.nih.gov/pubmed/11005843\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::e034d6a11054f5ade9221ebac484e864\",\"value\":\"scholExplorer\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"UNKNOWN\",\"classname\":\"UNKNOWN\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0001\",\"classname\":\"Article\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\":\"ORCID\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"2000-01-01\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"OPEN\",\"classname\":\"Open Access\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0001\",\"classname\":\"Article\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[\"http://www.pnas.org/content/97/21/11198.full.pdf\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::8ac8380272269217cb09a928c8caa993\",\"value\":\"UnpayWall\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"UNKNOWN\",\"classname\":\"not available\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0001\",\"classname\":\"Article\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[\"https://syndication.highwire.org/content/doi/10.1073/pnas.200372597\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2\",\"value\":\"Crossref\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"UNKNOWN\",\"classname\":\"not available\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0001\",\"classname\":\"Article\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[\"https://academic.microsoft.com/#/detail/2045899555\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a\",\"value\":\"Microsoft Academic Graph\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"RESTRICTED\",\"classname\":\"Restricted\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0001\",\"classname\":\"Article\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[\"http://dx.doi.org/10.1073/pnas.200372597\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2\",\"value\":\"Crossref\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}}],\"journal\":null}", + "type": "publication", + "id": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10" + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 06408937b..d6ec4e6ab 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-worfklow-profiles/pom.xml b/dhp-workflows/dhp-worfklow-profiles/pom.xml index 5f99cdc8d..cb20db57e 100644 --- a/dhp-workflows/dhp-worfklow-profiles/pom.xml +++ b/dhp-workflows/dhp-worfklow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml index 7c918a0d7..487afee4f 100644 --- a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml @@ -521,6 +521,8 @@ { 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', 'workingDir' : '/tmp/beta_provision/working_dir/country', 'allowedtypes' : 'pubsrepository::institutional', 'whitelist' : '10|opendoar____::300891a62162b960cf02ce3827bb363c', diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 271c66939..cf9753da4 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT ../ diff --git a/pom.xml b/pom.xml index 419de3540..e0ee18900 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT pom