This commit is contained in:
Miriam Baglioni 2023-07-17 16:24:57 +02:00
commit 0482648131
58 changed files with 3058 additions and 9 deletions

View File

@ -0,0 +1,5 @@
id name acronym description
04a00617ca659adc944977ac700ea14b Digital Humanities and Cultural Heritage dh-ch This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.
3ee95893613de7450247d7fef747136f DARIAH EU dariah The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents.
5fde864866ea5ded4cc873b3170b63c3 Transport Research beopen Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research. The TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323)
aa0e56dd2e9d2a0be749f5debdd2b3d8 Energy Research enermaps <p>EnerMaps Open Data Management Tool aims to&nbsp; <strong>improve data management</strong>&nbsp; and&nbsp; <strong>accessibility</strong>&nbsp; in the field of&nbsp; <strong>energy research</strong>&nbsp; for the&nbsp; <strong>renewable energy industry</strong>.</p> <p>EnerMaps&rsquo; tool accelerates and facilitates the energy transition offering a qualitative and user-friendly digital platform to the energy professionals.</p> <p>The project is based on the&nbsp; <strong>FAIR data principle</strong>&nbsp; which requires data to be&nbsp; <strong>F</strong>indable,&nbsp; <strong>A</strong>ccessible,&nbsp; <strong>I</strong>nteroperable and&nbsp; <strong>R</strong>eusable.</p> <p><strong>EnerMaps project</strong>&nbsp; coordinates and enriches existing energy databases to promote&nbsp; <strong>trans-disciplinary research</strong>&nbsp; and to develop partnerships between researchers and the energy professionals.</p> <p>The EnerMaps&nbsp;project has received funding from the European Union&rsquo;s Horizon 2020 research and innovation program under &nbsp; <a href="https://cordis.europa.eu/project/id/884161?WT.mc_id=RSS-Feed&amp;WT.rss_f=project&amp;WT.rss_a=227144&amp;WT.rss_ev=a" rel="noopener noreferrer" target="_blank">grant agreement N&deg;884161</a>.&nbsp;</p> <p>&nbsp;</p> <p>Website:<a href="https://enermaps.eu/">&nbsp; https://enermaps.eu/&nbsp;</a></p>

View File

@ -2,15 +2,22 @@
package eu.dnetlib.dhp.oa.graph.dump;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -18,6 +25,8 @@ public class QueryInformationSystem {
private ISLookUpService isLookUp;
private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class);
private static final String XQUERY_ALL = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
@ -71,4 +80,31 @@ public class QueryInformationSystem {
return map;
}
public List<String> getCommunityCsv(String toString) throws ISLookUpException, SAXException, DocumentException {
List<String> communities = new ArrayList<>();
for (String xml : isLookUp.quickSearchProfile(toString)) {
log.info(xml);
final Document doc;
final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
doc = reader.read(new StringReader(xml));
Element root = doc.getRootElement();
StringBuilder builder = new StringBuilder();
builder.append(DHPUtils.md5(root.attribute("id").getValue()));
builder.append(Constants.SEP);
builder.append(root.attribute("label").getValue());
builder.append(Constants.SEP);
builder.append(root.attribute("id").getValue());
builder.append(Constants.SEP);
builder
.append(
((Node) (root.selectNodes("//description").get(0)))
.getText()
.replace("\n", " ")
.replace("\t", " "));
communities.add(builder.toString());
}
return communities;
}
}

View File

@ -21,6 +21,7 @@ import eu.dnetlib.dhp.oa.model.Indicator;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.Subject;
import eu.dnetlib.dhp.oa.model.community.CfHbKeyValue;
import eu.dnetlib.dhp.oa.model.community.CommunityInstance;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;

View File

@ -9,9 +9,9 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException;
import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient;
public class SendToZenodoHDFS implements Serializable {
@ -81,8 +81,10 @@ public class SendToZenodoHDFS implements Serializable {
String pString = p.toString();
if (!pString.endsWith("_SUCCESS")) {
String name = pString.substring(pString.lastIndexOf("/") + 1);
FSDataInputStream inputStream = fileSystem.open(p);
zenodoApiClient.uploadIS3(inputStream, name, fileSystem.getFileStatus(p).getLen());
try (FSDataInputStream inputStream = fileSystem.open(p)) {
zenodoApiClient.uploadIS(inputStream, name);
}
}
}
@ -90,9 +92,9 @@ public class SendToZenodoHDFS implements Serializable {
zenodoApiClient.sendMretadata(metadata);
}
// if (Boolean.TRUE.equals(publish)) {
// zenodoApiClient.publish();
// }
if (Boolean.TRUE.equals(publish)) {
zenodoApiClient.publish();
}
}
}

View File

@ -0,0 +1,102 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
import java.io.Serializable;
import eu.dnetlib.dhp.utils.DHPUtils;
/**
* @author miriam.baglioni
* @Date 05/05/23
*/
public class AuthorResult implements Serializable {
private String authorId;
private String firstName;
private String lastName;
private String fullName;
private String orcid;
private String resultId;
private String rank;
private Boolean fromOrcid;
public Boolean getFromOrcid() {
return fromOrcid;
}
public void setFromOrcid(Boolean fromOrcid) {
this.fromOrcid = fromOrcid;
}
public String getFullName() {
return fullName;
}
public void setFullName(String fullName) {
this.fullName = fullName;
}
public String getAuthorId() {
return authorId;
}
public void setAuthorId(String authorId) {
this.authorId = authorId;
}
public String getResultId() {
return resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
public String getRank() {
return rank;
}
public void setRank(String rank) {
this.rank = rank;
}
public String getId() {
return authorId;
}
public void setId(String id) {
this.authorId = id;
}
public String getFirstName() {
return firstName;
}
public void setFirstName(String firstName) {
this.firstName = firstName;
}
public String getLastName() {
return lastName;
}
public void setLastName(String lastName) {
this.lastName = lastName;
}
public String getOrcid() {
return orcid;
}
public void setOrcid(String orcid) {
this.orcid = orcid;
}
public void autosetId() {
if (orcid != null) {
authorId = DHPUtils.md5(orcid);
} else {
authorId = DHPUtils.md5(resultId + rank);
}
}
}

View File

@ -0,0 +1,20 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
import org.apache.commons.lang.StringUtils;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 10/05/23
*/
public class Constants implements Serializable {
public final static String SEP = "\t";
public static final String addQuotes(String id) {
if(StringUtils.isNotEmpty(id))
return "\"" + id + "\"";
return id;
}
}

View File

@ -0,0 +1,119 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static org.apache.commons.lang3.StringUtils.split;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem;
import eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
/**
* @author miriam.baglioni
* @Date 09/05/23
*/
//STEP 1
public class DumpCommunities implements Serializable {
private static final Logger log = LoggerFactory.getLogger(DumpCommunities.class);
private final BufferedWriter writer;
private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP
+ " description \n";
private final transient QueryInformationSystem queryInformationSystem;
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
DumpCommunities.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String nameNode = parser.get("nameNode");
log.info("nameNode: {}", nameNode);
final List<String> communities = Arrays.asList(split(parser.get("communities"), ";"));
final DumpCommunities dc = new DumpCommunities(outputPath, nameNode, parser.get("isLookUpUrl"));
dc.writeCommunity(communities);
}
private void writeCommunity(List<String> communities)
throws IOException, ISLookUpException, DocumentException, SAXException {
writer.write(HEADER);
writer.flush();
String a = IOUtils
.toString(
DumpCommunities.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq"));
final String xquery = String
.format(
a,
communities
.stream()
.map(t -> String.format("$x//CONFIGURATION/context[./@id= '%s']", t))
.collect(Collectors.joining(" or ")));
for (String community : queryInformationSystem
.getCommunityCsv(xquery)) {
writer
.write(
community);
writer.write("\n");
}
writer.close();
}
public DumpCommunities(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws Exception {
final Configuration conf = new Configuration();
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
Path hdfsWritePath = new Path(hdfsPath);
if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, true);
}
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
}
}

View File

@ -0,0 +1,365 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static org.apache.commons.lang3.StringUtils.remove;
import static org.apache.commons.lang3.StringUtils.split;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.*;
import org.apache.spark.sql.*;
import org.apache.spark.sql.Dataset;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVAuthor;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVPid;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRelResAut;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVResult;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 04/05/23
*/
//STEP 3
public class SparkDumpResults implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkDumpResults.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkDumpResults.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste3.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String resultType = parser.get("resultType");
log.info("resultType: {}", resultType);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String workingPath = parser.get("workingPath");
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
// Utils.removeOutputDir(spark, outputPath);
run(spark, inputPath, inputClazz, resultType, workingPath);
});
}
private static <R extends Result> void run(SparkSession spark, String inputPath,
Class<R> inputClazz, String resultType, String workingPath) {
Dataset<String> resultIds = spark.read().textFile(workingPath + "/resultIds");
// resultIds.foreach((ForeachFunction<String>) r -> System.out.println(r));
Dataset<R> results = Utils
.readPath(spark, inputPath + "/" + resultType, inputClazz)
.filter(
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() && !p.getDataInfo().getInvisible());
resultIds
.joinWith(results, resultIds.col("value").equalTo(results.col("id")))
.map((MapFunction<Tuple2<String, R>, R>) t2 -> t2._2(), Encoders.bean(inputClazz))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingPath + "/" + resultType + "/temp/result");
// map results
results = Utils.readPath(spark, workingPath + "/" + resultType + "/temp/result", inputClazz);
results
.map(
(MapFunction<R, CSVResult>) r -> mapResultInfo(r),
Encoders.bean(CSVResult.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(workingPath + "/" + resultType + "/result");
// map relations between pid and result
results
.flatMap((FlatMapFunction<R, CSVPid>) r -> {
List<CSVPid> pids = new ArrayList<>();
if (Optional.ofNullable(r.getPid()).isPresent() && r.getPid().size() > 0) {
pids.addAll(mapPid(r.getPid(), r.getId()));
}
return pids.iterator();
}, Encoders.bean(CSVPid.class))
.filter(Objects::nonNull)
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(workingPath + "/" + resultType + "/result_pid");
// map authors from the result
// per ogni autore nel result
// se l'autore ha un orcid il suo id dipende dall'orcid (tipo md5(orcid))
// se non ha orcid il suo id si costruisce come result_id + authorrank ( se non ha il rank si sua
// la sua posizione nell'insieme degli autori) sempre con md5
results
.flatMap((FlatMapFunction<R, AuthorResult>) r -> {
int count = 0;
List<AuthorResult> arl = new ArrayList<>();
Set<String> authorIds = new HashSet();
if (Optional.ofNullable(r.getAuthor()).isPresent()) {
for (Author a : r.getAuthor()) {
count += 1;
AuthorResult ar = new AuthorResult();
ar.setResultId(r.getId());
if (Optional.ofNullable(a.getRank()).isPresent()) {
if (a.getRank() > 0) {
ar.setRank(String.valueOf(a.getRank()));
} else {
ar.setRank(String.valueOf(count));
}
}
ar.setFirstName(replace(a.getName()));
ar.setLastName(replace(a.getSurname()));
ar.setFullName(replace(a.getFullname()));
Tuple2<String, Boolean> orcid = getOrcid(a.getPid());
if (Optional.ofNullable(orcid).isPresent()) {
ar.setOrcid(orcid._1());
ar.setFromOrcid(orcid._2());
}
ar.autosetId();
if (!authorIds.contains(ar.getAuthorId())) {
arl.add(ar);
authorIds.add(ar.getAuthorId());
}
}
}
return arl.iterator();
}, Encoders.bean(AuthorResult.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingPath + "/" + resultType + "/temp/authorresult");
Dataset<AuthorResult> authorResult = Utils
.readPath(spark, workingPath + "/" + resultType + "/temp/authorresult", AuthorResult.class);
// map the relation between author and result
authorResult
.map(
(MapFunction<AuthorResult, CSVRelResAut>) ar -> {
CSVRelResAut ret = new CSVRelResAut();
ret.setResult_id(ar.getResultId());
ret.setAuthor_id(ar.getAuthorId());
return ret;
},
Encoders.bean(CSVRelResAut.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(workingPath + "/" + resultType + "/result_author");
// ma the authors in the working dir. I do not want to have them repeated. If I have an orcid as id, I choose
// the one from orcid if any
authorResult
.groupByKey((MapFunction<AuthorResult, String>) ar -> ar.getAuthorId(), Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, AuthorResult, CSVAuthor>) (k, it) -> {
AuthorResult first = it.next();
if (!Optional.ofNullable(first.getFromOrcid()).isPresent() || first.getFromOrcid())
return getAuthorDump(first);
while (it.hasNext()) {
AuthorResult ar = it.next();
if (ar.getFromOrcid())
return getAuthorDump(ar);
}
return getAuthorDump(first);
},
Encoders.bean(CSVAuthor.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(workingPath + "/" + resultType + "/author");
}
private static String replace(String input) {
if (Optional.ofNullable(input).isPresent())
return input.replace("\t", " ").replace("\n", " ").replace("\r", " ").replace("\"", " ");
else
return "";
}
private static List<CSVPid> mapPid(List<StructuredProperty> pid, String resultId) {
return pid
.stream()
.map(p -> p.getQualifier().getClassid().toLowerCase() + "@" + p.getValue().toLowerCase())
.distinct()
.map(p -> {
CSVPid ret = new CSVPid();
ret.setId(DHPUtils.md5(p + "@" + resultId));
ret.setResult_id(resultId);
ret.setPid(split(p, "@")[1]);
ret.setType(split(p, "@")[0]);
return ret;
})
.collect(Collectors.toList());
}
private static CSVAuthor getAuthorDump(AuthorResult ar) {
CSVAuthor ret = new CSVAuthor();
ret.setFirstname(ar.getFirstName());
ret.setId(ar.getAuthorId());
ret.setLastname(ar.getLastName());
ret.setFullname(ar.getFullName());
if (ar.getOrcid() != null) {
ret.setOrcid(ar.getOrcid());
ret.setFromOrcid(ar.getFromOrcid());
} else {
ret.setOrcid("");
}
return ret;
}
private static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
if (!Optional.ofNullable(pid).isPresent())
return null;
if (pid.size() == 0)
return null;
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
return new Tuple2<>(p.getValue(), Boolean.TRUE);
}
}
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
return new Tuple2<>(p.getValue(), Boolean.FALSE);
}
}
return null;
}
private static String getFieldValue(Field<String> input) {
if (input != null &&
StringUtils.isNotEmpty(input.getValue())) {
return removeBreaks(input.getValue());
} else {
return "";
}
}
private static <R extends Result> CSVResult mapResultInfo(R r) {
CSVResult ret = new CSVResult();
ret.setId(removeBreaks(r.getId()));
ret.setType(removeBreaks(r.getResulttype().getClassid()));
ret.setTitle(getTitle(r.getTitle()));
ret.setDescription(getAbstract(r.getDescription()));
ret.setAccessright(removeBreaks(r.getBestaccessright().getClassid()));
ret.setPublication_date(removeBreaks(getFieldValue(r.getDateofacceptance())));
ret.setPublisher(removeBreaks(getFieldValue(r.getPublisher())));
if (Optional.ofNullable(r.getSubject()).isPresent())
ret.setKeywords(String.join(", ", r.getSubject().stream().map(s -> {
if (StringUtils.isNotEmpty(s.getValue()))
return removeBreaks(s.getValue().toLowerCase());
else
return null;
}).filter(Objects::nonNull).distinct().collect(Collectors.toList())));
else
ret.setKeywords("");
if (Optional.ofNullable(r.getCountry()).isPresent())
ret
.setCountry(
String.join(", ", r.getCountry().stream().map(Country::getClassid).collect(Collectors.toList())));
else
ret.setCountry("");
if (Optional.ofNullable(r.getLanguage()).isPresent() && StringUtils.isNotEmpty(r.getLanguage().getClassid())) {
ret.setLanguage(r.getLanguage().getClassid());
} else {
ret.setLanguage("");
}
return ret;
}
private static String getAbstract(List<Field<String>> description) {
if (description == null)
return "";
for (Field<String> abs : description) {
if (StringUtils.isNotEmpty(abs.getValue())) {
return removeBreaks(abs.getValue());
}
}
return "";
}
private static String getTitle(List<StructuredProperty> titles) {
String firstTitle = null;
for (StructuredProperty title : titles) {
if (StringUtils.isEmpty(firstTitle)) {
if (StringUtils.isNotEmpty(title.getValue()))
firstTitle = removeBreaks(title.getValue());
}
if (title.getQualifier().getClassid().equals(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid())) {
if (StringUtils.isNotEmpty(title.getValue()))
return removeBreaks(title.getValue());
}
}
if (firstTitle != null) {
return removeBreaks(firstTitle);
}
return "";
}
private static String removeBreaks(String input) {
return input.replace("\n", " ").replace("\t", " ")
.replace("\r", " ")
.replace("\\\"", " ")
.replace("\"", " ")
;
}
}

View File

@ -0,0 +1,133 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVAuthor;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVPid;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRelResAut;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVResult;
import eu.dnetlib.dhp.schema.oaf.*;
/**
* @author miriam.baglioni
* @Date 10/05/23
*/
//STEP 4
public class SparkMoveOnSigleDir implements Serializable {
// All the products saved in different directories are put under the same one.
// For the authors also a step of reconciliation mast be done, since the same author id can be saved in more than
// one directory
private static final Logger log = LoggerFactory.getLogger(SparkMoveOnSigleDir.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkMoveOnSigleDir.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste4.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String workingPath = parser.get("workingPath");
log.info("workingPath: {}", workingPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
// Utils.removeOutputDir(spark, outputPath);
run(spark, outputPath, workingPath);
});
}
private static <R extends Result> void run(SparkSession spark, String outputPath,
String workingPath) {
Utils
.readPath(spark, workingPath + "/publication/result", CSVResult.class)
.union(Utils.readPath(spark, workingPath + "/dataset/result", CSVResult.class))
.union(Utils.readPath(spark, workingPath + "/software/result", CSVResult.class))
.union(Utils.readPath(spark, workingPath + "/otherresearchproduct/result", CSVResult.class))
.write()
.mode(SaveMode.Overwrite)
.option("header", "true")
.option("delimiter", Constants.SEP)
.option("compression", "gzip")
.csv(outputPath + "/result");
Utils
.readPath(spark, workingPath + "/publication/result_pid", CSVPid.class)
.union(Utils.readPath(spark, workingPath + "/dataset/result_pid", CSVPid.class))
.union(Utils.readPath(spark, workingPath + "/software/result_pid", CSVPid.class))
.union(Utils.readPath(spark, workingPath + "/otherresearchproduct/result_pid", CSVPid.class))
.write()
.mode(SaveMode.Overwrite)
.option("header", "true")
.option("delimiter", Constants.SEP)
.option("compression", "gzip")
.csv(outputPath + "/result_pid");
Utils
.readPath(spark, workingPath + "/publication/result_author", CSVRelResAut.class)
.union(Utils.readPath(spark, workingPath + "/dataset/result_author", CSVRelResAut.class))
.union(Utils.readPath(spark, workingPath + "/software/result_author", CSVRelResAut.class))
.union(Utils.readPath(spark, workingPath + "/otherresearchproduct/result_author", CSVRelResAut.class))
.write()
.mode(SaveMode.Overwrite)
.option("header", "true")
.option("delimiter", Constants.SEP)
.option("compression", "gzip")
.csv(outputPath + "/result_author");
Utils
.readPath(spark, workingPath + "/publication/author", CSVAuthor.class)
.union(Utils.readPath(spark, workingPath + "/dataset/author", CSVAuthor.class))
.union(Utils.readPath(spark, workingPath + "/software/author", CSVAuthor.class))
.union(Utils.readPath(spark, workingPath + "/otherresearchproduct/author", CSVAuthor.class))
.groupByKey((MapFunction<CSVAuthor, String>) r -> r.getId(), Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, CSVAuthor, CSVAuthor>) (k, it) -> it.next(), Encoders.bean(CSVAuthor.class))
.write()
.mode(SaveMode.Overwrite)
.option("header", "true")
.option("delimiter", Constants.SEP)
.option("compression", "gzip")
.csv(outputPath + "/author");
}
}

View File

@ -0,0 +1,227 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVCitation;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRELCommunityResult;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 04/05/23
*/
//STEP 2
public class SparkSelectResultsAndDumpRelations implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSelectResultsAndDumpRelations.class);
private static String RESULT_COMMUNITY_TABLE = "/result_community";
private static String COMMUNITY_RESULT_IDS = "/communityResultIds";
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSelectResultsAndDumpRelations.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String workingPath = parser.get("workingPath");
List<String> communityList = null;
Optional<String> communities = Optional.ofNullable(parser.get("communities"));
if (communities.isPresent()) {
communityList = Arrays.asList(communities.get().split(";"));
}
SparkConf conf = new SparkConf();
List<String> finalCommunityList = communityList;
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
// Utils.removeOutputDir(spark, outputPath);
run(spark, inputPath, outputPath, workingPath, finalCommunityList);
});
}
private static void run(SparkSession spark, String inputPath, String outputPath,
String workingPath,
List<String> communityList) {
// select the result ids related to the set of communities considered
writeCommunityRelatedIds(
spark, inputPath + "/publication", Publication.class, communityList, workingPath + COMMUNITY_RESULT_IDS);
writeCommunityRelatedIds(
spark, inputPath + "/dataset", Dataset.class, communityList, workingPath + COMMUNITY_RESULT_IDS);
writeCommunityRelatedIds(
spark, inputPath + "/software", Software.class, communityList, workingPath + COMMUNITY_RESULT_IDS);
writeCommunityRelatedIds(
spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class, communityList,
workingPath + COMMUNITY_RESULT_IDS);
// write the relations result communities
writeCommunityResultRelations(
spark, inputPath + "/publication", Publication.class, communityList, outputPath + RESULT_COMMUNITY_TABLE);
writeCommunityResultRelations(
spark, inputPath + "/dataset", Dataset.class, communityList, outputPath + RESULT_COMMUNITY_TABLE);
writeCommunityResultRelations(
spark, inputPath + "/software", Software.class, communityList, outputPath + RESULT_COMMUNITY_TABLE);
writeCommunityResultRelations(
spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class, communityList,
outputPath + RESULT_COMMUNITY_TABLE);
// select the relations with semantics cites
org.apache.spark.sql.Dataset<Relation> relations = Utils
.readPath(spark, inputPath + "/relation", Relation.class)
.filter(
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
r.getRelClass().equals(ModelConstants.CITES));
// select the results target of the selected relations having as source one of the results related to the
// communities
org.apache.spark.sql.Dataset<String> resultIds = spark
.read()
.textFile(workingPath + COMMUNITY_RESULT_IDS)
.distinct();
resultIds
.joinWith(relations, resultIds.col("value").equalTo(relations.col("source")), "left")
.flatMap((FlatMapFunction<Tuple2<String, Relation>, String>) t2 -> {
if (Optional.ofNullable(t2._2()).isPresent()) {
return Arrays.asList(t2._1(), t2._2().getTarget()).iterator();
} else {
return Arrays.asList(t2._1()).iterator();
}
}, Encoders.STRING())
.distinct()
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.text(workingPath + "/resultIds");
resultIds
.joinWith(relations, resultIds.col("value").equalTo(relations.col("source")))
.map(
(MapFunction<Tuple2<String, Relation>, CSVCitation>) t2 -> mapToCitation(t2._2()),
Encoders.bean(CSVCitation.class))
.write()
.option("compression", "gzip")
.option("header", "true")
.option("delimiter", Constants.SEP)
.mode(SaveMode.Overwrite)
.csv(outputPath + "/relation");
}
private static CSVCitation mapToCitation(Relation relation) {
CSVCitation ret = new CSVCitation();
ret.setId(DHPUtils.md5(relation.getSource() + relation.getRelClass().toLowerCase() + relation.getTarget()));
ret.setResult_id_cites(relation.getSource());
ret.setResult_id_cited(relation.getTarget());
return ret;
}
private static <R extends Result> void writeCommunityResultRelations(SparkSession spark, String inputPath,
Class<R> clazz, List<String> communityList, String outputPath) {
Utils
.readPath(spark, inputPath, clazz)
.filter(
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() &&
!p.getDataInfo().getInvisible())
.flatMap((FlatMapFunction<R, CSVRELCommunityResult>) p -> {
Set<String> inserted = new HashSet<>();
List<CSVRELCommunityResult> ret = new ArrayList<>();
for (String context : p
.getContext()
.stream()
.map(Context::getId)
.distinct()
.collect(Collectors.toList())) {
String cId = context.contains("::")
? context.substring(0, context.indexOf("::"))
: context;
if (communityList.contains(cId) && !inserted.contains(cId)) {
CSVRELCommunityResult crc = new CSVRELCommunityResult();
crc.setResult_id(p.getId());
crc.setCommunity_id(DHPUtils.md5(cId));
ret.add(crc);
inserted.add(cId);
}
}
return ret.iterator();
}, Encoders.bean(CSVRELCommunityResult.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Append)
.option("header", "true")
.option("delimiter", Constants.SEP)
.csv(outputPath);
}
private static <R extends Result> void writeCommunityRelatedIds(SparkSession spark, String inputPath,
Class<R> clazz, List<String> communityList, String outputPath) {
Utils
.readPath(spark, inputPath, clazz)
.filter(
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() &&
!p.getDataInfo().getInvisible() &&
isRelatedToCommunities(p, communityList))
.map((MapFunction<R, String>) Result::getId, Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Append)
.text(outputPath);
}
private static <R extends Result> boolean isRelatedToCommunities(R p, List<String> communityList) {
return p
.getContext()
.stream()
.anyMatch(
c -> communityList.contains(c.getId()) ||
(c.getId().contains("::")
&& communityList.contains(c.getId().substring(0, c.getId().indexOf("::")))));
}
}

View File

@ -0,0 +1,68 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 11/05/23
*/
public class CSVAuthor implements Serializable {
private String id;
private String firstname;
private String lastname;
private String fullname;
private String orcid;
private Boolean fromOrcid;
public Boolean getFromOrcid() {
return fromOrcid;
}
public void setFromOrcid(Boolean fromOrcid) {
this.fromOrcid = fromOrcid;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = Constants.addQuotes(id);
}
public String getFirstname() {
return firstname;
}
public void setFirstname(String firstname) {
this.firstname = Constants.addQuotes(firstname);
}
public String getLastname() {
return lastname;
}
public void setLastname(String lastname) {
this.lastname = Constants.addQuotes(lastname);
}
public String getFullname() {
return fullname;
}
public void setFullname(String fullname) {
this.fullname = Constants.addQuotes(fullname);
}
public String getOrcid() {
return orcid;
}
public void setOrcid(String orcid) {
this.orcid = Constants.addQuotes(orcid);
}
}

View File

@ -0,0 +1,40 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 11/05/23
*/
public class CSVCitation implements Serializable {
private String id;
private String result_id_cites;
private String result_id_cited;
public String getId() {
return id;
}
public void setId(String id) {
this.id = Constants.addQuotes(id);
}
public String getResult_id_cites() {
return result_id_cites;
}
public void setResult_id_cites(String result_id_cites) {
this.result_id_cites = Constants.addQuotes(result_id_cites);
}
public String getResult_id_cited() {
return result_id_cited;
}
public void setResult_id_cited(String result_id_cited) {
this.result_id_cited = Constants.addQuotes(result_id_cited);
}
}

View File

@ -0,0 +1,50 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 11/05/23
*/
public class CSVPid implements Serializable {
private String id;
private String result_id;
private String pid;
private String type;
public String getResult_id() {
return result_id;
}
public void setResult_id(String result_id) {
this.result_id = Constants.addQuotes(result_id);
}
public String getPid() {
return pid;
}
public void setPid(String pid) {
this.pid = Constants.addQuotes(pid);
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = Constants.addQuotes(type);
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = Constants.addQuotes(id);
}
}

View File

@ -0,0 +1,31 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 11/05/23
*/
public class CSVRELCommunityResult implements Serializable {
private String result_id;
private String community_id;
public String getResult_id() {
return result_id;
}
public void setResult_id(String result_id) {
this.result_id = Constants.addQuotes(result_id);
}
public String getCommunity_id() {
return community_id;
}
public void setCommunity_id(String community_id) {
this.community_id = Constants.addQuotes(community_id);
}
}

View File

@ -0,0 +1,31 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 11/05/23
*/
public class CSVRelResAut implements Serializable {
private String result_id;
private String author_id;
public String getResult_id() {
return result_id;
}
public void setResult_id(String result_id) {
this.result_id = Constants.addQuotes(result_id);
}
public String getAuthor_id() {
return author_id;
}
public void setAuthor_id(String author_id) {
this.author_id = Constants.addQuotes(author_id);
}
}

View File

@ -0,0 +1,116 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import java.io.Serializable;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import org.apache.commons.lang.StringUtils;
import com.fasterxml.jackson.annotation.JsonGetter;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSetter;
import eu.dnetlib.dhp.schema.oaf.Country;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import sun.swing.StringUIClientPropertyKey;
/**
* @author miriam.baglioni
* @Date 11/05/23
*/
public class CSVResult implements Serializable {
private String id;
private String type;
private String title;
private String description;
private String accessright;
private String publication_date;
private String publisher;
private String keywords;
private String country;
private String language;
public String getId() {
return id;
}
public void setId(String id) {
this.id = Constants.addQuotes(id);
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = Constants.addQuotes(type);
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title =
Constants.addQuotes(title);
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description =
Constants.addQuotes(description);
}
public String getAccessright() {
return accessright;
}
public void setAccessright(String accessright) {
this.accessright = Constants.addQuotes(accessright);
}
public String getPublication_date() {
return publication_date;
}
public void setPublication_date(String publication_date) {
this.publication_date = Constants.addQuotes(publication_date);
}
public String getPublisher() {
return publisher;
}
public void setPublisher(String publisher) {
this.publisher = Constants.addQuotes(publisher);
}
public String getKeywords() {
return keywords;
}
public void setKeywords(String keywords) {
this.keywords = Constants.addQuotes(keywords);
}
public String getCountry() {
return country;
}
public void setCountry(String country) {
this.country = Constants.addQuotes(country);
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = Constants.addQuotes(language);
}
}

View File

@ -0,0 +1,241 @@
package eu.dnetlib.dhp.oa.graph.dump.serafeim;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 04/05/23
*/
//STEP 2
public class SparkSelectResultsAndDumpRelations implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSelectResultsAndDumpRelations.class);
private static String RESULT_COMMUNITY_TABLE = "/result_community";
private static String COMMUNITY_RESULT_IDS = "/communityResultIds";
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSelectResultsAndDumpRelations.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String workingPath = parser.get("workingPath");
List<String> communityList = null;
Optional<String> communities = Optional.ofNullable(parser.get("communities"));
if (communities.isPresent()) {
communityList = Arrays.asList(communities.get().split(";"));
}
SparkConf conf = new SparkConf();
List<String> finalCommunityList = communityList;
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
run(spark, inputPath, outputPath, workingPath, finalCommunityList);
});
}
private static void run(SparkSession spark, String inputPath, String outputPath,
String workingPath,
List<String> communityList) {
// select the result ids related to the set of communities considered
writeCommunityRelatedIds(
spark, inputPath, Publication.class, communityList, workingPath, "publication");
writeCommunityRelatedIds(
spark, inputPath, Dataset.class, communityList, workingPath, "dataset");
writeCommunityRelatedIds(
spark, inputPath, Software.class, communityList, workingPath, "software");
writeCommunityRelatedIds(
spark, inputPath, OtherResearchProduct.class, communityList,
workingPath, "otherresearchproduct");
// select the relations with semantics cites
org.apache.spark.sql.Dataset<Relation> relations = Utils
.readPath(spark, inputPath + "/relation", Relation.class)
.filter(
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
r.getRelClass().equals(ModelConstants.CITES));
// select the relations having as source one of the results related to the
// communities
org.apache.spark.sql.Dataset<String> communityResultIds = spark
.read()
.textFile(workingPath + COMMUNITY_RESULT_IDS)
.distinct();
Utils
.readPath(spark, inputPath + "/publication", Publication.class)
.filter(
(FilterFunction<Publication>) p -> !p.getDataInfo().getDeletedbyinference()
&& !p.getDataInfo().getInvisible())
.map((MapFunction<Publication, String>) p -> p.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, inputPath + "/dataset", Dataset.class)
.filter(
(FilterFunction<Dataset>) p -> !p.getDataInfo().getDeletedbyinference()
&& !p.getDataInfo().getInvisible())
.map((MapFunction<Dataset, String>) p -> p.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/software", Software.class)
.filter(
(FilterFunction<Software>) p -> !p.getDataInfo().getDeletedbyinference()
&& !p.getDataInfo().getInvisible())
.map((MapFunction<Software, String>) p -> p.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class)
.filter(
(FilterFunction<OtherResearchProduct>) p -> !p.getDataInfo().getDeletedbyinference()
&& !p.getDataInfo().getInvisible())
.map((MapFunction<OtherResearchProduct, String>) p -> p.getId(), Encoders.STRING()))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.text(workingPath + "/resultIds");
org.apache.spark.sql.Dataset<String> resultIds = spark.read().textFile(workingPath + "/resultIds");
org.apache.spark.sql.Dataset<Relation> oksource = communityResultIds
.joinWith(relations, communityResultIds.col("value").equalTo(relations.col("source")))
.map(
(MapFunction<Tuple2<String, Relation>, Relation>) t2 -> t2._2(),
Encoders.bean(Relation.class));
oksource
.joinWith(resultIds, oksource.col("target").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath + "/relation");
writeNodes(
spark, inputPath + "/publication", Publication.class, outputPath + "/publication",
outputPath + "/relation", workingPath);
writeNodes(
spark, inputPath + "/dataset", Dataset.class, outputPath + "/dataset", outputPath + "/relation",
workingPath);
writeNodes(
spark, inputPath + "/software", Software.class, outputPath + "/software", outputPath + "/relation",
workingPath);
writeNodes(
spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class,
outputPath + "/otherresearchproduct", outputPath + "/relation", workingPath);
}
private static <R extends Result> void writeNodes(SparkSession spark, String inputPath, Class<R> clazz,
String outputPath, String relationPath, String workingPath) {
org.apache.spark.sql.Dataset<Relation> citingRelations = Utils.readPath(spark, relationPath, Relation.class);
org.apache.spark.sql.Dataset<R> result = Utils
.readPath(spark, inputPath, clazz)
.filter(
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() &&
!p.getDataInfo().getInvisible());
// take the distinct result id for source and target of the relations
citingRelations
.flatMap(
(FlatMapFunction<Relation, String>) r -> Arrays
.asList(r.getSource(), r.getTarget())
.iterator(),
Encoders.STRING())
.distinct()
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.text(workingPath + "/relationIds");
org.apache.spark.sql.Dataset<String> relationIds = spark.read().textFile(workingPath + "/relationIds");
relationIds
.joinWith(result, relationIds.col("value").equalTo(result.col("id")))
.map((MapFunction<Tuple2<String, R>, R>) t2 -> t2._2(), Encoders.bean(clazz))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
}
private static <R extends Result> void writeCommunityRelatedIds(SparkSession spark, String inputPath,
Class<R> clazz, List<String> communityList, String outputPath, String resultType) {
org.apache.spark.sql.Dataset<R> results = Utils
.readPath(spark, inputPath + "/" + resultType, clazz)
.filter(
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() &&
!p.getDataInfo().getInvisible() &&
isRelatedToCommunities(p, communityList));
results
.map((MapFunction<R, String>) Result::getId, Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Append)
.text(outputPath + COMMUNITY_RESULT_IDS);
// results
// // .repartition(10000)
// .write()
// .option("compression", "gzip")
// .mode(SaveMode.Append)
// .json(outputPath + "/" + resultType);
}
private static <R extends Result> boolean isRelatedToCommunities(R p, List<String> communityList) {
return p
.getContext()
.stream()
.anyMatch(
c -> communityList.contains(c.getId()) ||
(c.getId().contains("::")
&& communityList.contains(c.getId().substring(0, c.getId().indexOf("::")))));
}
}

View File

@ -0,0 +1,30 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,289 @@
<workflow-app name="dump_graph_csv" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>communities</name>
<description>the communities whose products should be dumped</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="reset_outputpath" />
<action name="reset_outputpath">
<fs>
<delete path="${outputPath}"/>
<mkdir path="${outputPath}"/>
</fs>
<ok to="dump_communities"/>
<error to="Kill"/>
</action>
<action name="dump_communities">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities</main-class>
<arg>--outputPath</arg><arg>${outputPath}/community</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
<arg>--communities</arg><arg>${communities}</arg>
</java>
<ok to="select_result_dump_relation"/>
<error to="Kill"/>
</action>
<action name="select_result_dump_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results ids connected to communities and dump relation </name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkSelectResultsAndDumpRelations</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--communities</arg><arg>${communities}</arg>
</spark>
<ok to="fork_dump_result_author_pid"/>
<error to="Kill"/>
</action>
<fork name="fork_dump_result_author_pid">
<path start="dump_publication"/>
<path start="dump_dataset"/>
<path start="dump_other"/>
<path start="dump_software"/>
</fork>
<action name="dump_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results from publication </name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=9G
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>publication</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results from dataset </name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>dataset</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_other">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results from other </name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>otherresearchproduct</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results from software</name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>software</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<join name="join_dump" to="dump_single_results"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="dump_single_results">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump single results </name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkMoveOnSigleDir</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
</java>
<ok to="send_zenodo"/>
<error to="Kill"/>
</action>
<action name="send_zenodo">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>${accessToken}</arg>
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
<arg>--metadata</arg><arg>${metadata}</arg>
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
<arg>--depositionType</arg><arg>${depositionType}</arg>
<arg>--depositionId</arg><arg>${depositionId}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,30 @@
[
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "nn",
"paramLongName": "nameNode",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": true
},
{
"paramName":"ilu",
"paramLongName":"isLookUpUrl",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"c",
"paramLongName":"communities",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -0,0 +1,38 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"wp",
"paramLongName":"workingPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"c",
"paramLongName":"communities",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -0,0 +1,36 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"wp",
"paramLongName":"workingPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"rt",
"paramLongName":"resultType",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"rtn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -0,0 +1,25 @@
[
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"wp",
"paramLongName":"workingPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"o",
"paramLongName":"outputPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -0,0 +1,30 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,102 @@
<workflow-app name="dump_graph_csv" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>communities</name>
<description>the communities whose products should be dumped</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="select_result_dump_relation" />
<action name="select_result_dump_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results ids connected to communities and dump relation </name>
<class>eu.dnetlib.dhp.oa.graph.dump.serafeim.SparkSelectResultsAndDumpRelations</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=10G
--executor-cores=3
--driver-memory=10G
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--workingPath</arg><arg>${workingDir}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--communities</arg><arg>${communities}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,8 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']
and ($x//context/param[./@name = 'status']/text() = 'all')
return
<community>
{$x//CONFIGURATION/context/@id}
{$x//CONFIGURATION/context/@label}
</community>

View File

@ -0,0 +1,11 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']
and (%s)
return
<community>
{$x//CONFIGURATION/context/@id}
{$x//CONFIGURATION/context/@label}
<description>
{$x//CONFIGURATION/context/param[@name='description']/text()}
</description>
</community>

View File

@ -0,0 +1,8 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']
and $x//CONFIGURATION/context[./@id=%s]
return
<community>
{$x//CONFIGURATION/context/@id}
{$x//CONFIGURATION/context/@label}
</community>

View File

@ -0,0 +1,9 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
/**
* @author miriam.baglioni
* @Date 11/05/23
*/
public class DumpCommunitiesTest {
}

View File

@ -0,0 +1,350 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
import static org.apache.commons.lang3.StringUtils.split;
import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Optional;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVAuthor;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVResult;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Function1;
/**
* @author miriam.baglioni
* @Date 11/05/23
*/
public class DumpResultTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(DumpResultTest.class);
private static HashMap<String, String> map = new HashMap<>();
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(DumpResultTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(DumpResultTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(DumpResultTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void testDumpResult() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/input/")
.getPath();
spark
.read()
.text(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds")
.getPath())
.write()
.text(workingDir.toString() + "/working/resultIds/");
SparkDumpResults.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-workingPath", workingDir.toString() + "/working",
"-resultType", "publication",
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
Dataset<CSVResult> tmp = Utils
.readPath(spark, workingDir.toString() + "/working/publication/result", CSVResult.class);
tmp.show(false);
Assertions.assertEquals(5, tmp.count());
CSVResult row = tmp
.filter(
(FilterFunction<CSVResult>) r -> r.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"))
.first();
Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAccessright());
Assertions.assertEquals("FI", row.getCountry());
Assertions.assertEquals("Lit.opg., bijl.", row.getDescription());
Assertions.assertEquals(3, split(row.getKeywords(), ", ").length);
Assertions.assertTrue(row.getKeywords().toString().contains("archeologie"));
Assertions.assertTrue(row.getKeywords().toString().contains("prospectie"));
Assertions.assertTrue(row.getKeywords().toString().contains("archaeology"));
Assertions.assertEquals("nl", row.getLanguage());
Assertions.assertEquals("2007-01-01", row.getPublication_date());
Assertions.assertEquals("FakePublisher1", row.getPublisher());
Assertions
.assertEquals(
"Inventariserend veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel",
row.getTitle());
Assertions.assertEquals("publication", row.getType());
row = tmp
.filter(
(FilterFunction<CSVResult>) r -> r.getId().equals("50|doi_________::715fec7723208e6f17e855c204656e2f"))
.first();
System.out.println(row.getPublisher());
String a = row.getPublisher().replace("\\n", " ");
System.out.println(a);
// row = tmp
// .where("id = '50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9'")
// .first();
// Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAs("accessright"));
// Assertions.assertEquals(2, split(row.getAs("country"), ", ").length);
// Assertions.assertNull(row.getAs("description"));
// Assertions.assertEquals(2, split(row.getAs("keywords"), ", ").length);
// Assertions.assertTrue(row.getAs("keywords").toString().contains("archeologie"));
// Assertions.assertTrue(row.getAs("keywords").toString().contains("archaeology"));
// Assertions.assertEquals("UNKNOWN", row.getAs("language"));
// Assertions.assertNull(row.getAs("publication_date"));
// Assertions.assertNull(row.getAs("publisher"));
// Assertions.assertEquals("None", row.getAs("title"));
// Assertions.assertEquals("publication", row.getAs("type"));
//
// row = tmp
// .where("id = '50|DansKnawCris::26780065282e607306372abd0d808245'")
// .first();
// Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAs("accessright"));
// Assertions.assertNull(row.getAs("country"));
// Assertions.assertNull(row.getAs("description"));
// Assertions.assertEquals(2, split(row.getAs("keywords"), ", ").length);
// Assertions.assertTrue(row.getAs("keywords").toString().contains("archeologie"));
// Assertions.assertTrue(row.getAs("keywords").toString().contains("archaeology"));
// Assertions.assertEquals("UNKNOWN", row.getAs("language"));
// Assertions.assertNull(row.getAs("publication_date"));
// Assertions.assertNull(row.getAs("publisher"));
// Assertions.assertEquals("None", row.getAs("title"));
// Assertions.assertEquals("publication", row.getAs("type"));
}
@Test
public void testDumpAuthor() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/input/")
.getPath();
spark
.read()
.text(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds")
.getPath())
.write()
.text(workingDir.toString() + "/working/resultIds/");
SparkDumpResults.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-workingPath", workingDir.toString() + "/working",
"-resultType", "publication",
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
Dataset<CSVAuthor> tmp = Utils
.readPath(spark, workingDir.toString() + "/working/publication/author", CSVAuthor.class);
Assertions.assertEquals(13, tmp.count());
Assertions.assertEquals(1, tmp.where("firstName == 'Maryam'").count());
Assertions
.assertEquals(
DHPUtils.md5("50|DansKnawCris::0224aae28af558f21768dbc6439c7a951"),
tmp.where("firstName == 'Maryam'").first().getId());
Assertions
.assertEquals(DHPUtils.md5("0000-0003-2914-2734"), tmp.where("firstName == 'Michael'").first().getId());
Assertions
.assertEquals(
DHPUtils.md5("0000-0002-6660-5673"),
tmp.where("firstName == 'Mikhail'").first().getId());
}
@Test
public void testDumpResultAuthorRelations() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/input/")
.getPath();
spark
.read()
.text(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds")
.getPath())
.write()
.text(workingDir.toString() + "/working/resultIds/");
SparkDumpResults.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/output",
"-workingPath", workingDir.toString() + "/working",
"-resultType", "publication",
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
Dataset<Row> tmp = spark
.read()
.option("header", "true")
.option("delimiter", Constants.SEP)
.csv(workingDir.toString() + "/working/publication/result_author");
Assertions.assertEquals(6, tmp.count());
Assertions.assertEquals(2, tmp.where("author_id == '" + DHPUtils.md5("0000-0003-2914-2734") + "'").count());
Assertions
.assertEquals(
1, tmp
.where("author_id == '" + DHPUtils.md5("0000-0003-2914-2734") + "'")
.where("result_id == '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95'")
.count());
Assertions
.assertEquals(
1, tmp
.where("author_id == '" + DHPUtils.md5("0000-0003-2914-2734") + "'")
.where("result_id == '50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9'")
.count());
}
@Test
public void testDumpResultPid() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/input/")
.getPath();
spark
.read()
.text(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds")
.getPath())
.write()
.text(workingDir.toString() + "/working/resultIds/");
SparkDumpResults.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/output",
"-workingPath", workingDir.toString() + "/working",
"-resultType", "publication",
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
Dataset<Row> tmp = spark
.read()
.option("header", "true")
.option("delimiter", Constants.SEP)
.csv(workingDir.toString() + "/working/publication/result_pid");
tmp.show(false);
Assertions.assertEquals(4, tmp.count());
Assertions
.assertEquals(2, tmp.where("result_id == '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95'").count());
Assertions
.assertEquals(
"10.1023/fakedoi",
tmp
.where("result_id == '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95' and type == 'doi'")
.first()
.getAs("pid"));
}
@Test
public void prova() throws DocumentException {
String input = "<community id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\">" +
" <description>This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.</description>"
+
"</community>";
final Document doc;
final SAXReader reader = new SAXReader();
doc = reader.read(new StringReader(input));
Element root = doc.getRootElement();
StringBuilder builder = new StringBuilder();
builder.append(DHPUtils.md5(root.attribute("id").getValue()));
builder.append(Constants.SEP);
builder.append(root.attribute("label").getValue());
builder.append(Constants.SEP);
builder.append(root.attribute("id").getValue());
builder.append(Constants.SEP);
builder.append(((Node) (root.selectNodes("//description").get(0))).getText());
System.out.println(builder.toString());
}
}

View File

@ -0,0 +1,119 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
import static org.apache.commons.lang3.StringUtils.split;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
/**
* @author miriam.baglioni
* @Date 25/05/23
*/
public class MoveOnSingleDirTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(MoveOnSingleDirTest.class);
private static HashMap<String, String> map = new HashMap<>();
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(MoveOnSingleDirTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(MoveOnSingleDirTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(MoveOnSingleDirTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void testDMoveSingleDir() throws Exception {
final String workingPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working")
.getPath();
spark
.read()
.text(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds")
.getPath())
.write()
.text(workingDir.toString() + "/working/resultIds/");
SparkMoveOnSigleDir.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/output",
"-workingPath", workingPath
});
Dataset<Row> tmp = spark
.read()
.option("header", "true")
.option("delimiter", Constants.SEP)
.csv(workingDir.toString() + "/output/result");
Assertions.assertEquals(22, tmp.count());
Assertions.assertEquals(12, tmp.filter("type == 'dataset'").count());
Assertions.assertEquals(4, tmp.filter("type == 'other'").count());
Assertions.assertEquals(5, tmp.filter("type == 'publication'").count());
Assertions.assertEquals(1, tmp.filter("type == 'software'").count());
tmp.filter("type == 'publication'").show(false);
Assertions
.assertEquals(
8, spark
.read()
.option("header", "true")
.option("delimiter", Constants.SEP)
.csv(workingDir.toString() + "/output/author")
.count());
}
}

View File

@ -0,0 +1,221 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRELCommunityResult;
import eu.dnetlib.dhp.utils.DHPUtils;
/**
* @author miriam.baglioni
* @Date 11/05/23
*/
public class SelectResultAndDumpRelationTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(SelectResultAndDumpRelationTest.class);
private static HashMap<String, String> map = new HashMap<>();
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(SelectResultAndDumpRelationTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(SelectResultAndDumpRelationTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(SelectResultAndDumpRelationTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void test1() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/input/")
.getPath();
SparkSelectResultsAndDumpRelations.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/output",
"-workingPath", workingDir.toString() + "/working",
"-communities", "enermaps;dh-ch",
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
Assertions.assertEquals(2, sc.textFile(workingDir.toString() + "/working/communityResultIds").count());
Assertions
.assertEquals(
1, sc
.textFile(workingDir.toString() + "/working/communityResultIds")
.filter(v -> v.equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"))
.count());
Assertions
.assertEquals(
1, sc
.textFile(workingDir.toString() + "/working/communityResultIds")
.filter(v -> v.equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"))
.count());
// verify that the association is correct with the communityid and result id
spark
.read()
.option("header", "true")
.option("delimiter", Constants.SEP)
.csv(workingDir.toString() + "/output/result_community")
.createOrReplaceTempView("result_community");
Assertions.assertEquals(3, spark.sql("SELECT * FROM result_community").count());
Assertions
.assertEquals(
1, spark
.sql(
"SELECT * " +
"FROM result_community " +
"WHERE community_id = '" + DHPUtils.md5("dh-ch") + "'")
.count());
Assertions
.assertEquals(
1, spark
.sql(
"SELECT * " +
"FROM result_community" +
" WHERE result_id = '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95' " +
"AND community_id = '" + DHPUtils.md5("dh-ch") + "'")
.count());
Assertions
.assertEquals(
2, spark
.sql(
"SELECT * " +
"FROM result_community " +
"WHERE community_id = '" + DHPUtils.md5("enermaps") + "'")
.count());
Assertions
.assertEquals(
1, spark
.sql(
"SELECT * " +
"FROM result_community " +
"WHERE result_id = '50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9' " +
"AND community_id = '" + DHPUtils.md5("enermaps") + "'")
.count());
Assertions
.assertEquals(
1, spark
.sql(
"SELECT * " +
"FROM result_community " +
"WHERE result_id = '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95' " +
"AND community_id = '" + DHPUtils.md5("enermaps") + "'")
.count());
Assertions.assertEquals(3, spark.read().textFile(workingDir.toString() + "/working/resultIds").count());
Assertions
.assertEquals(
1, sc
.textFile(workingDir.toString() + "/working/resultIds")
.filter(v -> v.equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"))
.count());
Assertions
.assertEquals(
1, sc
.textFile(workingDir.toString() + "/working/resultIds")
.filter(v -> v.equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"))
.count());
Assertions
.assertEquals(
1, sc
.textFile(workingDir.toString() + "/working/resultIds")
.filter(v -> v.equals("50|DansKnawCris::26780065282e607306372abd0d808245"))
.count());
spark
.read()
.option("header", "true")
.option("delimiter", Constants.SEP)
.csv(workingDir.toString() + "/output/relation")
.createOrReplaceTempView("relation");
Assertions.assertEquals(2, spark.sql("SELECT * FROM relation").count());
Assertions
.assertEquals(
1, spark
.sql(
"SELECT * FROM relation WHERE id = '" +
DHPUtils
.md5(
("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9cites50|DansKnawCris::26780065282e607306372abd0d808245"))
+ "'")
.count());
Assertions
.assertEquals(
1, spark
.sql(
"SELECT * FROM relation WHERE id = '" +
DHPUtils
.md5(
("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9cites50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"))
+ "'")
.count());
}
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,5 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::26780065282e607306372abd0d808245","subRelType":"provision","target":"50|DansKnawCris::26780065282e607306372abd0d808246"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","subRelType":"provision","target":"50|DansKnawCris::26780065282e607306372abd0d808245"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","subRelType":"provision","target":"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","subRelType":"provision","target":"50|doi_________::715fec7723208e6f17e855c204656e2f"}

View File

@ -0,0 +1,4 @@
{"fullname":"Giovanni Aloisio","id":"5ac035663df4d9099cf92d0e3f22a964","orcid":""}
{"fullname":"Cosimo Palazzo","id":"9f0d3123b6390dd7b2f3cee66c6bc926","orcid":""}
{"firstname":"L","fullname":"L, Issel-Tarver","id":"bafb7637b5f1c692419e55b13bf719a3","lastname":"Issel-Tarver","orcid":""}
{"firstname":"Voula","fullname":"Giouli, Voula","id":"c80f55a9afb32ffc4bc6bb67b6e0df33","lastname":"Giouli","orcid":""}

View File

@ -0,0 +1,12 @@
{"accessright":"UNKNOWN","country":"","description":"Absidiole NE_face ext","id":"50|doi_dedup___::f126b46ff3cea748ffbda3ae4e9ce816","keywords":"chevet, fenêtre, façade","language":"und","publication_date":"2019-01-01","publisher":"Nakala by Huma-Num","title":"QS83_17_Absidiole NE_face ext.jpg","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::0676bf8b1f33afc121ac4f28e1c3d8ad","keywords":"kiu38; http://sith.huma-num.fr/karnak/38","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 69534. Karnak, KIU 38 / stèle denceinte de ramsès iii XXe dynastie / Ramses III","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::0b92f33d78d42f54084145b91500941a","keywords":"kiu2869; http://sith.huma-num.fr/karnak/2869","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 8263. Karnak, KIU 2869 / Cour à portique de Thoutmosis IV, Scene, piliers, pilier 03 est : accolade XVIIIe dynastie / Thoutmosis IV","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::157349520d61226da5d85e0856bdae3e","keywords":"kiu4635; http://sith.huma-num.fr/karnak/4635","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 171030. Karnak, KIU 4635 / Cour nord du IVe pylône porte sud-est, face nord, montants est XVIIIe dynastie / Thoutmosis III","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::18b2aa2b1b9a2a11da84bc8e1f662070","keywords":"kiu4225; http://sith.huma-num.fr/karnak/4225, kiu4217; http://sith.huma-num.fr/karnak/4217, kiu4218; http://sith.huma-num.fr/karnak/4218","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 151603. Karnak, KIU 4217 / Temple dOpet, Soubassement, face extérieure est, soubassement, 1er registre sud 10.n (opet 213 gauche) Romains / Auguste","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::31f713b5670d801de154453ea68ff4e1","keywords":"kiu3479; http://sith.huma-num.fr/karnak/3479","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 198480. Karnak, KIU 3479 / VIe pylône, Scene, mur intérieur est, partie nord 3.s annales (vi) : XVIIIe dynastie / Thoutmosis III","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::358b383fd0975b292edafd6b1d1fe9a2","keywords":"op179; http://sith.huma-num.fr/karnak/op179, kiu1114; http://sith.huma-num.fr/karnak/1114","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 135670. Karnak, KIU 1114 / Temple de Ptah, Objet, objet(s) découvert(s) porte de grenier XVIIe dynastie / SenakhtenRe","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::4cc834e3443c27cb7b0100a470c5c7f9","keywords":"kiu7329; http://sith.huma-num.fr/karnak/7329, kiu7330; http://sith.huma-num.fr/karnak/7330, kiu7331; http://sith.huma-num.fr/karnak/7331","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 169666. Karnak, KIU 7330 / Salle hypostyle colonnes, côté sud, colonne 017, fût frise XXe dynastie / Ramses IV","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::516950eba1c6737cbe26a52401b3fb2c","keywords":"kiu2185; http://sith.huma-num.fr/karnak/2185","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 128938. Karnak, KIU 2185 / « Magasin pur » de Khonsou, Objet porte fragmentaire du « magasin pur » de khonsou Ptolemees / Ptolemee Evergete Ier","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::71182950600db8e6aff20566f9df0345","keywords":"kiu4212; http://sith.huma-num.fr/karnak/4212","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 151470. Karnak, KIU 4212 / Temple dOpet, Scene, face extérieure est, soubassement, 1er registre sud 04.n (opet 210 gauche) : procession de nils Romains / Auguste","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::99592f6a6bc8b9b67b0d8f1612e310bb","keywords":"kiu3939; http://sith.huma-num.fr/karnak/3939, kiu3822; http://sith.huma-num.fr/karnak/3822, kiu3823; http://sith.huma-num.fr/karnak/3823, kiu3825; http://sith.huma-num.fr/karnak/3825","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 141190. Karnak, KIU 3939 / Temple dOpet face extérieure sud, soubassement, 1er registre bandeau (opet 266-267) Romains / Auguste","type":"dataset"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::abd19ac4153416d0eb73b8f2e7612d35","keywords":"kiu5592; http://sith.huma-num.fr/karnak/5592, kiu8128; http://sith.huma-num.fr/karnak/8128, kiu8129; http://sith.huma-num.fr/karnak/8129, kiu8130; http://sith.huma-num.fr/karnak/8130","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 167789. Karnak","type":"dataset"}

View File

@ -0,0 +1,19 @@
{"author_id":"54ecb1d939e05ac0542d6af377100e67","result_id":"50|doi_dedup___::f126b46ff3cea748ffbda3ae4e9ce816"}
{"author_id":"06706770e1fb3b89fea4d0a8a60e7809","result_id":"50|r3f5b9831893::0b92f33d78d42f54084145b91500941a"}
{"author_id":"3afe02a6563ca7c30df007d69645f730","result_id":"50|r3f5b9831893::18b2aa2b1b9a2a11da84bc8e1f662070"}
{"author_id":"440464bc227f8371c905779a4641d49a","result_id":"50|r3f5b9831893::31f713b5670d801de154453ea68ff4e1"}
{"author_id":"3d0c4aa051cdc1cc71907a973f616767","result_id":"50|r3f5b9831893::4cc834e3443c27cb7b0100a470c5c7f9"}
{"author_id":"874398e3c71ba2e8cf76de4ba458d5fb","result_id":"50|r3f5b9831893::516950eba1c6737cbe26a52401b3fb2c"}
{"author_id":"fe165c3a039f1cc4301c9dbd7c7f2247","result_id":"50|r3f5b9831893::71182950600db8e6aff20566f9df0345"}
{"author_id":"b3b2b99a02b1bbd8d4b5a1994b8d60fe","result_id":"50|r3f5b9831893::99592f6a6bc8b9b67b0d8f1612e310bb"}
{"author_id":"be12aee5482275608067a3cab9e8beb6","result_id":"50|r3f5b9831893::abd19ac4153416d0eb73b8f2e7612d35"}
{"author_id":"dde164aefcd3aebafec84feedd999170","result_id":"50|r3f5b9831893::b26848c3000fbd7153e2fdeaf3d70bd2"}
{"author_id":"3a55a188e8a23e645752055ff18d4720","result_id":"50|r3f5b9831893::b94d49cfb4ea230b784be1fe24f0edd5"}
{"author_id":"a0bcddc2a41a4cc0dd768eced4dd0939","result_id":"50|r3f5b9831893::ef9f1724cef04a9f62bdf90d9084d70b"}
{"author_id":"51b2a67f20cdfd9628233ebf04158468","result_id":"50|r3f5b9831893::f349ea5bdd91d846e70e6a4a3c71ccd6"}
{"author_id":"dfad2f4b741f4fbac8f504dd0088db06","result_id":"50|r3f5b9831893::f82af1f6dfd2b8644ba3ab799285849f"}
{"author_id":"b52f90003de8e73f2f704ced12b83bba","result_id":"50|r3f5b9831893::fb7cf14ef55474c3b745262fea21d4c0"}
{"author_id":"08e7328f7c44b32e1203374aadbedf0c","result_id":"50|doi_dedup___::c7a29e095e1763e09af2eb0e2ffbb717"}
{"author_id":"c8c6c6273e798cf408f848afd8ca13f8","result_id":"50|r3f5b9831893::0bc48082a3803d837098447a4f8fb28d"}
{"author_id":"16d0306f0af215d9ec8f70660026d585","result_id":"50|r3f5b9831893::1a372b7640db956b13716fc5e7b455b7"}
{"author_id":"c0a97e8f55967dedb4a57125e3174816","result_id":"50|r3f5b9831893::1b8dec9230423314146858112059845d"}

View File

@ -0,0 +1,33 @@
{"id":"58c75fe64b4df0126e0e4fdfafb8be18","pid":"http://hdl.handle.net/11280/86e6ac0d","result_id":"50|doi_dedup___::f126b46ff3cea748ffbda3ae4e9ce816","type":"handle"}
{"id":"45c62956554c7d3e7f9708bce5c9a086","pid":"11280/86e6ac0d","result_id":"50|doi_dedup___::f126b46ff3cea748ffbda3ae4e9ce816","type":"handle"}
{"id":"312a5c89fa6d82ccc66c1b9615d3d364","pid":"10.34847/nkl.7f846pnw","result_id":"50|doi_dedup___::f126b46ff3cea748ffbda3ae4e9ce816","type":"doi"}
{"id":"cb29ee70d77746445ca5ce5f121bc473","pid":"http://hdl.handle.net/11280/747fab4a","result_id":"50|r3f5b9831893::0676bf8b1f33afc121ac4f28e1c3d8ad","type":"handle"}
{"id":"45a465d38aabff009c0fcf41c2f08c67","pid":"11280/747fab4a","result_id":"50|r3f5b9831893::0676bf8b1f33afc121ac4f28e1c3d8ad","type":"handle"}
{"id":"cc956040bd5031ecec943d91e8b764fb","pid":"11280/51909d00","result_id":"50|r3f5b9831893::0b92f33d78d42f54084145b91500941a","type":"handle"}
{"id":"726c5eef33521e505ef9cb48fe75d596","pid":"http://hdl.handle.net/11280/51909d00","result_id":"50|r3f5b9831893::0b92f33d78d42f54084145b91500941a","type":"handle"}
{"id":"32429dfa16fa2847b0286efaf0a0dce8","pid":"11280/fc581aa4","result_id":"50|r3f5b9831893::157349520d61226da5d85e0856bdae3e","type":"handle"}
{"id":"554994db0c44fe13283444e190ac9607","pid":"http://hdl.handle.net/11280/fc581aa4","result_id":"50|r3f5b9831893::157349520d61226da5d85e0856bdae3e","type":"handle"}
{"id":"88a301e2cadf5e691ebb6a5665eb78f4","pid":"http://hdl.handle.net/11280/1cfc2896","result_id":"50|r3f5b9831893::18b2aa2b1b9a2a11da84bc8e1f662070","type":"handle"}
{"id":"2f15200f24a870ff9edb3913e292d61f","pid":"11280/1cfc2896","result_id":"50|r3f5b9831893::18b2aa2b1b9a2a11da84bc8e1f662070","type":"handle"}
{"id":"027c0e2083ab8ea468469a34fe9d46e1","pid":"http://hdl.handle.net/11280/3b2225c5","result_id":"50|r3f5b9831893::31f713b5670d801de154453ea68ff4e1","type":"handle"}
{"id":"8466cbb68b2d1c541b056006b7f27ea4","pid":"11280/3b2225c5","result_id":"50|r3f5b9831893::31f713b5670d801de154453ea68ff4e1","type":"handle"}
{"id":"bac82482f2dba75f8e34802ed7789554","pid":"http://hdl.handle.net/11280/f3911908","result_id":"50|r3f5b9831893::358b383fd0975b292edafd6b1d1fe9a2","type":"handle"}
{"id":"8cd4bb9ef9c8007155a95ee9df90ea69","pid":"11280/f3911908","result_id":"50|r3f5b9831893::358b383fd0975b292edafd6b1d1fe9a2","type":"handle"}
{"id":"ba83be852322c4c86ed6b3ab0610987d","pid":"11280/65056b94","result_id":"50|r3f5b9831893::4cc834e3443c27cb7b0100a470c5c7f9","type":"handle"}
{"id":"93cd2ffff769223cf04034e0db0f6284","pid":"http://hdl.handle.net/11280/65056b94","result_id":"50|r3f5b9831893::4cc834e3443c27cb7b0100a470c5c7f9","type":"handle"}
{"id":"c5dcb6dab6f53a281f96bfbe048858ce","pid":"http://hdl.handle.net/11280/dac5fe22","result_id":"50|r3f5b9831893::516950eba1c6737cbe26a52401b3fb2c","type":"handle"}
{"id":"999076fd410cdb0c1599b7d5e355b94a","pid":"11280/dac5fe22","result_id":"50|r3f5b9831893::516950eba1c6737cbe26a52401b3fb2c","type":"handle"}
{"id":"ef68e036a7e753da17a2794ccf1b8ce5","pid":"http://hdl.handle.net/11280/446e3387","result_id":"50|r3f5b9831893::71182950600db8e6aff20566f9df0345","type":"handle"}
{"id":"5377b0f0143c324176bbee897d9d966c","pid":"11280/446e3387","result_id":"50|r3f5b9831893::71182950600db8e6aff20566f9df0345","type":"handle"}
{"id":"9e588201f52f05fca56efc43583ca615","pid":"http://hdl.handle.net/11280/969ae30a","result_id":"50|r3f5b9831893::99592f6a6bc8b9b67b0d8f1612e310bb","type":"handle"}
{"id":"f64681856cadef587b4c34396e9e6861","pid":"11280/969ae30a","result_id":"50|r3f5b9831893::99592f6a6bc8b9b67b0d8f1612e310bb","type":"handle"}
{"id":"4ad4d6c56ce6e206c42849df92d894f5","pid":"http://hdl.handle.net/11280/dddf5851","result_id":"50|r3f5b9831893::abd19ac4153416d0eb73b8f2e7612d35","type":"handle"}
{"id":"0b3ea2f9c96eb9593fd9b21363b7d9f6","pid":"11280/dddf5851","result_id":"50|r3f5b9831893::abd19ac4153416d0eb73b8f2e7612d35","type":"handle"}
{"id":"45dc28539b305d186f51d5ee9465aee0","pid":"http://hdl.handle.net/11280/3f2679d9","result_id":"50|r3f5b9831893::b26848c3000fbd7153e2fdeaf3d70bd2","type":"handle"}
{"id":"b9c5beb054f3ca72477cb1b07351196a","pid":"11280/3f2679d9","result_id":"50|r3f5b9831893::b26848c3000fbd7153e2fdeaf3d70bd2","type":"handle"}
{"id":"ee0120c72b2f9c1fc1dd3cf47c98ac9d","pid":"http://hdl.handle.net/11280/d957e9f3","result_id":"50|r3f5b9831893::b94d49cfb4ea230b784be1fe24f0edd5","type":"handle"}
{"id":"4770ff66784a0b9470551d46e7a0aaa0","pid":"11280/d957e9f3","result_id":"50|r3f5b9831893::b94d49cfb4ea230b784be1fe24f0edd5","type":"handle"}
{"id":"3cf2316ff497fda37d07757e72307173","pid":"11280/e8d8ed9f","result_id":"50|r3f5b9831893::ef9f1724cef04a9f62bdf90d9084d70b","type":"handle"}
{"id":"5a9092d335d45be6d01f9d6af99c9d86","pid":"http://hdl.handle.net/11280/e8d8ed9f","result_id":"50|r3f5b9831893::ef9f1724cef04a9f62bdf90d9084d70b","type":"handle"}
{"id":"37018c7be9823e3c49aeff0e9ae69054","pid":"http://hdl.handle.net/11280/9ff65944","result_id":"50|r3f5b9831893::f349ea5bdd91d846e70e6a4a3c71ccd6","type":"handle"}
{"id":"c372305e06eacc7855c7de0e3fc6df07","pid":"11280/9ff65944","result_id":"50|r3f5b9831893::f349ea5bdd91d846e70e6a4a3c71ccd6","type":"handle"}

View File

@ -0,0 +1,2 @@
{"firstname":"Taal En Spraaktechnologie","fullname":"LS OZ Taal en spraaktechnologie","id":"60fa4ab9fa107f5281b91c1db2885bf9","lastname":"Ls Oz","orcid":""}
{"fullname":"Nispen, van, Annelies","id":"1279ef1ced7366cc6af25a2079ab4554","orcid":""}

View File

@ -0,0 +1,4 @@
{"accessright":"OPEN","country":"","description":"","id":"50|core_ac_uk__::15d72bdde1addf525170aa61664f8daf","keywords":"","language":"eng","publication_date":"","publisher":"Springer International Publishing","title":"Reengineering and Reinventing both Democracy and the Concept of Life in the Digital Era","type":"other"}
{"accessright":"OPEN","country":"IT","description":"","id":"50|od______3686::b0cb086c9a0222684d48b3e355eba1c8","keywords":"","language":"und","publication_date":"2002-01-01","publisher":"","title":"Progetto dellimpianto eolico di Pescopagano (Potenza), progetto secondo classificato al Concorso nazionale “Paesaggi del Vento”, progetto pubblicato in: E. Zanchini , a cura di, Paesaggi del vento, Meltemi, Roma 2002 , pp.84-89","type":"other"}
{"accessright":"OPEN","country":"NL","description":"This article reports about the on-going work on a new version of the metadata framework Component Metadata Infrastructure (CMDI), central to the CLARIN infrastructure. Version 1.2 introduces a number of important changes based on the experience gathered in the last five years of intensive use of CMDI by the digital humanities community, addressing problems encountered, but also introducing new functionality. Next to the consolidation of the structure of the model and schema sanity, new means for lifecycle management have been introduced aimed at combatting the observed proliferation of components, new mechanism for use of external vocabularies will contribute to more consistent use of controlled values and cues for tools will allow improved presentation of the metadata records to the human users. The feature set has been frozen and approved, and the infrastructure is now entering a transition phase, in which all the tools and data need to be migrated to the new version.","id":"50|narcis______::07cab979c27c9240f7ef5d80d752679b","keywords":"","language":"eng","publication_date":"2015-08-26","publisher":"Linköping University Electronic Press, Linköpings universitet","title":"CMDI 1.2: Improvements in the CLARIN Component Metadata Infrastructure","type":"other"}
{"accessright":"OPEN","country":"NL","description":"This paper describes what the CLARIN infrastructure is and how it can be used, with a focus on the Netherlands part of the CLARIN infrastructure. It aims to explain how a humanities researcher can use the CLARIN infrastructure.","id":"50|narcis______::655f9ef445ffa66a1782f29208cc1569","keywords":"","language":"eng","publication_date":"2014-08-20","publisher":"UiL OTS","title":"The CLARIN infrastructure in the Netherlands: What is it and how can you use it?","type":"other"}

View File

@ -0,0 +1,17 @@
{"author_id":"af07dd90a1f0be8159e52f7f572d1c5c","result_id":"50|narcis______::14afd8c5c46d17af87ceef410ab25e01"}
{"author_id":"9f24c2ed6e1cb057772b641806ae77ec","result_id":"50|narcis______::14afd8c5c46d17af87ceef410ab25e01"}
{"author_id":"9ad1701184de323823fc1a858a868ac2","result_id":"50|narcis______::14afd8c5c46d17af87ceef410ab25e01"}
{"author_id":"de106449e38166d8cf2ac7bb7bb6c5d8","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"}
{"author_id":"8a157b06eaaf9fbca8b67011bc374744","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"}
{"author_id":"10bffdada7578cec278ba1a5e3d63da5","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"}
{"author_id":"d2a8ebfa553c4f6ff90998bd1c58fbcc","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"}
{"author_id":"86b929edfab2d532f075506559a6ac76","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"}
{"author_id":"478c134423c1afa8bb2ee174014726af","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"}
{"author_id":"ba92d49768133c928d102eb86cb3690c","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"}
{"author_id":"d590f7127b93a0b6003cbed3bd20983b","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"}
{"author_id":"c146c73851641e52e6ea1adc6f271fd1","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"}
{"author_id":"e3e6238baf917a025bcbff8be9288393","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"}
{"author_id":"e1a361a13f6595628524b87b6fa29918","result_id":"50|dedup_wf_001::e9457bd83cfd425b8779f239c96e0ffe"}
{"author_id":"5764f46e7ded9260eadea13e81fdf0fe","result_id":"50|dedup_wf_001::e9457bd83cfd425b8779f239c96e0ffe"}
{"author_id":"b56a640d36a2dc9e3dc88401edb61149","result_id":"50|dedup_wf_001::e9457bd83cfd425b8779f239c96e0ffe"}
{"author_id":"e08632d458b519b66e575dd5b7eb54e9","result_id":"50|dedup_wf_001::e9457bd83cfd425b8779f239c96e0ffe"}

View File

@ -0,0 +1,5 @@
{"id":"3ff0ab5e679c5320381c857d8699cd4a","pid":"10.5281/zenodo.2657248","result_id":"50|doi_dedup___::84db353272d83833fa76ec87fc540e63","type":"doi"}
{"id":"935716d050a36d36f797e843187b8192","pid":"https://hdl.handle.net/21.11115/0000-000e-0ff1-2","result_id":"50|r369162d0a40::da892118ba0be7a5cf695ad54ae5147e","type":"handle"}
{"id":"133b9dd1a59099adc577004209e83c52","pid":"21.11115/0000-000e-0ff1-2","result_id":"50|r369162d0a40::da892118ba0be7a5cf695ad54ae5147e","type":"handle"}
{"id":"8e17b86e61db6c34ec741eabe947ea9f","pid":"https://hdl.handle.net/21.11115/0000-000e-ce31-3","result_id":"50|r369162d0a40::b69a5145a8e41bdaa33c24be67c209f1","type":"handle"}
{"id":"b7cc730f4cbb6d379d5c4f57369978b3","pid":"21.11115/0000-000e-ce31-3","result_id":"50|r369162d0a40::b69a5145a8e41bdaa33c24be67c209f1","type":"handle"}

View File

@ -0,0 +1,4 @@
{"fullname":"Giovanni Aloisio","id":"5ac035663df4d9099cf92d0e3f22a964","orcid":""}
{"fullname":"Cosimo Palazzo","id":"9f0d3123b6390dd7b2f3cee66c6bc926","orcid":""}
{"firstname":"L","fullname":"L, Issel-Tarver","id":"bafb7637b5f1c692419e55b13bf719a3","lastname":"Issel-Tarver","orcid":""}
{"firstname":"Voula","fullname":"Giouli, Voula","id":"c80f55a9afb32ffc4bc6bb67b6e0df33","lastname":"Giouli","orcid":""}

View File

@ -0,0 +1,5 @@
{"accessright":"OPEN","country":"","description":"We describe the CoNLL-2002 shared task: language-independent named entity recognition. We give background information on the data sets and the evaluation method, present a general overview of the systems that have taken part in the task and discuss their performance.","id":"50|doi_dedup___::13b14c741a7b3420591c161f54ed5c80","keywords":"computer science - computation and language, i.2.7, computation and language (cs.cl), fos: computer and information sciences","language":"eng","publication_date":"2002-09-05","publisher":"","title":"Introduction to the CoNLL-2002 Shared Task: Language-Independent Named Entity Recognition","type":"publication"}
{"accessright":"OPEN","country":"GB","description":"Following a strategy similar to that used in baker's yeast (Herrgård et al. Nat Biotechnol 26:1155-1160, 2008). A consensus yeast metabolic network obtained from a community approach to systems biology (Herrgård et al. 2008; Dobson et al. BMC Syst Biol 4:145, 2010). Further developments towards a genome-scale metabolic model of yeast (Dobson et al. 2010; Heavner et al. BMC Syst Biol 6:55, 2012). Yeast 5-an expanded reconstruction of the Saccharomyces cerevisiae metabolic network (Heavner et al. 2012) and in Salmonella typhimurium (Thiele et al. BMC Syst Biol 5:8, 2011). A community effort towards a knowledge-base and mathematical model of the human pathogen Salmonellatyphimurium LT2 (Thiele et al. 2011), a recent paper (Thiele et al. Nat Biotechnol 31:419-425, 2013). A community-driven global reconstruction of human metabolism (Thiele et al. 2013) described a much improved 'community consensus' reconstruction of the human metabolic network, called Recon 2, and the authors (that include the present ones) have made it freely available via a database at http://humanmetabolism.org/ and in SBML format at Biomodels (http://identifiers.org/biomodels.db/MODEL1109130000. This short analysis summarises the main findings, and suggests some approaches that will be able to exploit the availability of this model to advantage. © 2013 The Author(s).","id":"50|doi_dedup___::e0392f427fea9a701aa469e6f24bdf93","keywords":"review article, metabolism, modelling, systems biology, networks, metabolic networks, clinical biochemistry, biochemistry, endocrinology, diabetes and metabolism, community approach, operations research, metabolic network, human metabolism, metabolic model, biology, computational biology, sbml, 03 medical and health sciences, 0302 clinical medicine, 0303 health sciences, 030220 oncology & carcinogenesis, 030304 developmental biology, researchinstitutes_networks_beacons/manchester_institute_of_biotechnology, manchester institute of biotechnology","language":"eng","publication_date":"2013-08-01","publisher":"Springer US","title":"An analysis of a community-driven reconstruction of the human metabolic network","type":"publication"}
{"accessright":"OPEN","country":"","description":"Current machine learning systems operate, almost exclusively, in a statistical, or model-free mode, which entails severe theoretical limits on their power and performance. Such systems cannot reason about interventions and retrospection and, therefore, cannot serve as the basis for strong AI. To achieve human level intelligence, learning machines need the guidance of a model of reality, similar to the ones used in causal inference tasks. To demonstrate the essential role of such models, I will present a summary of seven tasks which are beyond reach of current machine learning systems and which have been accomplished using the tools of causal modeling.","id":"50|doi_dedup___::2436e90941a664931b54b956ade5b77b","keywords":"machine learning (cs.lg), artificial intelligence (cs.ai), machine learning (stat.ml), fos: computer and information sciences, mode (statistics), causal inference, artificial intelligence, business.industry, business, power (physics), computer science, machine learning, computer.software_genre, computer, basis (linear algebra), 03 medical and health sciences, 02 engineering and technology, 0202 electrical engineering, electronic engineering, information engineering, 0301 basic medicine, 020201 artificial intelligence & image processing, 030104 developmental biology, computer science - learning, computer science - artificial intelligence, statistics - machine learning","language":"und","publication_date":"2018-02-02","publisher":"arXiv","title":"Theoretical Impediments to Machine Learning With Seven Sparks from the Causal Revolution","type":"publication"}
{"accessright":"OPEN","country":"","description":"In most natural and engineered systems, a set of entities interact with each other in complicated patterns that can encompass multiple types of relationships, change in time, and include other types of complications. Such systems include multiple subsystems and layers of connectivity, and it is important to take such \"multilayer\" features into account to try to improve our understanding of complex systems. Consequently, it is necessary to generalize \"traditional\" network theory by developing (and validating) a framework and associated tools to study multilayer systems in a comprehensive fashion. The origins of such efforts date back several decades and arose in multiple disciplines, and now the study of multilayer networks has become one of the most important directions in network science. In this paper, we discuss the history of multilayer networks (and related concepts) and review the exploding body of work on such networks. To unify the disparate terminology in the large body of recent work, we discuss a general framework for multilayer networks, construct a dictionary of terminology to relate the numerous existing concepts to each other, and provide a thorough discussion that compares, contrasts, and translates between related notions such as multilayer networks, multiplex networks, interdependent networks, networks of networks, and many others. We also survey and discuss existing data sets that can be represented as multilayer networks. We review attempts to generalize single-layer-network diagnostics to multilayer networks. We also discuss the rapidly expanding research on multilayer-network models and notions like community structure, connected components, tensor decompositions, and various types of dynamical processes on multilayer networks. We conclude with a summary and an outlook.","id":"50|doi_dedup___::c5a574592f2e347f27be49d2c20a5558","keywords":"applied mathematics, computational mathematics, control and optimization, management science and operations research, computer networks and communications, data science, connected component, terminology, complex system, network theory, network science, construct (philosophy), computer science, interdependent networks, set (psychology), 01 natural sciences, 0103 physical sciences, 010306 general physics, 010305 fluids & plasmas, physics - physics and society, computer science - social and information networks, physics and society (physics.soc-ph), social and information networks (cs.si), fos: physical sciences, fos: computer and information sciences","language":"und","publication_date":"2013-09-27","publisher":"Oxford University Press (OUP)","title":"Multilayer networks","type":"publication"}
{"accessright":"UNKNOWN","country":"","description":"","id":"50|doi_________::715fec7723208e6f17e855c204656e2f","keywords":"","language":"und","publication_date":"1998-10-19","publisher":"American Mathematical\\n Society","title":"Good encodings for DNA-based solutions to combinatorial problems","type":"publication"}

View File

@ -0,0 +1,17 @@
{"author_id":"6fa85e5d3da0c5ed3ab65e4423481714","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"}
{"author_id":"dad3b6e22750b26a27296cd1c98565d1","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"}
{"author_id":"121d8003d3895905cfd67b9b69ac99e1","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"}
{"author_id":"91d3d8c07152d64fbf1c059940211334","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"}
{"author_id":"a25d1cc688c34c0458a4b00b48bc4cdc","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"}
{"author_id":"968ad30220675afb7a0b2b583b35c3a1","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"}
{"author_id":"a55af296962dfb58977aabcb3cf6a8d9","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"}
{"author_id":"5a344a09dab274779fd8e34654fd3541","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"}
{"author_id":"77104c891595df750391d710280da022","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"}
{"author_id":"148f572c63c1f22386c1cae02e5bae2d","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"}
{"author_id":"8e571c27bc66cf96051302db9aa903dc","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"}
{"author_id":"175e45bf98e2b74df9c888598bb917fc","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"}
{"author_id":"bcdeabeece29231977e580b8f417ea82","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"}
{"author_id":"11cea0826b37ff58aa2f4c12ec42695e","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"}
{"author_id":"faf54def0161659b903f58ab4ce8bfae","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"}
{"author_id":"088daddc0f62bc2b8700a4e66a399d5f","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"}
{"author_id":"0b78df096d451535b5b8f7f4f47a6433","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"}

View File

@ -0,0 +1,12 @@
{"id":"94c1431ed983f9ea9996650e2d2205cc","pid":"10.5281/zenodo.3529160","result_id":"50|doi_dedup___::564289a1b69707f216d73aafdd70b20e","type":"doi"}
{"id":"f2328b2e830ee5c03945f65ab1802af7","pid":"10.3389/fphar.2019.01303","result_id":"50|doi_dedup___::564289a1b69707f216d73aafdd70b20e","type":"doi"}
{"id":"53511fa534223420fb925c58051725d6","pid":"31749705","result_id":"50|doi_dedup___::564289a1b69707f216d73aafdd70b20e","type":"pmid"}
{"id":"0e254059fe10cf07df8dbae2cfe5797e","pid":"pmc6848277","result_id":"50|doi_dedup___::564289a1b69707f216d73aafdd70b20e","type":"pmc"}
{"id":"a6181896a32edebf1c45649b894e5886","pid":"10.5281/zenodo.3529159","result_id":"50|doi_dedup___::564289a1b69707f216d73aafdd70b20e","type":"doi"}
{"id":"6e2dc8a4fd3523656a5abd3c0e090a18","pid":"10.7287/peerj.preprints.2711v2","result_id":"50|doi_dedup___::612838ab331dcdfeb9862351bd3fb423","type":"doi"}
{"id":"2072bbca91cb3f3a05b2454edce57f6f","pid":"10.1371/journal.pbio.1002614","result_id":"50|doi_dedup___::612838ab331dcdfeb9862351bd3fb423","type":"doi"}
{"id":"a4e63567711400f9526cc46ca84d2bc1","pid":"pmc5655613","result_id":"50|doi_dedup___::612838ab331dcdfeb9862351bd3fb423","type":"pmc"}
{"id":"477cabc52ec11dfaec8631ee1073376d","pid":"29065148","result_id":"50|doi_dedup___::612838ab331dcdfeb9862351bd3fb423","type":"pmid"}
{"id":"27285b8c2487b534fc2196d27ad4cf0d","pid":"10.7287/peerj.preprints.2711v3","result_id":"50|doi_dedup___::612838ab331dcdfeb9862351bd3fb423","type":"doi"}
{"id":"056a211b8f85fe3058825df170960c06","pid":"10.1111/cgf.13610","result_id":"50|doi_dedup___::32c3649d7aa266f3d754463d6194ebd5","type":"doi"}
{"id":"79c575556941fbb62d9eee77b97fd0e4","pid":"1902.06815","result_id":"50|doi_dedup___::32c3649d7aa266f3d754463d6194ebd5","type":"arxiv"}

View File

@ -0,0 +1 @@
50|doi_________::715fec7723208e6f17e855c204656e2f

View File

@ -0,0 +1 @@
50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9

View File

@ -0,0 +1 @@
50|DansKnawCris::0224aae28af558f21768dbc6439c7a95

View File

@ -0,0 +1,2 @@
50|DansKnawCris::26780065282e607306372abd0d808245
50|doi_________::16e142b54fbddb2cf1c71ff7460e2792

View File

@ -0,0 +1,2 @@
{"firstname":"Maurizio","fullname":"Toscano, Maurizio","id":"045bdce3ee24842af4eb4a7f89a44adb","lastname":"Toscano","orcid":""}
{"firstname":"","fullname":"Aitor Díaz","id":"25fc898122164b69f56f08a8545804d3","lastname":"","orcid":""}

View File

@ -0,0 +1 @@
{"accessright":"OPEN","country":"","description":"<p>Mapping digital humanities in Spain (1993-2019)</p> <p>This dataset has been&nbsp;extensively analysed in the following paper&nbsp;<a href=\"https://doi.org/10.3145/epi.2020.nov.01\">https://doi.org/10.3145/epi.2020.nov.01</a>&nbsp;and has also been used for the following poster&nbsp;<a href=\"https://doi.org/10.5281/zenodo.4256689\">https://doi.org/10.5281/zenodo.4256689</a></p>","id":"50|doi_dedup___::57c23b72fc2da4d47b35e5b871c35423","keywords":"","language":"esl/spa","publication_date":"2020-06-14","publisher":"Zenodo","title":"Mapping digital humanities in Spain - 1993-2019","type":"software"}

View File

@ -0,0 +1,2 @@
{"author_id":"045bdce3ee24842af4eb4a7f89a44adb","result_id":"50|doi_dedup___::57c23b72fc2da4d47b35e5b871c35423"}
{"author_id":"25fc898122164b69f56f08a8545804d3","result_id":"50|doi_dedup___::57c23b72fc2da4d47b35e5b871c35423"}

View File

@ -0,0 +1,2 @@
{"id":"cb7d0c2e4660c784cb647060974dbee7","pid":"10.5281/zenodo.3893545","result_id":"50|doi_dedup___::57c23b72fc2da4d47b35e5b871c35423","type":"doi"}
{"id":"19703b43918fc184698f6e0298bf2fc8","pid":"10.5281/zenodo.3893546","result_id":"50|doi_dedup___::57c23b72fc2da4d47b35e5b871c35423","type":"doi"}

View File

@ -102,8 +102,7 @@
<junit-jupiter.version>5.6.1</junit-jupiter.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<dhp.guava.version>11.0.2</dhp.guava.version>
<!-- <dhp-schemas.version>[2.13.2-SNAPSHOT]</dhp-schemas.version>-->
<dhp-schemas.version>[2.13.1-patched]</dhp-schemas.version>
<dhp-schemas.version>[3.17.1]</dhp-schemas.version>
</properties>
</project>