diff --git a/dump/miriam.baglioni@hadoop-edge3.garr-pa1.d4science.org b/dump/miriam.baglioni@hadoop-edge3.garr-pa1.d4science.org new file mode 100644 index 0000000..41a91b3 --- /dev/null +++ b/dump/miriam.baglioni@hadoop-edge3.garr-pa1.d4science.org @@ -0,0 +1,5 @@ +id name acronym description +04a00617ca659adc944977ac700ea14b Digital Humanities and Cultural Heritage dh-ch This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields. +3ee95893613de7450247d7fef747136f DARIAH EU dariah The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. +5fde864866ea5ded4cc873b3170b63c3 Transport Research beopen Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research. The TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323) +aa0e56dd2e9d2a0be749f5debdd2b3d8 Energy Research enermaps

EnerMaps Open Data Management Tool aims to  improve data management  and  accessibility  in the field of  energy research  for the  renewable energy industry.

EnerMaps’ tool accelerates and facilitates the energy transition offering a qualitative and user-friendly digital platform to the energy professionals.

The project is based on the  FAIR data principle  which requires data to be  Findable,  Accessible,  Interoperable and  Reusable.

EnerMaps project  coordinates and enriches existing energy databases to promote  trans-disciplinary research  and to develop partnerships between researchers and the energy professionals.

The EnerMaps project has received funding from the European Union’s Horizon 2020 research and innovation program under   grant agreement N°884161

 

Website:  https://enermaps.eu/ 

diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index b972de6..8ca73ea 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -2,15 +2,22 @@ package eu.dnetlib.dhp.oa.graph.dump; import java.io.StringReader; +import java.util.ArrayList; import java.util.List; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; +import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; +import eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities; +import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -18,6 +25,8 @@ public class QueryInformationSystem { private ISLookUpService isLookUp; + private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class); + private static final String XQUERY_ALL = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + @@ -71,4 +80,31 @@ public class QueryInformationSystem { return map; } + public List getCommunityCsv(String toString) throws ISLookUpException, SAXException, DocumentException { + List communities = new ArrayList<>(); + + for (String xml : isLookUp.quickSearchProfile(toString)) { + log.info(xml); + final Document doc; + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); + Element root = doc.getRootElement(); + StringBuilder builder = new StringBuilder(); + builder.append(DHPUtils.md5(root.attribute("id").getValue())); + builder.append(Constants.SEP); + builder.append(root.attribute("label").getValue()); + builder.append(Constants.SEP); + builder.append(root.attribute("id").getValue()); + builder.append(Constants.SEP); + builder + .append( + ((Node) (root.selectNodes("//description").get(0))) + .getText() + .replace("\n", " ") + .replace("\t", " ")); + communities.add(builder.toString()); + } + return communities; + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 9414cda..625d6b0 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -21,6 +21,7 @@ import eu.dnetlib.dhp.oa.model.Indicator; import eu.dnetlib.dhp.oa.model.Instance; import eu.dnetlib.dhp.oa.model.OpenAccessRoute; import eu.dnetlib.dhp.oa.model.Result; +import eu.dnetlib.dhp.oa.model.Subject; import eu.dnetlib.dhp.oa.model.community.CfHbKeyValue; import eu.dnetlib.dhp.oa.model.community.CommunityInstance; import eu.dnetlib.dhp.oa.model.community.CommunityResult; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index bf90722..71c10be 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -9,9 +9,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.api.MissingConceptDoiException; +import eu.dnetlib.dhp.common.api.ZenodoAPIClient; import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; -import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException; -import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient; public class SendToZenodoHDFS implements Serializable { @@ -81,8 +81,10 @@ public class SendToZenodoHDFS implements Serializable { String pString = p.toString(); if (!pString.endsWith("_SUCCESS")) { String name = pString.substring(pString.lastIndexOf("/") + 1); - FSDataInputStream inputStream = fileSystem.open(p); - zenodoApiClient.uploadIS3(inputStream, name, fileSystem.getFileStatus(p).getLen()); + + try (FSDataInputStream inputStream = fileSystem.open(p)) { + zenodoApiClient.uploadIS(inputStream, name); + } } } @@ -90,9 +92,9 @@ public class SendToZenodoHDFS implements Serializable { zenodoApiClient.sendMretadata(metadata); } -// if (Boolean.TRUE.equals(publish)) { -// zenodoApiClient.publish(); -// } + if (Boolean.TRUE.equals(publish)) { + zenodoApiClient.publish(); + } } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/AuthorResult.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/AuthorResult.java new file mode 100644 index 0000000..1628fdd --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/AuthorResult.java @@ -0,0 +1,102 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv; + +import java.io.Serializable; + +import eu.dnetlib.dhp.utils.DHPUtils; + +/** + * @author miriam.baglioni + * @Date 05/05/23 + */ +public class AuthorResult implements Serializable { + private String authorId; + private String firstName; + private String lastName; + private String fullName; + private String orcid; + private String resultId; + private String rank; + private Boolean fromOrcid; + + public Boolean getFromOrcid() { + return fromOrcid; + } + + public void setFromOrcid(Boolean fromOrcid) { + this.fromOrcid = fromOrcid; + } + + public String getFullName() { + return fullName; + } + + public void setFullName(String fullName) { + this.fullName = fullName; + } + + public String getAuthorId() { + return authorId; + } + + public void setAuthorId(String authorId) { + this.authorId = authorId; + } + + public String getResultId() { + return resultId; + } + + public void setResultId(String resultId) { + this.resultId = resultId; + } + + public String getRank() { + return rank; + } + + public void setRank(String rank) { + this.rank = rank; + } + + public String getId() { + return authorId; + } + + public void setId(String id) { + this.authorId = id; + } + + public String getFirstName() { + return firstName; + } + + public void setFirstName(String firstName) { + this.firstName = firstName; + } + + public String getLastName() { + return lastName; + } + + public void setLastName(String lastName) { + this.lastName = lastName; + } + + public String getOrcid() { + return orcid; + } + + public void setOrcid(String orcid) { + this.orcid = orcid; + } + + public void autosetId() { + if (orcid != null) { + authorId = DHPUtils.md5(orcid); + } else { + authorId = DHPUtils.md5(resultId + rank); + } + + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/Constants.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/Constants.java new file mode 100644 index 0000000..24fff80 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/Constants.java @@ -0,0 +1,20 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv; + +import org.apache.commons.lang.StringUtils; + +import java.io.Serializable; + +/** + * @author miriam.baglioni + * @Date 10/05/23 + */ +public class Constants implements Serializable { + public final static String SEP = "\t"; + + public static final String addQuotes(String id) { + if(StringUtils.isNotEmpty(id)) + return "\"" + id + "\""; + return id; + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java new file mode 100644 index 0000000..ebbadaa --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java @@ -0,0 +1,119 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static org.apache.commons.lang3.StringUtils.split; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.SparkSession; +import org.dom4j.DocumentException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem; +import eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; + +/** + * @author miriam.baglioni + * @Date 09/05/23 + */ +//STEP 1 +public class DumpCommunities implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(DumpCommunities.class); + private final BufferedWriter writer; + private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP + + " description \n"; + private final transient QueryInformationSystem queryInformationSystem; + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + DumpCommunities.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String nameNode = parser.get("nameNode"); + log.info("nameNode: {}", nameNode); + + final List communities = Arrays.asList(split(parser.get("communities"), ";")); + + final DumpCommunities dc = new DumpCommunities(outputPath, nameNode, parser.get("isLookUpUrl")); + + dc.writeCommunity(communities); + + } + + private void writeCommunity(List communities) + throws IOException, ISLookUpException, DocumentException, SAXException { + writer.write(HEADER); + writer.flush(); + String a = IOUtils + .toString( + DumpCommunities.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq")); + + final String xquery = String + .format( + a, + communities + .stream() + .map(t -> String.format("$x//CONFIGURATION/context[./@id= '%s']", t)) + .collect(Collectors.joining(" or "))); + + for (String community : queryInformationSystem + .getCommunityCsv(xquery)) { + writer + .write( + community); + writer.write("\n"); + + } + writer.close(); + } + + public DumpCommunities(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws Exception { + final Configuration conf = new Configuration(); + queryInformationSystem = new QueryInformationSystem(); + queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); + + conf.set("fs.defaultFS", hdfsNameNode); + FileSystem fileSystem = FileSystem.get(conf); + Path hdfsWritePath = new Path(hdfsPath); + + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, true); + } + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); + + writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); + + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkDumpResults.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkDumpResults.java new file mode 100644 index 0000000..f2d1e73 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkDumpResults.java @@ -0,0 +1,365 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static org.apache.commons.lang3.StringUtils.remove; +import static org.apache.commons.lang3.StringUtils.split; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collector; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.*; +import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVAuthor; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVPid; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRelResAut; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVResult; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.utils.DHPUtils; +import scala.Tuple2; + +/** + * @author miriam.baglioni + * @Date 04/05/23 + */ +//STEP 3 +public class SparkDumpResults implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(SparkDumpResults.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkDumpResults.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste3.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String resultType = parser.get("resultType"); + log.info("resultType: {}", resultType); + + final String resultClassName = parser.get("resultTableName"); + log.info("resultTableName: {}", resultClassName); + + final String workingPath = parser.get("workingPath"); + + Class inputClazz = (Class) Class.forName(resultClassName); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + // Utils.removeOutputDir(spark, outputPath); + run(spark, inputPath, inputClazz, resultType, workingPath); + + }); + + } + + private static void run(SparkSession spark, String inputPath, + Class inputClazz, String resultType, String workingPath) { + + Dataset resultIds = spark.read().textFile(workingPath + "/resultIds"); + // resultIds.foreach((ForeachFunction) r -> System.out.println(r)); + Dataset results = Utils + .readPath(spark, inputPath + "/" + resultType, inputClazz) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() && !p.getDataInfo().getInvisible()); + + resultIds + .joinWith(results, resultIds.col("value").equalTo(results.col("id"))) + .map((MapFunction, R>) t2 -> t2._2(), Encoders.bean(inputClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(workingPath + "/" + resultType + "/temp/result"); + + // map results + results = Utils.readPath(spark, workingPath + "/" + resultType + "/temp/result", inputClazz); + results + .map( + (MapFunction) r -> mapResultInfo(r), + Encoders.bean(CSVResult.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(workingPath + "/" + resultType + "/result"); + + // map relations between pid and result + results + .flatMap((FlatMapFunction) r -> { + List pids = new ArrayList<>(); + if (Optional.ofNullable(r.getPid()).isPresent() && r.getPid().size() > 0) { + pids.addAll(mapPid(r.getPid(), r.getId())); + } + return pids.iterator(); + }, Encoders.bean(CSVPid.class)) + .filter(Objects::nonNull) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(workingPath + "/" + resultType + "/result_pid"); + + // map authors from the result + // per ogni autore nel result + // se l'autore ha un orcid il suo id dipende dall'orcid (tipo md5(orcid)) + // se non ha orcid il suo id si costruisce come result_id + authorrank ( se non ha il rank si sua + // la sua posizione nell'insieme degli autori) sempre con md5 + results + .flatMap((FlatMapFunction) r -> { + int count = 0; + List arl = new ArrayList<>(); + Set authorIds = new HashSet(); + if (Optional.ofNullable(r.getAuthor()).isPresent()) { + for (Author a : r.getAuthor()) { + count += 1; + AuthorResult ar = new AuthorResult(); + ar.setResultId(r.getId()); + if (Optional.ofNullable(a.getRank()).isPresent()) { + if (a.getRank() > 0) { + ar.setRank(String.valueOf(a.getRank())); + } else { + ar.setRank(String.valueOf(count)); + } + } + ar.setFirstName(replace(a.getName())); + ar.setLastName(replace(a.getSurname())); + ar.setFullName(replace(a.getFullname())); + Tuple2 orcid = getOrcid(a.getPid()); + if (Optional.ofNullable(orcid).isPresent()) { + ar.setOrcid(orcid._1()); + ar.setFromOrcid(orcid._2()); + } + + ar.autosetId(); + + if (!authorIds.contains(ar.getAuthorId())) { + arl.add(ar); + authorIds.add(ar.getAuthorId()); + } + + } + } + + return arl.iterator(); + }, Encoders.bean(AuthorResult.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(workingPath + "/" + resultType + "/temp/authorresult"); + + Dataset authorResult = Utils + .readPath(spark, workingPath + "/" + resultType + "/temp/authorresult", AuthorResult.class); + // map the relation between author and result + authorResult + .map( + (MapFunction) ar -> { + CSVRelResAut ret = new CSVRelResAut(); + ret.setResult_id(ar.getResultId()); + ret.setAuthor_id(ar.getAuthorId()); + return ret; + }, + Encoders.bean(CSVRelResAut.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(workingPath + "/" + resultType + "/result_author"); + + // ma the authors in the working dir. I do not want to have them repeated. If I have an orcid as id, I choose + // the one from orcid if any + authorResult + .groupByKey((MapFunction) ar -> ar.getAuthorId(), Encoders.STRING()) + .mapGroups( + (MapGroupsFunction) (k, it) -> { + AuthorResult first = it.next(); + if (!Optional.ofNullable(first.getFromOrcid()).isPresent() || first.getFromOrcid()) + return getAuthorDump(first); + while (it.hasNext()) { + AuthorResult ar = it.next(); + if (ar.getFromOrcid()) + return getAuthorDump(ar); + } + return getAuthorDump(first); + }, + Encoders.bean(CSVAuthor.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(workingPath + "/" + resultType + "/author"); + + } + + private static String replace(String input) { + if (Optional.ofNullable(input).isPresent()) + return input.replace("\t", " ").replace("\n", " ").replace("\r", " ").replace("\"", " "); + else + return ""; + } + + private static List mapPid(List pid, String resultId) { + return pid + .stream() + .map(p -> p.getQualifier().getClassid().toLowerCase() + "@" + p.getValue().toLowerCase()) + .distinct() + .map(p -> { + CSVPid ret = new CSVPid(); + ret.setId(DHPUtils.md5(p + "@" + resultId)); + ret.setResult_id(resultId); + ret.setPid(split(p, "@")[1]); + ret.setType(split(p, "@")[0]); + + return ret; + }) + .collect(Collectors.toList()); + + } + + private static CSVAuthor getAuthorDump(AuthorResult ar) { + CSVAuthor ret = new CSVAuthor(); + ret.setFirstname(ar.getFirstName()); + + ret.setId(ar.getAuthorId()); + ret.setLastname(ar.getLastName()); + + ret.setFullname(ar.getFullName()); + + if (ar.getOrcid() != null) { + ret.setOrcid(ar.getOrcid()); + ret.setFromOrcid(ar.getFromOrcid()); + } else { + ret.setOrcid(""); + } + + return ret; + } + + private static Tuple2 getOrcid(List pid) { + if (!Optional.ofNullable(pid).isPresent()) + return null; + if (pid.size() == 0) + return null; + for (StructuredProperty p : pid) { + if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) { + return new Tuple2<>(p.getValue(), Boolean.TRUE); + } + } + for (StructuredProperty p : pid) { + if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) { + return new Tuple2<>(p.getValue(), Boolean.FALSE); + } + } + return null; + } + + private static String getFieldValue(Field input) { + if (input != null && + StringUtils.isNotEmpty(input.getValue())) { + return removeBreaks(input.getValue()); + } else { + return ""; + } + } + + private static CSVResult mapResultInfo(R r) { + CSVResult ret = new CSVResult(); + ret.setId(removeBreaks(r.getId())); + ret.setType(removeBreaks(r.getResulttype().getClassid())); + ret.setTitle(getTitle(r.getTitle())); + ret.setDescription(getAbstract(r.getDescription())); + ret.setAccessright(removeBreaks(r.getBestaccessright().getClassid())); + ret.setPublication_date(removeBreaks(getFieldValue(r.getDateofacceptance()))); + ret.setPublisher(removeBreaks(getFieldValue(r.getPublisher()))); + + if (Optional.ofNullable(r.getSubject()).isPresent()) + ret.setKeywords(String.join(", ", r.getSubject().stream().map(s -> { + if (StringUtils.isNotEmpty(s.getValue())) + return removeBreaks(s.getValue().toLowerCase()); + else + return null; + }).filter(Objects::nonNull).distinct().collect(Collectors.toList()))); + else + ret.setKeywords(""); + + if (Optional.ofNullable(r.getCountry()).isPresent()) + ret + .setCountry( + String.join(", ", r.getCountry().stream().map(Country::getClassid).collect(Collectors.toList()))); + else + ret.setCountry(""); + + if (Optional.ofNullable(r.getLanguage()).isPresent() && StringUtils.isNotEmpty(r.getLanguage().getClassid())) { + ret.setLanguage(r.getLanguage().getClassid()); + } else { + ret.setLanguage(""); + } + + return ret; + } + + private static String getAbstract(List> description) { + if (description == null) + return ""; + for (Field abs : description) { + if (StringUtils.isNotEmpty(abs.getValue())) { + return removeBreaks(abs.getValue()); + } + } + return ""; + } + + private static String getTitle(List titles) { + String firstTitle = null; + for (StructuredProperty title : titles) { + if (StringUtils.isEmpty(firstTitle)) { + if (StringUtils.isNotEmpty(title.getValue())) + firstTitle = removeBreaks(title.getValue()); + } + if (title.getQualifier().getClassid().equals(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid())) { + if (StringUtils.isNotEmpty(title.getValue())) + return removeBreaks(title.getValue()); + } + } + if (firstTitle != null) { + return removeBreaks(firstTitle); + } + return ""; + } + + private static String removeBreaks(String input) { + return input.replace("\n", " ").replace("\t", " ") + .replace("\r", " ") + .replace("\\\"", " ") + .replace("\"", " ") + ; + + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkMoveOnSigleDir.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkMoveOnSigleDir.java new file mode 100644 index 0000000..5a41ae8 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkMoveOnSigleDir.java @@ -0,0 +1,133 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVAuthor; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVPid; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRelResAut; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVResult; +import eu.dnetlib.dhp.schema.oaf.*; + +/** + * @author miriam.baglioni + * @Date 10/05/23 + */ +//STEP 4 +public class SparkMoveOnSigleDir implements Serializable { + + // All the products saved in different directories are put under the same one. + // For the authors also a step of reconciliation mast be done, since the same author id can be saved in more than + // one directory + + private static final Logger log = LoggerFactory.getLogger(SparkMoveOnSigleDir.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkMoveOnSigleDir.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste4.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + // Utils.removeOutputDir(spark, outputPath); + run(spark, outputPath, workingPath); + + }); + + } + + private static void run(SparkSession spark, String outputPath, + String workingPath) { + + Utils + .readPath(spark, workingPath + "/publication/result", CSVResult.class) + .union(Utils.readPath(spark, workingPath + "/dataset/result", CSVResult.class)) + .union(Utils.readPath(spark, workingPath + "/software/result", CSVResult.class)) + .union(Utils.readPath(spark, workingPath + "/otherresearchproduct/result", CSVResult.class)) + .write() + .mode(SaveMode.Overwrite) + .option("header", "true") + .option("delimiter", Constants.SEP) + .option("compression", "gzip") + .csv(outputPath + "/result"); + + Utils + .readPath(spark, workingPath + "/publication/result_pid", CSVPid.class) + .union(Utils.readPath(spark, workingPath + "/dataset/result_pid", CSVPid.class)) + .union(Utils.readPath(spark, workingPath + "/software/result_pid", CSVPid.class)) + .union(Utils.readPath(spark, workingPath + "/otherresearchproduct/result_pid", CSVPid.class)) + .write() + .mode(SaveMode.Overwrite) + .option("header", "true") + .option("delimiter", Constants.SEP) + .option("compression", "gzip") + .csv(outputPath + "/result_pid"); + + Utils + .readPath(spark, workingPath + "/publication/result_author", CSVRelResAut.class) + .union(Utils.readPath(spark, workingPath + "/dataset/result_author", CSVRelResAut.class)) + .union(Utils.readPath(spark, workingPath + "/software/result_author", CSVRelResAut.class)) + .union(Utils.readPath(spark, workingPath + "/otherresearchproduct/result_author", CSVRelResAut.class)) + .write() + .mode(SaveMode.Overwrite) + .option("header", "true") + .option("delimiter", Constants.SEP) + .option("compression", "gzip") + .csv(outputPath + "/result_author"); + + Utils + .readPath(spark, workingPath + "/publication/author", CSVAuthor.class) + .union(Utils.readPath(spark, workingPath + "/dataset/author", CSVAuthor.class)) + .union(Utils.readPath(spark, workingPath + "/software/author", CSVAuthor.class)) + .union(Utils.readPath(spark, workingPath + "/otherresearchproduct/author", CSVAuthor.class)) + .groupByKey((MapFunction) r -> r.getId(), Encoders.STRING()) + .mapGroups( + (MapGroupsFunction) (k, it) -> it.next(), Encoders.bean(CSVAuthor.class)) + .write() + .mode(SaveMode.Overwrite) + .option("header", "true") + .option("delimiter", Constants.SEP) + .option("compression", "gzip") + .csv(outputPath + "/author"); + + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkSelectResultsAndDumpRelations.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkSelectResultsAndDumpRelations.java new file mode 100644 index 0000000..30772fa --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkSelectResultsAndDumpRelations.java @@ -0,0 +1,227 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVCitation; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRELCommunityResult; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.utils.DHPUtils; +import scala.Tuple2; + +/** + * @author miriam.baglioni + * @Date 04/05/23 + */ +//STEP 2 +public class SparkSelectResultsAndDumpRelations implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(SparkSelectResultsAndDumpRelations.class); + private static String RESULT_COMMUNITY_TABLE = "/result_community"; + private static String COMMUNITY_RESULT_IDS = "/communityResultIds"; + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkSelectResultsAndDumpRelations.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String workingPath = parser.get("workingPath"); + + List communityList = null; + Optional communities = Optional.ofNullable(parser.get("communities")); + if (communities.isPresent()) { + communityList = Arrays.asList(communities.get().split(";")); + } + + SparkConf conf = new SparkConf(); + + List finalCommunityList = communityList; + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + // Utils.removeOutputDir(spark, outputPath); + run(spark, inputPath, outputPath, workingPath, finalCommunityList); + + }); + + } + + private static void run(SparkSession spark, String inputPath, String outputPath, + String workingPath, + List communityList) { + + // select the result ids related to the set of communities considered + writeCommunityRelatedIds( + spark, inputPath + "/publication", Publication.class, communityList, workingPath + COMMUNITY_RESULT_IDS); + writeCommunityRelatedIds( + spark, inputPath + "/dataset", Dataset.class, communityList, workingPath + COMMUNITY_RESULT_IDS); + writeCommunityRelatedIds( + spark, inputPath + "/software", Software.class, communityList, workingPath + COMMUNITY_RESULT_IDS); + writeCommunityRelatedIds( + spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class, communityList, + workingPath + COMMUNITY_RESULT_IDS); + + // write the relations result communities + writeCommunityResultRelations( + spark, inputPath + "/publication", Publication.class, communityList, outputPath + RESULT_COMMUNITY_TABLE); + writeCommunityResultRelations( + spark, inputPath + "/dataset", Dataset.class, communityList, outputPath + RESULT_COMMUNITY_TABLE); + writeCommunityResultRelations( + spark, inputPath + "/software", Software.class, communityList, outputPath + RESULT_COMMUNITY_TABLE); + writeCommunityResultRelations( + spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class, communityList, + outputPath + RESULT_COMMUNITY_TABLE); + + // select the relations with semantics cites + org.apache.spark.sql.Dataset relations = Utils + .readPath(spark, inputPath + "/relation", Relation.class) + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && + r.getRelClass().equals(ModelConstants.CITES)); + + // select the results target of the selected relations having as source one of the results related to the + // communities + org.apache.spark.sql.Dataset resultIds = spark + .read() + .textFile(workingPath + COMMUNITY_RESULT_IDS) + .distinct(); + + resultIds + .joinWith(relations, resultIds.col("value").equalTo(relations.col("source")), "left") + .flatMap((FlatMapFunction, String>) t2 -> { + if (Optional.ofNullable(t2._2()).isPresent()) { + return Arrays.asList(t2._1(), t2._2().getTarget()).iterator(); + } else { + return Arrays.asList(t2._1()).iterator(); + } + }, Encoders.STRING()) + .distinct() + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + + .text(workingPath + "/resultIds"); + + resultIds + .joinWith(relations, resultIds.col("value").equalTo(relations.col("source"))) + .map( + (MapFunction, CSVCitation>) t2 -> mapToCitation(t2._2()), + Encoders.bean(CSVCitation.class)) + .write() + .option("compression", "gzip") + .option("header", "true") + .option("delimiter", Constants.SEP) + .mode(SaveMode.Overwrite) + .csv(outputPath + "/relation"); + + } + + private static CSVCitation mapToCitation(Relation relation) { + CSVCitation ret = new CSVCitation(); + ret.setId(DHPUtils.md5(relation.getSource() + relation.getRelClass().toLowerCase() + relation.getTarget())); + ret.setResult_id_cites(relation.getSource()); + ret.setResult_id_cited(relation.getTarget()); + return ret; + } + + private static void writeCommunityResultRelations(SparkSession spark, String inputPath, + Class clazz, List communityList, String outputPath) { + Utils + .readPath(spark, inputPath, clazz) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() && + !p.getDataInfo().getInvisible()) + .flatMap((FlatMapFunction) p -> { + Set inserted = new HashSet<>(); + List ret = new ArrayList<>(); + + for (String context : p + .getContext() + .stream() + .map(Context::getId) + .distinct() + .collect(Collectors.toList())) { + String cId = context.contains("::") + ? context.substring(0, context.indexOf("::")) + : context; + if (communityList.contains(cId) && !inserted.contains(cId)) { + CSVRELCommunityResult crc = new CSVRELCommunityResult(); + crc.setResult_id(p.getId()); + crc.setCommunity_id(DHPUtils.md5(cId)); + ret.add(crc); + inserted.add(cId); + } + } + return ret.iterator(); + }, Encoders.bean(CSVRELCommunityResult.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Append) + .option("header", "true") + .option("delimiter", Constants.SEP) + .csv(outputPath); + } + + private static void writeCommunityRelatedIds(SparkSession spark, String inputPath, + Class clazz, List communityList, String outputPath) { + Utils + .readPath(spark, inputPath, clazz) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() && + !p.getDataInfo().getInvisible() && + isRelatedToCommunities(p, communityList)) + .map((MapFunction) Result::getId, Encoders.STRING()) + .write() + .option("compression", "gzip") + .mode(SaveMode.Append) + .text(outputPath); + + } + + private static boolean isRelatedToCommunities(R p, List communityList) { + return p + .getContext() + .stream() + .anyMatch( + c -> communityList.contains(c.getId()) || + (c.getId().contains("::") + && communityList.contains(c.getId().substring(0, c.getId().indexOf("::"))))); + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVAuthor.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVAuthor.java new file mode 100644 index 0000000..c3057e9 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVAuthor.java @@ -0,0 +1,68 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv.model; + +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; + +import java.io.Serializable; + +/** + * @author miriam.baglioni + * @Date 11/05/23 + */ +public class CSVAuthor implements Serializable { + private String id; + private String firstname; + private String lastname; + private String fullname; + private String orcid; + private Boolean fromOrcid; + + public Boolean getFromOrcid() { + return fromOrcid; + } + + public void setFromOrcid(Boolean fromOrcid) { + this.fromOrcid = fromOrcid; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = Constants.addQuotes(id); + } + + public String getFirstname() { + return firstname; + } + + public void setFirstname(String firstname) { + this.firstname = Constants.addQuotes(firstname); + } + + public String getLastname() { + return lastname; + } + + public void setLastname(String lastname) { + this.lastname = Constants.addQuotes(lastname); + } + + public String getFullname() { + return fullname; + } + + public void setFullname(String fullname) { + this.fullname = Constants.addQuotes(fullname); + } + + public String getOrcid() { + return orcid; + } + + public void setOrcid(String orcid) { + this.orcid = Constants.addQuotes(orcid); + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVCitation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVCitation.java new file mode 100644 index 0000000..9b0fe14 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVCitation.java @@ -0,0 +1,40 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv.model; + +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; + +import java.io.Serializable; + +/** + * @author miriam.baglioni + * @Date 11/05/23 + */ +public class CSVCitation implements Serializable { + private String id; + private String result_id_cites; + private String result_id_cited; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = Constants.addQuotes(id); + } + + public String getResult_id_cites() { + return result_id_cites; + } + + public void setResult_id_cites(String result_id_cites) { + this.result_id_cites = Constants.addQuotes(result_id_cites); + } + + public String getResult_id_cited() { + return result_id_cited; + } + + public void setResult_id_cited(String result_id_cited) { + this.result_id_cited = Constants.addQuotes(result_id_cited); + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVPid.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVPid.java new file mode 100644 index 0000000..c61ae0c --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVPid.java @@ -0,0 +1,50 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv.model; + +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; + +import java.io.Serializable; + +/** + * @author miriam.baglioni + * @Date 11/05/23 + */ +public class CSVPid implements Serializable { + + private String id; + private String result_id; + private String pid; + private String type; + + public String getResult_id() { + return result_id; + } + + public void setResult_id(String result_id) { + this.result_id = Constants.addQuotes(result_id); + } + + public String getPid() { + return pid; + } + + public void setPid(String pid) { + this.pid = Constants.addQuotes(pid); + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = Constants.addQuotes(type); + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = Constants.addQuotes(id); + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVRELCommunityResult.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVRELCommunityResult.java new file mode 100644 index 0000000..bbc0a9a --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVRELCommunityResult.java @@ -0,0 +1,31 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv.model; + +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; + +import java.io.Serializable; + +/** + * @author miriam.baglioni + * @Date 11/05/23 + */ +public class CSVRELCommunityResult implements Serializable { + private String result_id; + private String community_id; + + public String getResult_id() { + return result_id; + } + + public void setResult_id(String result_id) { + this.result_id = Constants.addQuotes(result_id); + } + + public String getCommunity_id() { + return community_id; + } + + public void setCommunity_id(String community_id) { + this.community_id = Constants.addQuotes(community_id); + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVRelResAut.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVRelResAut.java new file mode 100644 index 0000000..1b334c8 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVRelResAut.java @@ -0,0 +1,31 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv.model; + +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; + +import java.io.Serializable; + +/** + * @author miriam.baglioni + * @Date 11/05/23 + */ +public class CSVRelResAut implements Serializable { + private String result_id; + private String author_id; + + public String getResult_id() { + return result_id; + } + + public void setResult_id(String result_id) { + this.result_id = Constants.addQuotes(result_id); + } + + public String getAuthor_id() { + return author_id; + } + + public void setAuthor_id(String author_id) { + this.author_id = Constants.addQuotes(author_id); + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVResult.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVResult.java new file mode 100644 index 0000000..da8a78c --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/model/CSVResult.java @@ -0,0 +1,116 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv.model; + +import java.io.Serializable; + +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; +import org.apache.commons.lang.StringUtils; + +import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSetter; + +import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import sun.swing.StringUIClientPropertyKey; + +/** + * @author miriam.baglioni + * @Date 11/05/23 + */ +public class CSVResult implements Serializable { + private String id; + private String type; + private String title; + private String description; + private String accessright; + private String publication_date; + private String publisher; + private String keywords; + private String country; + private String language; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = Constants.addQuotes(id); + } + + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = Constants.addQuotes(type); + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = + Constants.addQuotes(title); + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = + Constants.addQuotes(description); + } + + public String getAccessright() { + return accessright; + } + + public void setAccessright(String accessright) { + this.accessright = Constants.addQuotes(accessright); + } + + public String getPublication_date() { + return publication_date; + } + + public void setPublication_date(String publication_date) { + this.publication_date = Constants.addQuotes(publication_date); + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = Constants.addQuotes(publisher); + } + + public String getKeywords() { + return keywords; + } + + public void setKeywords(String keywords) { + this.keywords = Constants.addQuotes(keywords); + } + + public String getCountry() { + return country; + } + + public void setCountry(String country) { + this.country = Constants.addQuotes(country); + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = Constants.addQuotes(language); + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/serafeim/SparkSelectResultsAndDumpRelations.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/serafeim/SparkSelectResultsAndDumpRelations.java new file mode 100644 index 0000000..1f31c3c --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/serafeim/SparkSelectResultsAndDumpRelations.java @@ -0,0 +1,241 @@ + +package eu.dnetlib.dhp.oa.graph.dump.serafeim; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import scala.Tuple2; + +/** + * @author miriam.baglioni + * @Date 04/05/23 + */ +//STEP 2 +public class SparkSelectResultsAndDumpRelations implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(SparkSelectResultsAndDumpRelations.class); + private static String RESULT_COMMUNITY_TABLE = "/result_community"; + private static String COMMUNITY_RESULT_IDS = "/communityResultIds"; + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkSelectResultsAndDumpRelations.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String workingPath = parser.get("workingPath"); + + List communityList = null; + Optional communities = Optional.ofNullable(parser.get("communities")); + if (communities.isPresent()) { + communityList = Arrays.asList(communities.get().split(";")); + } + + SparkConf conf = new SparkConf(); + + List finalCommunityList = communityList; + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + run(spark, inputPath, outputPath, workingPath, finalCommunityList); + + }); + + } + + private static void run(SparkSession spark, String inputPath, String outputPath, + String workingPath, + List communityList) { + + // select the result ids related to the set of communities considered + writeCommunityRelatedIds( + spark, inputPath, Publication.class, communityList, workingPath, "publication"); + writeCommunityRelatedIds( + spark, inputPath, Dataset.class, communityList, workingPath, "dataset"); + writeCommunityRelatedIds( + spark, inputPath, Software.class, communityList, workingPath, "software"); + writeCommunityRelatedIds( + spark, inputPath, OtherResearchProduct.class, communityList, + workingPath, "otherresearchproduct"); + + // select the relations with semantics cites + org.apache.spark.sql.Dataset relations = Utils + .readPath(spark, inputPath + "/relation", Relation.class) + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && + r.getRelClass().equals(ModelConstants.CITES)); + + // select the relations having as source one of the results related to the + // communities + org.apache.spark.sql.Dataset communityResultIds = spark + .read() + .textFile(workingPath + COMMUNITY_RESULT_IDS) + .distinct(); + + Utils + .readPath(spark, inputPath + "/publication", Publication.class) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() + && !p.getDataInfo().getInvisible()) + .map((MapFunction) p -> p.getId(), Encoders.STRING()) + .union( + Utils + .readPath(spark, inputPath + "/dataset", Dataset.class) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() + && !p.getDataInfo().getInvisible()) + .map((MapFunction) p -> p.getId(), Encoders.STRING())) + .union( + Utils + .readPath(spark, inputPath + "/software", Software.class) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() + && !p.getDataInfo().getInvisible()) + .map((MapFunction) p -> p.getId(), Encoders.STRING())) + .union( + Utils + .readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() + && !p.getDataInfo().getInvisible()) + .map((MapFunction) p -> p.getId(), Encoders.STRING())) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(workingPath + "/resultIds"); + + org.apache.spark.sql.Dataset resultIds = spark.read().textFile(workingPath + "/resultIds"); + + org.apache.spark.sql.Dataset oksource = communityResultIds + .joinWith(relations, communityResultIds.col("value").equalTo(relations.col("source"))) + .map( + (MapFunction, Relation>) t2 -> t2._2(), + Encoders.bean(Relation.class)); + oksource + .joinWith(resultIds, oksource.col("target").equalTo(resultIds.col("value"))) + .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath + "/relation"); + + writeNodes( + spark, inputPath + "/publication", Publication.class, outputPath + "/publication", + outputPath + "/relation", workingPath); + writeNodes( + spark, inputPath + "/dataset", Dataset.class, outputPath + "/dataset", outputPath + "/relation", + workingPath); + writeNodes( + spark, inputPath + "/software", Software.class, outputPath + "/software", outputPath + "/relation", + workingPath); + writeNodes( + spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class, + outputPath + "/otherresearchproduct", outputPath + "/relation", workingPath); + + } + + private static void writeNodes(SparkSession spark, String inputPath, Class clazz, + String outputPath, String relationPath, String workingPath) { + org.apache.spark.sql.Dataset citingRelations = Utils.readPath(spark, relationPath, Relation.class); + org.apache.spark.sql.Dataset result = Utils + .readPath(spark, inputPath, clazz) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() && + !p.getDataInfo().getInvisible()); + + // take the distinct result id for source and target of the relations + citingRelations + .flatMap( + (FlatMapFunction) r -> Arrays + .asList(r.getSource(), r.getTarget()) + .iterator(), + Encoders.STRING()) + .distinct() + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .text(workingPath + "/relationIds"); + + org.apache.spark.sql.Dataset relationIds = spark.read().textFile(workingPath + "/relationIds"); + + relationIds + .joinWith(result, relationIds.col("value").equalTo(result.col("id"))) + .map((MapFunction, R>) t2 -> t2._2(), Encoders.bean(clazz)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath); + } + + private static void writeCommunityRelatedIds(SparkSession spark, String inputPath, + Class clazz, List communityList, String outputPath, String resultType) { + org.apache.spark.sql.Dataset results = Utils + .readPath(spark, inputPath + "/" + resultType, clazz) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() && + !p.getDataInfo().getInvisible() && + isRelatedToCommunities(p, communityList)); + results + .map((MapFunction) Result::getId, Encoders.STRING()) + .write() + .option("compression", "gzip") + .mode(SaveMode.Append) + .text(outputPath + COMMUNITY_RESULT_IDS); + +// results +// // .repartition(10000) +// .write() +// .option("compression", "gzip") +// .mode(SaveMode.Append) +// .json(outputPath + "/" + resultType); + + } + + private static boolean isRelatedToCommunities(R p, List communityList) { + return p + .getContext() + .stream() + .anyMatch( + c -> communityList.contains(c.getId()) || + (c.getId().contains("::") + && communityList.contains(c.getId().substring(0, c.getId().indexOf("::"))))); + } + +} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/config-default.xml new file mode 100644 index 0000000..d262cb6 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml new file mode 100644 index 0000000..bacf0a3 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml @@ -0,0 +1,289 @@ + + + + sourcePath + the source path + + + outputPath + the output path + + + communities + the communities whose products should be dumped + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities + --outputPath${outputPath}/community + --nameNode${nameNode} + --isLookUpUrl${isLookUpUrl} + --communities${communities} + + + + + + + + + yarn + cluster + select results ids connected to communities and dump relation + eu.dnetlib.dhp.oa.graph.dump.csv.SparkSelectResultsAndDumpRelations + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --workingPath${outputPath}/workingDir + --outputPath${outputPath} + --communities${communities} + + + + + + + + + + + + + + + + yarn + cluster + select results from publication + eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults + dump-${projectVersion}.jar + + --executor-memory=9G + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + + --workingPath${outputPath}/workingDir + --resultTypepublication + + + + + + + yarn + cluster + select results from dataset + eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + + --workingPath${outputPath}/workingDir + --resultTypedataset + + + + + + + yarn + cluster + select results from other + eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + --workingPath${outputPath}/workingDir + --resultTypeotherresearchproduct + + + + + + + yarn + cluster + select results from software + eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + + --workingPath${outputPath}/workingDir + --resultTypesoftware + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + yarn + cluster + Dump single results + eu.dnetlib.dhp.oa.graph.dump.csv.SparkMoveOnSigleDir + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --workingPath${outputPath}/workingDir + + --outputPath${outputPath} + + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.MakeTar + --hdfsPath${outputPath} + --nameNode${nameNode} + --sourcePath${workingDir}/tar + + + + + + + eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS + --hdfsPath${outputPath} + --nameNode${nameNode} + --accessToken${accessToken} + --connectionUrl${connectionUrl} + --metadata${metadata} + --conceptRecordId${conceptRecordId} + --depositionType${depositionType} + --depositionId${depositionId} + + + + + + \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json new file mode 100644 index 0000000..2f89c84 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json @@ -0,0 +1,30 @@ +[ + + + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "nn", + "paramLongName": "nameNode", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + + { + "paramName":"ilu", + "paramLongName":"isLookUpUrl", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName":"c", + "paramLongName":"communities", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + } +] + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json new file mode 100644 index 0000000..a78b1be --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json @@ -0,0 +1,38 @@ +[ + + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName":"c", + "paramLongName":"communities", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + } +] + + + + + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste3.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste3.json new file mode 100644 index 0000000..1aceb18 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste3.json @@ -0,0 +1,36 @@ +[ + + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName":"rt", + "paramLongName":"resultType", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName":"rtn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + } +] + + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste4.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste4.json new file mode 100644 index 0000000..706e7e9 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste4.json @@ -0,0 +1,25 @@ +[ + + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + + { + "paramName":"o", + "paramLongName":"outputPath", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + } +] + + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml new file mode 100644 index 0000000..d262cb6 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml new file mode 100644 index 0000000..dc9ead6 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml @@ -0,0 +1,102 @@ + + + + sourcePath + the source path + + + outputPath + the output path + + + communities + the communities whose products should be dumped + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + yarn + cluster + select results ids connected to communities and dump relation + eu.dnetlib.dhp.oa.graph.dump.serafeim.SparkSelectResultsAndDumpRelations + dump-${projectVersion}.jar + + --executor-memory=10G + --executor-cores=3 + --driver-memory=10G + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath} + --workingPath${workingDir} + --outputPath${outputPath} + --communities${communities} + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/all_communities.xq b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/all_communities.xq new file mode 100644 index 0000000..620955c --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/all_communities.xq @@ -0,0 +1,8 @@ +for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') +where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] +and ($x//context/param[./@name = 'status']/text() = 'all') +return + +{$x//CONFIGURATION/context/@id} +{$x//CONFIGURATION/context/@label} + \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq new file mode 100644 index 0000000..7b470ca --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq @@ -0,0 +1,11 @@ +for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') +where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] +and (%s) +return + +{$x//CONFIGURATION/context/@id} +{$x//CONFIGURATION/context/@label} + +{$x//CONFIGURATION/context/param[@name='description']/text()} + + \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/single_community.xq b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/single_community.xq new file mode 100644 index 0000000..4f257a6 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/single_community.xq @@ -0,0 +1,8 @@ +for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') +where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] +and $x//CONFIGURATION/context[./@id=%s] +return + +{$x//CONFIGURATION/context/@id} +{$x//CONFIGURATION/context/@label} + \ No newline at end of file diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunitiesTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunitiesTest.java new file mode 100644 index 0000000..605f1ec --- /dev/null +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunitiesTest.java @@ -0,0 +1,9 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv; + +/** + * @author miriam.baglioni + * @Date 11/05/23 + */ +public class DumpCommunitiesTest { +} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpResultTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpResultTest.java new file mode 100644 index 0000000..e285098 --- /dev/null +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpResultTest.java @@ -0,0 +1,350 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv; + +import static org.apache.commons.lang3.StringUtils.split; + +import java.io.IOException; +import java.io.StringReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Optional; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.Element; +import org.dom4j.Node; +import org.dom4j.io.SAXReader; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVAuthor; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVResult; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.utils.DHPUtils; +import scala.Function1; + +/** + * @author miriam.baglioni + * @Date 11/05/23 + */ +public class DumpResultTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory + .getLogger(DumpResultTest.class); + + private static HashMap map = new HashMap<>(); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DumpResultTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(DumpResultTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(DumpResultTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void testDumpResult() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/input/") + .getPath(); + + spark + .read() + .text( + getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds") + .getPath()) + .write() + .text(workingDir.toString() + "/working/resultIds/"); + + SparkDumpResults.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + + "-workingPath", workingDir.toString() + "/working", + "-resultType", "publication", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-sourcePath", sourcePath + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + Dataset tmp = Utils + .readPath(spark, workingDir.toString() + "/working/publication/result", CSVResult.class); + + tmp.show(false); + + Assertions.assertEquals(5, tmp.count()); + CSVResult row = tmp + .filter( + (FilterFunction) r -> r.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")) + .first(); + Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAccessright()); + Assertions.assertEquals("FI", row.getCountry()); + Assertions.assertEquals("Lit.opg., bijl.", row.getDescription()); + Assertions.assertEquals(3, split(row.getKeywords(), ", ").length); + Assertions.assertTrue(row.getKeywords().toString().contains("archeologie")); + Assertions.assertTrue(row.getKeywords().toString().contains("prospectie")); + Assertions.assertTrue(row.getKeywords().toString().contains("archaeology")); + Assertions.assertEquals("nl", row.getLanguage()); + Assertions.assertEquals("2007-01-01", row.getPublication_date()); + Assertions.assertEquals("FakePublisher1", row.getPublisher()); + Assertions + .assertEquals( + "Inventariserend veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel", + row.getTitle()); + Assertions.assertEquals("publication", row.getType()); + + row = tmp + .filter( + (FilterFunction) r -> r.getId().equals("50|doi_________::715fec7723208e6f17e855c204656e2f")) + .first(); + + System.out.println(row.getPublisher()); + String a = row.getPublisher().replace("\\n", " "); + System.out.println(a); +// row = tmp +// .where("id = '50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9'") +// .first(); +// Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAs("accessright")); +// Assertions.assertEquals(2, split(row.getAs("country"), ", ").length); +// Assertions.assertNull(row.getAs("description")); +// Assertions.assertEquals(2, split(row.getAs("keywords"), ", ").length); +// Assertions.assertTrue(row.getAs("keywords").toString().contains("archeologie")); +// Assertions.assertTrue(row.getAs("keywords").toString().contains("archaeology")); +// Assertions.assertEquals("UNKNOWN", row.getAs("language")); +// Assertions.assertNull(row.getAs("publication_date")); +// Assertions.assertNull(row.getAs("publisher")); +// Assertions.assertEquals("None", row.getAs("title")); +// Assertions.assertEquals("publication", row.getAs("type")); +// +// row = tmp +// .where("id = '50|DansKnawCris::26780065282e607306372abd0d808245'") +// .first(); +// Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAs("accessright")); +// Assertions.assertNull(row.getAs("country")); +// Assertions.assertNull(row.getAs("description")); +// Assertions.assertEquals(2, split(row.getAs("keywords"), ", ").length); +// Assertions.assertTrue(row.getAs("keywords").toString().contains("archeologie")); +// Assertions.assertTrue(row.getAs("keywords").toString().contains("archaeology")); +// Assertions.assertEquals("UNKNOWN", row.getAs("language")); +// Assertions.assertNull(row.getAs("publication_date")); +// Assertions.assertNull(row.getAs("publisher")); +// Assertions.assertEquals("None", row.getAs("title")); +// Assertions.assertEquals("publication", row.getAs("type")); + + } + + @Test + public void testDumpAuthor() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/input/") + .getPath(); + + spark + .read() + .text( + getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds") + .getPath()) + .write() + .text(workingDir.toString() + "/working/resultIds/"); + + SparkDumpResults.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + + "-workingPath", workingDir.toString() + "/working", + "-resultType", "publication", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-sourcePath", sourcePath + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + Dataset tmp = Utils + .readPath(spark, workingDir.toString() + "/working/publication/author", CSVAuthor.class); + + Assertions.assertEquals(13, tmp.count()); + + Assertions.assertEquals(1, tmp.where("firstName == 'Maryam'").count()); + + Assertions + .assertEquals( + DHPUtils.md5("50|DansKnawCris::0224aae28af558f21768dbc6439c7a951"), + tmp.where("firstName == 'Maryam'").first().getId()); + Assertions + .assertEquals(DHPUtils.md5("0000-0003-2914-2734"), tmp.where("firstName == 'Michael'").first().getId()); + Assertions + .assertEquals( + DHPUtils.md5("0000-0002-6660-5673"), + tmp.where("firstName == 'Mikhail'").first().getId()); + + } + + @Test + public void testDumpResultAuthorRelations() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/input/") + .getPath(); + + spark + .read() + .text( + getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds") + .getPath()) + .write() + .text(workingDir.toString() + "/working/resultIds/"); + + SparkDumpResults.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/output", + "-workingPath", workingDir.toString() + "/working", + "-resultType", "publication", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-sourcePath", sourcePath + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + Dataset tmp = spark + .read() + .option("header", "true") + .option("delimiter", Constants.SEP) + .csv(workingDir.toString() + "/working/publication/result_author"); + + Assertions.assertEquals(6, tmp.count()); + + Assertions.assertEquals(2, tmp.where("author_id == '" + DHPUtils.md5("0000-0003-2914-2734") + "'").count()); + Assertions + .assertEquals( + 1, tmp + .where("author_id == '" + DHPUtils.md5("0000-0003-2914-2734") + "'") + .where("result_id == '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95'") + .count()); + Assertions + .assertEquals( + 1, tmp + .where("author_id == '" + DHPUtils.md5("0000-0003-2914-2734") + "'") + .where("result_id == '50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9'") + .count()); + + } + + @Test + public void testDumpResultPid() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/input/") + .getPath(); + + spark + .read() + .text( + getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds") + .getPath()) + .write() + .text(workingDir.toString() + "/working/resultIds/"); + + SparkDumpResults.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/output", + "-workingPath", workingDir.toString() + "/working", + "-resultType", "publication", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-sourcePath", sourcePath + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + Dataset tmp = spark + .read() + .option("header", "true") + .option("delimiter", Constants.SEP) + .csv(workingDir.toString() + "/working/publication/result_pid"); + + tmp.show(false); + Assertions.assertEquals(4, tmp.count()); + + Assertions + .assertEquals(2, tmp.where("result_id == '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95'").count()); + Assertions + .assertEquals( + "10.1023/fakedoi", + tmp + .where("result_id == '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95' and type == 'doi'") + .first() + .getAs("pid")); + + } + + @Test + public void prova() throws DocumentException { + String input = "" + + " This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields." + + + ""; + + final Document doc; + final SAXReader reader = new SAXReader(); + + doc = reader.read(new StringReader(input)); + Element root = doc.getRootElement(); + StringBuilder builder = new StringBuilder(); + builder.append(DHPUtils.md5(root.attribute("id").getValue())); + builder.append(Constants.SEP); + builder.append(root.attribute("label").getValue()); + builder.append(Constants.SEP); + builder.append(root.attribute("id").getValue()); + builder.append(Constants.SEP); + builder.append(((Node) (root.selectNodes("//description").get(0))).getText()); + System.out.println(builder.toString()); + } +} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/MoveOnSingleDirTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/MoveOnSingleDirTest.java new file mode 100644 index 0000000..279ba40 --- /dev/null +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/MoveOnSingleDirTest.java @@ -0,0 +1,119 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv; + +import static org.apache.commons.lang3.StringUtils.split; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.common.ModelConstants; + +/** + * @author miriam.baglioni + * @Date 25/05/23 + */ +public class MoveOnSingleDirTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory + .getLogger(MoveOnSingleDirTest.class); + + private static HashMap map = new HashMap<>(); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(MoveOnSingleDirTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(MoveOnSingleDirTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(MoveOnSingleDirTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void testDMoveSingleDir() throws Exception { + + final String workingPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working") + .getPath(); + + spark + .read() + .text( + getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds") + .getPath()) + .write() + .text(workingDir.toString() + "/working/resultIds/"); + + SparkMoveOnSigleDir.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/output", + "-workingPath", workingPath + }); + + Dataset tmp = spark + .read() + .option("header", "true") + .option("delimiter", Constants.SEP) + .csv(workingDir.toString() + "/output/result"); + + Assertions.assertEquals(22, tmp.count()); + Assertions.assertEquals(12, tmp.filter("type == 'dataset'").count()); + Assertions.assertEquals(4, tmp.filter("type == 'other'").count()); + Assertions.assertEquals(5, tmp.filter("type == 'publication'").count()); + Assertions.assertEquals(1, tmp.filter("type == 'software'").count()); + + tmp.filter("type == 'publication'").show(false); + + Assertions + .assertEquals( + 8, spark + .read() + .option("header", "true") + .option("delimiter", Constants.SEP) + .csv(workingDir.toString() + "/output/author") + .count()); + + } +} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/SelectResultAndDumpRelationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/SelectResultAndDumpRelationTest.java new file mode 100644 index 0000000..a4eed68 --- /dev/null +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/SelectResultAndDumpRelationTest.java @@ -0,0 +1,221 @@ + +package eu.dnetlib.dhp.oa.graph.dump.csv; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRELCommunityResult; +import eu.dnetlib.dhp.utils.DHPUtils; + +/** + * @author miriam.baglioni + * @Date 11/05/23 + */ +public class SelectResultAndDumpRelationTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory + .getLogger(SelectResultAndDumpRelationTest.class); + + private static HashMap map = new HashMap<>(); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(SelectResultAndDumpRelationTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(SelectResultAndDumpRelationTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(SelectResultAndDumpRelationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void test1() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/csv/input/") + .getPath(); + + SparkSelectResultsAndDumpRelations.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/output", + "-workingPath", workingDir.toString() + "/working", + "-communities", "enermaps;dh-ch", + "-sourcePath", sourcePath + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + Assertions.assertEquals(2, sc.textFile(workingDir.toString() + "/working/communityResultIds").count()); + + Assertions + .assertEquals( + 1, sc + .textFile(workingDir.toString() + "/working/communityResultIds") + .filter(v -> v.equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")) + .count()); + + Assertions + .assertEquals( + 1, sc + .textFile(workingDir.toString() + "/working/communityResultIds") + .filter(v -> v.equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")) + .count()); + + // verify that the association is correct with the communityid and result id + spark + .read() + .option("header", "true") + .option("delimiter", Constants.SEP) + .csv(workingDir.toString() + "/output/result_community") + .createOrReplaceTempView("result_community"); + + Assertions.assertEquals(3, spark.sql("SELECT * FROM result_community").count()); + + Assertions + .assertEquals( + 1, spark + .sql( + "SELECT * " + + "FROM result_community " + + "WHERE community_id = '" + DHPUtils.md5("dh-ch") + "'") + .count()); + + Assertions + .assertEquals( + 1, spark + .sql( + "SELECT * " + + "FROM result_community" + + " WHERE result_id = '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95' " + + "AND community_id = '" + DHPUtils.md5("dh-ch") + "'") + .count()); + + Assertions + .assertEquals( + 2, spark + .sql( + "SELECT * " + + "FROM result_community " + + "WHERE community_id = '" + DHPUtils.md5("enermaps") + "'") + .count()); + Assertions + .assertEquals( + 1, spark + .sql( + "SELECT * " + + "FROM result_community " + + "WHERE result_id = '50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9' " + + "AND community_id = '" + DHPUtils.md5("enermaps") + "'") + .count()); + Assertions + .assertEquals( + 1, spark + .sql( + "SELECT * " + + "FROM result_community " + + "WHERE result_id = '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95' " + + "AND community_id = '" + DHPUtils.md5("enermaps") + "'") + .count()); + + Assertions.assertEquals(3, spark.read().textFile(workingDir.toString() + "/working/resultIds").count()); + + Assertions + .assertEquals( + 1, sc + .textFile(workingDir.toString() + "/working/resultIds") + .filter(v -> v.equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")) + .count()); + + Assertions + .assertEquals( + 1, sc + .textFile(workingDir.toString() + "/working/resultIds") + .filter(v -> v.equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")) + .count()); + + Assertions + .assertEquals( + 1, sc + .textFile(workingDir.toString() + "/working/resultIds") + .filter(v -> v.equals("50|DansKnawCris::26780065282e607306372abd0d808245")) + .count()); + + spark + .read() + .option("header", "true") + .option("delimiter", Constants.SEP) + .csv(workingDir.toString() + "/output/relation") + .createOrReplaceTempView("relation"); + + Assertions.assertEquals(2, spark.sql("SELECT * FROM relation").count()); + + Assertions + .assertEquals( + 1, spark + .sql( + "SELECT * FROM relation WHERE id = '" + + DHPUtils + .md5( + ("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9cites50|DansKnawCris::26780065282e607306372abd0d808245")) + + "'") + .count()); + + Assertions + .assertEquals( + 1, spark + .sql( + "SELECT * FROM relation WHERE id = '" + + DHPUtils + .md5( + ("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9cites50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")) + + "'") + .count()); + + } + +} diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/dataset b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/dataset new file mode 100644 index 0000000..e69de29 diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/otherresearchproduct b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/otherresearchproduct new file mode 100644 index 0000000..e69de29 diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/publication b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/publication new file mode 100644 index 0000000..0148148 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/publication @@ -0,0 +1,6 @@ +{"author":[{"affiliation":[],"fullname":"Alrasheed, Maryam","name":"Maryam","pid":[],"rank":1,"surname":"Alrasheed"},{"affiliation":[],"fullname":"Blondin, Michael","name":"Michael","pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-2914-2734"}],"rank":1,"surname":"Blondin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"enermaps"}, {"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"ni"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[{"classid":"FI","classname":"Finland","dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"country:instrepos","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"schemeid":"dnet:countries","schemename":"dnet:countries"}],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T16:14:18.452Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Lit.opg., bijl."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282676557,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:51:24Z","harvestDate":"2020-05-25T11:33:13.427Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550013110","metadataNamespace":""}},"originalId":["DansKnawCris::0224aae28af558f21768dbc6439c7a95"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1023/fakedoi"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"FakePublisher1"},"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Inventariserend veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Synthegra Archeologie Rapportenreeks P0502381"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"Blondin, Michael","name":"Michael","pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-2914-2734"}],"rank":1,"surname":"Blondin"},{"affiliation":[],"fullname":"Raskin, Mikhail","name":"Mikhail","pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6660-5673"}],"rank":2,"surname":"Raskin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"enermaps"}],"contributor":[],"country":[{"classid":"IT","classname":"Finland","dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"country:instrepos","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"schemeid":"dnet:countries","schemename":"dnet:countries"},{"classid":"FI","classname":"Finland","dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"country:instrepos","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"schemeid":"dnet:countries","schemename":"dnet:countries"}],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:03:57.761Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283087415,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:58:39Z","harvestDate":"2020-05-25T11:34:38.707Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800020324","metadataNamespace":""}},"originalId":["DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"Ward, Mark Daniel","name":"Mark Daniel","pid":[],"rank":1,"surname":"Ward"},{"affiliation":[],"fullname":"Szpankowski, Wojciech","name":"Wojciech","pid":[],"rank":2,"surname":"Szpankowski"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"ni"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:13:23.976Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::26780065282e607306372abd0d808245","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282897527,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:42:33Z","harvestDate":"2020-05-25T11:40:10.845Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550053196","metadataNamespace":""}},"originalId":["DansKnawCris::26780065282e607306372abd0d808245"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"enermaps"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:13:23.976Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::26780065282e607306372abd0d80fake","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282897527,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:42:33Z","harvestDate":"2020-05-25T11:40:10.845Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550053196","metadataNamespace":""}},"originalId":["DansKnawCris::26780065282e607306372abd0d808245"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "0013", "classname": "Part of book or chapter of book", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.1090/dimacs/044/20"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemename": "dnet:dataCite_date", "schemeid": "dnet:dataCite_date"}, "value": "2017-04-27"}, {"qualifier": {"classid": "published-print", "classname": "published-print", "schemename": "dnet:dataCite_date", "schemeid": "dnet:dataCite_date"}, "value": "1998-10-19"}], "collectedfrom": [{"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "id":"50|doi_________::715fec7723208e6f17e855c204656e2f", "subject": [], "dateofacceptance": {"value": "1998-10-19"}, "lastupdatetimestamp": 1675978002598, "author": [{"surname": "Deaton", "fullname": "R. Deaton", "pid": [], "name": "R.", "rank": 1}, {"surname": "Murphy", "fullname": "R. Murphy", "pid": [], "name": "R.", "rank": 2}, {"surname": "Garzon", "fullname": "M. Garzon", "pid": [], "name": "M.", "rank": 3}, {"surname": "Franceschetti", "fullname": "D. Franceschetti", "pid": [], "name": "D.", "rank": 4}, {"surname": "Stevens", "fullname": "S. Stevens", "pid": [], "name": "S.", "rank": 5}], "instance": [{"refereed": {"classid": "0000", "classname": "UNKNOWN", "schemename": "dnet:review_levels", "schemeid": "dnet:review_levels"}, "collectedfrom": {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, "hostedby": {"dataInfo": {"invisible": false, "deletedbyinference": false}, "value": "Unknown Repository", "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "dateofacceptance": {"value": "1998-10-19"}, "url": ["https://doi.org/10.1090/dimacs/044/20"], "measures": [{"id": "influence", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "1.9184702E-8", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "popularity", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "5.79069E-9", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "influence_alt", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "51", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "popularity_alt", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "0.8491071", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "impulse", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "2", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}], "pid": [{"qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.1090/dimacs/044/20"}], "instancetype": {"classid": "0013", "classname": "Part of book or chapter of book", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "dateofcollection": "2023-02-09T21:26:42Z", "fulltext": [], "description": [], "format": [], "measures": [{"id": "influence", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "1.7008906E-8", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "popularity", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "5.1452145E-9", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "influence_alt", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "51", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "popularity_alt", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "0.50946426", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "impulse", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "2", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}], "coverage": [], "externalReference": [], "publisher": {"value": "American Mathematical\\n Society"}, "eoscifguidelines": [], "language": {"classid": "und", "classname": "Undetermined", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.1090/dimacs/044/20", "50|doiboost____::715fec7723208e6f17e855c204656e2f"], "source": [{"value": "Crossref"}], "context": [], "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Good encodings for DNA-based solutions to combinatorial problems"}]} +{"author":[{"affiliation":[{"value":"Royal Institute of Technology"}],"fullname":"Athina Tympakianaki","pid":[{"qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"https://academic.microsoft.com/#/detail/2746890261"}],"rank":1},{"affiliation":[{"value":"Northeastern University"}],"fullname":"Haris N. Koutsopoulos","pid":[{"qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"https://academic.microsoft.com/#/detail/1543483944"}],"rank":2},{"affiliation":[{"value":"Royal Institute of Technology"}],"fullname":"Haris N. Koutsopoulos","pid":[{"qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"https://academic.microsoft.com/#/detail/1543483944"}],"rank":2},{"affiliation":[{"value":"Royal Institute of Technology"}],"fullname":"Erik Jenelius","pid":[{"qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"https://academic.microsoft.com/#/detail/44823834"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-4106-3126"}],"rank":3}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|fairsharing_::cd0f74b5955dc87fd0605745c4b49ee8","value":"Open Researcher and Contributor ID Registry"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"value":"2018-01-01"},"dateofcollection":"2023-05-12T17:10:38Z","description":[{"value":"The Simultaneous Perturbation Stochastic Approximation (SPSA) algorithm has been used for solving the off-line dynamic origin-destination (OD) estimation problem. While the algorithm can be used wi ..."}],"eoscifguidelines":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|doi_________::16e142b54fbddb2cf1c71ff7460e2792","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","openAccessRoute":"gold","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"dateofacceptance":{"value":"2018-01-01"},"hostedby":{"key":"10|issn___print::8e5fa0b3dde7aa9c08716c4705189ead","value":"Procedia Computer Science"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/"},"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.5640458E-9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"class","value":"C"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"2.2657568E-8"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"class","value":"C"}]},{"id":"influence_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"11"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"class","value":"C"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"3.9216"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"class","value":"C"}]},{"id":"impulse","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"class","value":"C"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.procs.2018.04.012"}],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://doi.org/10.1016/j.procs.2018.04.012"]}],"journal":{"ep":"64","issnPrinted":"1877-0509","name":"Procedia Computer Science","sp":"57","vol":"130"},"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1683911438408,"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.819284E-9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"class","value":"C"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"1.9254836E-8"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"class","value":"C"}]},{"id":"influence_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"11"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"class","value":"C"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"2.35296"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"class","value":"C"}]},{"id":"impulse","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"class","value":"C"}]}],"originalId":["S1877050918303624","10.1016/j.procs.2018.04.012","50|doiboost____::16e142b54fbddb2cf1c71ff7460e2792","2786369073"],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.procs.2018.04.012"}],"publisher":{"value":"Elsevier BV"},"relevantdate":[{"qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2018-04-24"},{"qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2018-01-01"}],"resourcetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"value":"Crossref"},{"value":"ANT/SEIT"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"subject:fos","classname":"subject:fos","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"050210 logistics & transportation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"subject:fos","classname":"subject:fos","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"021103 operations research"},{"qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Computer science"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"subject:fos","classname":"subject:fos","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"05 social sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.891"},"qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"MathematicsofComputing_NUMERICALANALYSIS"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"subject:fos","classname":"subject:fos","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"0211 other engineering and technologies"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"subject:fos","classname":"subject:fos","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"02 engineering and technology"},{"qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Matrix estimation"},{"qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Simultaneous perturbation stochastic approximation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"subject:fos","classname":"subject:fos","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"0502 economics and business"},{"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"General Earth and Planetary Sciences"},{"qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Algorithm"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.7245"},"qualifier":{"classid":"arxiv","classname":"arXiv","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Computer Science::Databases"},{"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"General Environmental Science"}],"title":[{"qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Robust SPSA algorithms for dynamic OD matrix estimation"},{"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Robust SPSA algorithms for dynamic OD matrix estimation"}]}' \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/relation b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/relation new file mode 100644 index 0000000..9987812 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/relation @@ -0,0 +1,5 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::26780065282e607306372abd0d808245","subRelType":"provision","target":"50|DansKnawCris::26780065282e607306372abd0d808246"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","subRelType":"provision","target":"50|DansKnawCris::26780065282e607306372abd0d808245"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","subRelType":"provision","target":"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","subRelType":"provision","target":"50|doi_________::715fec7723208e6f17e855c204656e2f"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/software b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/software new file mode 100644 index 0000000..e69de29 diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/author/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/author/part0 new file mode 100644 index 0000000..e66ef9c --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/author/part0 @@ -0,0 +1,4 @@ +{"fullname":"Giovanni Aloisio","id":"5ac035663df4d9099cf92d0e3f22a964","orcid":""} +{"fullname":"Cosimo Palazzo","id":"9f0d3123b6390dd7b2f3cee66c6bc926","orcid":""} +{"firstname":"L","fullname":"L, Issel-Tarver","id":"bafb7637b5f1c692419e55b13bf719a3","lastname":"Issel-Tarver","orcid":""} +{"firstname":"Voula","fullname":"Giouli, Voula","id":"c80f55a9afb32ffc4bc6bb67b6e0df33","lastname":"Giouli","orcid":""} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/result/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/result/part0 new file mode 100644 index 0000000..077a321 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/result/part0 @@ -0,0 +1,12 @@ +{"accessright":"UNKNOWN","country":"","description":"Absidiole NE_face ext","id":"50|doi_dedup___::f126b46ff3cea748ffbda3ae4e9ce816","keywords":"chevet, fenêtre, façade","language":"und","publication_date":"2019-01-01","publisher":"Nakala by Huma-Num","title":"QS83_17_Absidiole NE_face ext.jpg","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::0676bf8b1f33afc121ac4f28e1c3d8ad","keywords":"kiu38; http://sith.huma-num.fr/karnak/38","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 69534. Karnak, KIU 38 / stèle d’enceinte de ramsès iii XXe dynastie / Ramses III","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::0b92f33d78d42f54084145b91500941a","keywords":"kiu2869; http://sith.huma-num.fr/karnak/2869","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 8263. Karnak, KIU 2869 / Cour à portique de Thoutmosis IV, Scene, piliers, pilier 03 est : accolade XVIIIe dynastie / Thoutmosis IV","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::157349520d61226da5d85e0856bdae3e","keywords":"kiu4635; http://sith.huma-num.fr/karnak/4635","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 171030. Karnak, KIU 4635 / Cour nord du IVe pylône porte sud-est, face nord, montants est XVIIIe dynastie / Thoutmosis III","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::18b2aa2b1b9a2a11da84bc8e1f662070","keywords":"kiu4225; http://sith.huma-num.fr/karnak/4225, kiu4217; http://sith.huma-num.fr/karnak/4217, kiu4218; http://sith.huma-num.fr/karnak/4218","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 151603. Karnak, KIU 4217 / Temple d’Opet, Soubassement, face extérieure est, soubassement, 1er registre sud 10.n (opet 213 gauche) Romains / Auguste","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::31f713b5670d801de154453ea68ff4e1","keywords":"kiu3479; http://sith.huma-num.fr/karnak/3479","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 198480. Karnak, KIU 3479 / VIe pylône, Scene, mur intérieur est, partie nord 3.s annales (vi) : XVIIIe dynastie / Thoutmosis III","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::358b383fd0975b292edafd6b1d1fe9a2","keywords":"op179; http://sith.huma-num.fr/karnak/op179, kiu1114; http://sith.huma-num.fr/karnak/1114","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 135670. Karnak, KIU 1114 / Temple de Ptah, Objet, objet(s) découvert(s) porte de grenier XVIIe dynastie / SenakhtenRe","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::4cc834e3443c27cb7b0100a470c5c7f9","keywords":"kiu7329; http://sith.huma-num.fr/karnak/7329, kiu7330; http://sith.huma-num.fr/karnak/7330, kiu7331; http://sith.huma-num.fr/karnak/7331","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 169666. Karnak, KIU 7330 / Salle hypostyle colonnes, côté sud, colonne 017, fût frise XXe dynastie / Ramses IV","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::516950eba1c6737cbe26a52401b3fb2c","keywords":"kiu2185; http://sith.huma-num.fr/karnak/2185","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 128938. Karnak, KIU 2185 / « Magasin pur » de Khonsou, Objet porte fragmentaire du « magasin pur » de khonsou Ptolemees / Ptolemee Evergete Ier","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::71182950600db8e6aff20566f9df0345","keywords":"kiu4212; http://sith.huma-num.fr/karnak/4212","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 151470. Karnak, KIU 4212 / Temple d’Opet, Scene, face extérieure est, soubassement, 1er registre sud 04.n (opet 210 gauche) : procession de nils Romains / Auguste","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::99592f6a6bc8b9b67b0d8f1612e310bb","keywords":"kiu3939; http://sith.huma-num.fr/karnak/3939, kiu3822; http://sith.huma-num.fr/karnak/3822, kiu3823; http://sith.huma-num.fr/karnak/3823, kiu3825; http://sith.huma-num.fr/karnak/3825","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 141190. Karnak, KIU 3939 / Temple d’Opet face extérieure sud, soubassement, 1er registre bandeau (opet 266-267) Romains / Auguste","type":"dataset"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|r3f5b9831893::abd19ac4153416d0eb73b8f2e7612d35","keywords":"kiu5592; http://sith.huma-num.fr/karnak/5592, kiu8128; http://sith.huma-num.fr/karnak/8128, kiu8129; http://sith.huma-num.fr/karnak/8129, kiu8130; http://sith.huma-num.fr/karnak/8130","language":"und","publication_date":"","publisher":"Nakala by Huma-Num","title":"CNRS-CFEETK 167789. Karnak","type":"dataset"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/result_author/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/result_author/part0 new file mode 100644 index 0000000..12baaf5 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/result_author/part0 @@ -0,0 +1,19 @@ +{"author_id":"54ecb1d939e05ac0542d6af377100e67","result_id":"50|doi_dedup___::f126b46ff3cea748ffbda3ae4e9ce816"} +{"author_id":"06706770e1fb3b89fea4d0a8a60e7809","result_id":"50|r3f5b9831893::0b92f33d78d42f54084145b91500941a"} +{"author_id":"3afe02a6563ca7c30df007d69645f730","result_id":"50|r3f5b9831893::18b2aa2b1b9a2a11da84bc8e1f662070"} +{"author_id":"440464bc227f8371c905779a4641d49a","result_id":"50|r3f5b9831893::31f713b5670d801de154453ea68ff4e1"} +{"author_id":"3d0c4aa051cdc1cc71907a973f616767","result_id":"50|r3f5b9831893::4cc834e3443c27cb7b0100a470c5c7f9"} +{"author_id":"874398e3c71ba2e8cf76de4ba458d5fb","result_id":"50|r3f5b9831893::516950eba1c6737cbe26a52401b3fb2c"} +{"author_id":"fe165c3a039f1cc4301c9dbd7c7f2247","result_id":"50|r3f5b9831893::71182950600db8e6aff20566f9df0345"} +{"author_id":"b3b2b99a02b1bbd8d4b5a1994b8d60fe","result_id":"50|r3f5b9831893::99592f6a6bc8b9b67b0d8f1612e310bb"} +{"author_id":"be12aee5482275608067a3cab9e8beb6","result_id":"50|r3f5b9831893::abd19ac4153416d0eb73b8f2e7612d35"} +{"author_id":"dde164aefcd3aebafec84feedd999170","result_id":"50|r3f5b9831893::b26848c3000fbd7153e2fdeaf3d70bd2"} +{"author_id":"3a55a188e8a23e645752055ff18d4720","result_id":"50|r3f5b9831893::b94d49cfb4ea230b784be1fe24f0edd5"} +{"author_id":"a0bcddc2a41a4cc0dd768eced4dd0939","result_id":"50|r3f5b9831893::ef9f1724cef04a9f62bdf90d9084d70b"} +{"author_id":"51b2a67f20cdfd9628233ebf04158468","result_id":"50|r3f5b9831893::f349ea5bdd91d846e70e6a4a3c71ccd6"} +{"author_id":"dfad2f4b741f4fbac8f504dd0088db06","result_id":"50|r3f5b9831893::f82af1f6dfd2b8644ba3ab799285849f"} +{"author_id":"b52f90003de8e73f2f704ced12b83bba","result_id":"50|r3f5b9831893::fb7cf14ef55474c3b745262fea21d4c0"} +{"author_id":"08e7328f7c44b32e1203374aadbedf0c","result_id":"50|doi_dedup___::c7a29e095e1763e09af2eb0e2ffbb717"} +{"author_id":"c8c6c6273e798cf408f848afd8ca13f8","result_id":"50|r3f5b9831893::0bc48082a3803d837098447a4f8fb28d"} +{"author_id":"16d0306f0af215d9ec8f70660026d585","result_id":"50|r3f5b9831893::1a372b7640db956b13716fc5e7b455b7"} +{"author_id":"c0a97e8f55967dedb4a57125e3174816","result_id":"50|r3f5b9831893::1b8dec9230423314146858112059845d"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/result_pid/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/result_pid/part0 new file mode 100644 index 0000000..9a4ac23 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/dataset/result_pid/part0 @@ -0,0 +1,33 @@ +{"id":"58c75fe64b4df0126e0e4fdfafb8be18","pid":"http://hdl.handle.net/11280/86e6ac0d","result_id":"50|doi_dedup___::f126b46ff3cea748ffbda3ae4e9ce816","type":"handle"} +{"id":"45c62956554c7d3e7f9708bce5c9a086","pid":"11280/86e6ac0d","result_id":"50|doi_dedup___::f126b46ff3cea748ffbda3ae4e9ce816","type":"handle"} +{"id":"312a5c89fa6d82ccc66c1b9615d3d364","pid":"10.34847/nkl.7f846pnw","result_id":"50|doi_dedup___::f126b46ff3cea748ffbda3ae4e9ce816","type":"doi"} +{"id":"cb29ee70d77746445ca5ce5f121bc473","pid":"http://hdl.handle.net/11280/747fab4a","result_id":"50|r3f5b9831893::0676bf8b1f33afc121ac4f28e1c3d8ad","type":"handle"} +{"id":"45a465d38aabff009c0fcf41c2f08c67","pid":"11280/747fab4a","result_id":"50|r3f5b9831893::0676bf8b1f33afc121ac4f28e1c3d8ad","type":"handle"} +{"id":"cc956040bd5031ecec943d91e8b764fb","pid":"11280/51909d00","result_id":"50|r3f5b9831893::0b92f33d78d42f54084145b91500941a","type":"handle"} +{"id":"726c5eef33521e505ef9cb48fe75d596","pid":"http://hdl.handle.net/11280/51909d00","result_id":"50|r3f5b9831893::0b92f33d78d42f54084145b91500941a","type":"handle"} +{"id":"32429dfa16fa2847b0286efaf0a0dce8","pid":"11280/fc581aa4","result_id":"50|r3f5b9831893::157349520d61226da5d85e0856bdae3e","type":"handle"} +{"id":"554994db0c44fe13283444e190ac9607","pid":"http://hdl.handle.net/11280/fc581aa4","result_id":"50|r3f5b9831893::157349520d61226da5d85e0856bdae3e","type":"handle"} +{"id":"88a301e2cadf5e691ebb6a5665eb78f4","pid":"http://hdl.handle.net/11280/1cfc2896","result_id":"50|r3f5b9831893::18b2aa2b1b9a2a11da84bc8e1f662070","type":"handle"} +{"id":"2f15200f24a870ff9edb3913e292d61f","pid":"11280/1cfc2896","result_id":"50|r3f5b9831893::18b2aa2b1b9a2a11da84bc8e1f662070","type":"handle"} +{"id":"027c0e2083ab8ea468469a34fe9d46e1","pid":"http://hdl.handle.net/11280/3b2225c5","result_id":"50|r3f5b9831893::31f713b5670d801de154453ea68ff4e1","type":"handle"} +{"id":"8466cbb68b2d1c541b056006b7f27ea4","pid":"11280/3b2225c5","result_id":"50|r3f5b9831893::31f713b5670d801de154453ea68ff4e1","type":"handle"} +{"id":"bac82482f2dba75f8e34802ed7789554","pid":"http://hdl.handle.net/11280/f3911908","result_id":"50|r3f5b9831893::358b383fd0975b292edafd6b1d1fe9a2","type":"handle"} +{"id":"8cd4bb9ef9c8007155a95ee9df90ea69","pid":"11280/f3911908","result_id":"50|r3f5b9831893::358b383fd0975b292edafd6b1d1fe9a2","type":"handle"} +{"id":"ba83be852322c4c86ed6b3ab0610987d","pid":"11280/65056b94","result_id":"50|r3f5b9831893::4cc834e3443c27cb7b0100a470c5c7f9","type":"handle"} +{"id":"93cd2ffff769223cf04034e0db0f6284","pid":"http://hdl.handle.net/11280/65056b94","result_id":"50|r3f5b9831893::4cc834e3443c27cb7b0100a470c5c7f9","type":"handle"} +{"id":"c5dcb6dab6f53a281f96bfbe048858ce","pid":"http://hdl.handle.net/11280/dac5fe22","result_id":"50|r3f5b9831893::516950eba1c6737cbe26a52401b3fb2c","type":"handle"} +{"id":"999076fd410cdb0c1599b7d5e355b94a","pid":"11280/dac5fe22","result_id":"50|r3f5b9831893::516950eba1c6737cbe26a52401b3fb2c","type":"handle"} +{"id":"ef68e036a7e753da17a2794ccf1b8ce5","pid":"http://hdl.handle.net/11280/446e3387","result_id":"50|r3f5b9831893::71182950600db8e6aff20566f9df0345","type":"handle"} +{"id":"5377b0f0143c324176bbee897d9d966c","pid":"11280/446e3387","result_id":"50|r3f5b9831893::71182950600db8e6aff20566f9df0345","type":"handle"} +{"id":"9e588201f52f05fca56efc43583ca615","pid":"http://hdl.handle.net/11280/969ae30a","result_id":"50|r3f5b9831893::99592f6a6bc8b9b67b0d8f1612e310bb","type":"handle"} +{"id":"f64681856cadef587b4c34396e9e6861","pid":"11280/969ae30a","result_id":"50|r3f5b9831893::99592f6a6bc8b9b67b0d8f1612e310bb","type":"handle"} +{"id":"4ad4d6c56ce6e206c42849df92d894f5","pid":"http://hdl.handle.net/11280/dddf5851","result_id":"50|r3f5b9831893::abd19ac4153416d0eb73b8f2e7612d35","type":"handle"} +{"id":"0b3ea2f9c96eb9593fd9b21363b7d9f6","pid":"11280/dddf5851","result_id":"50|r3f5b9831893::abd19ac4153416d0eb73b8f2e7612d35","type":"handle"} +{"id":"45dc28539b305d186f51d5ee9465aee0","pid":"http://hdl.handle.net/11280/3f2679d9","result_id":"50|r3f5b9831893::b26848c3000fbd7153e2fdeaf3d70bd2","type":"handle"} +{"id":"b9c5beb054f3ca72477cb1b07351196a","pid":"11280/3f2679d9","result_id":"50|r3f5b9831893::b26848c3000fbd7153e2fdeaf3d70bd2","type":"handle"} +{"id":"ee0120c72b2f9c1fc1dd3cf47c98ac9d","pid":"http://hdl.handle.net/11280/d957e9f3","result_id":"50|r3f5b9831893::b94d49cfb4ea230b784be1fe24f0edd5","type":"handle"} +{"id":"4770ff66784a0b9470551d46e7a0aaa0","pid":"11280/d957e9f3","result_id":"50|r3f5b9831893::b94d49cfb4ea230b784be1fe24f0edd5","type":"handle"} +{"id":"3cf2316ff497fda37d07757e72307173","pid":"11280/e8d8ed9f","result_id":"50|r3f5b9831893::ef9f1724cef04a9f62bdf90d9084d70b","type":"handle"} +{"id":"5a9092d335d45be6d01f9d6af99c9d86","pid":"http://hdl.handle.net/11280/e8d8ed9f","result_id":"50|r3f5b9831893::ef9f1724cef04a9f62bdf90d9084d70b","type":"handle"} +{"id":"37018c7be9823e3c49aeff0e9ae69054","pid":"http://hdl.handle.net/11280/9ff65944","result_id":"50|r3f5b9831893::f349ea5bdd91d846e70e6a4a3c71ccd6","type":"handle"} +{"id":"c372305e06eacc7855c7de0e3fc6df07","pid":"11280/9ff65944","result_id":"50|r3f5b9831893::f349ea5bdd91d846e70e6a4a3c71ccd6","type":"handle"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/author/part1 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/author/part1 new file mode 100644 index 0000000..28a7797 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/author/part1 @@ -0,0 +1,2 @@ +{"firstname":"Taal En Spraaktechnologie","fullname":"LS OZ Taal en spraaktechnologie","id":"60fa4ab9fa107f5281b91c1db2885bf9","lastname":"Ls Oz","orcid":""} +{"fullname":"Nispen, van, Annelies","id":"1279ef1ced7366cc6af25a2079ab4554","orcid":""} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/result/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/result/part0 new file mode 100644 index 0000000..963e4ab --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/result/part0 @@ -0,0 +1,4 @@ +{"accessright":"OPEN","country":"","description":"","id":"50|core_ac_uk__::15d72bdde1addf525170aa61664f8daf","keywords":"","language":"eng","publication_date":"","publisher":"Springer International Publishing","title":"Reengineering and Reinventing both Democracy and the Concept of Life in the Digital Era","type":"other"} +{"accessright":"OPEN","country":"IT","description":"","id":"50|od______3686::b0cb086c9a0222684d48b3e355eba1c8","keywords":"","language":"und","publication_date":"2002-01-01","publisher":"","title":"Progetto dell’impianto eolico di Pescopagano (Potenza), progetto secondo classificato al Concorso nazionale “Paesaggi del Vento”, progetto pubblicato in: E. Zanchini , a cura di, Paesaggi del vento, Meltemi, Roma 2002 , pp.84-89","type":"other"} +{"accessright":"OPEN","country":"NL","description":"This article reports about the on-going work on a new version of the metadata framework Component Metadata Infrastructure (CMDI), central to the CLARIN infrastructure. Version 1.2 introduces a number of important changes based on the experience gathered in the last five years of intensive use of CMDI by the digital humanities community, addressing problems encountered, but also introducing new functionality. Next to the consolidation of the structure of the model and schema sanity, new means for lifecycle management have been introduced aimed at combatting the observed proliferation of components, new mechanism for use of external vocabularies will contribute to more consistent use of controlled values and cues for tools will allow improved presentation of the metadata records to the human users. The feature set has been frozen and approved, and the infrastructure is now entering a transition phase, in which all the tools and data need to be migrated to the new version.","id":"50|narcis______::07cab979c27c9240f7ef5d80d752679b","keywords":"","language":"eng","publication_date":"2015-08-26","publisher":"Linköping University Electronic Press, Linköpings universitet","title":"CMDI 1.2: Improvements in the CLARIN Component Metadata Infrastructure","type":"other"} +{"accessright":"OPEN","country":"NL","description":"This paper describes what the CLARIN infrastructure is and how it can be used, with a focus on the Netherlands part of the CLARIN infrastructure. It aims to explain how a humanities researcher can use the CLARIN infrastructure.","id":"50|narcis______::655f9ef445ffa66a1782f29208cc1569","keywords":"","language":"eng","publication_date":"2014-08-20","publisher":"UiL OTS","title":"The CLARIN infrastructure in the Netherlands: What is it and how can you use it?","type":"other"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/result_author/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/result_author/part0 new file mode 100644 index 0000000..ac79f71 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/result_author/part0 @@ -0,0 +1,17 @@ +{"author_id":"af07dd90a1f0be8159e52f7f572d1c5c","result_id":"50|narcis______::14afd8c5c46d17af87ceef410ab25e01"} +{"author_id":"9f24c2ed6e1cb057772b641806ae77ec","result_id":"50|narcis______::14afd8c5c46d17af87ceef410ab25e01"} +{"author_id":"9ad1701184de323823fc1a858a868ac2","result_id":"50|narcis______::14afd8c5c46d17af87ceef410ab25e01"} +{"author_id":"de106449e38166d8cf2ac7bb7bb6c5d8","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"} +{"author_id":"8a157b06eaaf9fbca8b67011bc374744","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"} +{"author_id":"10bffdada7578cec278ba1a5e3d63da5","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"} +{"author_id":"d2a8ebfa553c4f6ff90998bd1c58fbcc","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"} +{"author_id":"86b929edfab2d532f075506559a6ac76","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"} +{"author_id":"478c134423c1afa8bb2ee174014726af","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"} +{"author_id":"ba92d49768133c928d102eb86cb3690c","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"} +{"author_id":"d590f7127b93a0b6003cbed3bd20983b","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"} +{"author_id":"c146c73851641e52e6ea1adc6f271fd1","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"} +{"author_id":"e3e6238baf917a025bcbff8be9288393","result_id":"50|r3730f562f9e::36d61b2d7feb632e94e4f8113b890c6d"} +{"author_id":"e1a361a13f6595628524b87b6fa29918","result_id":"50|dedup_wf_001::e9457bd83cfd425b8779f239c96e0ffe"} +{"author_id":"5764f46e7ded9260eadea13e81fdf0fe","result_id":"50|dedup_wf_001::e9457bd83cfd425b8779f239c96e0ffe"} +{"author_id":"b56a640d36a2dc9e3dc88401edb61149","result_id":"50|dedup_wf_001::e9457bd83cfd425b8779f239c96e0ffe"} +{"author_id":"e08632d458b519b66e575dd5b7eb54e9","result_id":"50|dedup_wf_001::e9457bd83cfd425b8779f239c96e0ffe"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/result_pid/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/result_pid/part0 new file mode 100644 index 0000000..cd92ae4 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/otherresearchproduct/result_pid/part0 @@ -0,0 +1,5 @@ +{"id":"3ff0ab5e679c5320381c857d8699cd4a","pid":"10.5281/zenodo.2657248","result_id":"50|doi_dedup___::84db353272d83833fa76ec87fc540e63","type":"doi"} +{"id":"935716d050a36d36f797e843187b8192","pid":"https://hdl.handle.net/21.11115/0000-000e-0ff1-2","result_id":"50|r369162d0a40::da892118ba0be7a5cf695ad54ae5147e","type":"handle"} +{"id":"133b9dd1a59099adc577004209e83c52","pid":"21.11115/0000-000e-0ff1-2","result_id":"50|r369162d0a40::da892118ba0be7a5cf695ad54ae5147e","type":"handle"} +{"id":"8e17b86e61db6c34ec741eabe947ea9f","pid":"https://hdl.handle.net/21.11115/0000-000e-ce31-3","result_id":"50|r369162d0a40::b69a5145a8e41bdaa33c24be67c209f1","type":"handle"} +{"id":"b7cc730f4cbb6d379d5c4f57369978b3","pid":"21.11115/0000-000e-ce31-3","result_id":"50|r369162d0a40::b69a5145a8e41bdaa33c24be67c209f1","type":"handle"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/author/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/author/part0 new file mode 100644 index 0000000..e66ef9c --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/author/part0 @@ -0,0 +1,4 @@ +{"fullname":"Giovanni Aloisio","id":"5ac035663df4d9099cf92d0e3f22a964","orcid":""} +{"fullname":"Cosimo Palazzo","id":"9f0d3123b6390dd7b2f3cee66c6bc926","orcid":""} +{"firstname":"L","fullname":"L, Issel-Tarver","id":"bafb7637b5f1c692419e55b13bf719a3","lastname":"Issel-Tarver","orcid":""} +{"firstname":"Voula","fullname":"Giouli, Voula","id":"c80f55a9afb32ffc4bc6bb67b6e0df33","lastname":"Giouli","orcid":""} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result/part0 new file mode 100644 index 0000000..704eacd --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result/part0 @@ -0,0 +1,5 @@ +{"accessright":"OPEN","country":"","description":"We describe the CoNLL-2002 shared task: language-independent named entity recognition. We give background information on the data sets and the evaluation method, present a general overview of the systems that have taken part in the task and discuss their performance.","id":"50|doi_dedup___::13b14c741a7b3420591c161f54ed5c80","keywords":"computer science - computation and language, i.2.7, computation and language (cs.cl), fos: computer and information sciences","language":"eng","publication_date":"2002-09-05","publisher":"","title":"Introduction to the CoNLL-2002 Shared Task: Language-Independent Named Entity Recognition","type":"publication"} +{"accessright":"OPEN","country":"GB","description":"Following a strategy similar to that used in baker's yeast (Herrgård et al. Nat Biotechnol 26:1155-1160, 2008). A consensus yeast metabolic network obtained from a community approach to systems biology (Herrgård et al. 2008; Dobson et al. BMC Syst Biol 4:145, 2010). Further developments towards a genome-scale metabolic model of yeast (Dobson et al. 2010; Heavner et al. BMC Syst Biol 6:55, 2012). Yeast 5-an expanded reconstruction of the Saccharomyces cerevisiae metabolic network (Heavner et al. 2012) and in Salmonella typhimurium (Thiele et al. BMC Syst Biol 5:8, 2011). A community effort towards a knowledge-base and mathematical model of the human pathogen Salmonellatyphimurium LT2 (Thiele et al. 2011), a recent paper (Thiele et al. Nat Biotechnol 31:419-425, 2013). A community-driven global reconstruction of human metabolism (Thiele et al. 2013) described a much improved 'community consensus' reconstruction of the human metabolic network, called Recon 2, and the authors (that include the present ones) have made it freely available via a database at http://humanmetabolism.org/ and in SBML format at Biomodels (http://identifiers.org/biomodels.db/MODEL1109130000. This short analysis summarises the main findings, and suggests some approaches that will be able to exploit the availability of this model to advantage. © 2013 The Author(s).","id":"50|doi_dedup___::e0392f427fea9a701aa469e6f24bdf93","keywords":"review article, metabolism, modelling, systems biology, networks, metabolic networks, clinical biochemistry, biochemistry, endocrinology, diabetes and metabolism, community approach, operations research, metabolic network, human metabolism, metabolic model, biology, computational biology, sbml, 03 medical and health sciences, 0302 clinical medicine, 0303 health sciences, 030220 oncology & carcinogenesis, 030304 developmental biology, researchinstitutes_networks_beacons/manchester_institute_of_biotechnology, manchester institute of biotechnology","language":"eng","publication_date":"2013-08-01","publisher":"Springer US","title":"An analysis of a ‘community-driven’ reconstruction of the human metabolic network","type":"publication"} +{"accessright":"OPEN","country":"","description":"Current machine learning systems operate, almost exclusively, in a statistical, or model-free mode, which entails severe theoretical limits on their power and performance. Such systems cannot reason about interventions and retrospection and, therefore, cannot serve as the basis for strong AI. To achieve human level intelligence, learning machines need the guidance of a model of reality, similar to the ones used in causal inference tasks. To demonstrate the essential role of such models, I will present a summary of seven tasks which are beyond reach of current machine learning systems and which have been accomplished using the tools of causal modeling.","id":"50|doi_dedup___::2436e90941a664931b54b956ade5b77b","keywords":"machine learning (cs.lg), artificial intelligence (cs.ai), machine learning (stat.ml), fos: computer and information sciences, mode (statistics), causal inference, artificial intelligence, business.industry, business, power (physics), computer science, machine learning, computer.software_genre, computer, basis (linear algebra), 03 medical and health sciences, 02 engineering and technology, 0202 electrical engineering, electronic engineering, information engineering, 0301 basic medicine, 020201 artificial intelligence & image processing, 030104 developmental biology, computer science - learning, computer science - artificial intelligence, statistics - machine learning","language":"und","publication_date":"2018-02-02","publisher":"arXiv","title":"Theoretical Impediments to Machine Learning With Seven Sparks from the Causal Revolution","type":"publication"} +{"accessright":"OPEN","country":"","description":"In most natural and engineered systems, a set of entities interact with each other in complicated patterns that can encompass multiple types of relationships, change in time, and include other types of complications. Such systems include multiple subsystems and layers of connectivity, and it is important to take such \"multilayer\" features into account to try to improve our understanding of complex systems. Consequently, it is necessary to generalize \"traditional\" network theory by developing (and validating) a framework and associated tools to study multilayer systems in a comprehensive fashion. The origins of such efforts date back several decades and arose in multiple disciplines, and now the study of multilayer networks has become one of the most important directions in network science. In this paper, we discuss the history of multilayer networks (and related concepts) and review the exploding body of work on such networks. To unify the disparate terminology in the large body of recent work, we discuss a general framework for multilayer networks, construct a dictionary of terminology to relate the numerous existing concepts to each other, and provide a thorough discussion that compares, contrasts, and translates between related notions such as multilayer networks, multiplex networks, interdependent networks, networks of networks, and many others. We also survey and discuss existing data sets that can be represented as multilayer networks. We review attempts to generalize single-layer-network diagnostics to multilayer networks. We also discuss the rapidly expanding research on multilayer-network models and notions like community structure, connected components, tensor decompositions, and various types of dynamical processes on multilayer networks. We conclude with a summary and an outlook.","id":"50|doi_dedup___::c5a574592f2e347f27be49d2c20a5558","keywords":"applied mathematics, computational mathematics, control and optimization, management science and operations research, computer networks and communications, data science, connected component, terminology, complex system, network theory, network science, construct (philosophy), computer science, interdependent networks, set (psychology), 01 natural sciences, 0103 physical sciences, 010306 general physics, 010305 fluids & plasmas, physics - physics and society, computer science - social and information networks, physics and society (physics.soc-ph), social and information networks (cs.si), fos: physical sciences, fos: computer and information sciences","language":"und","publication_date":"2013-09-27","publisher":"Oxford University Press (OUP)","title":"Multilayer networks","type":"publication"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|doi_________::715fec7723208e6f17e855c204656e2f","keywords":"","language":"und","publication_date":"1998-10-19","publisher":"American Mathematical\\n Society","title":"Good encodings for DNA-based solutions to combinatorial problems","type":"publication"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result_author/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result_author/part0 new file mode 100644 index 0000000..b386db3 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result_author/part0 @@ -0,0 +1,17 @@ +{"author_id":"6fa85e5d3da0c5ed3ab65e4423481714","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"} +{"author_id":"dad3b6e22750b26a27296cd1c98565d1","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"} +{"author_id":"121d8003d3895905cfd67b9b69ac99e1","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"} +{"author_id":"91d3d8c07152d64fbf1c059940211334","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"} +{"author_id":"a25d1cc688c34c0458a4b00b48bc4cdc","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"} +{"author_id":"968ad30220675afb7a0b2b583b35c3a1","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"} +{"author_id":"a55af296962dfb58977aabcb3cf6a8d9","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"} +{"author_id":"5a344a09dab274779fd8e34654fd3541","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"} +{"author_id":"77104c891595df750391d710280da022","result_id":"50|doi_dedup___::b2ffae13a6f06b87539d538dc4919df7"} +{"author_id":"148f572c63c1f22386c1cae02e5bae2d","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"} +{"author_id":"8e571c27bc66cf96051302db9aa903dc","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"} +{"author_id":"175e45bf98e2b74df9c888598bb917fc","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"} +{"author_id":"bcdeabeece29231977e580b8f417ea82","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"} +{"author_id":"11cea0826b37ff58aa2f4c12ec42695e","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"} +{"author_id":"faf54def0161659b903f58ab4ce8bfae","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"} +{"author_id":"088daddc0f62bc2b8700a4e66a399d5f","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"} +{"author_id":"0b78df096d451535b5b8f7f4f47a6433","result_id":"50|doi_dedup___::2cd92ff12dd2fa919308d9438d9058b6"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result_pid/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result_pid/part0 new file mode 100644 index 0000000..d969f29 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result_pid/part0 @@ -0,0 +1,12 @@ +{"id":"94c1431ed983f9ea9996650e2d2205cc","pid":"10.5281/zenodo.3529160","result_id":"50|doi_dedup___::564289a1b69707f216d73aafdd70b20e","type":"doi"} +{"id":"f2328b2e830ee5c03945f65ab1802af7","pid":"10.3389/fphar.2019.01303","result_id":"50|doi_dedup___::564289a1b69707f216d73aafdd70b20e","type":"doi"} +{"id":"53511fa534223420fb925c58051725d6","pid":"31749705","result_id":"50|doi_dedup___::564289a1b69707f216d73aafdd70b20e","type":"pmid"} +{"id":"0e254059fe10cf07df8dbae2cfe5797e","pid":"pmc6848277","result_id":"50|doi_dedup___::564289a1b69707f216d73aafdd70b20e","type":"pmc"} +{"id":"a6181896a32edebf1c45649b894e5886","pid":"10.5281/zenodo.3529159","result_id":"50|doi_dedup___::564289a1b69707f216d73aafdd70b20e","type":"doi"} +{"id":"6e2dc8a4fd3523656a5abd3c0e090a18","pid":"10.7287/peerj.preprints.2711v2","result_id":"50|doi_dedup___::612838ab331dcdfeb9862351bd3fb423","type":"doi"} +{"id":"2072bbca91cb3f3a05b2454edce57f6f","pid":"10.1371/journal.pbio.1002614","result_id":"50|doi_dedup___::612838ab331dcdfeb9862351bd3fb423","type":"doi"} +{"id":"a4e63567711400f9526cc46ca84d2bc1","pid":"pmc5655613","result_id":"50|doi_dedup___::612838ab331dcdfeb9862351bd3fb423","type":"pmc"} +{"id":"477cabc52ec11dfaec8631ee1073376d","pid":"29065148","result_id":"50|doi_dedup___::612838ab331dcdfeb9862351bd3fb423","type":"pmid"} +{"id":"27285b8c2487b534fc2196d27ad4cf0d","pid":"10.7287/peerj.preprints.2711v3","result_id":"50|doi_dedup___::612838ab331dcdfeb9862351bd3fb423","type":"doi"} +{"id":"056a211b8f85fe3058825df170960c06","pid":"10.1111/cgf.13610","result_id":"50|doi_dedup___::32c3649d7aa266f3d754463d6194ebd5","type":"doi"} +{"id":"79c575556941fbb62d9eee77b97fd0e4","pid":"1902.06815","result_id":"50|doi_dedup___::32c3649d7aa266f3d754463d6194ebd5","type":"arxiv"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00000 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00000 new file mode 100644 index 0000000..23c80f9 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00000 @@ -0,0 +1 @@ +50|doi_________::715fec7723208e6f17e855c204656e2f \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00049 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00049 new file mode 100644 index 0000000..6a5ffaf --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00049 @@ -0,0 +1 @@ +50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9 diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00089 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00089 new file mode 100644 index 0000000..07020d0 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00089 @@ -0,0 +1 @@ +50|DansKnawCris::0224aae28af558f21768dbc6439c7a95 diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00169 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00169 new file mode 100644 index 0000000..e7e15ab --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00169 @@ -0,0 +1,2 @@ +50|DansKnawCris::26780065282e607306372abd0d808245 +50|doi_________::16e142b54fbddb2cf1c71ff7460e2792 diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/author/part1 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/author/part1 new file mode 100644 index 0000000..6691cd2 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/author/part1 @@ -0,0 +1,2 @@ +{"firstname":"Maurizio","fullname":"Toscano, Maurizio","id":"045bdce3ee24842af4eb4a7f89a44adb","lastname":"Toscano","orcid":""} +{"firstname":"","fullname":"Aitor Díaz","id":"25fc898122164b69f56f08a8545804d3","lastname":"","orcid":""} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/result/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/result/part0 new file mode 100644 index 0000000..c2501c3 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/result/part0 @@ -0,0 +1 @@ +{"accessright":"OPEN","country":"","description":"

Mapping digital humanities in Spain (1993-2019)

This dataset has been extensively analysed in the following paper https://doi.org/10.3145/epi.2020.nov.01 and has also been used for the following poster https://doi.org/10.5281/zenodo.4256689

","id":"50|doi_dedup___::57c23b72fc2da4d47b35e5b871c35423","keywords":"","language":"esl/spa","publication_date":"2020-06-14","publisher":"Zenodo","title":"Mapping digital humanities in Spain - 1993-2019","type":"software"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/result_author/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/result_author/part0 new file mode 100644 index 0000000..fe7f499 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/result_author/part0 @@ -0,0 +1,2 @@ +{"author_id":"045bdce3ee24842af4eb4a7f89a44adb","result_id":"50|doi_dedup___::57c23b72fc2da4d47b35e5b871c35423"} +{"author_id":"25fc898122164b69f56f08a8545804d3","result_id":"50|doi_dedup___::57c23b72fc2da4d47b35e5b871c35423"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/result_pid/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/result_pid/part0 new file mode 100644 index 0000000..b72038a --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/software/result_pid/part0 @@ -0,0 +1,2 @@ +{"id":"cb7d0c2e4660c784cb647060974dbee7","pid":"10.5281/zenodo.3893545","result_id":"50|doi_dedup___::57c23b72fc2da4d47b35e5b871c35423","type":"doi"} +{"id":"19703b43918fc184698f6e0298bf2fc8","pid":"10.5281/zenodo.3893546","result_id":"50|doi_dedup___::57c23b72fc2da4d47b35e5b871c35423","type":"doi"} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 47aa521..00b5e97 100644 --- a/pom.xml +++ b/pom.xml @@ -102,8 +102,7 @@ 5.6.1 3.5 11.0.2 - - [2.13.1-patched] + [3.17.1] \ No newline at end of file