mergin with branch beta
This commit is contained in:
commit
91d3a47110
|
@ -185,6 +185,22 @@ class OafMapperUtilsTest {
|
|||
.getClassid());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDelegatedAuthority() throws IOException {
|
||||
Dataset d1 = read("dataset_2.json", Dataset.class);
|
||||
Dataset d2 = read("dataset_delegated.json", Dataset.class);
|
||||
|
||||
assertEquals(1, d2.getCollectedfrom().size());
|
||||
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
|
||||
|
||||
Result res = OafMapperUtils.mergeResults(d1, d2);
|
||||
|
||||
assertEquals(d2, res);
|
||||
|
||||
System.out.println(OBJECT_MAPPER.writeValueAsString(res));
|
||||
|
||||
}
|
||||
|
||||
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
|
||||
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
|
||||
}
|
||||
|
|
|
@ -1 +1,140 @@
|
|||
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]}
|
||||
{
|
||||
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
|
||||
"resuttype": {"classid": "dataset"},
|
||||
"pid": [
|
||||
{
|
||||
"qualifier": {"classid": "doi"},
|
||||
"value": "10.1016/j.cmet.2011.03.013"
|
||||
},
|
||||
{
|
||||
"qualifier": {"classid": "urn"},
|
||||
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||
},
|
||||
{
|
||||
"qualifier": {"classid": "scp-number"},
|
||||
"value": "79953761260"
|
||||
},
|
||||
{
|
||||
"qualifier": {"classid": "pmc"},
|
||||
"value": "21459329"
|
||||
}
|
||||
],
|
||||
"collectedfrom": [
|
||||
{
|
||||
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
|
||||
"value": "Repository B"
|
||||
}
|
||||
],
|
||||
"instance": [
|
||||
{
|
||||
"refereed": {
|
||||
"classid": "0000",
|
||||
"classname": "UNKNOWN",
|
||||
"schemeid": "dnet:review_levels",
|
||||
"schemename": "dnet:review_levels"
|
||||
},
|
||||
"hostedby": {
|
||||
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||
"value": "Zenodo"
|
||||
},
|
||||
"accessright": {
|
||||
"classid": "OPEN",
|
||||
"classname": "Open Access",
|
||||
"schemeid": "dnet:access_modes",
|
||||
"schemename": "dnet:access_modes"
|
||||
},
|
||||
"processingchargecurrency": {
|
||||
"dataInfo": {
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "Harvested",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"deletedbyinference": false,
|
||||
"inferred": false,
|
||||
"inferenceprovenance": "",
|
||||
"invisible": true,
|
||||
"trust": "0.9"
|
||||
},
|
||||
"value": "EUR"
|
||||
},
|
||||
"pid": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "Harvested",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"deletedbyinference": false,
|
||||
"inferred": false,
|
||||
"inferenceprovenance": "",
|
||||
"invisible": true,
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "doi",
|
||||
"classname": "Digital Object Identifier",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "10.1371/journal.pone.0085605"
|
||||
}
|
||||
],
|
||||
"distributionlocation": "",
|
||||
"url": ["https://doi.org/10.1371/journal.pone.0085605"],
|
||||
"alternateIdentifier": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "Harvested",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"deletedbyinference": false,
|
||||
"inferred": false,
|
||||
"inferenceprovenance": "",
|
||||
"invisible": true,
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "pmid",
|
||||
"classname": "PubMed ID",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "24454899.0"
|
||||
}
|
||||
],
|
||||
"collectedfrom": {
|
||||
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
|
||||
"value": "Repository B"
|
||||
},
|
||||
"processingchargeamount": {
|
||||
"dataInfo": {
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "Harvested",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"deletedbyinference": false,
|
||||
"inferred": false,
|
||||
"inferenceprovenance": "",
|
||||
"invisible": true,
|
||||
"trust": "0.9"
|
||||
},
|
||||
"value": "1022.02"
|
||||
},
|
||||
"instancetype": {
|
||||
"classid": "0004",
|
||||
"classname": "Conference object",
|
||||
"schemeid": "dnet:publication_resource",
|
||||
"schemename": "dnet:publication_resource"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,140 @@
|
|||
{
|
||||
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
|
||||
"resuttype": {"classid": "dataset"},
|
||||
"pid": [
|
||||
{
|
||||
"qualifier": {"classid": "doi"},
|
||||
"value": "10.1016/j.cmet.2011.03.013"
|
||||
},
|
||||
{
|
||||
"qualifier": {"classid": "urn"},
|
||||
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||
},
|
||||
{
|
||||
"qualifier": {"classid": "scp-number"},
|
||||
"value": "79953761260"
|
||||
},
|
||||
{
|
||||
"qualifier": {"classid": "pmc"},
|
||||
"value": "21459329"
|
||||
}
|
||||
],
|
||||
"collectedfrom": [
|
||||
{
|
||||
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||
"value": "Zenodo"
|
||||
}
|
||||
],
|
||||
"instance": [
|
||||
{
|
||||
"refereed": {
|
||||
"classid": "0000",
|
||||
"classname": "UNKNOWN",
|
||||
"schemeid": "dnet:review_levels",
|
||||
"schemename": "dnet:review_levels"
|
||||
},
|
||||
"hostedby": {
|
||||
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||
"value": "Zenodo"
|
||||
},
|
||||
"accessright": {
|
||||
"classid": "OPEN",
|
||||
"classname": "Open Access",
|
||||
"schemeid": "dnet:access_modes",
|
||||
"schemename": "dnet:access_modes"
|
||||
},
|
||||
"processingchargecurrency": {
|
||||
"dataInfo": {
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "Harvested",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"deletedbyinference": false,
|
||||
"inferred": false,
|
||||
"inferenceprovenance": "",
|
||||
"invisible": true,
|
||||
"trust": "0.9"
|
||||
},
|
||||
"value": "EUR"
|
||||
},
|
||||
"pid": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "Harvested",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"deletedbyinference": false,
|
||||
"inferred": false,
|
||||
"inferenceprovenance": "",
|
||||
"invisible": true,
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "doi",
|
||||
"classname": "Digital Object Identifier",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "10.1371/journal.pone.0085605"
|
||||
}
|
||||
],
|
||||
"distributionlocation": "",
|
||||
"url": ["https://doi.org/10.1371/journal.pone.0085605"],
|
||||
"alternateIdentifier": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "Harvested",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"deletedbyinference": false,
|
||||
"inferred": false,
|
||||
"inferenceprovenance": "",
|
||||
"invisible": true,
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "pmid",
|
||||
"classname": "PubMed ID",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "24454899.0"
|
||||
}
|
||||
],
|
||||
"collectedfrom": {
|
||||
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||
"value": "Zenodo"
|
||||
},
|
||||
"processingchargeamount": {
|
||||
"dataInfo": {
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "Harvested",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"deletedbyinference": false,
|
||||
"inferred": false,
|
||||
"inferenceprovenance": "",
|
||||
"invisible": true,
|
||||
"trust": "0.9"
|
||||
},
|
||||
"value": "1022.02"
|
||||
},
|
||||
"instancetype": {
|
||||
"classid": "0004",
|
||||
"classname": "Conference object",
|
||||
"schemeid": "dnet:publication_resource",
|
||||
"schemename": "dnet:publication_resource"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
|
@ -14,6 +14,7 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
|||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -21,6 +22,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
|
@ -83,10 +85,13 @@ public class CreateActionSetSparkJob implements Serializable {
|
|||
private static void extractContent(SparkSession spark, String inputPath, String outputPath,
|
||||
boolean shouldDuplicateRels) {
|
||||
spark
|
||||
.sqlContext()
|
||||
.createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING())
|
||||
.read()
|
||||
.textFile(inputPath + "/*")
|
||||
.map(
|
||||
(MapFunction<String, COCI>) value -> OBJECT_MAPPER.readValue(value, COCI.class),
|
||||
Encoders.bean(COCI.class))
|
||||
.flatMap(
|
||||
(FlatMapFunction<String, Relation>) value -> createRelation(value, shouldDuplicateRels).iterator(),
|
||||
(FlatMapFunction<COCI, Relation>) value -> createRelation(value, shouldDuplicateRels).iterator(),
|
||||
Encoders.bean(Relation.class))
|
||||
.filter((FilterFunction<Relation>) value -> value != null)
|
||||
.toJavaRDD()
|
||||
|
@ -98,26 +103,29 @@ public class CreateActionSetSparkJob implements Serializable {
|
|||
|
||||
}
|
||||
|
||||
private static List<Relation> createRelation(String value, boolean duplicate) {
|
||||
String[] line = value.split(",");
|
||||
if (!line[1].startsWith("10.")) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
private static List<Relation> createRelation(COCI value, boolean duplicate) {
|
||||
|
||||
List<Relation> relationList = new ArrayList<>();
|
||||
|
||||
String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[1]));
|
||||
final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[2]));
|
||||
String citing = ID_PREFIX
|
||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting()));
|
||||
final String cited = ID_PREFIX
|
||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited()));
|
||||
|
||||
relationList
|
||||
.addAll(
|
||||
getRelations(
|
||||
citing,
|
||||
cited));
|
||||
if(!citing.equals(cited)){
|
||||
relationList
|
||||
.addAll(
|
||||
getRelations(
|
||||
citing,
|
||||
cited));
|
||||
|
||||
if (duplicate && line[1].endsWith(".refs")) {
|
||||
citing = ID_PREFIX + IdentifierFactory
|
||||
.md5(CleaningFunctions.normalizePidValue("doi", line[1].substring(0, line[1].indexOf(".refs"))));
|
||||
relationList.addAll(getRelations(citing, cited));
|
||||
if (duplicate && value.getCiting().endsWith(".refs")) {
|
||||
citing = ID_PREFIX + IdentifierFactory
|
||||
.md5(
|
||||
CleaningFunctions
|
||||
.normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
|
||||
relationList.addAll(getRelations(citing, cited));
|
||||
}
|
||||
}
|
||||
|
||||
return relationList;
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.opencitations;
|
||||
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER;
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class ReadCOCI implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ReadCOCI.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
ReadCOCI.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String[] inputFile = parser.get("inputFile").split(";");
|
||||
log.info("inputFile {}", inputFile.toString());
|
||||
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String workingPath = parser.get("workingPath");
|
||||
log.info("workingPath {}", workingPath);
|
||||
|
||||
SparkConf sconf = new SparkConf();
|
||||
|
||||
final String delimiter = Optional
|
||||
.ofNullable(parser.get("delimiter"))
|
||||
.orElse(DEFAULT_DELIMITER);
|
||||
|
||||
runWithSparkSession(
|
||||
sconf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
doRead(
|
||||
spark,
|
||||
workingPath,
|
||||
inputFile,
|
||||
outputPath,
|
||||
delimiter);
|
||||
});
|
||||
}
|
||||
|
||||
private static void doRead(SparkSession spark, String workingPath, String[] inputFiles,
|
||||
String outputPath,
|
||||
String delimiter) throws IOException {
|
||||
|
||||
for(String inputFile : inputFiles){
|
||||
String p_string = workingPath + "/" + inputFile + ".gz";
|
||||
|
||||
Dataset<Row> cociData = spark
|
||||
.read()
|
||||
.format("csv")
|
||||
.option("sep", delimiter)
|
||||
.option("inferSchema", "true")
|
||||
.option("header", "true")
|
||||
.option("quotes", "\"")
|
||||
.load(p_string)
|
||||
.repartition(100);
|
||||
|
||||
cociData.map((MapFunction<Row, COCI>) row -> {
|
||||
COCI coci = new COCI();
|
||||
coci.setOci(row.getString(0));
|
||||
coci.setCiting(row.getString(1));
|
||||
coci.setCited(row.getString(2));
|
||||
return coci;
|
||||
}, Encoders.bean(COCI.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath + inputFile);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.opencitations.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.opencsv.bean.CsvBindByPosition;
|
||||
|
||||
public class COCI implements Serializable {
|
||||
private String oci;
|
||||
|
||||
private String citing;
|
||||
|
||||
private String cited;
|
||||
|
||||
|
||||
public String getOci() {
|
||||
return oci;
|
||||
}
|
||||
|
||||
public void setOci(String oci) {
|
||||
this.oci = oci;
|
||||
}
|
||||
|
||||
public String getCiting() {
|
||||
return citing;
|
||||
}
|
||||
|
||||
public void setCiting(String citing) {
|
||||
this.citing = citing;
|
||||
}
|
||||
|
||||
public String getCited() {
|
||||
return cited;
|
||||
}
|
||||
|
||||
public void setCited(String cited) {
|
||||
this.cited = cited;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
[
|
||||
{
|
||||
"paramName": "wp",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "the zipped opencitations file",
|
||||
"paramRequired": true
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"paramName": "issm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "the hdfs name node",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "d",
|
||||
"paramLongName": "delimiter",
|
||||
"paramDescription": "the hdfs name node",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "op",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the hdfs name node",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "if",
|
||||
"paramLongName": "inputFile",
|
||||
"paramDescription": "the hdfs name node",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -26,6 +26,7 @@
|
|||
<switch>
|
||||
<case to="download">${wf:conf('resumeFrom') eq 'DownloadDump'}</case>
|
||||
<case to="extract">${wf:conf('resumeFrom') eq 'ExtractContent'}</case>
|
||||
<case to="read">${wf:conf('resumeFrom') eq 'ReadContent'}</case>
|
||||
<default to="create_actionset"/> <!-- first action to be done when downloadDump is to be performed -->
|
||||
</switch>
|
||||
</decision>
|
||||
|
@ -60,6 +61,32 @@
|
|||
<arg>--inputFile</arg><arg>${inputFile}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
</java>
|
||||
<ok to="read"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="read">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Produces the AS for OC</name>
|
||||
<class>eu.dnetlib.dhp.actionmanager.opencitations.ReadCOCI</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--workingPath</arg><arg>${workingPath}/COCI</arg>
|
||||
<arg>--outputPath</arg><arg>${workingPath}/COCI_JSON</arg>
|
||||
<arg>--delimiter</arg><arg>${delimiter}</arg>
|
||||
<arg>--inputFile</arg><arg>${inputFileCoci}</arg>
|
||||
</spark>
|
||||
<ok to="create_actionset"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
@ -81,7 +108,7 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${workingPath}/COCI</arg>
|
||||
<arg>--inputPath</arg><arg>${workingPath}/COCI_JSON</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -76,7 +76,7 @@ public class CreateOpenCitationsASTest {
|
|||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
CreateActionSetSparkJob
|
||||
|
@ -99,7 +99,7 @@ public class CreateOpenCitationsASTest {
|
|||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(aa -> ((Relation) aa.getPayload()));
|
||||
|
||||
assertEquals(60, tmp.count());
|
||||
assertEquals(62, tmp.count());
|
||||
|
||||
// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
|
||||
|
||||
|
@ -110,7 +110,7 @@ public class CreateOpenCitationsASTest {
|
|||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
CreateActionSetSparkJob
|
||||
|
@ -131,7 +131,7 @@ public class CreateOpenCitationsASTest {
|
|||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(aa -> ((Relation) aa.getPayload()));
|
||||
|
||||
assertEquals(44, tmp.count());
|
||||
assertEquals(46, tmp.count());
|
||||
|
||||
// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
|
||||
|
||||
|
@ -142,7 +142,7 @@ public class CreateOpenCitationsASTest {
|
|||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
CreateActionSetSparkJob
|
||||
|
@ -175,7 +175,7 @@ public class CreateOpenCitationsASTest {
|
|||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
CreateActionSetSparkJob
|
||||
|
@ -215,7 +215,7 @@ public class CreateOpenCitationsASTest {
|
|||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
CreateActionSetSparkJob
|
||||
|
@ -240,8 +240,8 @@ public class CreateOpenCitationsASTest {
|
|||
assertEquals("citation", r.getSubRelType());
|
||||
assertEquals("resultResult", r.getRelType());
|
||||
});
|
||||
assertEquals(22, tmp.filter(r -> r.getRelClass().equals("Cites")).count());
|
||||
assertEquals(22, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count());
|
||||
assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count());
|
||||
assertEquals(23, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count());
|
||||
|
||||
}
|
||||
|
||||
|
@ -250,7 +250,7 @@ public class CreateOpenCitationsASTest {
|
|||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
CreateActionSetSparkJob
|
||||
|
@ -295,7 +295,7 @@ public class CreateOpenCitationsASTest {
|
|||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
CreateActionSetSparkJob
|
||||
|
|
|
@ -0,0 +1,140 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.opencitations;
|
||||
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
|
||||
public class ReadCOCITest {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
private static final Logger log = LoggerFactory
|
||||
.getLogger(ReadCOCITest.class);
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(ReadCOCITest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(ReadCOCITest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(ReadCOCITest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testReadCOCI() throws Exception {
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
|
||||
.getPath();
|
||||
|
||||
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
|
||||
fs
|
||||
.copyFromLocalFile(
|
||||
false, new org.apache.hadoop.fs.Path(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz")
|
||||
.getPath()),
|
||||
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz"));
|
||||
|
||||
fs
|
||||
.copyFromLocalFile(
|
||||
false, new org.apache.hadoop.fs.Path(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz")
|
||||
.getPath()),
|
||||
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz"));
|
||||
|
||||
fs
|
||||
.copyFromLocalFile(
|
||||
false, new org.apache.hadoop.fs.Path(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz")
|
||||
.getPath()),
|
||||
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz"));
|
||||
|
||||
fs
|
||||
.copyFromLocalFile(
|
||||
false, new org.apache.hadoop.fs.Path(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz")
|
||||
.getPath()),
|
||||
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz"));
|
||||
|
||||
fs
|
||||
.copyFromLocalFile(
|
||||
false, new org.apache.hadoop.fs.Path(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz")
|
||||
.getPath()),
|
||||
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz"));
|
||||
|
||||
ReadCOCI
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-workingPath",
|
||||
workingDir.toString() + "/COCI",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/COCI_json/",
|
||||
"-inputFile", "input1;input2;input3;input4;input5"
|
||||
});
|
||||
|
||||
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<COCI> tmp = sc
|
||||
.textFile(workingDir.toString() + "/COCI_json/*/")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, COCI.class));
|
||||
|
||||
Assertions.assertEquals(24, tmp.count());
|
||||
|
||||
Assertions.assertEquals(1, tmp.filter(c -> c.getCiting().equals("10.1207/s15327647jcd3,4-01")).count());
|
||||
|
||||
Assertions.assertEquals(8, tmp.filter(c -> c.getCiting().indexOf(".refs") > -1).count());
|
||||
}
|
||||
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,8 +0,0 @@
|
|||
oci,citing,cited,creation,timespan,journal_sc,author_sc
|
||||
02001000007362801000805046300010563030608046333-0200101010136193701050501630209010637020000083700020400083733,10.1007/s10854-015-3684-x,10.1111/j.1551-2916.2008.02408.x,2015-09-01,P7Y2M,no,no
|
||||
02001000007362801000805046300010563030608046333-02001000007362801000805046300010463020101046309,10.1007/s10854-015-3684-x,10.1007/s10854-014-2114-9,2015-09-01,P1Y2M4D,yes,no
|
||||
02001000007362801000805046300010563030608046333-020010001063619371214271022182329370200010337000937000609,10.1007/s10854-015-3684-x,10.1016/j.ceramint.2013.09.069,2015-09-01,P1Y6M,no,no
|
||||
02001000007362801000805046300010563030608046333-02001000007362801000805046300000963090901036304,10.1007/s10854-015-3684-x,10.1007/s10854-009-9913-4,2015-09-01,P6Y3M10D,yes,no
|
||||
02001000007362801000805046300010563030608046333-02001000106360000030863010009085807025909000307006305,10.1007/s10854-015-3684-x,10.1016/0038-1098(72)90370-5,2015-09-01,P43Y8M,no,no
|
||||
02001000007362801000805046300010563030608056309-02001000106361937281010370200010437000937000308,10.1007/s10854-015-3685-9,10.1016/j.saa.2014.09.038,2015-09-03,P0Y7M,no,no
|
||||
02001000007362801000805046300010563030608056309-0200100010636193722102912171027370200010537000437000106,10.1007/s10854-015-3685-9,10.1016/j.matchar.2015.04.016,2015-09-03,P0Y2M,no,no
|
Binary file not shown.
|
@ -1,8 +0,0 @@
|
|||
oci,citing,cited,creation,timespan,journal_sc,author_sc
|
||||
02001000308362804010509076300010963000003086301-0200100020936020001003227000009010004,10.1038/s41597-019-0038-1,10.1029/2010wr009104,2019-04-15,P8Y1M,no,no
|
||||
02001000308362804010509076300010963000003086301-0200100010636280103060463080105025800015900000006006303,10.1038/s41597-019-0038-1,10.1016/s1364-8152(01)00060-3,2019-04-15,P17Y3M,no,no
|
||||
02001000308362804010509076300010963000003086301-02001000007362800000407076300010063000401066333,10.1038/s41597-019-0038-1,10.1007/s00477-010-0416-x,2019-04-15,P8Y9M6D,no,no
|
||||
02001000308362804010509076300010963000003086301-02001000007362800000700046300010363000905016308,10.1038/s41597-019-0038-1,10.1007/s00704-013-0951-8,2019-04-15,P5Y9M23D,no,no
|
||||
02001000308362804010509076300010963000003086301-02001000002361924123705070707,10.1038/s41597-019-0038-1,10.1002/joc.5777,2019-04-15,P0Y8M1D,no,no
|
||||
02001000308362804010509076300010963000003086301-02005010904361714282863020263040504076302000108,10.1038/s41597-019-0038-1,10.5194/hess-22-4547-2018,2019-04-15,P0Y7M18D,no,no
|
||||
02001000308362804010509076300010963000003086301-02001000002361924123703050404,10.1038/s41597-019-0038-1,10.1002/joc.3544,2019-04-15,P6Y9M6D,no,no
|
Binary file not shown.
|
@ -1,9 +0,0 @@
|
|||
oci,citing,cited,creation,timespan,journal_sc,author_sc
|
||||
0200100000236090708010101090307000202023727141528-020050302063600040000010307,10.1002/9781119370222.refs,10.5326/0400137,2020-06-22,P16Y3M,no,no
|
||||
0200100000236090708010101090307000202023727141528-0200101010136193701050302630905003337020000073700000301093733,10.1002/9781119370222.refs,10.1111/j.1532-950x.2007.00319.x,2020-06-22,P12Y8M,no,no
|
||||
0200100000236090708010101090307000202023727141528-0200101010136312830370102030509,10.1002/9781119370222.refs,10.1111/vsu.12359,2020-06-22,P4Y10M29D,no,no
|
||||
0200100000236090708010101090307000202023727141528-020050302063600030900020904,10.1002/9781119370222.refs,10.5326/0390294,2020-06-22,P17Y1M,no,no
|
||||
0200100000236090708010101090307000202023727141528-020050302063600040200030701,10.1002/9781119370222.refs,10.5326/0420371,2020-06-22,P13Y9M,no,no
|
||||
0200100000236090708010101090307000202023727141528-0200101010136193701050302630905003337020001033701020000003733,10.1002/9781119370222.refs,10.1111/j.1532-950x.2013.12000.x,2020-06-22,P7Y2M,no,no
|
||||
0200100000236090708010101090307000202023727141528-020010008003600000408000106093702000006370306070200,10.1002/9781119370222.refs,10.1080/00480169.2006.36720,2020-06-22,P13Y6M,no,no
|
||||
0200100000236090708010101090307000202023727141528-0200101010136193701070501630008010337020000063700000003033733,10.1002/9781119370222.refs,10.1111/j.1751-0813.2006.00033.x,2020-06-22,P13Y8M,no,no
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -102,7 +102,8 @@ public class ResultTagger implements Serializable {
|
|||
// .flatMap(p -> Stream.of(p.getFst(), p.getSnd()))
|
||||
// .map(s -> StringUtils.substringAfter(s, "|"))
|
||||
// .collect(Collectors.toCollection(HashSet::new))
|
||||
tmp.forEach(
|
||||
tmp
|
||||
.forEach(
|
||||
dsId -> datasources
|
||||
.addAll(
|
||||
conf.getCommunityForDatasource(dsId, param)));
|
||||
|
|
|
@ -347,6 +347,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
r.setCoverage(prepareCoverages(doc, info));
|
||||
r.setContext(prepareContexts(doc, info));
|
||||
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||
r
|
||||
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||
r
|
||||
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||
|
||||
r.setInstance(instances);
|
||||
r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances));
|
||||
|
|
|
@ -814,6 +814,27 @@ class MappersTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testOpenAPC() throws IOException, DocumentException {
|
||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_openapc.xml")));
|
||||
final List<Oaf> list = new OafToOafMapper(vocs, true, true).processMdRecord(xml);
|
||||
|
||||
System.out.println("***************");
|
||||
System.out.println(new ObjectMapper().writeValueAsString(list));
|
||||
System.out.println("***************");
|
||||
|
||||
final Publication p = (Publication) list.get(0);
|
||||
assertTrue(p.getInstance().size() > 0);
|
||||
|
||||
assertEquals("https://doi.org/10.1155/2015/439379", p.getInstance().get(0).getUrl().get(0));
|
||||
|
||||
assertTrue(p.getProcessingchargeamount() != null);
|
||||
assertTrue(p.getProcessingchargecurrency() != null);
|
||||
|
||||
assertEquals("1721.47", p.getProcessingchargeamount().getValue());
|
||||
assertEquals("EUR", p.getProcessingchargecurrency().getValue());
|
||||
}
|
||||
|
||||
private void assertValidId(final String id) {
|
||||
// System.out.println(id);
|
||||
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<oai:record xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:date="http://exslt.org/dates-and-times"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:oai="http://www.openarchives.org/OAI/2.0/">
|
||||
<oai:header>
|
||||
<dri:objIdentifier>openapc_____::000023f9cb6e3a247c764daec4273cbc</dri:objIdentifier>
|
||||
<dri:recordIdentifier>10.1155/2015/439379</dri:recordIdentifier>
|
||||
<dri:dateOfCollection>2022-02-01T15:26:33.817Z</dri:dateOfCollection>
|
||||
<oaf:datasourceprefix>openapc_____</oaf:datasourceprefix>
|
||||
<dr:dateOfTransformation>2022-02-02T15:45:32.502Z</dr:dateOfTransformation>
|
||||
</oai:header>
|
||||
<metadata xmlns="http://namespace.openaire.eu/">
|
||||
<dc:identifier>https://doi.org/10.1155/2015/439379</dc:identifier>
|
||||
<oaf:identifier identifierType="doi">10.1155/2015/439379</oaf:identifier>
|
||||
<oaf:identifier identifierType="pmcid">PMC4354964</oaf:identifier>
|
||||
<oaf:identifier identifierType="pmid">25811027.0</oaf:identifier>
|
||||
<datacite:affiliation affiliationIdentifier="grid.83440.3b"
|
||||
affiliationIdentifierScheme="GRID" schemeURI="https://www.grid.ac/">UCL</datacite:affiliation>
|
||||
<datacite:affiliation
|
||||
affiliationIdentifier="https://ror.org/02jx3x895" affiliationIdentifierScheme="ROR">UCL</datacite:affiliation>
|
||||
<oaf:processingchargeamount currency="EUR">1721.47</oaf:processingchargeamount>
|
||||
<oaf:journal issn="2314-6133">BioMed Research International</oaf:journal>
|
||||
<dc:license>http://creativecommons.org/licenses/by/3.0/</dc:license>
|
||||
<dc:date>2015</dc:date>
|
||||
<dr:CobjCategory type="publication">0004</dr:CobjCategory>
|
||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
|
||||
<oaf:hostedBy id="apc_________::openapc" name="OpenAPC Global Initiative"/>
|
||||
<oaf:collectedFrom id="apc_________::openapc" name="OpenAPC Global Initiative"/>
|
||||
</metadata>
|
||||
<oaf:about xmlns:oai="http://wwww.openarchives.org/OAI/2.0/">
|
||||
<oaf:datainfo>
|
||||
<oaf:inferred>false</oaf:inferred>
|
||||
<oaf:deletedbyinference>false</oaf:deletedbyinference>
|
||||
<oaf:trust>0.9</oaf:trust>
|
||||
<oaf:inferenceprovenance/>
|
||||
<oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive"
|
||||
classname="sysimport:crosswalk:datasetarchive"
|
||||
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
|
||||
</oaf:datainfo>
|
||||
</oaf:about>
|
||||
</oai:record>
|
|
@ -398,6 +398,16 @@ public class XmlRecordFactory implements Serializable {
|
|||
if (r.getResourcetype() != null) {
|
||||
metadata.add(XmlSerializationUtils.mapQualifier("resourcetype", r.getResourcetype()));
|
||||
}
|
||||
if (r.getProcessingchargeamount() != null) {
|
||||
metadata
|
||||
.add(
|
||||
XmlSerializationUtils
|
||||
.asXmlElement("processingchargeamount", r.getProcessingchargeamount().getValue()));
|
||||
metadata
|
||||
.add(
|
||||
XmlSerializationUtils
|
||||
.asXmlElement("processingchargecurrency", r.getProcessingchargecurrency().getValue()));
|
||||
}
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
|
|
|
@ -66,6 +66,9 @@ public class XmlRecordFactoryTest {
|
|||
assertEquals("10.5689/LIB.2018.2853550", doc.valueOf("//instance/alternateidentifier/text()"));
|
||||
|
||||
assertEquals(3, doc.selectNodes("//instance").size());
|
||||
|
||||
assertEquals("1721.47", doc.valueOf("//processingchargeamount/text()"));
|
||||
assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -1655,5 +1655,37 @@
|
|||
},
|
||||
"value": "Understanding Electromigration in Cu-CNT Composite Interconnects A Multiscale Electrothermal Simulation Study"
|
||||
}
|
||||
]
|
||||
],
|
||||
"processingchargeamount": {
|
||||
"value": "1721.47",
|
||||
"dataInfo": {
|
||||
"invisible": true,
|
||||
"inferred": false,
|
||||
"deletedbyinference": false,
|
||||
"trust": "0.9",
|
||||
"inferenceprovenance": "",
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
},
|
||||
"processingchargecurrency": {
|
||||
"value": "EUR",
|
||||
"dataInfo": {
|
||||
"invisible": true,
|
||||
"inferred": false,
|
||||
"deletedbyinference": false,
|
||||
"trust": "0.9",
|
||||
"inferenceprovenance": "",
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -14,7 +14,7 @@ LEFT OUTER JOIN
|
|||
(
|
||||
SELECT substr(d.id, 4) id
|
||||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as
|
||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
|
@ -25,7 +25,7 @@ LEFT OUTER JOIN
|
|||
(
|
||||
SELECT substr(d.id, 4) id
|
||||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as
|
||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
|
@ -36,7 +36,7 @@ LEFT OUTER JOIN
|
|||
(
|
||||
SELECT substr(d.id, 4) id
|
||||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as
|
||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
|
@ -47,7 +47,7 @@ LEFT OUTER JOIN
|
|||
(
|
||||
SELECT substr(d.id, 4) id
|
||||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS
|
||||
SELECT * FROM ${stats_db_name}.publication_sources
|
||||
|
@ -76,8 +76,8 @@ join ${openaire_db_name}.result r1 on rel.source=r1.id
|
|||
join ${openaire_db_name}.result r2 on r2.id=rel.target
|
||||
where reltype='resultResult'
|
||||
and r1.resulttype.classname!=r2.resulttype.classname
|
||||
and r1.datainfo.deletedbyinference=false
|
||||
and r2.datainfo.deletedbyinference=false
|
||||
and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE
|
||||
and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE
|
||||
and r1.resulttype.classname != 'other'
|
||||
and r2.resulttype.classname != 'other'
|
||||
and rel.datainfo.deletedbyinference=false;
|
||||
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;
|
|
@ -8,22 +8,22 @@
|
|||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS
|
||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||
from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS
|
||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||
from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS
|
||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||
from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS
|
||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||
from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS
|
||||
SELECT * FROM ${stats_db_name}.publication_licenses
|
||||
|
@ -46,7 +46,7 @@ FROM (
|
|||
LEFT OUTER JOIN (
|
||||
SELECT substr(d.id, 4) id
|
||||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id;
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id;
|
||||
|
||||
-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS;
|
||||
|
|
|
@ -9,22 +9,22 @@
|
|||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as
|
||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||
from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
|
||||
where r.datainfo.deletedbyinference=false;
|
||||
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as
|
||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||
from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
|
||||
where r.datainfo.deletedbyinference=false;
|
||||
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as
|
||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||
from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
|
||||
where r.datainfo.deletedbyinference=false;
|
||||
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as
|
||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||
from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
|
||||
where r.datainfo.deletedbyinference=false;
|
||||
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
|
||||
select * from ${stats_db_name}.publication_refereed
|
||||
|
|
|
@ -38,13 +38,13 @@ SELECT substr(p.id, 4) as id,
|
|||
case when size(p.description) > 0 then true else false end as abstract,
|
||||
'publication' as type
|
||||
from ${openaire_db_name}.publication p
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_classifications AS
|
||||
SELECT substr(p.id, 4) as id, instancetype.classname as type
|
||||
from ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_concepts AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
|
@ -53,45 +53,45 @@ SELECT substr(p.id, 4) as id, case
|
|||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||
from ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.context) contexts as context
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_datasources as
|
||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
|
||||
from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance
|
||||
where p.datainfo.deletedbyinference = false) p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
|
||||
LEFT OUTER JOIN (
|
||||
SELECT substr(d.id, 4) id
|
||||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id;
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_languages AS
|
||||
select substr(p.id, 4) as id, p.language.classname as language
|
||||
FROM ${openaire_db_name}.publication p
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_oids AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_pids AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
|
||||
FROM ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_topics as
|
||||
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_citations AS
|
||||
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.publication p
|
||||
lateral view explode(p.extrainfo) citations AS citation
|
||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and p.datainfo.deletedbyinference = false;
|
||||
and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
|
@ -81,7 +81,11 @@ compute stats TARGET.result_sources;
|
|||
create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_topics;
|
||||
|
||||
create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source or r.id=orig.target);
|
||||
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
|
||||
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
|
||||
create table TARGET.result_result as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
|
||||
drop view TARGET.foo1;
|
||||
drop view TARGET.foo2;
|
||||
compute stats TARGET.result_result;
|
||||
|
||||
-- datasources
|
||||
|
@ -126,7 +130,7 @@ compute stats TARGET.indi_result_has_cc_licence;
|
|||
create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_result_has_cc_licence_url;
|
||||
|
||||
create view TARGET.indi_funder_country_collab stored as select * from SOURCE.indi_funder_country_collab;
|
||||
create view TARGET.indi_funder_country_collab stored as parquet as select * from SOURCE.indi_funder_country_collab;
|
||||
|
||||
create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_result_with_orcid;
|
||||
|
|
|
@ -38,20 +38,20 @@ SELECT substr(d.id, 4) AS id,
|
|||
CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract,
|
||||
'dataset' AS type
|
||||
FROM ${openaire_db_name}.dataset d
|
||||
WHERE d.datainfo.deletedbyinference = FALSE;
|
||||
WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_citations AS
|
||||
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.dataset d
|
||||
LATERAL VIEW explode(d.extrainfo) citations AS citation
|
||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and d.datainfo.deletedbyinference = false;
|
||||
and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_classifications AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_concepts AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
|
@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case
|
|||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||
from ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.context) contexts as context
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_datasources AS
|
||||
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||
|
@ -68,31 +68,31 @@ FROM (
|
|||
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.instance) instances AS instance
|
||||
where p.datainfo.deletedbyinference = false) p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
|
||||
LEFT OUTER JOIN (
|
||||
SELECT substr(d.id, 4) id
|
||||
FROM ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id;
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_languages AS
|
||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_oids AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_pids AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_topics AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
|
@ -38,20 +38,20 @@ SELECT substr(s.id, 4) as id,
|
|||
CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
|
||||
'software' as type
|
||||
from ${openaire_db_name}.software s
|
||||
where s.datainfo.deletedbyinference = false;
|
||||
where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_citations AS
|
||||
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.software s
|
||||
LATERAL VIEW explode(s.extrainfo) citations as citation
|
||||
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and s.datainfo.deletedbyinference = false;
|
||||
and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_classifications AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_concepts AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
|
@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case
|
|||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.context) contexts AS context
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_datasources AS
|
||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
|
||||
|
@ -68,31 +68,31 @@ FROM (
|
|||
SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.instance) instances AS instance
|
||||
where p.datainfo.deletedbyinference = false) p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
|
||||
LEFT OUTER JOIN (
|
||||
SELECT substr(d.id, 4) id
|
||||
FROM ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id;
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_languages AS
|
||||
select substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.software p
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_oids AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_pids AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_topics AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
|
@ -37,19 +37,19 @@ SELECT substr(o.id, 4) AS id,
|
|||
CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
|
||||
'other' AS type
|
||||
FROM ${openaire_db_name}.otherresearchproduct o
|
||||
WHERE o.datainfo.deletedbyinference = FALSE;
|
||||
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false;
|
||||
|
||||
-- Otherresearchproduct_citations
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS
|
||||
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
|
||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and o.datainfo.deletedbyinference = false;
|
||||
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
|
@ -57,33 +57,33 @@ SELECT substr(p.id, 4) as id, case
|
|||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS
|
||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
|
||||
where p.datainfo.deletedbyinference = false) p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
|
||||
LEFT OUTER JOIN(SELECT substr(d.id, 4) id
|
||||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id;
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS
|
||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.otherresearchproduct p
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false;
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
|
@ -5,24 +5,26 @@
|
|||
------------------------------------------------------
|
||||
CREATE TABLE ${stats_db_name}.project_oids AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids;
|
||||
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_organizations AS
|
||||
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
||||
from ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'projectOrganization'
|
||||
and r.datainfo.deletedbyinference = false;
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_results AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'resultProject'
|
||||
and r.datainfo.deletedbyinference = false;
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
create table ${stats_db_name}.project_classification as
|
||||
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
|
||||
from ${openaire_db_name}.project p
|
||||
lateral view explode(p.h2020classification) classifs as class
|
||||
where p.datainfo.deletedbyinference=false and class.h2020programme is not null;
|
||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_tmp
|
||||
(
|
||||
|
@ -72,7 +74,7 @@ SELECT substr(p.id, 4) AS id,
|
|||
p.code.value AS code,
|
||||
p.totalcost AS totalcost
|
||||
FROM ${openaire_db_name}.project p
|
||||
WHERE p.datainfo.deletedbyinference = false;
|
||||
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
create table ${stats_db_name}.funder as
|
||||
select distinct xpath_string(fund, '//funder/id') as id,
|
||||
|
|
|
@ -127,7 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization AS
|
|||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'resultOrganization'
|
||||
and r.datainfo.deletedbyinference = false;
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.result_projects AS
|
||||
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
||||
|
|
|
@ -44,7 +44,7 @@ FROM ${openaire_db_name}.datasource d1
|
|||
LATERAL VIEW EXPLODE(originalid) temp AS originalidd
|
||||
WHERE originalidd like "piwik:%") AS d2
|
||||
ON d1.id = d2.id
|
||||
WHERE d1.datainfo.deletedbyinference = FALSE;
|
||||
WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false;
|
||||
|
||||
-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table.
|
||||
-- Creating a temporary dual table that will be removed after the following insert
|
||||
|
@ -82,24 +82,25 @@ WHERE yearofvalidation = '-1';
|
|||
|
||||
CREATE TABLE ${stats_db_name}.datasource_languages AS
|
||||
SELECT substr(d.id, 4) AS id, langs.languages AS language
|
||||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages;
|
||||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages
|
||||
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.datasource_oids AS
|
||||
SELECT substr(d.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids;
|
||||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids
|
||||
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.datasource_organizations AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'datasourceOrganization'
|
||||
and r.datainfo.deletedbyinference = false;
|
||||
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
-- datasource sources:
|
||||
-- where the datasource info have been collected from.
|
||||
create table if not exists ${stats_db_name}.datasource_sources AS
|
||||
select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
|
||||
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
|
||||
where d.datainfo.deletedbyinference = false;
|
||||
where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
|
||||
|
||||
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
|
||||
SELECT datasource AS id, id AS result
|
||||
|
|
|
@ -9,7 +9,7 @@ SELECT substr(o.id, 4) as id,
|
|||
o.legalshortname.value as legalshortname,
|
||||
o.country.classid as country
|
||||
FROM ${openaire_db_name}.organization o
|
||||
WHERE o.datainfo.deletedbyinference = FALSE;
|
||||
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS
|
||||
SELECT organization AS id, id AS datasource
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
<pluginRepository>
|
||||
<id>iis-releases</id>
|
||||
<name>iis releases plugin repository</name>
|
||||
<url>http://maven.ceon.pl/artifactory/iis-releases</url>
|
||||
<url>https://maven.ceon.pl/artifactory/iis-releases</url>
|
||||
<layout>default</layout>
|
||||
</pluginRepository>
|
||||
</pluginRepositories>
|
||||
|
|
2
pom.xml
2
pom.xml
|
@ -797,7 +797,7 @@
|
|||
<mockito-core.version>3.3.3</mockito-core.version>
|
||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||
<vtd.version>[2.12,3.0)</vtd.version>
|
||||
<dhp-schemas.version>[2.10.26]</dhp-schemas.version>
|
||||
<dhp-schemas.version>[2.10.29]</dhp-schemas.version>
|
||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||
|
|
Loading…
Reference in New Issue