diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateConnectedComponent.java b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateConnectedComponent.java
index 01a99da1b6..d3020b92c3 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateConnectedComponent.java
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateConnectedComponent.java
@@ -29,7 +29,7 @@ import java.util.List;
public class SparkCreateConnectedComponent {
public static void main(String[] args) throws Exception {
- final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateConnectedComponent.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/dedup_parameters.json")));
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateConnectedComponent.class.getResourceAsStream("/eu/dnetlib/dhp/sx/dedup/dedup_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
.builder()
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateDedupRecord.java b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateDedupRecord.java
index 8e60df945a..09c0ba89be 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateDedupRecord.java
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateDedupRecord.java
@@ -11,7 +11,7 @@ import org.apache.spark.sql.SparkSession;
public class SparkCreateDedupRecord {
public static void main(String[] args) throws Exception {
- final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateDedupRecord.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/dedupRecord_parameters.json")));
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateDedupRecord.class.getResourceAsStream("/eu/dnetlib/dhp/sx/dedup/dedupRecord_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
.builder()
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateSimRels.java b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateSimRels.java
index 2bdfa8759b..b847c80dc4 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateSimRels.java
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateSimRels.java
@@ -29,7 +29,7 @@ import java.util.List;
public class SparkCreateSimRels {
public static void main(String[] args) throws Exception {
- final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateSimRels.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/dedup_parameters.json")));
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateSimRels.class.getResourceAsStream("/eu/dnetlib/dhp/sx/dedup/dedup_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
.builder()
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkPropagateRelationsJob.java b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkPropagateRelationsJob.java
index 2896a2aa14..a725294430 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkPropagateRelationsJob.java
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkPropagateRelationsJob.java
@@ -23,7 +23,7 @@ public class SparkPropagateRelationsJob {
final static String TARGETJSONPATH = "$.target";
public static void main(String[] args) throws Exception {
- final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkPropagateRelationsJob.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/dedup_propagate_relation_parameters.json")));
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkPropagateRelationsJob.class.getResourceAsStream("/eu/dnetlib/dhp/sx/dedup/dedup_propagate_relation_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
.builder()
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkUpdateEntityJob.java b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkUpdateEntityJob.java
index 6039e55262..44f7c551e5 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkUpdateEntityJob.java
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkUpdateEntityJob.java
@@ -26,7 +26,7 @@ public class SparkUpdateEntityJob {
final static String IDJSONPATH = "$.id";
public static void main(String[] args) throws Exception {
- final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkUpdateEntityJob.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/dedup_delete_by_inference_parameters.json")));
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkUpdateEntityJob.class.getResourceAsStream("/eu/dnetlib/dhp/sx/dedup/dedup_delete_by_inference_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
.builder()
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/dedup/dedupRecord_parameters.json b/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/dedup/dedupRecord_parameters.json
new file mode 100644
index 0000000000..de744dfb63
--- /dev/null
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/dedup/dedupRecord_parameters.json
@@ -0,0 +1,33 @@
+[
+ {
+ "paramName": "mt",
+ "paramLongName": "master",
+ "paramDescription": "should be local or yarn",
+ "paramRequired": true
+ },
+ {
+ "paramName": "s",
+ "paramLongName": "sourcePath",
+ "paramDescription": "the path of the sequential file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "e",
+ "paramLongName": "entity",
+ "paramDescription": "the type of entity to be deduped",
+ "paramRequired": true
+ },
+ {
+ "paramName": "c",
+ "paramLongName": "dedupConf",
+ "paramDescription": "dedup configuration to be used",
+ "compressed": true,
+ "paramRequired": true
+ },
+ {
+ "paramName": "d",
+ "paramLongName": "dedupPath",
+ "paramDescription": "dedup path to load mergeRelation",
+ "paramRequired": true
+ }
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/dedup/dedup_parameters.json b/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/dedup/dedup_parameters.json
new file mode 100644
index 0000000000..8ba8515d0e
--- /dev/null
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/dedup/dedup_parameters.json
@@ -0,0 +1,33 @@
+[
+ {
+ "paramName": "mt",
+ "paramLongName": "master",
+ "paramDescription": "should be local or yarn",
+ "paramRequired": true
+ },
+ {
+ "paramName": "s",
+ "paramLongName": "sourcePath",
+ "paramDescription": "the path of the sequential file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "e",
+ "paramLongName": "entity",
+ "paramDescription": "the type of entity to be deduped",
+ "paramRequired": true
+ },
+ {
+ "paramName": "c",
+ "paramLongName": "dedupConf",
+ "paramDescription": "dedup configuration to be used",
+ "compressed": true,
+ "paramRequired": true
+ },
+ {
+ "paramName": "t",
+ "paramLongName": "targetPath",
+ "paramDescription": "target path to save dedup result",
+ "paramRequired": true
+ }
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/dedup/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/dedup/oozie_app/workflow.xml
index 6c8dba653b..2f22bb764d 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/dedup/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/dedup/oozie_app/workflow.xml
@@ -49,7 +49,7 @@
cluster
Create Similarity Relations
eu.dnetlib.dedup.SparkCreateSimRels
- dhp-dedup-${projectVersion}.jar
+ dhp-dedup-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
@@ -73,7 +73,7 @@
cluster
Create Connected Components
eu.dnetlib.dedup.SparkCreateConnectedComponent
- dhp-dedup-${projectVersion}.jar
+ dhp-dedup-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
@@ -97,7 +97,7 @@
cluster
Create Dedup Record
eu.dnetlib.dedup.SparkCreateDedupRecord
- dhp-dedup-${projectVersion}.jar
+ dhp-dedup-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
@@ -121,7 +121,7 @@
cluster
Propagate Dedup Relations
eu.dnetlib.dedup.sx.SparkPropagateRelationsJob
- dhp-dedup-${projectVersion}.jar
+ dhp-dedup-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
@@ -145,7 +145,7 @@
cluster
Update ${entity} and add DedupRecord
eu.dnetlib.dedup.sx.SparkUpdateEntityJob
- dhp-dedup-${projectVersion}.jar
+ dhp-dedup-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/Datacite2Scholix.java b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/Datacite2Scholix.java
new file mode 100644
index 0000000000..809186a502
--- /dev/null
+++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/Datacite2Scholix.java
@@ -0,0 +1,203 @@
+package eu.dnetlib.dhp.provision;
+
+import com.jayway.jsonpath.JsonPath;
+import eu.dnetlib.dhp.provision.scholix.*;
+import eu.dnetlib.dhp.utils.DHPUtils;
+import eu.dnetlib.scholexplorer.relation.RelInfo;
+import eu.dnetlib.scholexplorer.relation.RelationMapper;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+public class Datacite2Scholix {
+
+
+ final RelationMapper relationMapper;
+
+ public Datacite2Scholix(RelationMapper relationMapper) {
+ this.relationMapper = relationMapper;
+ }
+
+
+ public List generateScholixFromJson(final String dJson) {
+
+ List