diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkPropagateRelationsJob.java b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkPropagateRelationsJob.java
similarity index 93%
rename from dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkPropagateRelationsJob.java
rename to dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkPropagateRelationsJob.java
index 9f48ce521..2896a2aa1 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkPropagateRelationsJob.java
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkPropagateRelationsJob.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.dedup;
+package eu.dnetlib.dedup.sx;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -7,14 +7,8 @@ import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.DHPUtils;
import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.io.compress.GzipCodec;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.Optional;
-import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.*;
import scala.Tuple2;
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkUpdateEntityJob.java b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkUpdateEntityJob.java
similarity index 98%
rename from dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkUpdateEntityJob.java
rename to dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkUpdateEntityJob.java
index 396349481..6039e5526 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkUpdateEntityJob.java
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/sx/SparkUpdateEntityJob.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.dedup;
+package eu.dnetlib.dedup.sx;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -15,7 +15,6 @@ import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.*;
import scala.Tuple2;
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/dedup/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/dedup/oozie_app/workflow.xml
index ddbf39e5f..46f334b1b 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/dedup/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/resources/eu/dnetlib/dhp/dedup/oozie_app/workflow.xml
@@ -122,7 +122,7 @@
yarn-cluster
cluster
Propagate Dedup Relations
- eu.dnetlib.dedup.SparkPropagateRelationsJob
+ eu.dnetlib.dedup.sx.SparkPropagateRelationsJob
dhp-dedup-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
@@ -146,7 +146,7 @@
yarn-cluster
cluster
Update ${entity} and add DedupRecord
- eu.dnetlib.dedup.SparkUpdateEntityJob
+ eu.dnetlib.dedup.sx.SparkUpdateEntityJob
dhp-dedup-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/test/resources/eu/dnetlib/dedup/conf/pub_scholix.conf.json b/dhp-workflows/dhp-dedup-scholexplorer/src/test/resources/eu/dnetlib/dedup/sx/conf/pub_scholix.conf.json
similarity index 100%
rename from dhp-workflows/dhp-dedup-scholexplorer/src/test/resources/eu/dnetlib/dedup/conf/pub_scholix.conf.json
rename to dhp-workflows/dhp-dedup-scholexplorer/src/test/resources/eu/dnetlib/dedup/sx/conf/pub_scholix.conf.json
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/TargetFunction.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/TargetFunction.java
deleted file mode 100644
index 31a554a63..000000000
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/TargetFunction.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package eu.dnetlib.dhp.graph.scholexplorer;
-
-
-import eu.dnetlib.dhp.schema.oaf.Relation;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.spark.api.java.function.MapFunction;
-
-public class TargetFunction implements MapFunction {
- @Override
- public Relation call(Relation relation) throws Exception {
- final String type = StringUtils.substringBefore(relation.getSource(), "|");
- relation.setTarget(String.format("%s|%s", type, StringUtils.substringAfter(relation.getTarget(),"::")));
- return relation;
- }
-}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/ImportDataFromMongo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/ImportDataFromMongo.java
similarity index 72%
rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/ImportDataFromMongo.java
rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/ImportDataFromMongo.java
index 2357c3787..8994e9667 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/ImportDataFromMongo.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/ImportDataFromMongo.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.graph.scholexplorer;
+package eu.dnetlib.dhp.graph.sx;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
@@ -16,7 +16,6 @@ import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.bson.Document;
import org.bson.conversions.Bson;
-
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
@@ -26,14 +25,52 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.stream.Collectors;
+/**
+ * This job is responsible to collect
+ * data from mongoDatabase and store in a sequence File on HDFS
+ * Mongo database contains information of each MDSTore in two collections:
+ * -metadata
+ * That contains info like:
+ * ID, format, layout, interpretation
+ * -metadataManager:
+ * that contains info :
+ * ID, mongoCollectionName
+ * from the metadata collection we filter the ids with Format, layout, and Interpretation
+ * from the metadataManager we get the current MONGO collection name which contains metadata XML
+ * see function getCurrentId
+ *
+ * This Job will be called different times in base at the triple we want import,
+ * and generates for each triple a sequence file of XML
+ *
+ */
+
public class ImportDataFromMongo {
+ /**
+ * It requires in input some parameters described on a file eu/dnetlib/dhp/graph/sx/import_from_mongo_parameters.json
+ *
+ * - the name node
+ * - the paht where store HDFS File
+ * - the mongo host
+ * - the mongo port
+ * - the metadata format to import
+ * - the metadata layout to import
+ * - the metadata interpretation to import
+ * - the mongo database Name
+ *
+ * This params are encoded into args
+ *
+ *
-
+ *
+ *
+ * @param args
+ * @throws Exception
+ */
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
ImportDataFromMongo.class.getResourceAsStream(
- "/eu/dnetlib/dhp/graph/import_from_mongo_parameters.json")));
+ "/eu/dnetlib/dhp/sx/graph/argumentparser/import_from_mongo_parameters.json")));
parser.parseArgument(args);
final int port = Integer.parseInt(parser.get("dbport"));
final String host = parser.get("dbhost");
@@ -43,10 +80,7 @@ public class ImportDataFromMongo {
final String interpretation = parser.get("interpretation");
final String dbName = parser.get("dbName");
-
-
final MongoClient client = new MongoClient(host, port);
-
MongoDatabase database = client.getDatabase(dbName);
MongoCollection metadata = database.getCollection("metadata");
@@ -55,6 +89,8 @@ public class ImportDataFromMongo {
final List ids = new ArrayList<>();
metadata.find((Bson) query).forEach((Consumer) document -> ids.add(document.getString("mdId")));
List databaseId = ids.stream().map(it -> getCurrentId(it, metadataManager)).filter(Objects::nonNull).collect(Collectors.toList());
+
+
final String hdfsuri = parser.get("namenode");
// ====== Init HDFS File System Object
Configuration conf = new Configuration();
@@ -64,8 +100,6 @@ public class ImportDataFromMongo {
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
- System.setProperty("HADOOP_USER_NAME", parser.get("user"));
- System.setProperty("hadoop.home.dir", "/");
FileSystem.get(URI.create(hdfsuri), conf);
Path hdfswritepath = new Path(parser.get("targetPath"));
@@ -92,13 +126,17 @@ public class ImportDataFromMongo {
throw new RuntimeException(e);
}
}
-
);
});
}
}
-
+ /**
+ * Return the name of mongo collection giving an MdStore ID
+ * @param mdId The id of the MDStore
+ * @param metadataManager The collection metadataManager on mongo which contains this information
+ * @return
+ */
private static String getCurrentId(final String mdId, final MongoCollection metadataManager) {
FindIterable result = metadataManager.find((Bson) QueryBuilder.start("mdId").is(mdId).get());
final Document item = result.first();
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkExtractEntitiesJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkExtractEntitiesJob.java
similarity index 79%
rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkExtractEntitiesJob.java
rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkExtractEntitiesJob.java
index cabca4e5c..9f5a91d3c 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkExtractEntitiesJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkExtractEntitiesJob.java
@@ -1,8 +1,7 @@
-package eu.dnetlib.dhp.graph.scholexplorer;
+package eu.dnetlib.dhp.graph.sx;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.oa.graph.SparkGraphImporterJob;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.compress.GzipCodec;
@@ -16,6 +15,27 @@ import java.util.List;
import java.util.stream.Collectors;
+/**
+ * This Job extracts a typology of entity and stores it in a new RDD
+ * This job is called different times, for each file generated by the Job {@link ImportDataFromMongo}
+ * and store the new RDD in a path that should be under a folder:
+ * extractedEntities/entity/version1
+ *
+ * at the end of this process we will have :
+ * extractedEntities/dataset/version1
+ * extractedEntities/dataset/version2
+ * extractedEntities/dataset/...
+ * extractedEntities/publication/version1
+ * extractedEntities/publication/version2
+ * extractedEntities/publication/...
+ * extractedEntities/unknown/version1
+ * extractedEntities/unknown/version2
+ * extractedEntities/unknown/...
+ * extractedEntities/relation/version1
+ * extractedEntities/relation/version2
+ * extractedEntities/relation/...
+ */
+
public class SparkExtractEntitiesJob {
final static String IDJSONPATH = "$.id";
final static String SOURCEJSONPATH = "$.source";
@@ -27,7 +47,7 @@ public class SparkExtractEntitiesJob {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkExtractEntitiesJob.class.getResourceAsStream(
- "/eu/dnetlib/dhp/graph/input_extract_entities_parameters.json")));
+ "/eu/dnetlib/dhp/sx/graph/argumentparser/input_extract_entities_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
.builder()
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerGenerateSimRel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkSXGeneratePidSimlarity.java
similarity index 81%
rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerGenerateSimRel.java
rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkSXGeneratePidSimlarity.java
index aea763b85..c3e55ca2f 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerGenerateSimRel.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkSXGeneratePidSimlarity.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.graph.scholexplorer;
+package eu.dnetlib.dhp.graph.sx;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.DHPUtils;
@@ -12,13 +12,24 @@ import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
-public class SparkScholexplorerGenerateSimRel {
+
+/**
+ * In some case the identifier generated for the Entity in @{@link SparkExtractEntitiesJob} is different from the identifier
+ * * associated by the aggregator, this means that some relation points to missing identifier
+ * To avoid this problem we store in the model the Id and the OriginalObJIdentifier
+ * This jobs extract this pair and creates a Similar relation that will be used in SparkMergeEntities
+ *
+ */
+
+public class SparkSXGeneratePidSimlarity {
final static String IDJSONPATH = "$.id";
final static String OBJIDPATH = "$.originalObjIdentifier";
+
+
public static void generateDataFrame(final SparkSession spark, final JavaSparkContext sc, final String inputPath, final String targetPath) {
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerMergeEntitiesJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerCreateRawGraphJob.java
similarity index 81%
rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerMergeEntitiesJob.java
rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerCreateRawGraphJob.java
index 41ed137d6..5d8a35c1b 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerMergeEntitiesJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerCreateRawGraphJob.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.graph.scholexplorer;
+package eu.dnetlib.dhp.graph.sx;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -11,6 +11,7 @@ import eu.dnetlib.dhp.schema.scholexplorer.DLIUnknown;
import eu.dnetlib.dhp.utils.DHPUtils;
import net.minidev.json.JSONArray;
import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -32,7 +33,27 @@ import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
-public class SparkScholexplorerMergeEntitiesJob {
+
+/**
+ * This job is responsible of the creation of RAW Graph
+ * It is applied to the different entities generated from {@link SparkExtractEntitiesJob}
+ * In case of dataset, publication and Unknown Entities
+ * we group all the entities of the same type by their identifier,
+ * and then in the reduce phase we merge all the entities.
+ * Merge means:
+ * -merge all the metadata
+ * -merge the collected From values
+ *
+ * In case of relation we need to make a different work:
+ * -Phase 1: Map reduce jobs
+ * Map: Get all Relation and emit a key constructed by (source, relType, Target) and the relation itself
+ * Reduce: Merge all relations
+ * Looking at the javadoc of {@link SparkSXGeneratePidSimlarity} we take the dataset of pid relation
+ * and joining by source and target we replace the wrong identifier in the relation with the correct ones.
+ * At the end we replace the new Dataset of Relation
+ */
+
+public class SparkScholexplorerCreateRawGraphJob {
final static String IDJSONPATH = "$.id";
final static String SOURCEJSONPATH = "$.source";
@@ -44,22 +65,20 @@ public class SparkScholexplorerMergeEntitiesJob {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
- SparkScholexplorerMergeEntitiesJob.class.getResourceAsStream(
- "/eu/dnetlib/dhp/graph/merge_entities_scholix_parameters.json")));
+ SparkScholexplorerCreateRawGraphJob.class.getResourceAsStream(
+ "/eu/dnetlib/dhp/sx/graph/argumentparser/merge_entities_scholix_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
.builder()
.config(new SparkConf()
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"))
- .appName(SparkScholexplorerMergeEntitiesJob.class.getSimpleName())
+ .appName(SparkScholexplorerCreateRawGraphJob.class.getSimpleName())
.master(parser.get("master"))
.getOrCreate();
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
final String inputPath = parser.get("sourcePath");
final String targetPath = parser.get("targetPath");
final String entity = parser.get("entity");
-
-
FileSystem fs = FileSystem.get(sc.sc().hadoopConfiguration());
List subFolder = Arrays.stream(fs.listStatus(new Path(inputPath))).filter(FileStatus::isDirectory).map(FileStatus::getPath).collect(Collectors.toList());
List> inputRdd = new ArrayList<>();
@@ -113,7 +132,9 @@ public class SparkScholexplorerMergeEntitiesJob {
break;
case "relation":
- SparkScholexplorerGenerateSimRel.generateDataFrame(spark, sc, inputPath.replace("/relation",""),targetPath.replace("/relation","") );
+
+
+ SparkSXGeneratePidSimlarity.generateDataFrame(spark, sc, inputPath.replace("/relation",""),targetPath.replace("/relation","") );
RDD rdd = union.mapToPair((PairFunction) f -> {
final String source = getJPathString(SOURCEJSONPATH, f);
final String target = getJPathString(TARGETJSONPATH, f);
@@ -132,9 +153,13 @@ public class SparkScholexplorerMergeEntitiesJob {
System.out.println("LOADING PATH :"+targetPath.replace("/relation","")+"/pid_simRel");
Datasetsim_ds =spark.read().load(targetPath.replace("/relation","")+"/pid_simRel").as(Encoders.bean(Relation.class));
- TargetFunction tf = new TargetFunction();
-
- Dataset ids = sim_ds.map(tf, Encoders.bean(Relation.class));
+ Dataset ids = sim_ds.map((MapFunction) relation->
+ {
+ final String type = StringUtils.substringBefore(relation.getSource(), "|");
+ relation.setTarget(String.format("%s|%s", type, StringUtils.substringAfter(relation.getTarget(),"::")));
+ return relation;
+ }
+ , Encoders.bean(Relation.class));
final Dataset firstJoin = rel_ds
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerGraphImporter.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerGraphImporter.java
similarity index 83%
rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerGraphImporter.java
rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerGraphImporter.java
index 6cbfab327..96e2c0826 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerGraphImporter.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerGraphImporter.java
@@ -1,9 +1,9 @@
-package eu.dnetlib.dhp.graph.scholexplorer;
+package eu.dnetlib.dhp.graph.sx;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.graph.scholexplorer.parser.DatasetScholexplorerParser;
-import eu.dnetlib.dhp.graph.scholexplorer.parser.PublicationScholexplorerParser;
+import eu.dnetlib.dhp.graph.sx.parser.DatasetScholexplorerParser;
+import eu.dnetlib.dhp.graph.sx.parser.PublicationScholexplorerParser;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.scholexplorer.relation.RelationMapper;
import org.apache.commons.io.IOUtils;
@@ -15,6 +15,12 @@ import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
+
+/**
+ * This Job read a sequential File containing XML stored in the aggregator
+ * and generates an RDD of heterogeneous entities like Dataset, Relation, Publication and Unknown
+ */
+
public class SparkScholexplorerGraphImporter {
public static void main(String[] args) throws Exception {
@@ -22,7 +28,7 @@ public class SparkScholexplorerGraphImporter {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkScholexplorerGraphImporter.class.getResourceAsStream(
- "/eu/dnetlib/dhp/graph/input_graph_scholix_parameters.json")));
+ "/eu/dnetlib/dhp/sx/graph/argumentparser/input_graph_scholix_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/parser/AbstractScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/parser/AbstractScholexplorerParser.java
similarity index 98%
rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/parser/AbstractScholexplorerParser.java
rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/parser/AbstractScholexplorerParser.java
index 6f3aa68d2..f3f81013c 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/parser/AbstractScholexplorerParser.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/parser/AbstractScholexplorerParser.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.graph.scholexplorer.parser;
+package eu.dnetlib.dhp.graph.sx.parser;
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/parser/DatasetScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/parser/DatasetScholexplorerParser.java
similarity index 99%
rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/parser/DatasetScholexplorerParser.java
rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/parser/DatasetScholexplorerParser.java
index 21545092b..7e3f06e22 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/parser/DatasetScholexplorerParser.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/parser/DatasetScholexplorerParser.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.graph.scholexplorer.parser;
+package eu.dnetlib.dhp.graph.sx.parser;
import com.ximpleware.AutoPilot;
import com.ximpleware.VTDGen;
@@ -13,7 +13,6 @@ import eu.dnetlib.dhp.parser.utility.VtdUtilityParser.Node;
import eu.dnetlib.scholexplorer.relation.RelInfo;
import eu.dnetlib.scholexplorer.relation.RelationMapper;
import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
import java.util.ArrayList;
import java.util.Arrays;
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/parser/PublicationScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/parser/PublicationScholexplorerParser.java
similarity index 99%
rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/parser/PublicationScholexplorerParser.java
rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/parser/PublicationScholexplorerParser.java
index d5cf94a77..456b19064 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/scholexplorer/parser/PublicationScholexplorerParser.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/sx/parser/PublicationScholexplorerParser.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.graph.scholexplorer.parser;
+package eu.dnetlib.dhp.graph.sx.parser;
import com.ximpleware.AutoPilot;
import com.ximpleware.VTDGen;
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/convertXmlToEntities/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/convertXmlToEntities/oozie_app/workflow.xml
deleted file mode 100644
index c7f628b6d..000000000
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/convertXmlToEntities/oozie_app/workflow.xml
+++ /dev/null
@@ -1,90 +0,0 @@
-
-
-
-
- sourcePath
- the source path
-
-
- hive_db_name
- the target hive database name
-
-
- sparkDriverMemory
- memory for driver process
-
-
- sparkExecutorMemory
- memory for individual executor
-
-
- sparkExecutorCores
- number of cores used by single executor
-
-
-
-
- ${jobTracker}
- ${nameNode}
-
-
- mapreduce.job.queuename
- ${queueName}
-
-
- oozie.launcher.mapred.job.queue.name
- ${oozieLauncherQueueName}
-
-
-
-
-
-
-
- Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
-
-
-
- yarn
- cluster
- MapGraphAsHiveDB
- eu.dnetlib.dhp.oa.graph.SparkGraphImporterJob
- dhp-graph-mapper-${projectVersion}.jar
-
- --executor-memory ${sparkExecutorMemory}
- --executor-cores ${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
- --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
- --conf spark.sql.warehouse.dir="/user/hive/warehouse"
-
- -mt yarn
- -s${sourcePath}
- -db${hive_db_name}
- -h${hive_metastore_uris}
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
-
-
- hive.metastore.uris
- ${hive_metastore_uris}
-
-
- ${hive_jdbc_url}/${hive_db_name}
-
- hive_db_name=${hive_db_name}
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/mergeEntities/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/mergeEntities/oozie_app/config-default.xml
deleted file mode 100644
index 6fb2a1253..000000000
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/mergeEntities/oozie_app/config-default.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
- oozie.use.system.libpath
- true
-
-
- oozie.action.sharelib.for.spark
- spark2
-
-
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/generate_sim_rel_scholix_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/generate_sim_rel_scholix_parameters.json
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/generate_sim_rel_scholix_parameters.json
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/generate_sim_rel_scholix_parameters.json
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/import_from_mongo_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/import_from_mongo_parameters.json
similarity index 73%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/import_from_mongo_parameters.json
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/import_from_mongo_parameters.json
index 9032be287..ab8e760b2 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/import_from_mongo_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/import_from_mongo_parameters.json
@@ -1,12 +1,11 @@
[
{"paramName":"n", "paramLongName":"namenode", "paramDescription": "the name node", "paramRequired": true},
- {"paramName":"u", "paramLongName":"user", "paramDescription": "the name node", "paramRequired": true},
- {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the name node", "paramRequired": true},
+ {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the paht where store HDFS File", "paramRequired": true},
{"paramName":"h", "paramLongName":"dbhost", "paramDescription": "the mongo host", "paramRequired": true},
{"paramName":"p", "paramLongName":"dbport", "paramDescription": "the mongo port", "paramRequired": true},
{"paramName":"f", "paramLongName":"format", "paramDescription": "the metadata format to import", "paramRequired": true},
{"paramName":"l", "paramLongName":"layout", "paramDescription": "the metadata layout to import", "paramRequired": true},
{"paramName":"i", "paramLongName":"interpretation", "paramDescription": "the metadata interpretation to import", "paramRequired": true},
- {"paramName":"dn", "paramLongName":"dbName", "paramDescription": "the database Name", "paramRequired": true}
+ {"paramName":"dn", "paramLongName":"dbName", "paramDescription": "the mongo database Name", "paramRequired": true}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/input_extract_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/input_extract_entities_parameters.json
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/input_extract_entities_parameters.json
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/input_extract_entities_parameters.json
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/input_graph_scholix_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/input_graph_scholix_parameters.json
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/input_graph_scholix_parameters.json
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/input_graph_scholix_parameters.json
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/merge_entities_scholix_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/merge_entities_scholix_parameters.json
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/merge_entities_scholix_parameters.json
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/merge_entities_scholix_parameters.json
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/relations.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/relations.json
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/relations.json
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/relations.json
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/convertXmlToEntities/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/config-default.xml
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/convertXmlToEntities/oozie_app/config-default.xml
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/config-default.xml
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/importMongoDbToHdfs/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml
similarity index 91%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/importMongoDbToHdfs/oozie_app/workflow.xml
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml
index 35aa173c6..918cc652a 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/importMongoDbToHdfs/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml
@@ -1,4 +1,4 @@
-
+
workingPath
@@ -55,7 +55,7 @@
${jobTracker}
${nameNode}
- eu.dnetlib.dhp.graph.scholexplorer.ImportDataFromMongo
+ eu.dnetlib.dhp.graph.sx.ImportDataFromMongo
-t${targetPath}
-n${nameNode}
-u${user}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/extractEntities/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/config-default.xml
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/extractEntities/oozie_app/config-default.xml
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/config-default.xml
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/extractEntities/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml
similarity index 93%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/extractEntities/oozie_app/workflow.xml
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml
index 6caa8b1c3..01fdec2ef 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/extractEntities/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml
@@ -1,4 +1,4 @@
-
+
sourcePath
@@ -54,7 +54,7 @@
yarn-cluster
cluster
Extract ${entities}
- eu.dnetlib.dhp.graph.scholexplorer.SparkExtractEntitiesJob
+ eu.dnetlib.dhp.graph.sx.SparkExtractEntitiesJob
dhp-graph-mapper-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/importMongoDbToHdfs/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/config-default.xml
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/importMongoDbToHdfs/oozie_app/config-default.xml
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/config-default.xml
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/mergeEntities/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/workflow.xml
similarity index 90%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/mergeEntities/oozie_app/workflow.xml
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/workflow.xml
index 44c6004e2..cf66ab6e6 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/scholexplorer/mergeEntities/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/workflow.xml
@@ -1,4 +1,4 @@
-
+
sourcePath
@@ -45,7 +45,7 @@
yarn-cluster
cluster
Merge ${entity}
- eu.dnetlib.dhp.graph.scholexplorer.SparkScholexplorerMergeEntitiesJob
+ eu.dnetlib.dhp.graph.sx.SparkScholexplorerCreateRawGraphJob
dhp-graph-mapper-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT}
-mt yarn-cluster
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/scholexplorer/ScholexplorerParserTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/sx/ScholexplorerParserTest.java
similarity index 89%
rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/scholexplorer/ScholexplorerParserTest.java
rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/sx/ScholexplorerParserTest.java
index 2185b7987..0717efe4a 100644
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/scholexplorer/ScholexplorerParserTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/sx/ScholexplorerParserTest.java
@@ -1,9 +1,9 @@
-package eu.dnetlib.dhp.graph.scholexplorer;
+package eu.dnetlib.dhp.graph.sx;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
-import eu.dnetlib.dhp.graph.scholexplorer.parser.DatasetScholexplorerParser;
+import eu.dnetlib.dhp.graph.sx.parser.DatasetScholexplorerParser;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.scholexplorer.relation.RelationMapper;
import org.apache.commons.io.IOUtils;
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerGraphImporterTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerGraphImporterTest.java
similarity index 58%
rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerGraphImporterTest.java
rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerGraphImporterTest.java
index 505e7581a..f33340547 100644
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerGraphImporterTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerGraphImporterTest.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.graph.scholexplorer;
+package eu.dnetlib.dhp.graph.sx;
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerMergeEntitiesJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerMergeEntitiesJobTest.java
similarity index 58%
rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerMergeEntitiesJobTest.java
rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerMergeEntitiesJobTest.java
index 7a93c5834..623c38112 100644
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/scholexplorer/SparkScholexplorerMergeEntitiesJobTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/sx/SparkScholexplorerMergeEntitiesJobTest.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.graph.scholexplorer;
+package eu.dnetlib.dhp.graph.sx;
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/scholexplorer/dmf.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/sx/dmf.xml
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/scholexplorer/dmf.xml
rename to dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/sx/dmf.xml
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/scholexplorer/t.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/sx/t.xml
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/scholexplorer/t.xml
rename to dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/sx/t.xml