moved openaire specific implementations under dedicated package eu.dnetlib.dhp.oa

This commit is contained in:
Claudio Atzori 2020-03-27 10:42:17 +01:00
parent 098fabab3f
commit 673e744649
79 changed files with 294 additions and 254 deletions

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
import eu.dnetlib.dhp.schema.oaf.Field;
import org.apache.commons.lang.StringUtils;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.*;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
import com.google.common.collect.Sets;
import com.wcohen.ss.JaroWinkler;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
public enum OafEntityType {

View File

@ -1,9 +1,9 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
import com.google.common.hash.Hashing;
import eu.dnetlib.dhp.dedup.graph.ConnectedComponent;
import eu.dnetlib.dhp.dedup.graph.GraphProcessor;
import eu.dnetlib.dhp.oa.dedup.graph.ConnectedComponent;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.dedup.graph.GraphProcessor;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.pace.config.DedupConfig;
@ -29,7 +29,9 @@ import java.util.List;
public class SparkCreateConnectedComponent {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateConnectedComponent.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/createCC_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkCreateConnectedComponent.class.getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json")));
parser.parseArgument(args);
new SparkCreateConnectedComponent().run(parser);
@ -94,7 +96,6 @@ public class SparkCreateConnectedComponent {
.appName(SparkCreateSimRels.class.getSimpleName())
.master(parser.get("master"))
.config(conf)
.enableHiveSupport()
.getOrCreate();
}
}

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
@ -15,7 +15,9 @@ import org.dom4j.DocumentException;
public class SparkCreateDedupRecord {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateDedupRecord.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/createDedupRecord_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkCreateDedupRecord.class.getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json")));
parser.parseArgument(args);
new SparkCreateDedupRecord().run(parser);

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -13,8 +13,6 @@ import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
@ -32,7 +30,9 @@ public class SparkCreateSimRels implements Serializable {
private static final Log log = LogFactory.getLog(SparkCreateSimRels.class);
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateSimRels.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/createSimRels_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkCreateSimRels.class.getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json")));
parser.parseArgument(args);
new SparkCreateSimRels().run(parser);

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -35,7 +35,9 @@ public class SparkPropagateRelation {
final static String TARGETJSONPATH = "$.target";
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkPropagateRelation.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/propagateRelation_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkPropagateRelation.class.getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json")));
parser.parseArgument(args);
new SparkPropagateRelation().run(parser);

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
import eu.dnetlib.pace.util.Reporter;
import org.apache.commons.logging.Log;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -28,7 +28,9 @@ public class SparkUpdateEntity implements Serializable {
final String IDJSONPATH = "$.id";
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkUpdateEntity.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/updateEntity_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkUpdateEntity.class.getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json")));
parser.parseArgument(args);
new SparkUpdateEntity().run(parser);

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.dedup.graph;
package eu.dnetlib.dhp.oa.dedup.graph;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.dedup.DedupUtility;
import eu.dnetlib.dhp.oa.dedup.DedupUtility;
import eu.dnetlib.pace.util.PaceException;
import org.apache.commons.lang.StringUtils;
import org.codehaus.jackson.annotate.JsonIgnore;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup.graph
package eu.dnetlib.dhp.oa.dedup.graph
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD

View File

@ -55,7 +55,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Update Entity</name>
<class>eu.dnetlib.dhp.dedup.SparkUpdateEntity</class>
<class>eu.dnetlib.dhp.oa.dedup.SparkUpdateEntity</class>
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
@ -82,7 +82,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Update Relations</name>
<class>eu.dnetlib.dhp.dedup.SparkPropagateRelation</class>
<class>eu.dnetlib.dhp.oa.dedup.SparkPropagateRelation</class>
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}

View File

@ -59,7 +59,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Create Similarity Relations</name>
<class>eu.dnetlib.dhp.dedup.SparkCreateSimRels</class>
<class>eu.dnetlib.dhp.oa.dedup.SparkCreateSimRels</class>
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
@ -86,7 +86,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Create Merge Relations</name>
<class>eu.dnetlib.dhp.dedup.SparkCreateConnectedComponent</class>
<class>eu.dnetlib.dhp.oa.dedup.SparkCreateConnectedComponent</class>
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
@ -114,7 +114,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Create Dedup Record</name>
<class>eu.dnetlib.dhp.dedup.SparkCreateDedupRecord</class>
<class>eu.dnetlib.dhp.oa.dedup.SparkCreateDedupRecord</class>
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}

View File

@ -1,10 +1,10 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup.dedup;
import eu.dnetlib.dhp.oa.dedup.DedupUtility;
import eu.dnetlib.dhp.schema.oaf.Publication;
import org.apache.commons.io.IOUtils;
import org.codehaus.jackson.map.ObjectMapper;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.Arrays;

View File

@ -1,8 +1,11 @@
package eu.dnetlib.dhp.dedup;
package eu.dnetlib.dhp.oa.dedup.dedup;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.dedup.SparkCreateConnectedComponent;
import eu.dnetlib.dhp.oa.dedup.SparkCreateDedupRecord;
import eu.dnetlib.dhp.oa.dedup.SparkCreateSimRels;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.dedup.jpath;
package eu.dnetlib.dhp.oa.dedup.dedup.jpath;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;

View File

@ -0,0 +1,13 @@
----------------------------------------------------------------
Thu Mar 26 19:43:00 CET 2020:
Booting Derby version The Apache Software Foundation - Apache Derby - 10.12.1.1 - (1704137): instance a816c00e-0171-1827-9724-000012c70f40
on database directory /private/var/folders/xn/nr5vdk8n1572rvrnx5890_d80000gn/T/junit3871072562876431144/junit_metastore_db with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@4e6b5ed4
Loaded from file:/Users/claudio/.m2/repository/org/apache/derby/derby/10.12.1.1/derby-10.12.1.1.jar
java.vendor=Oracle Corporation
java.runtime.version=1.8.0_181-b13
user.dir=/Users/claudio/workspace/git/dnet-hadoop/dhp-workflows/dhp-graph-mapper
os.name=Mac OS X
os.arch=x86_64
os.version=10.15.3
derby.system.home=null
Database Class Loader started - derby.database.classpath=''

View File

@ -1,11 +1,12 @@
package eu.dnetlib.dhp.graph.scholexplorer;
import com.mongodb.*;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.QueryBuilder;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.graph.openaire.SparkGraphImporterJob;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@ -18,7 +19,9 @@ import org.bson.conversions.Bson;
import java.io.IOException;
import java.net.URI;
import java.util.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.stream.Collectors;
@ -27,7 +30,10 @@ public class ImportDataFromMongo {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkGraphImporterJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/import_from_mongo_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
ImportDataFromMongo.class.getResourceAsStream(
"/eu/dnetlib/dhp/graph/import_from_mongo_parameters.json")));
parser.parseArgument(args);
final int port = Integer.parseInt(parser.get("dbport"));
final String host = parser.get("dbhost");

View File

@ -2,7 +2,7 @@ package eu.dnetlib.dhp.graph.scholexplorer;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.graph.openaire.SparkGraphImporterJob;
import eu.dnetlib.dhp.oa.graph.SparkGraphImporterJob;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.compress.GzipCodec;
@ -24,11 +24,14 @@ public class SparkExtractEntitiesJob {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkExtractEntitiesJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_extract_entities_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkExtractEntitiesJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/graph/input_extract_entities_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
.builder()
.appName(SparkGraphImporterJob.class.getSimpleName())
.appName(SparkExtractEntitiesJob.class.getSimpleName())
.master(parser.get("master"))
.getOrCreate();
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

View File

@ -2,7 +2,6 @@ package eu.dnetlib.dhp.graph.scholexplorer;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.graph.openaire.SparkGraphImporterJob;
import eu.dnetlib.dhp.graph.scholexplorer.parser.DatasetScholexplorerParser;
import eu.dnetlib.dhp.graph.scholexplorer.parser.PublicationScholexplorerParser;
import eu.dnetlib.dhp.schema.oaf.Oaf;
@ -20,11 +19,15 @@ public class SparkScholexplorerGraphImporter {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkScholexplorerGraphImporter.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_graph_scholix_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkScholexplorerGraphImporter.class.getResourceAsStream(
"/eu/dnetlib/dhp/graph/input_graph_scholix_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
.builder()
.appName(SparkGraphImporterJob.class.getSimpleName())
.appName(SparkScholexplorerGraphImporter.class.getSimpleName())
.master(parser.get("master"))
.getOrCreate();
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

View File

@ -4,7 +4,6 @@ import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.graph.openaire.SparkGraphImporterJob;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.scholexplorer.DLIDataset;
import eu.dnetlib.dhp.schema.scholexplorer.DLIPublication;
@ -22,7 +21,10 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.*;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import java.util.ArrayList;
@ -40,13 +42,16 @@ public class SparkScholexplorerMergeEntitiesJob {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkScholexplorerMergeEntitiesJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/merge_entities_scholix_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkScholexplorerMergeEntitiesJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/graph/merge_entities_scholix_parameters.json")));
parser.parseArgument(args);
final SparkSession spark = SparkSession
.builder()
.config(new SparkConf()
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"))
.appName(SparkGraphImporterJob.class.getSimpleName())
.appName(SparkScholexplorerMergeEntitiesJob.class.getSimpleName())
.master(parser.get("master"))
.getOrCreate();
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.openaire;
package eu.dnetlib.dhp.oa.graph;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
@ -15,7 +15,7 @@ public class SparkGraphImporterJob {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(SparkGraphImporterJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/graph/input_graph_parameters.json")));
"/eu/dnetlib/dhp/oa/graph/input_graph_parameters.json")));
parser.parseArgument(args);
new SparkGraphImporterJob().run(parser);

View File

@ -49,7 +49,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>MapGraphAsHiveDB</name>
<class>eu.dnetlib.dhp.graph.openaire.SparkGraphImporterJob</class>
<class>eu.dnetlib.dhp.oa.graph.SparkGraphImporterJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}

View File

@ -49,7 +49,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>MapGraphAsHiveDB</name>
<class>eu.dnetlib.dhp.graph.SparkGraphImporterJob</class>
<class>eu.dnetlib.dhp.oa.graph.SparkGraphImporterJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}

View File

@ -1,7 +1,5 @@
package eu.dnetlib.dhp.graph;
package eu.dnetlib.dhp.oa.graph;
import eu.dnetlib.dhp.graph.openaire.GraphMappingUtils;
import eu.dnetlib.dhp.graph.openaire.SparkGraphImporterJob;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.Assertions;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph;
package eu.dnetlib.dhp.oa.provision;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -6,11 +6,11 @@ import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.graph.model.*;
import eu.dnetlib.dhp.graph.utils.ContextMapper;
import eu.dnetlib.dhp.graph.utils.GraphMappingUtils;
import eu.dnetlib.dhp.graph.utils.RelationPartitioner;
import eu.dnetlib.dhp.graph.utils.XmlRecordFactory;
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
import eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils;
import eu.dnetlib.dhp.oa.provision.utils.RelationPartitioner;
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
import eu.dnetlib.dhp.oa.provision.model.*;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
@ -28,7 +28,7 @@ import java.io.IOException;
import java.io.Serializable;
import java.util.Map;
import static eu.dnetlib.dhp.graph.utils.GraphMappingUtils.asRelatedEntity;
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.asRelatedEntity;
/**
* Joins the graph nodes by resolving the links of distance = 1 to create an adjacency list of linked objects.
@ -202,7 +202,7 @@ public class GraphJoiner implements Serializable {
if (rel.hasRelatedEntity()) {
try {
links.add(
new eu.dnetlib.dhp.graph.model.Tuple2()
new eu.dnetlib.dhp.oa.provision.model.Tuple2()
.setRelation(mapper.readValue(rel.getRelation().getOaf(), Relation.class))
.setRelatedEntity(mapper.readValue(rel.getTarget().getOaf(), RelatedEntity.class)));
} catch (IOException e) {

View File

@ -1,8 +1,8 @@
package eu.dnetlib.dhp.graph;
package eu.dnetlib.dhp.oa.provision;
import com.lucidworks.spark.util.SolrSupport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.graph.utils.StreamingInputDocumentFactory;
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
@ -39,7 +39,10 @@ public class SparkXmlIndexingJob {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkXmlIndexingJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_params_update_index.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkXmlIndexingJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/oa/provision/input_params_update_index.json")));
parser.parseArgument(args);
final String inputPath = parser.get("sourcePath");

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.graph;
package eu.dnetlib.dhp.oa.provision;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.graph.utils.ContextMapper;
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -12,7 +12,9 @@ public class SparkXmlRecordBuilderJob {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkXmlRecordBuilderJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_params_build_adjacency_lists.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(
SparkXmlRecordBuilderJob.class.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_build_adjacency_lists.json")));
parser.parseArgument(args);
final String master = parser.get("master");

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.model;
package eu.dnetlib.dhp.oa.provision.model;
import java.io.Serializable;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.model;
package eu.dnetlib.dhp.oa.provision.model;
import eu.dnetlib.dhp.schema.oaf.OafEntity;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.model;
package eu.dnetlib.dhp.oa.provision.model;
import java.util.ArrayList;

View File

@ -1,12 +1,10 @@
package eu.dnetlib.dhp.graph.model;
package eu.dnetlib.dhp.oa.provision.model;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import org.codehaus.jackson.map.ObjectMapper;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.model;
package eu.dnetlib.dhp.oa.provision.model;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Maps;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.model;
package eu.dnetlib.dhp.oa.provision.model;
import eu.dnetlib.dhp.schema.oaf.Relation;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.model;
package eu.dnetlib.dhp.oa.provision.model;
import java.io.Serializable;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.utils;
package eu.dnetlib.dhp.oa.provision.utils;
import java.io.Serializable;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.utils;
package eu.dnetlib.dhp.oa.provision.utils;
import com.google.common.base.Joiner;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;

View File

@ -1,18 +1,16 @@
package eu.dnetlib.dhp.graph.utils;
package eu.dnetlib.dhp.oa.provision.utils;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Predicate;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.graph.model.EntityRelEntity;
import eu.dnetlib.dhp.graph.model.RelatedEntity;
import eu.dnetlib.dhp.graph.model.TypedRow;
import eu.dnetlib.dhp.oa.provision.model.EntityRelEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
import eu.dnetlib.dhp.oa.provision.model.TypedRow;
import eu.dnetlib.dhp.schema.oaf.*;
import net.minidev.json.JSONArray;
import org.apache.commons.lang3.StringUtils;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.utils;
package eu.dnetlib.dhp.oa.provision.utils;
import eu.dnetlib.dhp.schema.oaf.Qualifier;

View File

@ -1,6 +1,6 @@
package eu.dnetlib.dhp.graph.utils;
package eu.dnetlib.dhp.oa.provision.utils;
import eu.dnetlib.dhp.graph.model.SortableRelationKey;
import eu.dnetlib.dhp.oa.provision.model.SortableRelationKey;
import org.apache.spark.Partitioner;
import org.apache.spark.util.Utils;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.utils;
package eu.dnetlib.dhp.oa.provision.utils;
import java.io.StringReader;
import java.io.StringWriter;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.utils;
package eu.dnetlib.dhp.oa.provision.utils;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
@ -10,8 +10,8 @@ import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.graph.utils.GraphMappingUtils.removePrefix;
import static eu.dnetlib.dhp.graph.utils.XmlSerializationUtils.escapeXml;
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix;
import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml;
public class TemplateFactory {

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.utils;
package eu.dnetlib.dhp.oa.provision.utils;
import com.google.common.io.Resources;

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph.utils;
package eu.dnetlib.dhp.oa.provision.utils;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
@ -7,9 +7,9 @@ import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.mycila.xmltool.XMLDoc;
import com.mycila.xmltool.XMLTag;
import eu.dnetlib.dhp.graph.model.JoinedEntity;
import eu.dnetlib.dhp.graph.model.RelatedEntity;
import eu.dnetlib.dhp.graph.model.Tuple2;
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
import eu.dnetlib.dhp.oa.provision.model.Tuple2;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
@ -34,8 +34,7 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.graph.utils.GraphMappingUtils.*;
import static eu.dnetlib.dhp.graph.utils.XmlSerializationUtils.*;
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.*;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.apache.commons.lang3.StringUtils.substringBefore;
@ -84,7 +83,7 @@ public class XmlRecordFactory implements Serializable {
final List<String> relations = listRelations(je, templateFactory, contexts);
metadata.addAll(buildContexts(getMainType(je.getType()), contexts));
metadata.add(parseDataInfo(entity.getDataInfo()));
metadata.add(XmlSerializationUtils.parseDataInfo(entity.getDataInfo()));
final String body = templateFactory.buildBody(
getMainType(je.getType()),
@ -121,19 +120,19 @@ public class XmlRecordFactory implements Serializable {
if (entity.getCollectedfrom() != null) {
metadata.addAll(entity.getCollectedfrom()
.stream()
.map(kv -> mapKeyValue("collectedfrom", kv))
.map(kv -> XmlSerializationUtils.mapKeyValue("collectedfrom", kv))
.collect(Collectors.toList()));
}
if (entity.getOriginalId() != null) {
metadata.addAll(entity.getOriginalId()
.stream()
.map(s -> asXmlElement("originalId", s))
.map(s -> XmlSerializationUtils.asXmlElement("originalId", s))
.collect(Collectors.toList()));
}
if (entity.getPid() != null) {
metadata.addAll(entity.getPid()
.stream()
.map(p -> mapStructuredProperty("pid", p))
.map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p))
.collect(Collectors.toList()));
}
@ -154,11 +153,11 @@ public class XmlRecordFactory implements Serializable {
if (r.getTitle() != null) {
metadata.addAll(r.getTitle()
.stream()
.map(t -> mapStructuredProperty("title", t))
.map(t -> XmlSerializationUtils.mapStructuredProperty("title", t))
.collect(Collectors.toList()));
}
if (r.getBestaccessright() != null) {
metadata.add(mapQualifier("bestaccessright", r.getBestaccessright()));
metadata.add(XmlSerializationUtils.mapQualifier("bestaccessright", r.getBestaccessright()));
}
if (r.getAuthor() != null) {
metadata.addAll(r.getAuthor()
@ -166,17 +165,17 @@ public class XmlRecordFactory implements Serializable {
.map(a -> {
final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
if (isNotBlank(a.getName())) {
sb.append(" name=\"" + escapeXml(a.getName()) + "\"");
sb.append(" name=\"" + XmlSerializationUtils.escapeXml(a.getName()) + "\"");
}
if (isNotBlank(a.getSurname())) {
sb.append(" surname=\"" + escapeXml(a.getSurname()) + "\"");
sb.append(" surname=\"" + XmlSerializationUtils.escapeXml(a.getSurname()) + "\"");
}
if (a.getPid() != null) {
a.getPid().stream()
.filter(sp -> isNotBlank(sp.getQualifier().getClassid()) && isNotBlank(sp.getValue()))
.forEach(sp -> {
String pidType = escapeXml(sp.getQualifier().getClassid()).replaceAll("\\W", "");
String pidValue = escapeXml(sp.getValue());
String pidType = XmlSerializationUtils.escapeXml(sp.getQualifier().getClassid()).replaceAll("\\W", "");
String pidValue = XmlSerializationUtils.escapeXml(sp.getValue());
// ugly hack: some records provide swapped pidtype and pidvalue
if (authorPidTypes.contains(pidValue.toLowerCase().trim())) {
@ -191,78 +190,78 @@ public class XmlRecordFactory implements Serializable {
}
});
}
sb.append(">" + escapeXml(a.getFullname()) + "</creator>");
sb.append(">" + XmlSerializationUtils.escapeXml(a.getFullname()) + "</creator>");
return sb.toString();
}).collect(Collectors.toList()));
}
if (r.getContributor() != null) {
metadata.addAll(r.getContributor()
.stream()
.map(c -> asXmlElement("contributor", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("contributor", c.getValue()))
.collect(Collectors.toList()));
}
if (r.getCountry() != null) {
metadata.addAll(r.getCountry()
.stream()
.map(c -> mapQualifier("country", c))
.map(c -> XmlSerializationUtils.mapQualifier("country", c))
.collect(Collectors.toList()));
}
if (r.getCoverage() != null) {
metadata.addAll(r.getCoverage()
.stream()
.map(c -> asXmlElement("coverage", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("coverage", c.getValue()))
.collect(Collectors.toList()));
}
if (r.getDateofacceptance() != null) {
metadata.add(asXmlElement("dateofacceptance", r.getDateofacceptance().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("dateofacceptance", r.getDateofacceptance().getValue()));
}
if (r.getDescription() != null) {
metadata.addAll(r.getDescription()
.stream()
.map(c -> asXmlElement("description", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("description", c.getValue()))
.collect(Collectors.toList()));
}
if (r.getEmbargoenddate() != null) {
metadata.add(asXmlElement("embargoenddate", r.getEmbargoenddate().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue()));
}
if (r.getSubject() != null) {
metadata.addAll(r.getSubject()
.stream()
.map(s -> mapStructuredProperty("subject", s))
.map(s -> XmlSerializationUtils.mapStructuredProperty("subject", s))
.collect(Collectors.toList()));
}
if (r.getLanguage() != null) {
metadata.add(mapQualifier("language", r.getLanguage()));
metadata.add(XmlSerializationUtils.mapQualifier("language", r.getLanguage()));
}
if (r.getRelevantdate() != null) {
metadata.addAll(r.getRelevantdate()
.stream()
.map(s -> mapStructuredProperty("relevantdate", s))
.map(s -> XmlSerializationUtils.mapStructuredProperty("relevantdate", s))
.collect(Collectors.toList()));
}
if (r.getPublisher() != null) {
metadata.add(asXmlElement("publisher", r.getPublisher().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("publisher", r.getPublisher().getValue()));
}
if (r.getSource() != null) {
metadata.addAll(r.getSource()
.stream()
.map(c -> asXmlElement("source", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("source", c.getValue()))
.collect(Collectors.toList()));
}
if (r.getFormat() != null) {
metadata.addAll(r.getFormat()
.stream()
.map(c -> asXmlElement("format", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("format", c.getValue()))
.collect(Collectors.toList()));
}
if (r.getResulttype() != null) {
metadata.add(mapQualifier("resulttype", r.getResulttype()));
metadata.add(XmlSerializationUtils.mapQualifier("resulttype", r.getResulttype()));
}
if (r.getResourcetype() != null) {
metadata.add(mapQualifier("resourcetype", r.getResourcetype()));
metadata.add(XmlSerializationUtils.mapQualifier("resourcetype", r.getResourcetype()));
}
metadata.add(mapQualifier("bestaccessright", getBestAccessright(r)));
metadata.add(XmlSerializationUtils.mapQualifier("bestaccessright", getBestAccessright(r)));
}
switch (EntityType.valueOf(type)) {
@ -271,29 +270,29 @@ public class XmlRecordFactory implements Serializable {
if (pub.getJournal() != null) {
final Journal j = pub.getJournal();
metadata.add(mapJournal(j));
metadata.add(XmlSerializationUtils.mapJournal(j));
}
break;
case dataset:
final Dataset d = (Dataset) entity;
if (d.getDevice() != null) {
metadata.add(asXmlElement("device", d.getDevice().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("device", d.getDevice().getValue()));
}
if (d.getLastmetadataupdate() != null) {
metadata.add(asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue()));
}
if (d.getMetadataversionnumber() != null) {
metadata.add(asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue()));
}
if (d.getSize() != null) {
metadata.add(asXmlElement("size", d.getSize().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue()));
}
if (d.getStoragedate() != null) {
metadata.add(asXmlElement("storagedate", d.getStoragedate().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue()));
}
if (d.getVersion() != null) {
metadata.add(asXmlElement("version", d.getVersion().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue()));
}
//TODO d.getGeolocation()
@ -304,20 +303,20 @@ public class XmlRecordFactory implements Serializable {
if (orp.getContactperson() != null) {
metadata.addAll(orp.getContactperson()
.stream()
.map(c -> asXmlElement("contactperson", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("contactperson", c.getValue()))
.collect(Collectors.toList()));
}
if (orp.getContactgroup() != null) {
metadata.addAll(orp.getContactgroup()
.stream()
.map(c -> asXmlElement("contactgroup", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c.getValue()))
.collect(Collectors.toList()));
}
if (orp.getTool() != null) {
metadata.addAll(orp.getTool()
.stream()
.map(c -> asXmlElement("tool", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("tool", c.getValue()))
.collect(Collectors.toList()));
}
break;
@ -327,20 +326,20 @@ public class XmlRecordFactory implements Serializable {
if (s.getDocumentationUrl() != null) {
metadata.addAll(s.getDocumentationUrl()
.stream()
.map(c -> asXmlElement("documentationUrl", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c.getValue()))
.collect(Collectors.toList()));
}
if (s.getLicense() != null) {
metadata.addAll(s.getLicense()
.stream()
.map(l -> mapStructuredProperty("license", l))
.map(l -> XmlSerializationUtils.mapStructuredProperty("license", l))
.collect(Collectors.toList()));
}
if (s.getCodeRepositoryUrl() != null) {
metadata.add(asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue()));
}
if (s.getProgrammingLanguage() != null) {
metadata.add(mapQualifier("programmingLanguage", s.getProgrammingLanguage()));
metadata.add(XmlSerializationUtils.mapQualifier("programmingLanguage", s.getProgrammingLanguage()));
}
break;
case datasource:
@ -350,120 +349,120 @@ public class XmlRecordFactory implements Serializable {
mapDatasourceType(metadata, ds.getDatasourcetype());
}
if (ds.getOpenairecompatibility() != null) {
metadata.add(mapQualifier("openairecompatibility", ds.getOpenairecompatibility()));
metadata.add(XmlSerializationUtils.mapQualifier("openairecompatibility", ds.getOpenairecompatibility()));
}
if (ds.getOfficialname() != null) {
metadata.add(asXmlElement("officialname", ds.getOfficialname().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue()));
}
if (ds.getEnglishname() != null) {
metadata.add(asXmlElement("englishname", ds.getEnglishname().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue()));
}
if (ds.getWebsiteurl() != null) {
metadata.add(asXmlElement("websiteurl", ds.getWebsiteurl().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue()));
}
if (ds.getLogourl() != null) {
metadata.add(asXmlElement("logourl", ds.getLogourl().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue()));
}
if (ds.getContactemail() != null) {
metadata.add(asXmlElement("contactemail", ds.getContactemail().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue()));
}
if (ds.getNamespaceprefix() != null) {
metadata.add(asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue()));
}
if (ds.getLatitude() != null) {
metadata.add(asXmlElement("latitude", ds.getLatitude().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue()));
}
if (ds.getLongitude() != null) {
metadata.add(asXmlElement("longitude", ds.getLongitude().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue()));
}
if (ds.getDateofvalidation() != null) {
metadata.add(asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue()));
}
if (ds.getDescription() != null) {
metadata.add(asXmlElement("description", ds.getDescription().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue()));
}
if (ds.getOdnumberofitems() != null) {
metadata.add(asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue()));
}
if (ds.getOdnumberofitemsdate() != null) {
metadata.add(asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue()));
}
if (ds.getOdpolicies() != null) {
metadata.add(asXmlElement("odpolicies", ds.getOdpolicies().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue()));
}
if (ds.getOdlanguages() != null) {
metadata.addAll(ds.getOdlanguages()
.stream()
.map(c -> asXmlElement("odlanguages", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue()))
.collect(Collectors.toList()));
}
if (ds.getOdcontenttypes() != null) {
metadata.addAll(ds.getOdcontenttypes()
.stream()
.map(c -> asXmlElement("odcontenttypes", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c.getValue()))
.collect(Collectors.toList()));
}
if (ds.getAccessinfopackage() != null) {
metadata.addAll(ds.getAccessinfopackage()
.stream()
.map(c -> asXmlElement("accessinfopackage", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("accessinfopackage", c.getValue()))
.collect(Collectors.toList()));
}
if (ds.getReleaseenddate() != null) {
metadata.add(asXmlElement("releasestartdate", ds.getReleaseenddate().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("releasestartdate", ds.getReleaseenddate().getValue()));
}
if (ds.getReleaseenddate() != null) {
metadata.add(asXmlElement("releaseenddate", ds.getReleaseenddate().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("releaseenddate", ds.getReleaseenddate().getValue()));
}
if (ds.getMissionstatementurl() != null) {
metadata.add(asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue()));
}
if (ds.getDataprovider() != null) {
metadata.add(asXmlElement("dataprovider", ds.getDataprovider().getValue().toString()));
metadata.add(XmlSerializationUtils.asXmlElement("dataprovider", ds.getDataprovider().getValue().toString()));
}
if (ds.getServiceprovider() != null) {
metadata.add(asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString()));
metadata.add(XmlSerializationUtils.asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString()));
}
if (ds.getDatabaseaccesstype() != null) {
metadata.add(asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue()));
}
if (ds.getDatauploadtype() != null) {
metadata.add(asXmlElement("datauploadtype", ds.getDatauploadtype().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("datauploadtype", ds.getDatauploadtype().getValue()));
}
if (ds.getDatabaseaccessrestriction() != null) {
metadata.add(asXmlElement("databaseaccessrestriction", ds.getDatabaseaccessrestriction().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("databaseaccessrestriction", ds.getDatabaseaccessrestriction().getValue()));
}
if (ds.getDatauploadrestriction() != null) {
metadata.add(asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue()));
}
if (ds.getVersioning() != null) {
metadata.add(asXmlElement("versioning", ds.getVersioning().getValue().toString()));
metadata.add(XmlSerializationUtils.asXmlElement("versioning", ds.getVersioning().getValue().toString()));
}
if (ds.getCitationguidelineurl() != null) {
metadata.add(asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
}
if (ds.getQualitymanagementkind() != null) {
metadata.add(asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue()));
}
if (ds.getPidsystems() != null) {
metadata.add(asXmlElement("pidsystems", ds.getPidsystems().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue()));
}
if (ds.getCertificates() != null) {
metadata.add(asXmlElement("certificates", ds.getCertificates().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue()));
}
if (ds.getPolicies() != null) {
metadata.addAll(ds.getPolicies()
.stream()
.map(kv -> mapKeyValue("policies", kv))
.map(kv -> XmlSerializationUtils.mapKeyValue("policies", kv))
.collect(Collectors.toList()));
}
if (ds.getJournal() != null) {
metadata.add(mapJournal(ds.getJournal()));
metadata.add(XmlSerializationUtils.mapJournal(ds.getJournal()));
}
if (ds.getSubjects() != null) {
metadata.addAll(ds.getSubjects()
.stream()
.map(sp -> mapStructuredProperty("subjects", sp))
.map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp))
.collect(Collectors.toList()));
}
@ -472,56 +471,56 @@ public class XmlRecordFactory implements Serializable {
final Organization o = (Organization) entity;
if (o.getLegalshortname() != null) {
metadata.add(asXmlElement("legalshortname", o.getLegalshortname().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("legalshortname", o.getLegalshortname().getValue()));
}
if (o.getLegalname() != null) {
metadata.add(asXmlElement("legalname", o.getLegalname().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue()));
}
if (o.getAlternativeNames() != null) {
metadata.addAll(o.getAlternativeNames()
.stream()
.map(c -> asXmlElement("alternativeNames", c.getValue()))
.map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c.getValue()))
.collect(Collectors.toList()));
}
if (o.getWebsiteurl() != null) {
metadata.add(asXmlElement("websiteurl", o.getWebsiteurl().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue()));
}
if (o.getLogourl() != null) {
metadata.add(asXmlElement("websiteurl", o.getLogourl().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getLogourl().getValue()));
}
if (o.getEclegalbody() != null) {
metadata.add(asXmlElement("eclegalbody", o.getEclegalbody().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue()));
}
if (o.getEclegalperson() != null) {
metadata.add(asXmlElement("eclegalperson", o.getEclegalperson().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue()));
}
if (o.getEcnonprofit() != null) {
metadata.add(asXmlElement("ecnonprofit", o.getEcnonprofit().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue()));
}
if (o.getEcresearchorganization() != null) {
metadata.add(asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue()));
}
if (o.getEchighereducation() != null) {
metadata.add(asXmlElement("echighereducation", o.getEchighereducation().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("echighereducation", o.getEchighereducation().getValue()));
}
if (o.getEcinternationalorganization() != null) {
metadata.add(asXmlElement("ecinternationalorganizationeurinterests", o.getEcinternationalorganization().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("ecinternationalorganizationeurinterests", o.getEcinternationalorganization().getValue()));
}
if (o.getEcinternationalorganization() != null) {
metadata.add(asXmlElement("ecinternationalorganization", o.getEcinternationalorganization().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("ecinternationalorganization", o.getEcinternationalorganization().getValue()));
}
if (o.getEcenterprise() != null) {
metadata.add(asXmlElement("ecenterprise", o.getEcenterprise().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue()));
}
if (o.getEcsmevalidated() != null) {
metadata.add(asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue()));
}
if (o.getEcnutscode() != null) {
metadata.add(asXmlElement("ecnutscode", o.getEcnutscode().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue()));
}
if (o.getCountry() != null) {
metadata.add(mapQualifier("country", o.getCountry()));
metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry()));
}
break;
@ -530,70 +529,70 @@ public class XmlRecordFactory implements Serializable {
final Project p = (Project) entity;
if (p.getWebsiteurl() != null) {
metadata.add(asXmlElement("websiteurl", p.getWebsiteurl().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue()));
}
if (p.getCode() != null) {
metadata.add(asXmlElement("code", p.getCode().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue()));
}
if (p.getAcronym() != null) {
metadata.add(asXmlElement("acronym", p.getAcronym().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("acronym", p.getAcronym().getValue()));
}
if (p.getTitle() != null) {
metadata.add(asXmlElement("title", p.getTitle().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("title", p.getTitle().getValue()));
}
if (p.getStartdate() != null) {
metadata.add(asXmlElement("startdate", p.getStartdate().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue()));
}
if (p.getEnddate() != null) {
metadata.add(asXmlElement("enddate", p.getEnddate().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue()));
}
if (p.getCallidentifier() != null) {
metadata.add(asXmlElement("callidentifier", p.getCallidentifier().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("callidentifier", p.getCallidentifier().getValue()));
}
if (p.getKeywords() != null) {
metadata.add(asXmlElement("keywords", p.getKeywords().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue()));
}
if (p.getDuration() != null) {
metadata.add(asXmlElement("duration", p.getDuration().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("duration", p.getDuration().getValue()));
}
if (p.getEcarticle29_3() != null) {
metadata.add(asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue()));
}
if (p.getSubjects() != null) {
metadata.addAll(p.getSubjects()
.stream()
.map(sp -> mapStructuredProperty("subject", sp))
.map(sp -> XmlSerializationUtils.mapStructuredProperty("subject", sp))
.collect(Collectors.toList()));
}
if (p.getContracttype() != null) {
metadata.add(mapQualifier("contracttype", p.getContracttype()));
metadata.add(XmlSerializationUtils.mapQualifier("contracttype", p.getContracttype()));
}
if (p.getEcsc39() != null) {
metadata.add(asXmlElement("ecsc39", p.getEcsc39().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("ecsc39", p.getEcsc39().getValue()));
}
if (p.getContactfullname() != null) {
metadata.add(asXmlElement("contactfullname", p.getContactfullname().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("contactfullname", p.getContactfullname().getValue()));
}
if (p.getContactfax() != null) {
metadata.add(asXmlElement("contactfax", p.getContactfax().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("contactfax", p.getContactfax().getValue()));
}
if (p.getContactphone() != null) {
metadata.add(asXmlElement("contactphone", p.getContactphone().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("contactphone", p.getContactphone().getValue()));
}
if (p.getContactemail() != null) {
metadata.add(asXmlElement("contactemail", p.getContactemail().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("contactemail", p.getContactemail().getValue()));
}
if (p.getSummary() != null) {
metadata.add(asXmlElement("summary", p.getSummary().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("summary", p.getSummary().getValue()));
}
if (p.getCurrency() != null) {
metadata.add(asXmlElement("currency", p.getCurrency().getValue()));
metadata.add(XmlSerializationUtils.asXmlElement("currency", p.getCurrency().getValue()));
}
if (p.getTotalcost() != null) {
metadata.add(asXmlElement("totalcost", p.getTotalcost().toString()));
metadata.add(XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString()));
}
if (p.getFundedamount() != null) {
metadata.add(asXmlElement("fundedamount", p.getFundedamount().toString()));
metadata.add(XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString()));
}
if (p.getFundingtree() != null) {
metadata.addAll(p.getFundingtree()
@ -611,13 +610,13 @@ public class XmlRecordFactory implements Serializable {
}
private void mapDatasourceType(List<String> metadata, final Qualifier dsType) {
metadata.add(mapQualifier("datasourcetype", dsType));
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", dsType));
if (specialDatasourceTypes.contains(dsType.getClassid())) {
dsType.setClassid("other");
dsType.setClassname("other");
}
metadata.add(mapQualifier("datasourcetypeui", dsType));
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", dsType));
}
private Qualifier getBestAccessright(final Result r) {
@ -652,67 +651,67 @@ public class XmlRecordFactory implements Serializable {
case otherresearchproduct:
case software:
if (re.getTitle() != null && isNotBlank(re.getTitle().getValue())) {
metadata.add(mapStructuredProperty("title", re.getTitle()));
metadata.add(XmlSerializationUtils.mapStructuredProperty("title", re.getTitle()));
}
if (isNotBlank(re.getDateofacceptance())) {
metadata.add(asXmlElement("dateofacceptance", re.getDateofacceptance()));
metadata.add(XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance()));
}
if (isNotBlank(re.getPublisher())) {
metadata.add(asXmlElement("publisher", re.getPublisher()));
metadata.add(XmlSerializationUtils.asXmlElement("publisher", re.getPublisher()));
}
if (isNotBlank(re.getCodeRepositoryUrl())) {
metadata.add(asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl()));
metadata.add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl()));
}
if (re.getResulttype() != null & !re.getResulttype().isBlank()) {
metadata.add(mapQualifier("resulttype", re.getResulttype()));
metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype()));
}
if (re.getCollectedfrom() != null) {
metadata.addAll(re.getCollectedfrom()
.stream()
.map(kv -> mapKeyValue("collectedfrom", kv))
.map(kv -> XmlSerializationUtils.mapKeyValue("collectedfrom", kv))
.collect(Collectors.toList()));
}
if (re.getPid() != null) {
metadata.addAll(re.getPid()
.stream()
.map(p -> mapStructuredProperty("pid", p))
.map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p))
.collect(Collectors.toList()));
}
break;
case datasource:
if (isNotBlank(re.getOfficialname())) {
metadata.add(asXmlElement("officialname", re.getOfficialname()));
metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname()));
}
if (re.getDatasourcetype() != null & !re.getDatasourcetype().isBlank()) {
mapDatasourceType(metadata, re.getDatasourcetype());
}
if (re.getOpenairecompatibility() != null & !re.getOpenairecompatibility().isBlank()) {
metadata.add(mapQualifier("openairecompatibility", re.getOpenairecompatibility()));
metadata.add(XmlSerializationUtils.mapQualifier("openairecompatibility", re.getOpenairecompatibility()));
}
break;
case organization:
if (isNotBlank(re.getLegalname())) {
metadata.add(asXmlElement("legalname", re.getLegalname()));
metadata.add(XmlSerializationUtils.asXmlElement("legalname", re.getLegalname()));
}
if (isNotBlank(re.getLegalshortname())) {
metadata.add(asXmlElement("legalshortname", re.getLegalshortname()));
metadata.add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname()));
}
if (re.getCountry() != null & !re.getCountry().isBlank()) {
metadata.add(mapQualifier("country", re.getCountry()));
metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry()));
}
break;
case project:
if (isNotBlank(re.getProjectTitle())) {
metadata.add(asXmlElement("title", re.getProjectTitle()));
metadata.add(XmlSerializationUtils.asXmlElement("title", re.getProjectTitle()));
}
if (isNotBlank(re.getCode())) {
metadata.add(asXmlElement("code", re.getCode()));
metadata.add(XmlSerializationUtils.asXmlElement("code", re.getCode()));
}
if (isNotBlank(re.getAcronym())) {
metadata.add(asXmlElement("acronym", re.getAcronym()));
metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym()));
}
if (re.getContracttype() != null & !re.getContracttype().isBlank()) {
metadata.add(mapQualifier("contracttype", re.getContracttype()));
metadata.add(XmlSerializationUtils.mapQualifier("contracttype", re.getContracttype()));
}
if (re.getFundingtree() != null) {
metadata.addAll(re.getFundingtree()
@ -761,31 +760,31 @@ public class XmlRecordFactory implements Serializable {
final List<String> fields = Lists.newArrayList();
if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) {
fields.add(mapQualifier("accessright", instance.getAccessright()));
fields.add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright()));
}
if (instance.getCollectedfrom() != null) {
fields.add(mapKeyValue("collectedfrom", instance.getCollectedfrom()));
fields.add(XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom()));
}
if (instance.getHostedby() != null) {
fields.add(mapKeyValue("hostedby", instance.getHostedby()));
fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby()));
}
if (instance.getDateofacceptance() != null && isNotBlank(instance.getDateofacceptance().getValue())) {
fields.add(asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue()));
fields.add(XmlSerializationUtils.asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue()));
}
if (instance.getInstancetype() != null && !instance.getInstancetype().isBlank()) {
fields.add(mapQualifier("instancetype", instance.getInstancetype()));
fields.add(XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype()));
}
if (isNotBlank(instance.getDistributionlocation())) {
fields.add(asXmlElement("distributionlocation", instance.getDistributionlocation()));
fields.add(XmlSerializationUtils.asXmlElement("distributionlocation", instance.getDistributionlocation()));
}
if (instance.getRefereed() != null && isNotBlank(instance.getRefereed().getValue())) {
fields.add(asXmlElement("refereed", instance.getRefereed().getValue()));
fields.add(XmlSerializationUtils.asXmlElement("refereed", instance.getRefereed().getValue()));
}
if (instance.getProcessingchargeamount() != null && isNotBlank(instance.getProcessingchargeamount().getValue())) {
fields.add(asXmlElement("processingchargeamount", instance.getProcessingchargeamount().getValue()));
fields.add(XmlSerializationUtils.asXmlElement("processingchargeamount", instance.getProcessingchargeamount().getValue()));
}
if (instance.getProcessingchargecurrency() != null && isNotBlank(instance.getProcessingchargecurrency().getValue())) {
fields.add(asXmlElement("processingchargecurrency", instance.getProcessingchargecurrency().getValue()));
fields.add(XmlSerializationUtils.asXmlElement("processingchargecurrency", instance.getProcessingchargecurrency().getValue()));
}
children.add(templateFactory.getInstance(instance.getHostedby().getKey(), fields, instance.getUrl()));
@ -798,25 +797,25 @@ public class XmlRecordFactory implements Serializable {
final List<String> fields = Lists.newArrayList();
if (isNotBlank(er.getSitename())) {
fields.add(asXmlElement("sitename", er.getSitename()));
fields.add(XmlSerializationUtils.asXmlElement("sitename", er.getSitename()));
}
if (isNotBlank(er.getLabel())) {
fields.add(asXmlElement("label", er.getLabel()));
fields.add(XmlSerializationUtils.asXmlElement("label", er.getLabel()));
}
if (isNotBlank(er.getUrl())) {
fields.add(asXmlElement("url", er.getUrl()));
fields.add(XmlSerializationUtils.asXmlElement("url", er.getUrl()));
}
if (isNotBlank(er.getDescription())) {
fields.add(asXmlElement("description", er.getDescription()));
fields.add(XmlSerializationUtils.asXmlElement("description", er.getDescription()));
}
if (isNotBlank(er.getUrl())) {
fields.add(mapQualifier("qualifier", er.getQualifier()));
fields.add(XmlSerializationUtils.mapQualifier("qualifier", er.getQualifier()));
}
if (isNotBlank(er.getRefidentifier())) {
fields.add(asXmlElement("refidentifier", er.getRefidentifier()));
fields.add(XmlSerializationUtils.asXmlElement("refidentifier", er.getRefidentifier()));
}
if (isNotBlank(er.getQuery())) {
fields.add(asXmlElement("query", er.getQuery()));
fields.add(XmlSerializationUtils.asXmlElement("query", er.getQuery()));
}
children.add(templateFactory.getChild("externalreference", null, fields));
@ -831,7 +830,7 @@ public class XmlRecordFactory implements Serializable {
final List<ExtraInfo> extraInfo = je.getEntity().getExtraInfo();
return extraInfo != null ? extraInfo
.stream()
.map(e -> mapExtraInfo(e))
.map(e -> XmlSerializationUtils.mapExtraInfo(e))
.collect(Collectors.toList()) : Lists.newArrayList();
}
@ -967,7 +966,7 @@ public class XmlRecordFactory implements Serializable {
for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
final Element e = (Element) o;
final String _id = e.valueOf("./id");
funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
funding += "<" + e.getName() + " name=\"" + XmlSerializationUtils.escapeXml(e.valueOf("./name")) + "\">" + XmlSerializationUtils.escapeXml(_id) + "</" + e.getName() + ">";
}
} catch (final DocumentException e) {
throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
@ -983,8 +982,8 @@ public class XmlRecordFactory implements Serializable {
final String funderName = ftree.valueOf("//fundingtree/funder/name");
final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction");
return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
return "<funder id=\"" + XmlSerializationUtils.escapeXml(funderId) + "\" shortname=\"" + XmlSerializationUtils.escapeXml(funderShortName) + "\" name=\"" + XmlSerializationUtils.escapeXml(funderName)
+ "\" jurisdiction=\"" + XmlSerializationUtils.escapeXml(funderJurisdiction) + "\" />";
}
}

View File

@ -1,8 +1,8 @@
package eu.dnetlib.dhp.graph.utils;
package eu.dnetlib.dhp.oa.provision.utils;
import eu.dnetlib.dhp.schema.oaf.*;
import static eu.dnetlib.dhp.graph.utils.GraphMappingUtils.removePrefix;
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.apache.commons.lang3.StringUtils.isNotBlank;

View File

@ -61,7 +61,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>build_adjacency_lists</name>
<class>eu.dnetlib.dhp.graph.SparkXmlRecordBuilderJob</class>
<class>eu.dnetlib.dhp.oa.provision.SparkXmlRecordBuilderJob</class>
<jar>dhp-graph-provision-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores ${sparkExecutorCoresForJoining}
@ -88,7 +88,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>to_solr_index</name>
<class>eu.dnetlib.dhp.graph.SparkXmlIndexingJob</class>
<class>eu.dnetlib.dhp.oa.provision.SparkXmlIndexingJob</class>
<jar>dhp-graph-provision-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores ${sparkExecutorCoresForIndexing}

View File

@ -1,4 +1,4 @@
package eu.dnetlib.dhp.graph;
package eu.dnetlib.dhp.oa.provision;
import org.junit.jupiter.api.BeforeEach;

View File

@ -154,6 +154,7 @@
<plugin>
<groupId>eu.dnetlib.primer</groupId>
<artifactId>primer-maven-plugin</artifactId>
<version>1.2.0</version>
<executions>
<execution>
<id>priming</id>
@ -233,6 +234,7 @@
<plugin>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build-properties-maven-plugin</artifactId>
<version>${project.version}</version>
<executions>
<execution>
<phase>validate</phase>