forked from D-Net/dnet-hadoop
bulktagging wfs moved into common dhp-enrichment module
This commit is contained in:
parent
c403971c2f
commit
6d0b11252e
|
@ -1,5 +1,5 @@
|
|||
<workflow-app name="blacklisting" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<parameters>
|
||||
<property>
|
||||
<name>postgresURL</name>
|
||||
<description>the url of the postgress server to query</description>
|
||||
|
@ -16,16 +16,23 @@
|
|||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
</parameters>
|
||||
<start to="reset-outputpath"/>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the graph output path</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<kill name="Kill">
|
||||
<start to="reset_outputpath"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
</kill>
|
||||
|
||||
<action name="reset-outputpath">
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path='${workingDir}/blacklist'/>
|
||||
<delete path="${workingDir}/blacklist"/>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="read_blacklist"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -87,12 +94,14 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--hdfsPath</arg><arg>${workingDir}/blacklist</arg>
|
||||
<arg>--mergesPath</arg><arg>${workingDir}/mergesRelation</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -1,7 +0,0 @@
|
|||
#sandboxName when not provided explicitly will be generated
|
||||
sandboxName=${sandboxName}
|
||||
sandboxDir=/user/${dhp.hadoop.frontend.user.name}/${sandboxName}
|
||||
workingDir=${sandboxDir}/working_dir
|
||||
oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir}
|
||||
oozieTopWfApplicationPath = ${oozie.wf.application.path}
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>dhp-bulktag</artifactId>
|
||||
<artifactId>dhp-enrichment</artifactId>
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
|
@ -31,6 +31,12 @@
|
|||
<artifactId>dhp-schemas</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-hive_2.11</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>dom4j</groupId>
|
||||
<artifactId>dom4j</artifactId>
|
||||
|
@ -43,23 +49,16 @@
|
|||
<groupId>com.jayway.jsonpath</groupId>
|
||||
<artifactId>json-path</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.reflections</groupId>
|
||||
<artifactId>reflections</artifactId>
|
||||
<version>0.9.11</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>23.3-jre</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.github.classgraph</groupId>
|
||||
<artifactId>classgraph</artifactId>
|
||||
<version>4.8.71</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
</dependencies>
|
||||
|
||||
|
||||
|
||||
</project>
|
|
@ -1,10 +1,11 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.bulktag.community.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
|
@ -15,12 +16,9 @@ import org.apache.spark.sql.SparkSession;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.community.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
public class SparkBulkTagJob {
|
||||
|
|
@ -1,15 +1,14 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
/** Created by miriam on 01/08/2018. */
|
||||
public class Community implements Serializable {
|
||||
|
||||
|
@ -17,7 +16,7 @@ public class Community implements Serializable {
|
|||
|
||||
private String id;
|
||||
private List<String> subjects = new ArrayList<>();
|
||||
private List<Datasource> datasources = new ArrayList<>();
|
||||
private List<Provider> providers = new ArrayList<>();
|
||||
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
|
||||
|
||||
public String toJson() {
|
||||
|
@ -27,7 +26,7 @@ public class Community implements Serializable {
|
|||
|
||||
public boolean isValid() {
|
||||
return !getSubjects().isEmpty()
|
||||
|| !getDatasources().isEmpty()
|
||||
|| !getProviders().isEmpty()
|
||||
|| !getZenodoCommunities().isEmpty();
|
||||
}
|
||||
|
||||
|
@ -47,12 +46,12 @@ public class Community implements Serializable {
|
|||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public List<Datasource> getDatasources() {
|
||||
return datasources;
|
||||
public List<Provider> getProviders() {
|
||||
return providers;
|
||||
}
|
||||
|
||||
public void setDatasources(List<Datasource> datasources) {
|
||||
this.datasources = datasources;
|
||||
public void setProviders(List<Provider> providers) {
|
||||
this.providers = providers;
|
||||
}
|
||||
|
||||
public List<ZenodoCommunity> getZenodoCommunities() {
|
|
@ -1,5 +1,14 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.GsonBuilder;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
|
@ -8,17 +17,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.GsonBuilder;
|
||||
|
||||
import eu.dnetlib.dhp.selectioncriteria.InterfaceAdapter;
|
||||
import eu.dnetlib.dhp.selectioncriteria.Selection;
|
||||
|
||||
/** Created by miriam on 02/08/2018. */
|
||||
public class CommunityConfiguration implements Serializable {
|
||||
|
||||
|
@ -84,7 +82,7 @@ public class CommunityConfiguration implements Serializable {
|
|||
add(sbj.toLowerCase().trim(), p, subjectMap);
|
||||
}
|
||||
// get datasources
|
||||
for (Datasource d : c.getDatasources()) {
|
||||
for (Provider d : c.getProviders()) {
|
||||
|
||||
add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap);
|
||||
}
|
|
@ -1,11 +1,14 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.GsonBuilder;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -14,15 +17,10 @@ import org.dom4j.DocumentException;
|
|||
import org.dom4j.Node;
|
||||
import org.dom4j.io.SAXReader;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.GsonBuilder;
|
||||
|
||||
import eu.dnetlib.dhp.selectioncriteria.InterfaceAdapter;
|
||||
import eu.dnetlib.dhp.selectioncriteria.Selection;
|
||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolverFactory;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/** Created by miriam on 03/08/2018. */
|
||||
public class CommunityConfigurationFactory {
|
||||
|
@ -77,7 +75,7 @@ public class CommunityConfigurationFactory {
|
|||
log.info(String.format("community id: %s", c.getId()));
|
||||
|
||||
c.setSubjects(parseSubjects(node));
|
||||
c.setDatasources(parseDatasources(node));
|
||||
c.setProviders(parseDatasources(node));
|
||||
c.setZenodoCommunities(parseZenodoCommunities(node));
|
||||
return c;
|
||||
}
|
||||
|
@ -96,17 +94,17 @@ public class CommunityConfigurationFactory {
|
|||
return subjects;
|
||||
}
|
||||
|
||||
private static List<Datasource> parseDatasources(final Node node) {
|
||||
private static List<Provider> parseDatasources(final Node node) {
|
||||
final List<Node> list = node.selectNodes("./datasources/datasource");
|
||||
final List<Datasource> datasourceList = new ArrayList<>();
|
||||
final List<Provider> providerList = new ArrayList<>();
|
||||
for (Node n : list) {
|
||||
Datasource d = new Datasource();
|
||||
Provider d = new Provider();
|
||||
d.setOpenaireId(n.selectSingleNode("./openaireId").getText());
|
||||
d.setSelCriteria(n.selectSingleNode("./selcriteria"), resolver);
|
||||
datasourceList.add(d);
|
||||
providerList.add(d);
|
||||
}
|
||||
log.info("size of the datasource list " + datasourceList.size());
|
||||
return datasourceList;
|
||||
log.info("size of the datasource list " + providerList.size());
|
||||
return providerList;
|
||||
}
|
||||
|
||||
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
|
|
@ -1,12 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
||||
import eu.dnetlib.dhp.selectioncriteria.Selection;
|
||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
||||
|
||||
public class Constraint implements Serializable {
|
||||
private String verb;
|
||||
private String field;
|
|
@ -1,5 +1,11 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.reflect.TypeToken;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
@ -8,14 +14,6 @@ import java.util.Collection;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.reflect.TypeToken;
|
||||
|
||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
||||
|
||||
/** Created by miriam on 02/08/2018. */
|
||||
public class Constraints implements Serializable {
|
||||
private static final Log log = LogFactory.getLog(Constraints.class);
|
|
@ -1,10 +1,10 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/** Created by miriam on 03/08/2018. */
|
||||
public class Pair<A, B> implements Serializable {
|
||||
private A fst;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
|
@ -1,19 +1,17 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.dom4j.Node;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
||||
import java.io.Serializable;
|
||||
|
||||
/** Created by miriam on 01/08/2018. */
|
||||
public class Datasource implements Serializable {
|
||||
private static final Log log = LogFactory.getLog(Datasource.class);
|
||||
public class Provider implements Serializable {
|
||||
private static final Log log = LogFactory.getLog(Provider.class);
|
||||
|
||||
private String openaireId;
|
||||
|
|
@ -1,15 +1,13 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.dom4j.DocumentException;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import org.dom4j.DocumentException;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class QueryInformationSystem {
|
||||
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
|
@ -1,20 +1,19 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import static eu.dnetlib.dhp.community.TagginConstants.*;
|
||||
import com.google.gson.Gson;
|
||||
import com.jayway.jsonpath.DocumentContext;
|
||||
import com.jayway.jsonpath.JsonPath;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.jayway.jsonpath.DocumentContext;
|
||||
import com.jayway.jsonpath.JsonPath;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
|
||||
/** Created by miriam on 02/08/2018. */
|
||||
public class ResultTagger implements Serializable {
|
||||
|
@ -239,8 +238,8 @@ public class ResultTagger implements Serializable {
|
|||
Qualifier pa = new Qualifier();
|
||||
pa.setClassid(inference_class_id);
|
||||
pa.setClassname(inference_class_name);
|
||||
pa.setSchemeid(DNET_SCHEMA_ID);
|
||||
pa.setSchemename(DNET_SCHEMA_NAME);
|
||||
pa.setSchemeid(DNET_PROVENANCE_ACTIONS);
|
||||
pa.setSchemename(DNET_PROVENANCE_ACTIONS);
|
||||
return pa;
|
||||
}
|
||||
}
|
|
@ -1,5 +1,9 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.reflect.TypeToken;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.lang.reflect.Type;
|
||||
|
@ -7,11 +11,6 @@ import java.util.Collection;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.reflect.TypeToken;
|
||||
|
||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
||||
|
||||
public class SelectionConstraints implements Serializable {
|
||||
private List<Constraints> criteria;
|
||||
|
|
@ -1,20 +1,14 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
public class TagginConstants {
|
||||
public class TaggingConstants {
|
||||
|
||||
public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging";
|
||||
|
||||
public static final String DNET_SCHEMA_NAME = "dnet:provenanceActions";
|
||||
public static final String DNET_SCHEMA_ID = "dnet:provenanceActions";
|
||||
|
||||
public static final String CLASS_ID_SUBJECT = "community:subject";
|
||||
public static final String CLASS_ID_DATASOURCE = "community:datasource";
|
||||
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
|
||||
|
||||
public static final String SCHEMA_ID = "dnet:provenanceActions";
|
||||
public static final String COUNTER_GROUP = "Bulk Tagging";
|
||||
|
||||
public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
|
||||
|
||||
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
|
@ -1,11 +1,10 @@
|
|||
|
||||
package eu.dnetlib.dhp.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.dom4j.Node;
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import org.dom4j.Node;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/** Created by miriam on 01/08/2018. */
|
||||
public class ZenodoCommunity implements Serializable {
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
|
||||
import java.lang.reflect.Type;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import com.google.gson.*;
|
||||
|
||||
import java.lang.reflect.Type;
|
||||
|
||||
public class InterfaceAdapter implements JsonSerializer, JsonDeserializer {
|
||||
|
||||
private static final String CLASSNAME = "CLASSNAME";
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
public interface Selection {
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
|
@ -1,16 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import io.github.classgraph.ClassGraph;
|
||||
import io.github.classgraph.ClassInfo;
|
||||
import io.github.classgraph.ClassInfoList;
|
||||
import io.github.classgraph.ScanResult;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class VerbResolver implements Serializable {
|
||||
private Map<String, Class<Selection>> map = null; // = new HashMap<>();
|
||||
private final ClassGraph classgraph = new ClassGraph();
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.selectioncriteria;
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
public class VerbResolverFactory {
|
||||
|
|
@ -1,13 +1,13 @@
|
|||
|
||||
package eu.dnetlib.dhp;
|
||||
|
||||
import static eu.dnetlib.dhp.community.TagginConstants.ZENODO_COMMUNITY_INDICATOR;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
package eu.dnetlib.dhp.bulktag;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
|
@ -18,37 +18,44 @@ import org.junit.jupiter.api.AfterAll;
|
|||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.mortbay.util.IO;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.SparkBulkTagJob;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR;
|
||||
|
||||
public class BulkTagJobTest {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final ClassLoader cl = eu.dnetlib.dhp.BulkTagJobTest.class.getClassLoader();
|
||||
public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
|
||||
|
||||
public static final String pathMap =
|
||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
||||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}";
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.BulkTagJobTest.class);
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(BulkTagJobTest.class);
|
||||
|
||||
private static String taggingConf = "";
|
||||
|
||||
static {
|
||||
try {
|
||||
taggingConf = IO
|
||||
taggingConf = IOUtils
|
||||
.toString(
|
||||
BulkTagJobTest.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml"));
|
||||
"/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml"));
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
@ -56,11 +63,11 @@ public class BulkTagJobTest {
|
|||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(eu.dnetlib.dhp.BulkTagJobTest.class.getSimpleName());
|
||||
workingDir = Files.createTempDirectory(BulkTagJobTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(eu.dnetlib.dhp.BulkTagJobTest.class.getSimpleName());
|
||||
conf.setAppName(BulkTagJobTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
|
@ -84,34 +91,21 @@ public class BulkTagJobTest {
|
|||
|
||||
@Test
|
||||
public void noUpdatesTest() throws Exception {
|
||||
final String pathMap = BulkTagJobTest.pathMap;
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass().getResource("/eu/dnetlib/dhp/sample/dataset/no_updates").getPath(),
|
||||
"-taggingConf",
|
||||
taggingConf,
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl",
|
||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
||||
"-pathMap",
|
||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
||||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
||||
// "-preparedInfoPath",
|
||||
// getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo").getPath()
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/dataset")
|
||||
|
@ -134,34 +128,24 @@ public class BulkTagJobTest {
|
|||
|
||||
@Test
|
||||
public void bulktagBySubjectNoPreviousContextTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext")
|
||||
.getPath();
|
||||
final String pathMap = BulkTagJobTest.pathMap;
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/sample/dataset/update_subject/nocontext")
|
||||
.getPath(),
|
||||
"-taggingConf",
|
||||
taggingConf,
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl",
|
||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
||||
"-pathMap",
|
||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
||||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/dataset")
|
||||
|
@ -240,32 +224,22 @@ public class BulkTagJobTest {
|
|||
|
||||
@Test
|
||||
public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance")
|
||||
.getPath();
|
||||
final String pathMap = BulkTagJobTest.pathMap;
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/sample/dataset/update_subject/contextnoprovenance")
|
||||
.getPath(),
|
||||
"-taggingConf",
|
||||
taggingConf,
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl",
|
||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
||||
"-pathMap",
|
||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
||||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
@ -332,34 +306,23 @@ public class BulkTagJobTest {
|
|||
|
||||
@Test
|
||||
public void bulktagByDatasourceTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/sample/publication/update_datasource")
|
||||
.getPath(),
|
||||
"-taggingConf",
|
||||
taggingConf,
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Publication",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/publication",
|
||||
"-isLookUpUrl",
|
||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
||||
"-pathMap",
|
||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
||||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
||||
"-outputPath", workingDir.toString() + "/publication",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Publication> tmp = sc
|
||||
.textFile(workingDir.toString() + "/publication")
|
||||
|
@ -415,35 +378,24 @@ public class BulkTagJobTest {
|
|||
|
||||
@Test
|
||||
public void bulktagByZenodoCommunityTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/sample/otherresearchproduct/update_zenodocommunity")
|
||||
.getPath(),
|
||||
"-taggingConf",
|
||||
taggingConf,
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/orp",
|
||||
"-isLookUpUrl",
|
||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
||||
"-pathMap",
|
||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
||||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
||||
"-outputPath", workingDir.toString() + "/orp",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<OtherResearchProduct> tmp = sc
|
||||
.textFile(workingDir.toString() + "/orp")
|
||||
|
@ -548,34 +500,23 @@ public class BulkTagJobTest {
|
|||
|
||||
@Test
|
||||
public void bulktagBySubjectDatasourceTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/sample/dataset/update_subject_datasource")
|
||||
.getPath(),
|
||||
"-taggingConf",
|
||||
taggingConf,
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl",
|
||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
||||
"-pathMap",
|
||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
||||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/dataset")
|
||||
|
@ -691,29 +632,17 @@ public class BulkTagJobTest {
|
|||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass().getResource("/eu/dnetlib/dhp/sample/software/").getPath(),
|
||||
"-taggingConf",
|
||||
taggingConf,
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Software",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/software",
|
||||
"-isLookUpUrl",
|
||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
||||
"-pathMap",
|
||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
||||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(),
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||
"-outputPath", workingDir.toString() + "/software",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Software> tmp = sc
|
||||
.textFile(workingDir.toString() + "/software")
|
||||
|
@ -796,35 +725,24 @@ public class BulkTagJobTest {
|
|||
@Test
|
||||
public void bulktagDatasourcewithConstraintsTest() throws Exception {
|
||||
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/sample/dataset/update_datasourcewithconstraints")
|
||||
.getPath(),
|
||||
"-taggingConf",
|
||||
taggingConf,
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl",
|
||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
||||
"-pathMap",
|
||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
||||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/dataset")
|
|
@ -1,23 +1,21 @@
|
|||
|
||||
package eu.dnetlib.dhp;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.*;
|
||||
package eu.dnetlib.dhp.bulktag;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||
import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory;
|
||||
import eu.dnetlib.dhp.bulktag.community.Constraint;
|
||||
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.community.CommunityConfiguration;
|
||||
import eu.dnetlib.dhp.community.CommunityConfigurationFactory;
|
||||
import eu.dnetlib.dhp.community.Constraint;
|
||||
import eu.dnetlib.dhp.community.SelectionConstraints;
|
||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.*;
|
||||
|
||||
/** Created by miriam on 03/08/2018. */
|
||||
public class CommunityConfigurationFactoryTest {
|
|
@ -5,12 +5,15 @@ import java.io.IOException;
|
|||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
|
@ -26,12 +29,11 @@ import eu.dnetlib.dhp.schema.oaf.Software;
|
|||
import scala.Tuple2;
|
||||
|
||||
public class CountryPropagationJobTest {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(CountryPropagationJobTest.class);
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final ClassLoader cl = CountryPropagationJobTest.class.getClassLoader();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
@ -101,8 +103,8 @@ public class CountryPropagationJobTest {
|
|||
Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count());
|
||||
|
||||
Dataset<String> countryExploded = verificationDs
|
||||
.flatMap(row -> row.getCountry().iterator(), Encoders.bean(Country.class))
|
||||
.map(c -> c.getClassid(), Encoders.STRING());
|
||||
.flatMap((FlatMapFunction<Software, Country>) row -> row.getCountry().iterator(), Encoders.bean(Country.class))
|
||||
.map((MapFunction<Country, String>) c -> c.getClassid(), Encoders.STRING());
|
||||
|
||||
Assertions.assertEquals(9, countryExploded.count());
|
||||
|
||||
|
@ -115,8 +117,7 @@ public class CountryPropagationJobTest {
|
|||
Assertions.assertEquals(2, countryExploded.filter("value = 'JP'").count());
|
||||
|
||||
Dataset<Tuple2<String, String>> countryExplodedWithCountryclassid = verificationDs
|
||||
.flatMap(
|
||||
row -> {
|
||||
.flatMap((FlatMapFunction<Software, Tuple2<String, String>>) row -> {
|
||||
List<Tuple2<String, String>> prova = new ArrayList();
|
||||
List<Country> country_list = row.getCountry();
|
||||
country_list
|
||||
|
@ -127,8 +128,7 @@ public class CountryPropagationJobTest {
|
|||
new Tuple2<>(
|
||||
row.getId(), c.getClassid())));
|
||||
return prova.iterator();
|
||||
},
|
||||
Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
||||
}, Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
||||
|
||||
Assertions.assertEquals(9, countryExplodedWithCountryclassid.count());
|
||||
|
||||
|
@ -178,7 +178,7 @@ public class CountryPropagationJobTest {
|
|||
|
||||
Dataset<Tuple2<String, String>> countryExplodedWithCountryclassname = verificationDs
|
||||
.flatMap(
|
||||
row -> {
|
||||
(FlatMapFunction<Software, Tuple2<String, String>>) row -> {
|
||||
List<Tuple2<String, String>> prova = new ArrayList();
|
||||
List<Country> country_list = row.getCountry();
|
||||
country_list
|
||||
|
@ -239,7 +239,7 @@ public class CountryPropagationJobTest {
|
|||
|
||||
Dataset<Tuple2<String, String>> countryExplodedWithCountryProvenance = verificationDs
|
||||
.flatMap(
|
||||
row -> {
|
||||
(FlatMapFunction<Software, Tuple2<String, String>>) row -> {
|
||||
List<Tuple2<String, String>> prova = new ArrayList();
|
||||
List<Country> country_list = row.getCountry();
|
||||
country_list
|
|
@ -29,8 +29,6 @@ public class OrcidPropagationJobTest {
|
|||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final ClassLoader cl = OrcidPropagationJobTest.class.getClassLoader();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
|
@ -9,6 +9,7 @@ import org.apache.commons.io.FileUtils;
|
|||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
@ -29,8 +30,6 @@ public class ProjectPropagationJobTest {
|
|||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final ClassLoader cl = ProjectPropagationJobTest.class.getClassLoader();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
@ -72,34 +71,26 @@ public class ProjectPropagationJobTest {
|
|||
@Test
|
||||
public void NoUpdateTest() throws Exception {
|
||||
|
||||
SparkResultToProjectThroughSemRelJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
// "-sourcePath",
|
||||
// getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/relation").getPath(),
|
||||
"-hive_metastore_uris",
|
||||
"",
|
||||
"-saveGraph",
|
||||
"true",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/relation",
|
||||
"-potentialUpdatePath",
|
||||
getClass()
|
||||
final String potentialUpdateDate = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates")
|
||||
.getPath(),
|
||||
"-alreadyLinkedPath",
|
||||
getClass()
|
||||
.getPath();
|
||||
final String alreadyLinkedPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
||||
.getPath(),
|
||||
.getPath();
|
||||
SparkResultToProjectThroughSemRelJob.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-hive_metastore_uris", "",
|
||||
"-saveGraph", "true",
|
||||
"-outputPath", workingDir.toString() + "/relation",
|
||||
"-potentialUpdatePath", potentialUpdateDate,
|
||||
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.textFile(workingDir.toString() + "/relation")
|
||||
|
@ -115,34 +106,26 @@ public class ProjectPropagationJobTest {
|
|||
*/
|
||||
@Test
|
||||
public void UpdateTenTest() throws Exception {
|
||||
SparkResultToProjectThroughSemRelJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
// "-sourcePath",
|
||||
// getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/relation").getPath(),
|
||||
"-hive_metastore_uris",
|
||||
"",
|
||||
"-saveGraph",
|
||||
"true",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/relation",
|
||||
"-potentialUpdatePath",
|
||||
getClass()
|
||||
final String potentialUpdatePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates")
|
||||
.getPath(),
|
||||
"-alreadyLinkedPath",
|
||||
getClass()
|
||||
.getPath();
|
||||
final String alreadyLinkedPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
||||
.getPath(),
|
||||
.getPath();
|
||||
SparkResultToProjectThroughSemRelJob.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-hive_metastore_uris", "",
|
||||
"-saveGraph", "true",
|
||||
"-outputPath", workingDir.toString() + "/relation",
|
||||
"-potentialUpdatePath", potentialUpdatePath,
|
||||
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.textFile(workingDir.toString() + "/relation")
|
||||
|
@ -160,18 +143,18 @@ public class ProjectPropagationJobTest {
|
|||
.assertEquals(
|
||||
5,
|
||||
verificationDs
|
||||
.filter(
|
||||
r -> r.getSource().substring(0, 2).equals("50")
|
||||
&& r.getTarget().substring(0, 2).equals("40")
|
||||
.filter((FilterFunction<Relation>) r ->
|
||||
r.getSource().startsWith("50")
|
||||
&& r.getTarget().startsWith("40")
|
||||
&& r.getRelClass().equals("isProducedBy"))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
5,
|
||||
verificationDs
|
||||
.filter(
|
||||
r -> r.getSource().substring(0, 2).equals("40")
|
||||
&& r.getTarget().substring(0, 2).equals("50")
|
||||
.filter((FilterFunction<Relation>) r ->
|
||||
r.getSource().startsWith("40")
|
||||
&& r.getTarget().startsWith("50")
|
||||
&& r.getRelClass().equals("produces"))
|
||||
.count());
|
||||
|
||||
|
@ -194,34 +177,26 @@ public class ProjectPropagationJobTest {
|
|||
*/
|
||||
@Test
|
||||
public void UpdateMixTest() throws Exception {
|
||||
SparkResultToProjectThroughSemRelJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
// "-sourcePath",
|
||||
// getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/relation").getPath(),
|
||||
"-hive_metastore_uris",
|
||||
"",
|
||||
"-saveGraph",
|
||||
"true",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/relation",
|
||||
"-potentialUpdatePath",
|
||||
getClass()
|
||||
final String potentialUpdatepath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates")
|
||||
.getPath(),
|
||||
"-alreadyLinkedPath",
|
||||
getClass()
|
||||
.getPath();
|
||||
final String alreadyLinkedPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
||||
.getPath(),
|
||||
.getPath();
|
||||
SparkResultToProjectThroughSemRelJob.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-hive_metastore_uris", "",
|
||||
"-saveGraph", "true",
|
||||
"-outputPath", workingDir.toString() + "/relation",
|
||||
"-potentialUpdatePath", potentialUpdatepath,
|
||||
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.textFile(workingDir.toString() + "/relation")
|
||||
|
@ -242,18 +217,18 @@ public class ProjectPropagationJobTest {
|
|||
.assertEquals(
|
||||
4,
|
||||
verificationDs
|
||||
.filter(
|
||||
r -> r.getSource().substring(0, 2).equals("50")
|
||||
&& r.getTarget().substring(0, 2).equals("40")
|
||||
.filter((FilterFunction<Relation>) r ->
|
||||
r.getSource().startsWith("50")
|
||||
&& r.getTarget().startsWith("40")
|
||||
&& r.getRelClass().equals("isProducedBy"))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
4,
|
||||
verificationDs
|
||||
.filter(
|
||||
r -> r.getSource().substring(0, 2).equals("40")
|
||||
&& r.getTarget().substring(0, 2).equals("50")
|
||||
.filter((FilterFunction<Relation>) r ->
|
||||
r.getSource().startsWith("40")
|
||||
&& r.getTarget().startsWith("50")
|
||||
&& r.getRelClass().equals("produces"))
|
||||
.count());
|
||||
|
|
@ -32,8 +32,6 @@ public class ResultToCommunityJobTest {
|
|||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final ClassLoader cl = ResultToCommunityJobTest.class.getClassLoader();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
@ -68,33 +66,24 @@ public class ResultToCommunityJobTest {
|
|||
|
||||
@Test
|
||||
public void testSparkResultToCommunityFromOrganizationJob() throws Exception {
|
||||
SparkResultToCommunityFromOrganizationJob
|
||||
.main(
|
||||
final String preparedInfoPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo")
|
||||
.getPath();
|
||||
SparkResultToCommunityFromOrganizationJob.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample")
|
||||
.getPath(),
|
||||
"-hive_metastore_uris",
|
||||
"",
|
||||
"-saveGraph",
|
||||
"true",
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/dataset",
|
||||
"-preparedInfoPath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo")
|
||||
.getPath()
|
||||
"-hive_metastore_uris", "",
|
||||
"-saveGraph", "true",
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-preparedInfoPath", preparedInfoPath
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/dataset")
|
||||
|
@ -217,13 +206,6 @@ public class ResultToCommunityJobTest {
|
|||
.get(0)
|
||||
.getString(0));
|
||||
|
||||
/*
|
||||
* {"communityList":["euromarine","mes"],"resultId":"50|doajarticles::8d817039a63710fcf97e30f14662c6c8"}
|
||||
* "context" ["id": euromarine] updates = 1
|
||||
* {"communityList":["euromarine","mes"],"resultId":"50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"} context
|
||||
* = [ni, euromarine] updates = 1
|
||||
*/
|
||||
|
||||
query = "select id, MyT.id community "
|
||||
+ "from dataset "
|
||||
+ "lateral view explode(context) c as MyT "
|
|
@ -29,32 +29,21 @@ import eu.dnetlib.dhp.schema.oaf.Dataset;
|
|||
|
||||
public class ResultToCommunityJobTest {
|
||||
|
||||
private static final Logger log = LoggerFactory
|
||||
.getLogger(
|
||||
eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class);
|
||||
private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class);
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final ClassLoader cl = eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class
|
||||
.getClassLoader();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(
|
||||
eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class
|
||||
.getSimpleName());
|
||||
workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf
|
||||
.setAppName(
|
||||
eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class
|
||||
.getSimpleName());
|
||||
conf.setAppName(ResultToCommunityJobTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
|
@ -65,7 +54,7 @@ public class ResultToCommunityJobTest {
|
|||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(OrcidPropagationJobTest.class.getSimpleName())
|
||||
.appName(ResultToCommunityJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
@ -83,22 +72,18 @@ public class ResultToCommunityJobTest {
|
|||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample")
|
||||
"-sourcePath", getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample")
|
||||
.getPath(),
|
||||
"-hive_metastore_uris", "",
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-preparedInfoPath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo")
|
||||
"-preparedInfoPath", getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo")
|
||||
.getPath()
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/dataset")
|
|
@ -23,23 +23,19 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class Result2OrganizationJobTest {
|
||||
public class ResultToOrganizationJobTest {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(Result2OrganizationJobTest.class);
|
||||
private static final Logger log = LoggerFactory.getLogger(ResultToOrganizationJobTest.class);
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final ClassLoader cl = Result2OrganizationJobTest.class.getClassLoader();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(
|
||||
SparkResultToOrganizationFromIstRepoJob.class.getSimpleName());
|
||||
workingDir = Files.createTempDirectory(SparkResultToOrganizationFromIstRepoJob.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
@ -72,40 +68,29 @@ public class Result2OrganizationJobTest {
|
|||
*/
|
||||
@Test
|
||||
public void NoUpdateTest() throws Exception {
|
||||
SparkResultToOrganizationFromIstRepoJob
|
||||
.main(
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
||||
.getPath();
|
||||
final String datasourceOrganizationPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization")
|
||||
.getPath();
|
||||
final String alreadyLinkedPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked")
|
||||
.getPath();
|
||||
SparkResultToOrganizationFromIstRepoJob.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
||||
.getPath(),
|
||||
"-hive_metastore_uris",
|
||||
"",
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Software",
|
||||
|
||||
"-saveGraph",
|
||||
"true",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/relation",
|
||||
"-datasourceOrganizationPath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization")
|
||||
.getPath(),
|
||||
"-alreadyLinkedPath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked")
|
||||
.getPath(),
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-hive_metastore_uris", "",
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||
"-saveGraph", "true",
|
||||
"-outputPath", workingDir.toString() + "/relation",
|
||||
"-datasourceOrganizationPath", datasourceOrganizationPath,
|
||||
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.textFile(workingDir.toString() + "/relation")
|
||||
|
@ -123,40 +108,29 @@ public class Result2OrganizationJobTest {
|
|||
*/
|
||||
@Test
|
||||
public void UpdateNoMixTest() throws Exception {
|
||||
SparkResultToOrganizationFromIstRepoJob
|
||||
.main(
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
||||
.getPath();
|
||||
final String datasourceOrganizationPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization")
|
||||
.getPath();
|
||||
final String alreadyLinkedPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked")
|
||||
.getPath();
|
||||
SparkResultToOrganizationFromIstRepoJob.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
||||
.getPath(),
|
||||
"-hive_metastore_uris",
|
||||
"",
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Software",
|
||||
|
||||
"-saveGraph",
|
||||
"true",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/relation",
|
||||
"-datasourceOrganizationPath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization")
|
||||
.getPath(),
|
||||
"-alreadyLinkedPath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked")
|
||||
.getPath(),
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-hive_metastore_uris", "",
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||
"-saveGraph", "true",
|
||||
"-outputPath", workingDir.toString() + "/relation",
|
||||
"-datasourceOrganizationPath", datasourceOrganizationPath,
|
||||
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.textFile(workingDir.toString() + "/relation")
|
||||
|
@ -197,40 +171,29 @@ public class Result2OrganizationJobTest {
|
|||
|
||||
@Test
|
||||
public void UpdateMixTest() throws Exception {
|
||||
SparkResultToOrganizationFromIstRepoJob
|
||||
.main(
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix")
|
||||
.getPath();
|
||||
final String datasourceOrganizationPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization")
|
||||
.getPath();
|
||||
final String alreadyLinkedPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked")
|
||||
.getPath();
|
||||
SparkResultToOrganizationFromIstRepoJob.main(
|
||||
new String[] {
|
||||
"-isTest",
|
||||
Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix")
|
||||
.getPath(),
|
||||
"-hive_metastore_uris",
|
||||
"",
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Software",
|
||||
|
||||
"-saveGraph",
|
||||
"true",
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/relation",
|
||||
"-datasourceOrganizationPath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization")
|
||||
.getPath(),
|
||||
"-alreadyLinkedPath",
|
||||
getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked")
|
||||
.getPath(),
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-hive_metastore_uris", "",
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||
"-saveGraph", "true",
|
||||
"-outputPath", workingDir.toString() + "/relation",
|
||||
"-datasourceOrganizationPath", datasourceOrganizationPath,
|
||||
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.textFile(workingDir.toString() + "/relation")
|
|
@ -2,17 +2,17 @@
|
|||
<community id="fet-fp7">
|
||||
<oacommunity/>
|
||||
<subjects/>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="fet-h2020">
|
||||
<subjects/>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="oa-pg">
|
||||
<subjects/>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="ee">
|
||||
|
@ -35,7 +35,7 @@
|
|||
<subject>SDG9 - Industry innovation and infrastructure</subject>
|
||||
<subject>SDG16 - Peace justice and strong institutions</subject>
|
||||
</subjects>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities>
|
||||
<zenodocommunity>
|
||||
<zenodoid>123</zenodoid>
|
||||
|
@ -45,12 +45,12 @@
|
|||
</community>
|
||||
<community id="dh-ch">
|
||||
<subjects/>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="fam">
|
||||
<subjects/>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="ni">
|
||||
|
@ -74,7 +74,7 @@
|
|||
<subject>brain magnetic resonance imaging</subject>
|
||||
<subject>brain abnormalities</subject>
|
||||
</subjects>
|
||||
<datasources>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
|
||||
<selcriteria/>
|
||||
|
@ -95,7 +95,7 @@
|
|||
<openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
</providers>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="mes">
|
||||
|
@ -106,12 +106,12 @@
|
|||
<subject>aqua</subject>
|
||||
<subject>sea</subject>
|
||||
</subjects>
|
||||
<datasources>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
</providers>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="aginfra">
|
||||
|
@ -134,7 +134,7 @@
|
|||
<subject>food distribution</subject>
|
||||
<subject>forestry</subject>
|
||||
</subjects>
|
||||
<datasources>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
|
||||
<selcriteria/>
|
||||
|
@ -159,18 +159,18 @@
|
|||
<openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
</providers>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="clarin">
|
||||
<oacommunity>oac_clarin</oacommunity>
|
||||
<subjects/>
|
||||
<datasources>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
</providers>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
</communities>
|
|
@ -2,17 +2,17 @@
|
|||
<community id="fet-fp7">
|
||||
<oacommunity/>
|
||||
<subjects/>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="fet-h2020">
|
||||
<subjects/>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="oa-pg">
|
||||
<subjects/>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="ee">
|
||||
|
@ -35,7 +35,7 @@
|
|||
<subject>SDG9 - Industry innovation and infrastructure</subject>
|
||||
<subject>SDG16 - Peace justice and strong institutions</subject>
|
||||
</subjects>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities>
|
||||
<zenodocommunity>
|
||||
<zenodoid>123</zenodoid>
|
||||
|
@ -45,12 +45,12 @@
|
|||
</community>
|
||||
<community id="dh-ch">
|
||||
<subjects/>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="fam">
|
||||
<subjects/>
|
||||
<datasources/>
|
||||
<providers/>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="ni">
|
||||
|
@ -74,7 +74,7 @@
|
|||
<subject>brain magnetic resonance imaging</subject>
|
||||
<subject>brain abnormalities</subject>
|
||||
</subjects>
|
||||
<datasources>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
|
||||
<selcriteria/>
|
||||
|
@ -95,7 +95,7 @@
|
|||
<openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
</providers>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="mes">
|
||||
|
@ -106,12 +106,12 @@
|
|||
<subject>aqua</subject>
|
||||
<subject>sea</subject>
|
||||
</subjects>
|
||||
<datasources>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
</providers>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="aginfra">
|
||||
|
@ -134,7 +134,7 @@
|
|||
<subject>food distribution</subject>
|
||||
<subject>forestry</subject>
|
||||
</subjects>
|
||||
<datasources>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
|
||||
<selcriteria/>
|
||||
|
@ -159,30 +159,30 @@
|
|||
<openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
</providers>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="clarin">
|
||||
<oacommunity>oac_clarin</oacommunity>
|
||||
<subjects/>
|
||||
<datasources>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
</providers>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="dariah">
|
||||
<oacommunity>oaa_dariah</oacommunity>
|
||||
<subjects/>
|
||||
<datasources>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>openaire____::1cfdb2e14977f31a98e0118283401f32</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
</datasources>
|
||||
</providers>
|
||||
<zenodocommunities>
|
||||
<zenodocommunity>
|
||||
<zenodoid>dimpo</zenodoid>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue