forked from antonis.lempesis/dnet-hadoop
bulktagging wfs moved into common dhp-enrichment module
This commit is contained in:
parent
c403971c2f
commit
6d0b11252e
|
@ -1,31 +1,38 @@
|
||||||
<workflow-app name="blacklisting" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="blacklisting" xmlns="uri:oozie:workflow:0.5">
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>postgresURL</name>
|
<name>postgresURL</name>
|
||||||
<description>the url of the postgress server to query</description>
|
<description>the url of the postgress server to query</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>postgresUser</name>
|
<name>postgresUser</name>
|
||||||
<description>the username to access the postgres db</description>
|
<description>the username to access the postgres db</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>postgresPassword</name>
|
<name>postgresPassword</name>
|
||||||
<description>the postgres password</description>
|
<description>the postgres password</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sourcePath</name>
|
<name>sourcePath</name>
|
||||||
<description>the source path</description>
|
<description>the source path</description>
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
<property>
|
||||||
<start to="reset-outputpath"/>
|
<name>outputPath</name>
|
||||||
|
<description>the graph output path</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
<kill name="Kill">
|
<start to="reset_outputpath"/>
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
|
||||||
</kill>
|
|
||||||
|
|
||||||
<action name="reset-outputpath">
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<action name="reset_outputpath">
|
||||||
<fs>
|
<fs>
|
||||||
<delete path='${workingDir}/blacklist'/>
|
<delete path="${workingDir}/blacklist"/>
|
||||||
|
<delete path="${outputPath}"/>
|
||||||
|
<mkdir path="${outputPath}"/>
|
||||||
</fs>
|
</fs>
|
||||||
<ok to="read_blacklist"/>
|
<ok to="read_blacklist"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -87,12 +94,14 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/relation</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||||
<arg>--hdfsPath</arg><arg>${workingDir}/blacklist</arg>
|
<arg>--hdfsPath</arg><arg>${workingDir}/blacklist</arg>
|
||||||
<arg>--mergesPath</arg><arg>${workingDir}/mergesRelation</arg>
|
<arg>--mergesPath</arg><arg>${workingDir}/mergesRelation</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
<end name="End"/>
|
|
||||||
|
<end name="End"/>
|
||||||
|
|
||||||
</workflow-app>
|
</workflow-app>
|
|
@ -1,7 +0,0 @@
|
||||||
#sandboxName when not provided explicitly will be generated
|
|
||||||
sandboxName=${sandboxName}
|
|
||||||
sandboxDir=/user/${dhp.hadoop.frontend.user.name}/${sandboxName}
|
|
||||||
workingDir=${sandboxDir}/working_dir
|
|
||||||
oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir}
|
|
||||||
oozieTopWfApplicationPath = ${oozie.wf.application.path}
|
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<artifactId>dhp-bulktag</artifactId>
|
<artifactId>dhp-enrichment</artifactId>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -31,6 +31,12 @@
|
||||||
<artifactId>dhp-schemas</artifactId>
|
<artifactId>dhp-schemas</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-hive_2.11</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>dom4j</groupId>
|
<groupId>dom4j</groupId>
|
||||||
<artifactId>dom4j</artifactId>
|
<artifactId>dom4j</artifactId>
|
||||||
|
@ -43,23 +49,16 @@
|
||||||
<groupId>com.jayway.jsonpath</groupId>
|
<groupId>com.jayway.jsonpath</groupId>
|
||||||
<artifactId>json-path</artifactId>
|
<artifactId>json-path</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.reflections</groupId>
|
|
||||||
<artifactId>reflections</artifactId>
|
|
||||||
<version>0.9.11</version>
|
|
||||||
<scope>compile</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.google.guava</groupId>
|
|
||||||
<artifactId>guava</artifactId>
|
|
||||||
<version>23.3-jre</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>io.github.classgraph</groupId>
|
<groupId>io.github.classgraph</groupId>
|
||||||
<artifactId>classgraph</artifactId>
|
<artifactId>classgraph</artifactId>
|
||||||
<version>4.8.71</version>
|
<version>4.8.71</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</project>
|
</project>
|
|
@ -1,10 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag;
|
package eu.dnetlib.dhp.bulktag;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.gson.Gson;
|
||||||
import java.util.Optional;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
@ -15,12 +16,9 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import java.util.Optional;
|
||||||
import com.google.gson.Gson;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import eu.dnetlib.dhp.community.*;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
|
|
||||||
public class SparkBulkTagJob {
|
public class SparkBulkTagJob {
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
|
|
||||||
/** Created by miriam on 01/08/2018. */
|
/** Created by miriam on 01/08/2018. */
|
||||||
public class Community implements Serializable {
|
public class Community implements Serializable {
|
||||||
|
|
||||||
|
@ -17,7 +16,7 @@ public class Community implements Serializable {
|
||||||
|
|
||||||
private String id;
|
private String id;
|
||||||
private List<String> subjects = new ArrayList<>();
|
private List<String> subjects = new ArrayList<>();
|
||||||
private List<Datasource> datasources = new ArrayList<>();
|
private List<Provider> providers = new ArrayList<>();
|
||||||
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
|
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
|
||||||
|
|
||||||
public String toJson() {
|
public String toJson() {
|
||||||
|
@ -27,7 +26,7 @@ public class Community implements Serializable {
|
||||||
|
|
||||||
public boolean isValid() {
|
public boolean isValid() {
|
||||||
return !getSubjects().isEmpty()
|
return !getSubjects().isEmpty()
|
||||||
|| !getDatasources().isEmpty()
|
|| !getProviders().isEmpty()
|
||||||
|| !getZenodoCommunities().isEmpty();
|
|| !getZenodoCommunities().isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,12 +46,12 @@ public class Community implements Serializable {
|
||||||
this.subjects = subjects;
|
this.subjects = subjects;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Datasource> getDatasources() {
|
public List<Provider> getProviders() {
|
||||||
return datasources;
|
return providers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setDatasources(List<Datasource> datasources) {
|
public void setProviders(List<Provider> providers) {
|
||||||
this.datasources = datasources;
|
this.providers = providers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<ZenodoCommunity> getZenodoCommunities() {
|
public List<ZenodoCommunity> getZenodoCommunities() {
|
|
@ -1,5 +1,14 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.GsonBuilder;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -8,17 +17,6 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import com.google.gson.GsonBuilder;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.InterfaceAdapter;
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.Selection;
|
|
||||||
|
|
||||||
/** Created by miriam on 02/08/2018. */
|
/** Created by miriam on 02/08/2018. */
|
||||||
public class CommunityConfiguration implements Serializable {
|
public class CommunityConfiguration implements Serializable {
|
||||||
|
|
||||||
|
@ -84,7 +82,7 @@ public class CommunityConfiguration implements Serializable {
|
||||||
add(sbj.toLowerCase().trim(), p, subjectMap);
|
add(sbj.toLowerCase().trim(), p, subjectMap);
|
||||||
}
|
}
|
||||||
// get datasources
|
// get datasources
|
||||||
for (Datasource d : c.getDatasources()) {
|
for (Provider d : c.getProviders()) {
|
||||||
|
|
||||||
add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap);
|
add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap);
|
||||||
}
|
}
|
|
@ -1,11 +1,14 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.GsonBuilder;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -14,15 +17,10 @@ import org.dom4j.DocumentException;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import java.io.StringReader;
|
||||||
import com.google.common.collect.Maps;
|
import java.util.ArrayList;
|
||||||
import com.google.gson.Gson;
|
import java.util.List;
|
||||||
import com.google.gson.GsonBuilder;
|
import java.util.Map;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.InterfaceAdapter;
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.Selection;
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolverFactory;
|
|
||||||
|
|
||||||
/** Created by miriam on 03/08/2018. */
|
/** Created by miriam on 03/08/2018. */
|
||||||
public class CommunityConfigurationFactory {
|
public class CommunityConfigurationFactory {
|
||||||
|
@ -77,7 +75,7 @@ public class CommunityConfigurationFactory {
|
||||||
log.info(String.format("community id: %s", c.getId()));
|
log.info(String.format("community id: %s", c.getId()));
|
||||||
|
|
||||||
c.setSubjects(parseSubjects(node));
|
c.setSubjects(parseSubjects(node));
|
||||||
c.setDatasources(parseDatasources(node));
|
c.setProviders(parseDatasources(node));
|
||||||
c.setZenodoCommunities(parseZenodoCommunities(node));
|
c.setZenodoCommunities(parseZenodoCommunities(node));
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
@ -96,17 +94,17 @@ public class CommunityConfigurationFactory {
|
||||||
return subjects;
|
return subjects;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Datasource> parseDatasources(final Node node) {
|
private static List<Provider> parseDatasources(final Node node) {
|
||||||
final List<Node> list = node.selectNodes("./datasources/datasource");
|
final List<Node> list = node.selectNodes("./datasources/datasource");
|
||||||
final List<Datasource> datasourceList = new ArrayList<>();
|
final List<Provider> providerList = new ArrayList<>();
|
||||||
for (Node n : list) {
|
for (Node n : list) {
|
||||||
Datasource d = new Datasource();
|
Provider d = new Provider();
|
||||||
d.setOpenaireId(n.selectSingleNode("./openaireId").getText());
|
d.setOpenaireId(n.selectSingleNode("./openaireId").getText());
|
||||||
d.setSelCriteria(n.selectSingleNode("./selcriteria"), resolver);
|
d.setSelCriteria(n.selectSingleNode("./selcriteria"), resolver);
|
||||||
datasourceList.add(d);
|
providerList.add(d);
|
||||||
}
|
}
|
||||||
log.info("size of the datasource list " + datasourceList.size());
|
log.info("size of the datasource list " + providerList.size());
|
||||||
return datasourceList;
|
return providerList;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
|
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
|
|
@ -1,12 +1,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.Selection;
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
|
||||||
|
|
||||||
public class Constraint implements Serializable {
|
public class Constraint implements Serializable {
|
||||||
private String verb;
|
private String verb;
|
||||||
private String field;
|
private String field;
|
|
@ -1,5 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.reflect.TypeToken;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
@ -8,14 +14,6 @@ import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import com.google.gson.reflect.TypeToken;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
|
||||||
|
|
||||||
/** Created by miriam on 02/08/2018. */
|
/** Created by miriam on 02/08/2018. */
|
||||||
public class Constraints implements Serializable {
|
public class Constraints implements Serializable {
|
||||||
private static final Log log = LogFactory.getLog(Constraints.class);
|
private static final Log log = LogFactory.getLog(Constraints.class);
|
|
@ -1,10 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
/** Created by miriam on 03/08/2018. */
|
/** Created by miriam on 03/08/2018. */
|
||||||
public class Pair<A, B> implements Serializable {
|
public class Pair<A, B> implements Serializable {
|
||||||
private A fst;
|
private A fst;
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
|
@ -1,19 +1,17 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import java.io.Serializable;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
|
||||||
|
|
||||||
/** Created by miriam on 01/08/2018. */
|
/** Created by miriam on 01/08/2018. */
|
||||||
public class Datasource implements Serializable {
|
public class Provider implements Serializable {
|
||||||
private static final Log log = LogFactory.getLog(Datasource.class);
|
private static final Log log = LogFactory.getLog(Provider.class);
|
||||||
|
|
||||||
private String openaireId;
|
private String openaireId;
|
||||||
|
|
|
@ -1,15 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class QueryInformationSystem {
|
public class QueryInformationSystem {
|
||||||
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
|
@ -1,20 +1,19 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.community.TagginConstants.*;
|
import com.google.gson.Gson;
|
||||||
|
import com.jayway.jsonpath.DocumentContext;
|
||||||
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import com.google.gson.Gson;
|
|
||||||
import com.jayway.jsonpath.DocumentContext;
|
|
||||||
import com.jayway.jsonpath.JsonPath;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
|
|
||||||
/** Created by miriam on 02/08/2018. */
|
/** Created by miriam on 02/08/2018. */
|
||||||
public class ResultTagger implements Serializable {
|
public class ResultTagger implements Serializable {
|
||||||
|
@ -51,7 +50,7 @@ public class ResultTagger implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public <R extends Result> R enrichContextCriteria(
|
public <R extends Result> R enrichContextCriteria(
|
||||||
final R result, final CommunityConfiguration conf, final Map<String, String> criteria) {
|
final R result, final CommunityConfiguration conf, final Map<String, String> criteria) {
|
||||||
|
|
||||||
// }
|
// }
|
||||||
// public Result enrichContextCriteria(final Result result, final CommunityConfiguration
|
// public Result enrichContextCriteria(final Result result, final CommunityConfiguration
|
||||||
|
@ -239,8 +238,8 @@ public class ResultTagger implements Serializable {
|
||||||
Qualifier pa = new Qualifier();
|
Qualifier pa = new Qualifier();
|
||||||
pa.setClassid(inference_class_id);
|
pa.setClassid(inference_class_id);
|
||||||
pa.setClassname(inference_class_name);
|
pa.setClassname(inference_class_name);
|
||||||
pa.setSchemeid(DNET_SCHEMA_ID);
|
pa.setSchemeid(DNET_PROVENANCE_ACTIONS);
|
||||||
pa.setSchemename(DNET_SCHEMA_NAME);
|
pa.setSchemename(DNET_PROVENANCE_ACTIONS);
|
||||||
return pa;
|
return pa;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,5 +1,9 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.reflect.TypeToken;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.Type;
|
import java.lang.reflect.Type;
|
||||||
|
@ -7,11 +11,6 @@ import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import com.google.gson.reflect.TypeToken;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
|
||||||
|
|
||||||
public class SelectionConstraints implements Serializable {
|
public class SelectionConstraints implements Serializable {
|
||||||
private List<Constraints> criteria;
|
private List<Constraints> criteria;
|
||||||
|
|
|
@ -1,20 +1,14 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
public class TagginConstants {
|
public class TaggingConstants {
|
||||||
|
|
||||||
public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging";
|
public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging";
|
||||||
|
|
||||||
public static final String DNET_SCHEMA_NAME = "dnet:provenanceActions";
|
|
||||||
public static final String DNET_SCHEMA_ID = "dnet:provenanceActions";
|
|
||||||
|
|
||||||
public static final String CLASS_ID_SUBJECT = "community:subject";
|
public static final String CLASS_ID_SUBJECT = "community:subject";
|
||||||
public static final String CLASS_ID_DATASOURCE = "community:datasource";
|
public static final String CLASS_ID_DATASOURCE = "community:datasource";
|
||||||
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
|
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
|
||||||
|
|
||||||
public static final String SCHEMA_ID = "dnet:provenanceActions";
|
|
||||||
public static final String COUNTER_GROUP = "Bulk Tagging";
|
|
||||||
|
|
||||||
public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
|
public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
|
||||||
|
|
||||||
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
|
@ -1,11 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
import org.dom4j.Node;
|
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
/** Created by miriam on 01/08/2018. */
|
/** Created by miriam on 01/08/2018. */
|
||||||
public class ZenodoCommunity implements Serializable {
|
public class ZenodoCommunity implements Serializable {
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.lang.reflect.Type;
|
|
||||||
|
|
||||||
import com.google.gson.*;
|
import com.google.gson.*;
|
||||||
|
|
||||||
|
import java.lang.reflect.Type;
|
||||||
|
|
||||||
public class InterfaceAdapter implements JsonSerializer, JsonDeserializer {
|
public class InterfaceAdapter implements JsonSerializer, JsonDeserializer {
|
||||||
|
|
||||||
private static final String CLASSNAME = "CLASSNAME";
|
private static final String CLASSNAME = "CLASSNAME";
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
public interface Selection {
|
public interface Selection {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.lang.annotation.ElementType;
|
import java.lang.annotation.ElementType;
|
||||||
import java.lang.annotation.Retention;
|
import java.lang.annotation.Retention;
|
|
@ -1,16 +1,16 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import io.github.classgraph.ClassGraph;
|
import io.github.classgraph.ClassGraph;
|
||||||
import io.github.classgraph.ClassInfo;
|
import io.github.classgraph.ClassInfo;
|
||||||
import io.github.classgraph.ClassInfoList;
|
import io.github.classgraph.ClassInfoList;
|
||||||
import io.github.classgraph.ScanResult;
|
import io.github.classgraph.ScanResult;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public class VerbResolver implements Serializable {
|
public class VerbResolver implements Serializable {
|
||||||
private Map<String, Class<Selection>> map = null; // = new HashMap<>();
|
private Map<String, Class<Selection>> map = null; // = new HashMap<>();
|
||||||
private final ClassGraph classgraph = new ClassGraph();
|
private final ClassGraph classgraph = new ClassGraph();
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.selectioncriteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
public class VerbResolverFactory {
|
public class VerbResolverFactory {
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp;
|
package eu.dnetlib.dhp.bulktag;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.community.TagginConstants.ZENODO_COMMUNITY_INDICATOR;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
@ -18,37 +18,44 @@ import org.junit.jupiter.api.AfterAll;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.mortbay.util.IO;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.bulktag.SparkBulkTagJob;
|
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
|
||||||
|
|
||||||
public class BulkTagJobTest {
|
public class BulkTagJobTest {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final ClassLoader cl = eu.dnetlib.dhp.BulkTagJobTest.class.getClassLoader();
|
public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
|
||||||
|
|
||||||
|
public static final String pathMap =
|
||||||
|
"{ \"author\" : \"$['author'][*]['fullname']\","
|
||||||
|
+ " \"title\" : \"$['title'][*]['value']\","
|
||||||
|
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||||
|
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||||
|
+ " \"description\" : \"$['description'][*]['value']\"}";
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.BulkTagJobTest.class);
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(BulkTagJobTest.class);
|
||||||
|
|
||||||
private static String taggingConf = "";
|
private static String taggingConf = "";
|
||||||
|
|
||||||
static {
|
static {
|
||||||
try {
|
try {
|
||||||
taggingConf = IO
|
taggingConf = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
BulkTagJobTest.class
|
BulkTagJobTest.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml"));
|
"/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml"));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
@ -56,11 +63,11 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() throws IOException {
|
public static void beforeAll() throws IOException {
|
||||||
workingDir = Files.createTempDirectory(eu.dnetlib.dhp.BulkTagJobTest.class.getSimpleName());
|
workingDir = Files.createTempDirectory(BulkTagJobTest.class.getSimpleName());
|
||||||
log.info("using work dir {}", workingDir);
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(eu.dnetlib.dhp.BulkTagJobTest.class.getSimpleName());
|
conf.setAppName(BulkTagJobTest.class.getSimpleName());
|
||||||
|
|
||||||
conf.setMaster("local[*]");
|
conf.setMaster("local[*]");
|
||||||
conf.set("spark.driver.host", "localhost");
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
@ -84,34 +91,21 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void noUpdatesTest() throws Exception {
|
public void noUpdatesTest() throws Exception {
|
||||||
|
final String pathMap = BulkTagJobTest.pathMap;
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
|
||||||
Boolean.FALSE.toString(),
|
"-taggingConf", taggingConf,
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/sample/dataset/no_updates").getPath(),
|
"-outputPath", workingDir.toString() + "/dataset",
|
||||||
"-taggingConf",
|
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||||
taggingConf,
|
"-pathMap", pathMap
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/dataset",
|
|
||||||
"-isLookUpUrl",
|
|
||||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
|
||||||
"-pathMap",
|
|
||||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
|
||||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
|
||||||
// "-preparedInfoPath",
|
|
||||||
// getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo").getPath()
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Dataset> tmp = sc
|
JavaRDD<Dataset> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/dataset")
|
.textFile(workingDir.toString() + "/dataset")
|
||||||
|
@ -134,34 +128,24 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void bulktagBySubjectNoPreviousContextTest() throws Exception {
|
public void bulktagBySubjectNoPreviousContextTest() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext")
|
||||||
|
.getPath();
|
||||||
|
final String pathMap = BulkTagJobTest.pathMap;
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", sourcePath,
|
||||||
Boolean.FALSE.toString(),
|
"-taggingConf", taggingConf,
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||||
getClass()
|
"-outputPath", workingDir.toString() + "/dataset",
|
||||||
.getResource("/eu/dnetlib/dhp/sample/dataset/update_subject/nocontext")
|
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||||
.getPath(),
|
"-pathMap", pathMap
|
||||||
"-taggingConf",
|
|
||||||
taggingConf,
|
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/dataset",
|
|
||||||
"-isLookUpUrl",
|
|
||||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
|
||||||
"-pathMap",
|
|
||||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
|
||||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Dataset> tmp = sc
|
JavaRDD<Dataset> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/dataset")
|
.textFile(workingDir.toString() + "/dataset")
|
||||||
|
@ -240,32 +224,22 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
|
public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance")
|
||||||
|
.getPath();
|
||||||
|
final String pathMap = BulkTagJobTest.pathMap;
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", sourcePath,
|
||||||
Boolean.FALSE.toString(),
|
"-taggingConf", taggingConf,
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||||
getClass()
|
"-outputPath", workingDir.toString() + "/dataset",
|
||||||
.getResource(
|
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||||
"/eu/dnetlib/dhp/sample/dataset/update_subject/contextnoprovenance")
|
"-pathMap", pathMap
|
||||||
.getPath(),
|
|
||||||
"-taggingConf",
|
|
||||||
taggingConf,
|
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/dataset",
|
|
||||||
"-isLookUpUrl",
|
|
||||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
|
||||||
"-pathMap",
|
|
||||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
|
||||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||||
|
@ -332,34 +306,23 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void bulktagByDatasourceTest() throws Exception {
|
public void bulktagByDatasourceTest() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource")
|
||||||
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", sourcePath,
|
||||||
Boolean.FALSE.toString(),
|
"-taggingConf", taggingConf,
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
||||||
getClass()
|
"-outputPath", workingDir.toString() + "/publication",
|
||||||
.getResource("/eu/dnetlib/dhp/sample/publication/update_datasource")
|
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||||
.getPath(),
|
"-pathMap", pathMap
|
||||||
"-taggingConf",
|
|
||||||
taggingConf,
|
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.Publication",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/publication",
|
|
||||||
"-isLookUpUrl",
|
|
||||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
|
||||||
"-pathMap",
|
|
||||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
|
||||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Publication> tmp = sc
|
JavaRDD<Publication> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/publication")
|
.textFile(workingDir.toString() + "/publication")
|
||||||
|
@ -415,35 +378,24 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void bulktagByZenodoCommunityTest() throws Exception {
|
public void bulktagByZenodoCommunityTest() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity")
|
||||||
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", sourcePath,
|
||||||
Boolean.FALSE.toString(),
|
"-taggingConf", taggingConf,
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
||||||
getClass()
|
"-outputPath", workingDir.toString() + "/orp",
|
||||||
.getResource(
|
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||||
"/eu/dnetlib/dhp/sample/otherresearchproduct/update_zenodocommunity")
|
"-pathMap", pathMap
|
||||||
.getPath(),
|
|
||||||
"-taggingConf",
|
|
||||||
taggingConf,
|
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/orp",
|
|
||||||
"-isLookUpUrl",
|
|
||||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
|
||||||
"-pathMap",
|
|
||||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
|
||||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<OtherResearchProduct> tmp = sc
|
JavaRDD<OtherResearchProduct> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/orp")
|
.textFile(workingDir.toString() + "/orp")
|
||||||
|
@ -548,34 +500,23 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void bulktagBySubjectDatasourceTest() throws Exception {
|
public void bulktagBySubjectDatasourceTest() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource")
|
||||||
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", sourcePath,
|
||||||
Boolean.FALSE.toString(),
|
"-taggingConf", taggingConf,
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||||
getClass()
|
"-outputPath", workingDir.toString() + "/dataset",
|
||||||
.getResource("/eu/dnetlib/dhp/sample/dataset/update_subject_datasource")
|
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||||
.getPath(),
|
"-pathMap", pathMap
|
||||||
"-taggingConf",
|
|
||||||
taggingConf,
|
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/dataset",
|
|
||||||
"-isLookUpUrl",
|
|
||||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
|
||||||
"-pathMap",
|
|
||||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
|
||||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Dataset> tmp = sc
|
JavaRDD<Dataset> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/dataset")
|
.textFile(workingDir.toString() + "/dataset")
|
||||||
|
@ -691,29 +632,17 @@ public class BulkTagJobTest {
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(),
|
||||||
Boolean.FALSE.toString(),
|
"-taggingConf", taggingConf,
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/sample/software/").getPath(),
|
"-outputPath", workingDir.toString() + "/software",
|
||||||
"-taggingConf",
|
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||||
taggingConf,
|
"-pathMap", pathMap
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/software",
|
|
||||||
"-isLookUpUrl",
|
|
||||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
|
||||||
"-pathMap",
|
|
||||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
|
||||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Software> tmp = sc
|
JavaRDD<Software> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/software")
|
.textFile(workingDir.toString() + "/software")
|
||||||
|
@ -796,35 +725,24 @@ public class BulkTagJobTest {
|
||||||
@Test
|
@Test
|
||||||
public void bulktagDatasourcewithConstraintsTest() throws Exception {
|
public void bulktagDatasourcewithConstraintsTest() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
|
||||||
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", sourcePath,
|
||||||
Boolean.FALSE.toString(),
|
"-taggingConf", taggingConf,
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||||
getClass()
|
"-outputPath", workingDir.toString() + "/dataset",
|
||||||
.getResource(
|
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||||
"/eu/dnetlib/dhp/sample/dataset/update_datasourcewithconstraints")
|
"-pathMap", pathMap
|
||||||
.getPath(),
|
|
||||||
"-taggingConf",
|
|
||||||
taggingConf,
|
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/dataset",
|
|
||||||
"-isLookUpUrl",
|
|
||||||
"http://beta.services.openaire.eu:8280/is/services/isLookUp",
|
|
||||||
"-pathMap",
|
|
||||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
|
||||||
+ " \"description\" : \"$['description'][*]['value']\"}"
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Dataset> tmp = sc
|
JavaRDD<Dataset> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/dataset")
|
.textFile(workingDir.toString() + "/dataset")
|
|
@ -1,23 +1,21 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp;
|
package eu.dnetlib.dhp.bulktag;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.Constraint;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import eu.dnetlib.dhp.community.CommunityConfiguration;
|
import java.util.*;
|
||||||
import eu.dnetlib.dhp.community.CommunityConfigurationFactory;
|
|
||||||
import eu.dnetlib.dhp.community.Constraint;
|
|
||||||
import eu.dnetlib.dhp.community.SelectionConstraints;
|
|
||||||
import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
|
|
||||||
|
|
||||||
/** Created by miriam on 03/08/2018. */
|
/** Created by miriam on 03/08/2018. */
|
||||||
public class CommunityConfigurationFactoryTest {
|
public class CommunityConfigurationFactoryTest {
|
|
@ -5,12 +5,15 @@ import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.*;
|
import org.apache.spark.sql.*;
|
||||||
import org.junit.jupiter.api.AfterAll;
|
import org.junit.jupiter.api.AfterAll;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
@ -26,12 +29,11 @@ import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class CountryPropagationJobTest {
|
public class CountryPropagationJobTest {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(CountryPropagationJobTest.class);
|
private static final Logger log = LoggerFactory.getLogger(CountryPropagationJobTest.class);
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final ClassLoader cl = CountryPropagationJobTest.class.getClassLoader();
|
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
@ -101,8 +103,8 @@ public class CountryPropagationJobTest {
|
||||||
Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count());
|
Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count());
|
||||||
|
|
||||||
Dataset<String> countryExploded = verificationDs
|
Dataset<String> countryExploded = verificationDs
|
||||||
.flatMap(row -> row.getCountry().iterator(), Encoders.bean(Country.class))
|
.flatMap((FlatMapFunction<Software, Country>) row -> row.getCountry().iterator(), Encoders.bean(Country.class))
|
||||||
.map(c -> c.getClassid(), Encoders.STRING());
|
.map((MapFunction<Country, String>) c -> c.getClassid(), Encoders.STRING());
|
||||||
|
|
||||||
Assertions.assertEquals(9, countryExploded.count());
|
Assertions.assertEquals(9, countryExploded.count());
|
||||||
|
|
||||||
|
@ -115,20 +117,18 @@ public class CountryPropagationJobTest {
|
||||||
Assertions.assertEquals(2, countryExploded.filter("value = 'JP'").count());
|
Assertions.assertEquals(2, countryExploded.filter("value = 'JP'").count());
|
||||||
|
|
||||||
Dataset<Tuple2<String, String>> countryExplodedWithCountryclassid = verificationDs
|
Dataset<Tuple2<String, String>> countryExplodedWithCountryclassid = verificationDs
|
||||||
.flatMap(
|
.flatMap((FlatMapFunction<Software, Tuple2<String, String>>) row -> {
|
||||||
row -> {
|
List<Tuple2<String, String>> prova = new ArrayList();
|
||||||
List<Tuple2<String, String>> prova = new ArrayList();
|
List<Country> country_list = row.getCountry();
|
||||||
List<Country> country_list = row.getCountry();
|
country_list
|
||||||
country_list
|
.stream()
|
||||||
.stream()
|
.forEach(
|
||||||
.forEach(
|
|
||||||
c -> prova
|
c -> prova
|
||||||
.add(
|
.add(
|
||||||
new Tuple2<>(
|
new Tuple2<>(
|
||||||
row.getId(), c.getClassid())));
|
row.getId(), c.getClassid())));
|
||||||
return prova.iterator();
|
return prova.iterator();
|
||||||
},
|
}, Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
||||||
Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
|
||||||
|
|
||||||
Assertions.assertEquals(9, countryExplodedWithCountryclassid.count());
|
Assertions.assertEquals(9, countryExplodedWithCountryclassid.count());
|
||||||
|
|
||||||
|
@ -178,20 +178,20 @@ public class CountryPropagationJobTest {
|
||||||
|
|
||||||
Dataset<Tuple2<String, String>> countryExplodedWithCountryclassname = verificationDs
|
Dataset<Tuple2<String, String>> countryExplodedWithCountryclassname = verificationDs
|
||||||
.flatMap(
|
.flatMap(
|
||||||
row -> {
|
(FlatMapFunction<Software, Tuple2<String, String>>) row -> {
|
||||||
List<Tuple2<String, String>> prova = new ArrayList();
|
List<Tuple2<String, String>> prova = new ArrayList();
|
||||||
List<Country> country_list = row.getCountry();
|
List<Country> country_list = row.getCountry();
|
||||||
country_list
|
country_list
|
||||||
.stream()
|
.stream()
|
||||||
.forEach(
|
.forEach(
|
||||||
c -> prova
|
c -> prova
|
||||||
.add(
|
.add(
|
||||||
new Tuple2<>(
|
new Tuple2<>(
|
||||||
row.getId(),
|
row.getId(),
|
||||||
c.getClassname())));
|
c.getClassname())));
|
||||||
return prova.iterator();
|
return prova.iterator();
|
||||||
},
|
},
|
||||||
Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
||||||
|
|
||||||
countryExplodedWithCountryclassname.show(false);
|
countryExplodedWithCountryclassname.show(false);
|
||||||
Assertions
|
Assertions
|
||||||
|
@ -239,22 +239,22 @@ public class CountryPropagationJobTest {
|
||||||
|
|
||||||
Dataset<Tuple2<String, String>> countryExplodedWithCountryProvenance = verificationDs
|
Dataset<Tuple2<String, String>> countryExplodedWithCountryProvenance = verificationDs
|
||||||
.flatMap(
|
.flatMap(
|
||||||
row -> {
|
(FlatMapFunction<Software, Tuple2<String, String>>) row -> {
|
||||||
List<Tuple2<String, String>> prova = new ArrayList();
|
List<Tuple2<String, String>> prova = new ArrayList();
|
||||||
List<Country> country_list = row.getCountry();
|
List<Country> country_list = row.getCountry();
|
||||||
country_list
|
country_list
|
||||||
.stream()
|
.stream()
|
||||||
.forEach(
|
.forEach(
|
||||||
c -> prova
|
c -> prova
|
||||||
.add(
|
.add(
|
||||||
new Tuple2<>(
|
new Tuple2<>(
|
||||||
row.getId(),
|
row.getId(),
|
||||||
c
|
c
|
||||||
.getDataInfo()
|
.getDataInfo()
|
||||||
.getInferenceprovenance())));
|
.getInferenceprovenance())));
|
||||||
return prova.iterator();
|
return prova.iterator();
|
||||||
},
|
},
|
||||||
Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
|
@ -29,8 +29,6 @@ public class OrcidPropagationJobTest {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final ClassLoader cl = OrcidPropagationJobTest.class.getClassLoader();
|
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
|
@ -9,6 +9,7 @@ import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
@ -29,8 +30,6 @@ public class ProjectPropagationJobTest {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final ClassLoader cl = ProjectPropagationJobTest.class.getClassLoader();
|
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
@ -72,34 +71,26 @@ public class ProjectPropagationJobTest {
|
||||||
@Test
|
@Test
|
||||||
public void NoUpdateTest() throws Exception {
|
public void NoUpdateTest() throws Exception {
|
||||||
|
|
||||||
SparkResultToProjectThroughSemRelJob
|
final String potentialUpdateDate = getClass()
|
||||||
.main(
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates")
|
||||||
|
.getPath();
|
||||||
|
final String alreadyLinkedPath = getClass()
|
||||||
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
||||||
|
.getPath();
|
||||||
|
SparkResultToProjectThroughSemRelJob.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-hive_metastore_uris", "",
|
||||||
Boolean.FALSE.toString(),
|
"-saveGraph", "true",
|
||||||
// "-sourcePath",
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
// getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/relation").getPath(),
|
"-potentialUpdatePath", potentialUpdateDate,
|
||||||
"-hive_metastore_uris",
|
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||||
"",
|
|
||||||
"-saveGraph",
|
|
||||||
"true",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/relation",
|
|
||||||
"-potentialUpdatePath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates")
|
|
||||||
.getPath(),
|
|
||||||
"-alreadyLinkedPath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
|
||||||
.getPath(),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Relation> tmp = sc
|
JavaRDD<Relation> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/relation")
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
@ -115,34 +106,26 @@ public class ProjectPropagationJobTest {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void UpdateTenTest() throws Exception {
|
public void UpdateTenTest() throws Exception {
|
||||||
SparkResultToProjectThroughSemRelJob
|
final String potentialUpdatePath = getClass()
|
||||||
.main(
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates")
|
||||||
|
.getPath();
|
||||||
|
final String alreadyLinkedPath = getClass()
|
||||||
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
||||||
|
.getPath();
|
||||||
|
SparkResultToProjectThroughSemRelJob.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-hive_metastore_uris", "",
|
||||||
Boolean.FALSE.toString(),
|
"-saveGraph", "true",
|
||||||
// "-sourcePath",
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
// getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/relation").getPath(),
|
"-potentialUpdatePath", potentialUpdatePath,
|
||||||
"-hive_metastore_uris",
|
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||||
"",
|
|
||||||
"-saveGraph",
|
|
||||||
"true",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/relation",
|
|
||||||
"-potentialUpdatePath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates")
|
|
||||||
.getPath(),
|
|
||||||
"-alreadyLinkedPath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
|
||||||
.getPath(),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Relation> tmp = sc
|
JavaRDD<Relation> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/relation")
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
@ -160,18 +143,18 @@ public class ProjectPropagationJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
5,
|
5,
|
||||||
verificationDs
|
verificationDs
|
||||||
.filter(
|
.filter((FilterFunction<Relation>) r ->
|
||||||
r -> r.getSource().substring(0, 2).equals("50")
|
r.getSource().startsWith("50")
|
||||||
&& r.getTarget().substring(0, 2).equals("40")
|
&& r.getTarget().startsWith("40")
|
||||||
&& r.getRelClass().equals("isProducedBy"))
|
&& r.getRelClass().equals("isProducedBy"))
|
||||||
.count());
|
.count());
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
5,
|
5,
|
||||||
verificationDs
|
verificationDs
|
||||||
.filter(
|
.filter((FilterFunction<Relation>) r ->
|
||||||
r -> r.getSource().substring(0, 2).equals("40")
|
r.getSource().startsWith("40")
|
||||||
&& r.getTarget().substring(0, 2).equals("50")
|
&& r.getTarget().startsWith("50")
|
||||||
&& r.getRelClass().equals("produces"))
|
&& r.getRelClass().equals("produces"))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
|
@ -194,34 +177,26 @@ public class ProjectPropagationJobTest {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void UpdateMixTest() throws Exception {
|
public void UpdateMixTest() throws Exception {
|
||||||
SparkResultToProjectThroughSemRelJob
|
final String potentialUpdatepath = getClass()
|
||||||
.main(
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates")
|
||||||
|
.getPath();
|
||||||
|
final String alreadyLinkedPath = getClass()
|
||||||
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
||||||
|
.getPath();
|
||||||
|
SparkResultToProjectThroughSemRelJob.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-hive_metastore_uris", "",
|
||||||
Boolean.FALSE.toString(),
|
"-saveGraph", "true",
|
||||||
// "-sourcePath",
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
// getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/relation").getPath(),
|
"-potentialUpdatePath", potentialUpdatepath,
|
||||||
"-hive_metastore_uris",
|
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||||
"",
|
|
||||||
"-saveGraph",
|
|
||||||
"true",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/relation",
|
|
||||||
"-potentialUpdatePath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates")
|
|
||||||
.getPath(),
|
|
||||||
"-alreadyLinkedPath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
|
||||||
.getPath(),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Relation> tmp = sc
|
JavaRDD<Relation> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/relation")
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
@ -242,18 +217,18 @@ public class ProjectPropagationJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
4,
|
4,
|
||||||
verificationDs
|
verificationDs
|
||||||
.filter(
|
.filter((FilterFunction<Relation>) r ->
|
||||||
r -> r.getSource().substring(0, 2).equals("50")
|
r.getSource().startsWith("50")
|
||||||
&& r.getTarget().substring(0, 2).equals("40")
|
&& r.getTarget().startsWith("40")
|
||||||
&& r.getRelClass().equals("isProducedBy"))
|
&& r.getRelClass().equals("isProducedBy"))
|
||||||
.count());
|
.count());
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
4,
|
4,
|
||||||
verificationDs
|
verificationDs
|
||||||
.filter(
|
.filter((FilterFunction<Relation>) r ->
|
||||||
r -> r.getSource().substring(0, 2).equals("40")
|
r.getSource().startsWith("40")
|
||||||
&& r.getTarget().substring(0, 2).equals("50")
|
&& r.getTarget().startsWith("50")
|
||||||
&& r.getRelClass().equals("produces"))
|
&& r.getRelClass().equals("produces"))
|
||||||
.count());
|
.count());
|
||||||
|
|
|
@ -32,8 +32,6 @@ public class ResultToCommunityJobTest {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final ClassLoader cl = ResultToCommunityJobTest.class.getClassLoader();
|
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
@ -68,33 +66,24 @@ public class ResultToCommunityJobTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSparkResultToCommunityFromOrganizationJob() throws Exception {
|
public void testSparkResultToCommunityFromOrganizationJob() throws Exception {
|
||||||
SparkResultToCommunityFromOrganizationJob
|
final String preparedInfoPath = getClass()
|
||||||
.main(
|
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo")
|
||||||
|
.getPath();
|
||||||
|
SparkResultToCommunityFromOrganizationJob.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", getClass()
|
||||||
Boolean.FALSE.toString(),
|
|
||||||
"-sourcePath",
|
|
||||||
getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample")
|
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample")
|
||||||
.getPath(),
|
.getPath(),
|
||||||
"-hive_metastore_uris",
|
"-hive_metastore_uris", "",
|
||||||
"",
|
"-saveGraph", "true",
|
||||||
"-saveGraph",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||||
"true",
|
"-outputPath", workingDir.toString() + "/dataset",
|
||||||
"-resultTableName",
|
"-preparedInfoPath", preparedInfoPath
|
||||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/dataset",
|
|
||||||
"-preparedInfoPath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo")
|
|
||||||
.getPath()
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Dataset> tmp = sc
|
JavaRDD<Dataset> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/dataset")
|
.textFile(workingDir.toString() + "/dataset")
|
||||||
|
@ -217,13 +206,6 @@ public class ResultToCommunityJobTest {
|
||||||
.get(0)
|
.get(0)
|
||||||
.getString(0));
|
.getString(0));
|
||||||
|
|
||||||
/*
|
|
||||||
* {"communityList":["euromarine","mes"],"resultId":"50|doajarticles::8d817039a63710fcf97e30f14662c6c8"}
|
|
||||||
* "context" ["id": euromarine] updates = 1
|
|
||||||
* {"communityList":["euromarine","mes"],"resultId":"50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"} context
|
|
||||||
* = [ni, euromarine] updates = 1
|
|
||||||
*/
|
|
||||||
|
|
||||||
query = "select id, MyT.id community "
|
query = "select id, MyT.id community "
|
||||||
+ "from dataset "
|
+ "from dataset "
|
||||||
+ "lateral view explode(context) c as MyT "
|
+ "lateral view explode(context) c as MyT "
|
|
@ -29,32 +29,21 @@ import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
|
||||||
public class ResultToCommunityJobTest {
|
public class ResultToCommunityJobTest {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory
|
private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class);
|
||||||
.getLogger(
|
|
||||||
eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class);
|
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final ClassLoader cl = eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class
|
|
||||||
.getClassLoader();
|
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() throws IOException {
|
public static void beforeAll() throws IOException {
|
||||||
workingDir = Files
|
workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName());
|
||||||
.createTempDirectory(
|
|
||||||
eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class
|
|
||||||
.getSimpleName());
|
|
||||||
log.info("using work dir {}", workingDir);
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf
|
conf.setAppName(ResultToCommunityJobTest.class.getSimpleName());
|
||||||
.setAppName(
|
|
||||||
eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class
|
|
||||||
.getSimpleName());
|
|
||||||
|
|
||||||
conf.setMaster("local[*]");
|
conf.setMaster("local[*]");
|
||||||
conf.set("spark.driver.host", "localhost");
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
@ -65,7 +54,7 @@ public class ResultToCommunityJobTest {
|
||||||
|
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(OrcidPropagationJobTest.class.getSimpleName())
|
.appName(ResultToCommunityJobTest.class.getSimpleName())
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
}
|
}
|
||||||
|
@ -83,22 +72,18 @@ public class ResultToCommunityJobTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath",
|
"-sourcePath", getClass()
|
||||||
getClass()
|
.getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample")
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample")
|
|
||||||
.getPath(),
|
.getPath(),
|
||||||
"-hive_metastore_uris", "",
|
"-hive_metastore_uris", "",
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/dataset",
|
||||||
"-preparedInfoPath",
|
"-preparedInfoPath", getClass()
|
||||||
getClass()
|
.getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo")
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo")
|
|
||||||
.getPath()
|
.getPath()
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Dataset> tmp = sc
|
JavaRDD<Dataset> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/dataset")
|
.textFile(workingDir.toString() + "/dataset")
|
|
@ -23,23 +23,19 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
public class Result2OrganizationJobTest {
|
public class ResultToOrganizationJobTest {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(Result2OrganizationJobTest.class);
|
private static final Logger log = LoggerFactory.getLogger(ResultToOrganizationJobTest.class);
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final ClassLoader cl = Result2OrganizationJobTest.class.getClassLoader();
|
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() throws IOException {
|
public static void beforeAll() throws IOException {
|
||||||
workingDir = Files
|
workingDir = Files.createTempDirectory(SparkResultToOrganizationFromIstRepoJob.class.getSimpleName());
|
||||||
.createTempDirectory(
|
|
||||||
SparkResultToOrganizationFromIstRepoJob.class.getSimpleName());
|
|
||||||
log.info("using work dir {}", workingDir);
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
@ -72,40 +68,29 @@ public class Result2OrganizationJobTest {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void NoUpdateTest() throws Exception {
|
public void NoUpdateTest() throws Exception {
|
||||||
SparkResultToOrganizationFromIstRepoJob
|
final String sourcePath = getClass()
|
||||||
.main(
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
||||||
|
.getPath();
|
||||||
|
final String datasourceOrganizationPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization")
|
||||||
|
.getPath();
|
||||||
|
final String alreadyLinkedPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked")
|
||||||
|
.getPath();
|
||||||
|
SparkResultToOrganizationFromIstRepoJob.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", sourcePath,
|
||||||
Boolean.FALSE.toString(),
|
"-hive_metastore_uris", "",
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||||
getClass()
|
"-saveGraph", "true",
|
||||||
.getResource(
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
"-datasourceOrganizationPath", datasourceOrganizationPath,
|
||||||
.getPath(),
|
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||||
"-hive_metastore_uris",
|
|
||||||
"",
|
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
|
|
||||||
"-saveGraph",
|
|
||||||
"true",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/relation",
|
|
||||||
"-datasourceOrganizationPath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization")
|
|
||||||
.getPath(),
|
|
||||||
"-alreadyLinkedPath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked")
|
|
||||||
.getPath(),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Relation> tmp = sc
|
JavaRDD<Relation> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/relation")
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
@ -123,40 +108,29 @@ public class Result2OrganizationJobTest {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void UpdateNoMixTest() throws Exception {
|
public void UpdateNoMixTest() throws Exception {
|
||||||
SparkResultToOrganizationFromIstRepoJob
|
final String sourcePath = getClass()
|
||||||
.main(
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
||||||
|
.getPath();
|
||||||
|
final String datasourceOrganizationPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization")
|
||||||
|
.getPath();
|
||||||
|
final String alreadyLinkedPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked")
|
||||||
|
.getPath();
|
||||||
|
SparkResultToOrganizationFromIstRepoJob.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", sourcePath,
|
||||||
Boolean.FALSE.toString(),
|
"-hive_metastore_uris", "",
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||||
getClass()
|
"-saveGraph", "true",
|
||||||
.getResource(
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
"-datasourceOrganizationPath", datasourceOrganizationPath,
|
||||||
.getPath(),
|
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||||
"-hive_metastore_uris",
|
|
||||||
"",
|
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
|
|
||||||
"-saveGraph",
|
|
||||||
"true",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/relation",
|
|
||||||
"-datasourceOrganizationPath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization")
|
|
||||||
.getPath(),
|
|
||||||
"-alreadyLinkedPath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked")
|
|
||||||
.getPath(),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Relation> tmp = sc
|
JavaRDD<Relation> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/relation")
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
@ -197,40 +171,29 @@ public class Result2OrganizationJobTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void UpdateMixTest() throws Exception {
|
public void UpdateMixTest() throws Exception {
|
||||||
SparkResultToOrganizationFromIstRepoJob
|
final String sourcePath = getClass()
|
||||||
.main(
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix")
|
||||||
|
.getPath();
|
||||||
|
final String datasourceOrganizationPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization")
|
||||||
|
.getPath();
|
||||||
|
final String alreadyLinkedPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked")
|
||||||
|
.getPath();
|
||||||
|
SparkResultToOrganizationFromIstRepoJob.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest",
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
Boolean.TRUE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-isSparkSessionManaged",
|
"-sourcePath", sourcePath,
|
||||||
Boolean.FALSE.toString(),
|
"-hive_metastore_uris", "",
|
||||||
"-sourcePath",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||||
getClass()
|
"-saveGraph", "true",
|
||||||
.getResource(
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix")
|
"-datasourceOrganizationPath", datasourceOrganizationPath,
|
||||||
.getPath(),
|
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||||
"-hive_metastore_uris",
|
|
||||||
"",
|
|
||||||
"-resultTableName",
|
|
||||||
"eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
|
|
||||||
"-saveGraph",
|
|
||||||
"true",
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/relation",
|
|
||||||
"-datasourceOrganizationPath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization")
|
|
||||||
.getPath(),
|
|
||||||
"-alreadyLinkedPath",
|
|
||||||
getClass()
|
|
||||||
.getResource(
|
|
||||||
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked")
|
|
||||||
.getPath(),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Relation> tmp = sc
|
JavaRDD<Relation> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/relation")
|
.textFile(workingDir.toString() + "/relation")
|
|
@ -2,17 +2,17 @@
|
||||||
<community id="fet-fp7">
|
<community id="fet-fp7">
|
||||||
<oacommunity/>
|
<oacommunity/>
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="fet-h2020">
|
<community id="fet-h2020">
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="oa-pg">
|
<community id="oa-pg">
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="ee">
|
<community id="ee">
|
||||||
|
@ -35,7 +35,7 @@
|
||||||
<subject>SDG9 - Industry innovation and infrastructure</subject>
|
<subject>SDG9 - Industry innovation and infrastructure</subject>
|
||||||
<subject>SDG16 - Peace justice and strong institutions</subject>
|
<subject>SDG16 - Peace justice and strong institutions</subject>
|
||||||
</subjects>
|
</subjects>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities>
|
<zenodocommunities>
|
||||||
<zenodocommunity>
|
<zenodocommunity>
|
||||||
<zenodoid>123</zenodoid>
|
<zenodoid>123</zenodoid>
|
||||||
|
@ -45,12 +45,12 @@
|
||||||
</community>
|
</community>
|
||||||
<community id="dh-ch">
|
<community id="dh-ch">
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="fam">
|
<community id="fam">
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="ni">
|
<community id="ni">
|
||||||
|
@ -74,7 +74,7 @@
|
||||||
<subject>brain magnetic resonance imaging</subject>
|
<subject>brain magnetic resonance imaging</subject>
|
||||||
<subject>brain abnormalities</subject>
|
<subject>brain abnormalities</subject>
|
||||||
</subjects>
|
</subjects>
|
||||||
<datasources>
|
<providers>
|
||||||
<datasource>
|
<datasource>
|
||||||
<openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
|
<openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
|
@ -95,7 +95,7 @@
|
||||||
<openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
|
<openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
</datasource>
|
</datasource>
|
||||||
</datasources>
|
</providers>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="mes">
|
<community id="mes">
|
||||||
|
@ -106,12 +106,12 @@
|
||||||
<subject>aqua</subject>
|
<subject>aqua</subject>
|
||||||
<subject>sea</subject>
|
<subject>sea</subject>
|
||||||
</subjects>
|
</subjects>
|
||||||
<datasources>
|
<providers>
|
||||||
<datasource>
|
<datasource>
|
||||||
<openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
|
<openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
</datasource>
|
</datasource>
|
||||||
</datasources>
|
</providers>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="aginfra">
|
<community id="aginfra">
|
||||||
|
@ -134,7 +134,7 @@
|
||||||
<subject>food distribution</subject>
|
<subject>food distribution</subject>
|
||||||
<subject>forestry</subject>
|
<subject>forestry</subject>
|
||||||
</subjects>
|
</subjects>
|
||||||
<datasources>
|
<providers>
|
||||||
<datasource>
|
<datasource>
|
||||||
<openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
|
<openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
|
@ -159,18 +159,18 @@
|
||||||
<openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
|
<openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
</datasource>
|
</datasource>
|
||||||
</datasources>
|
</providers>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="clarin">
|
<community id="clarin">
|
||||||
<oacommunity>oac_clarin</oacommunity>
|
<oacommunity>oac_clarin</oacommunity>
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources>
|
<providers>
|
||||||
<datasource>
|
<datasource>
|
||||||
<openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
|
<openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
</datasource>
|
</datasource>
|
||||||
</datasources>
|
</providers>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
</communities>
|
</communities>
|
|
@ -2,17 +2,17 @@
|
||||||
<community id="fet-fp7">
|
<community id="fet-fp7">
|
||||||
<oacommunity/>
|
<oacommunity/>
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="fet-h2020">
|
<community id="fet-h2020">
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="oa-pg">
|
<community id="oa-pg">
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="ee">
|
<community id="ee">
|
||||||
|
@ -35,7 +35,7 @@
|
||||||
<subject>SDG9 - Industry innovation and infrastructure</subject>
|
<subject>SDG9 - Industry innovation and infrastructure</subject>
|
||||||
<subject>SDG16 - Peace justice and strong institutions</subject>
|
<subject>SDG16 - Peace justice and strong institutions</subject>
|
||||||
</subjects>
|
</subjects>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities>
|
<zenodocommunities>
|
||||||
<zenodocommunity>
|
<zenodocommunity>
|
||||||
<zenodoid>123</zenodoid>
|
<zenodoid>123</zenodoid>
|
||||||
|
@ -45,12 +45,12 @@
|
||||||
</community>
|
</community>
|
||||||
<community id="dh-ch">
|
<community id="dh-ch">
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="fam">
|
<community id="fam">
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources/>
|
<providers/>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="ni">
|
<community id="ni">
|
||||||
|
@ -74,7 +74,7 @@
|
||||||
<subject>brain magnetic resonance imaging</subject>
|
<subject>brain magnetic resonance imaging</subject>
|
||||||
<subject>brain abnormalities</subject>
|
<subject>brain abnormalities</subject>
|
||||||
</subjects>
|
</subjects>
|
||||||
<datasources>
|
<providers>
|
||||||
<datasource>
|
<datasource>
|
||||||
<openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
|
<openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
|
@ -95,7 +95,7 @@
|
||||||
<openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
|
<openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
</datasource>
|
</datasource>
|
||||||
</datasources>
|
</providers>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="mes">
|
<community id="mes">
|
||||||
|
@ -106,12 +106,12 @@
|
||||||
<subject>aqua</subject>
|
<subject>aqua</subject>
|
||||||
<subject>sea</subject>
|
<subject>sea</subject>
|
||||||
</subjects>
|
</subjects>
|
||||||
<datasources>
|
<providers>
|
||||||
<datasource>
|
<datasource>
|
||||||
<openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
|
<openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
</datasource>
|
</datasource>
|
||||||
</datasources>
|
</providers>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="aginfra">
|
<community id="aginfra">
|
||||||
|
@ -134,7 +134,7 @@
|
||||||
<subject>food distribution</subject>
|
<subject>food distribution</subject>
|
||||||
<subject>forestry</subject>
|
<subject>forestry</subject>
|
||||||
</subjects>
|
</subjects>
|
||||||
<datasources>
|
<providers>
|
||||||
<datasource>
|
<datasource>
|
||||||
<openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
|
<openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
|
@ -159,30 +159,30 @@
|
||||||
<openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
|
<openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
</datasource>
|
</datasource>
|
||||||
</datasources>
|
</providers>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="clarin">
|
<community id="clarin">
|
||||||
<oacommunity>oac_clarin</oacommunity>
|
<oacommunity>oac_clarin</oacommunity>
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources>
|
<providers>
|
||||||
<datasource>
|
<datasource>
|
||||||
<openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
|
<openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
|
||||||
<selcriteria/>
|
<selcriteria/>
|
||||||
</datasource>
|
</datasource>
|
||||||
</datasources>
|
</providers>
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="dariah">
|
<community id="dariah">
|
||||||
<oacommunity>oaa_dariah</oacommunity>
|
<oacommunity>oaa_dariah</oacommunity>
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources>
|
<providers>
|
||||||
<datasource>
|
<datasource>
|
||||||
<openaireId>openaire____::1cfdb2e14977f31a98e0118283401f32</openaireId>
|
<openaireId>openaire____::1cfdb2e14977f31a98e0118283401f32</openaireId>
|
||||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}
|
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}
|
||||||
</selcriteria>
|
</selcriteria>
|
||||||
</datasource>
|
</datasource>
|
||||||
</datasources>
|
</providers>
|
||||||
<zenodocommunities>
|
<zenodocommunities>
|
||||||
<zenodocommunity>
|
<zenodocommunity>
|
||||||
<zenodoid>dimpo</zenodoid>
|
<zenodoid>dimpo</zenodoid>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue