forked from D-Net/dnet-hadoop
Compare commits
No commits in common. "3c3e3537e08ba2636ab6ff1ae8436f8e05ba0ece" and "04a0d1ba6e40d84234c1a8373fa0cb7f31ec529a" have entirely different histories.
3c3e3537e0
...
04a0d1ba6e
|
@ -90,13 +90,6 @@ public class MakeTarArchive implements Serializable {
|
||||||
String p_string = p.toString();
|
String p_string = p.toString();
|
||||||
if (!p_string.endsWith("_SUCCESS")) {
|
if (!p_string.endsWith("_SUCCESS")) {
|
||||||
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||||
if (name.startsWith("part-") & name.length() > 10) {
|
|
||||||
String tmp = name.substring(0, 10);
|
|
||||||
if (name.contains(".")) {
|
|
||||||
tmp += name.substring(name.indexOf("."));
|
|
||||||
}
|
|
||||||
name = tmp;
|
|
||||||
}
|
|
||||||
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
|
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
|
||||||
entry.setSize(fileStatus.getLen());
|
entry.setSize(fileStatus.getLen());
|
||||||
current_size += fileStatus.getLen();
|
current_size += fileStatus.getLen();
|
||||||
|
|
|
@ -13,7 +13,6 @@ import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaPairRDD;
|
import org.apache.spark.api.java.JavaPairRDD;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.api.java.function.PairFunction;
|
import org.apache.spark.api.java.function.PairFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -92,7 +91,6 @@ public class SparkUpdateEntity extends AbstractSparkAction {
|
||||||
|
|
||||||
final JavaPairRDD<String, String> mergedIds = rel
|
final JavaPairRDD<String, String> mergedIds = rel
|
||||||
.where("relClass == 'merges'")
|
.where("relClass == 'merges'")
|
||||||
.where("source != target")
|
|
||||||
.select(rel.col("target"))
|
.select(rel.col("target"))
|
||||||
.distinct()
|
.distinct()
|
||||||
.toJavaRDD()
|
.toJavaRDD()
|
||||||
|
|
|
@ -37,8 +37,7 @@ public class DumpProducts implements Serializable {
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
execDump(
|
execDump(spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType);
|
||||||
spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ public class QueryInformationSystem {
|
||||||
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
+
|
+
|
||||||
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
" and ($x//context/param[./@name = 'status']/text() = 'all') "
|
" and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') "
|
||||||
+
|
+
|
||||||
" return " +
|
" return " +
|
||||||
"<community> " +
|
"<community> " +
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump;
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@ -10,7 +9,6 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.*;
|
import org.apache.hadoop.fs.*;
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
||||||
|
@ -50,12 +48,15 @@ public class SendToZenodoHDFS implements Serializable {
|
||||||
.orElse(false);
|
.orElse(false);
|
||||||
|
|
||||||
final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null);
|
final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null);
|
||||||
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
conf.set("fs.defaultFS", hdfsNameNode);
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath);
|
||||||
|
|
||||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||||
.listFiles(
|
.listFiles(
|
||||||
new Path(hdfsPath), true);
|
new Path(hdfsPath), true);
|
||||||
|
@ -86,6 +87,11 @@ public class SendToZenodoHDFS implements Serializable {
|
||||||
if (!p_string.endsWith("_SUCCESS")) {
|
if (!p_string.endsWith("_SUCCESS")) {
|
||||||
// String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
|
// String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
|
||||||
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||||
|
log.info("Sending information for community: " + name);
|
||||||
|
if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) {
|
||||||
|
name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar";
|
||||||
|
}
|
||||||
|
|
||||||
FSDataInputStream inputStream = fileSystem.open(p);
|
FSDataInputStream inputStream = fileSystem.open(p);
|
||||||
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
|
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
|
||||||
|
|
||||||
|
|
|
@ -34,14 +34,12 @@ public class CommunitySplit implements Serializable {
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
execSplit(spark, inputPath, outputPath, Utils.getCommunityMap(spark, communityMapPath).keySet());
|
||||||
|
|
||||||
execSplit(spark, inputPath, outputPath, communityMap);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void execSplit(SparkSession spark, String inputPath, String outputPath,
|
private static void execSplit(SparkSession spark, String inputPath, String outputPath,
|
||||||
CommunityMap communities) {
|
Set<String> communities) {
|
||||||
|
|
||||||
Dataset<CommunityResult> result = Utils
|
Dataset<CommunityResult> result = Utils
|
||||||
.readPath(spark, inputPath + "/publication", CommunityResult.class)
|
.readPath(spark, inputPath + "/publication", CommunityResult.class)
|
||||||
|
@ -50,9 +48,8 @@ public class CommunitySplit implements Serializable {
|
||||||
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
|
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
|
||||||
|
|
||||||
communities
|
communities
|
||||||
.keySet()
|
|
||||||
.stream()
|
.stream()
|
||||||
.forEach(c -> printResult(c, result, outputPath + "/" + communities.get(c).replace(" ", "_")));
|
.forEach(c -> printResult(c, result, outputPath));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,7 +61,7 @@ public class CommunitySplit implements Serializable {
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.json(outputPath);
|
.json(outputPath + "/" + c);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,85 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
|
|
||||||
public class RemoveCommunities implements Serializable {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(RemoveCommunities.class);
|
|
||||||
private final static ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
private final Configuration conf;
|
|
||||||
private final BufferedWriter writer;
|
|
||||||
private final CommunityMap communityMap;
|
|
||||||
|
|
||||||
public RemoveCommunities(String path, String hdfsNameNode) throws IOException {
|
|
||||||
conf = new Configuration();
|
|
||||||
conf.set("fs.defaultFS", hdfsNameNode);
|
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
|
||||||
Path hdfsPath = new Path(path);
|
|
||||||
// FSDataInputStream p = fileSystem.open(hdfsPath);
|
|
||||||
// ObjectMapper mapper = new ObjectMapper();
|
|
||||||
communityMap = OBJECT_MAPPER.readValue((InputStream) fileSystem.open(hdfsPath), CommunityMap.class);
|
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsPath)) {
|
|
||||||
fileSystem.delete(hdfsPath);
|
|
||||||
}
|
|
||||||
fsDataOutputStream = fileSystem.create(hdfsPath);
|
|
||||||
|
|
||||||
writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
|
||||||
String jsonConfiguration = IOUtils
|
|
||||||
.toString(
|
|
||||||
RemoveCommunities.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/oa/graph/dump/input_rc_parameters.json"));
|
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
final String nameNode = parser.get("nameNode");
|
|
||||||
log.info("nameNode: {}", nameNode);
|
|
||||||
|
|
||||||
final String outputPath = parser.get("path");
|
|
||||||
log.info("outputPath: {}", outputPath);
|
|
||||||
|
|
||||||
final String communityId = parser.get("communityId");
|
|
||||||
|
|
||||||
final RemoveCommunities scm = new RemoveCommunities(outputPath, nameNode);
|
|
||||||
|
|
||||||
scm.removeCommunities(communityId);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void removeCommunities(String communityId) throws IOException {
|
|
||||||
Set<String> toRemove = communityMap.keySet().stream().map(key -> {
|
|
||||||
if (key.equals(communityId))
|
|
||||||
return null;
|
|
||||||
return key;
|
|
||||||
}).filter(Objects::nonNull).collect(Collectors.toSet());
|
|
||||||
|
|
||||||
toRemove.forEach(key -> communityMap.remove(key));
|
|
||||||
writer.write(OBJECT_MAPPER.writeValueAsString(communityMap));
|
|
||||||
writer.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -159,7 +159,7 @@ public class QueryInformationSystem {
|
||||||
if (funding == null) {
|
if (funding == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (funding.toLowerCase().contains("h2020")) {
|
if (funding.toLowerCase().startsWith("h2020")) {
|
||||||
nsp = "corda__h2020::";
|
nsp = "corda__h2020::";
|
||||||
} else {
|
} else {
|
||||||
nsp = "corda_______::";
|
nsp = "corda_______::";
|
||||||
|
|
|
@ -1,18 +1,6 @@
|
||||||
<workflow-app name="dump_community_products" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="dump_community_products" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
|
||||||
<name>singleDeposition</name>
|
|
||||||
<description>Indicates if each file in the directory should be uploaded in a own deposition</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>upload</name>
|
|
||||||
<description>true if the dump should be upload in zenodo</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>communityId</name>
|
|
||||||
<description>the id of the community to be dumped if a dump for a single community should be done</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
<property>
|
||||||
<name>sourcePath</name>
|
<name>sourcePath</name>
|
||||||
<description>the source path</description>
|
<description>the source path</description>
|
||||||
|
@ -135,24 +123,6 @@
|
||||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||||
</java>
|
</java>
|
||||||
<ok to="single_deposition"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<decision name="single_deposition">
|
|
||||||
<switch>
|
|
||||||
<case to="remove_communities">${wf:conf('singleDeposition') eq true}</case>
|
|
||||||
<default to="fork_dump"/>
|
|
||||||
</switch>
|
|
||||||
</decision>
|
|
||||||
|
|
||||||
<action name="remove_communities">
|
|
||||||
<java>
|
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.community.RemoveCommunities</main-class>
|
|
||||||
<arg>--path</arg><arg>${workingDir}/communityMap</arg>
|
|
||||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
|
||||||
<arg>--communityId</arg><arg>${communityId}</arg>
|
|
||||||
</java>
|
|
||||||
<ok to="fork_dump"/>
|
<ok to="fork_dump"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
@ -435,16 +405,10 @@
|
||||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--sourcePath</arg><arg>${workingDir}/split</arg>
|
<arg>--sourcePath</arg><arg>${workingDir}/split</arg>
|
||||||
</java>
|
</java>
|
||||||
<ok to="should_upload"/>
|
<ok to="send_zenodo"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<decision name="should_upload">
|
|
||||||
<switch>
|
|
||||||
<case to="send_zenodo">${wf:conf('upload') eq true}</case>
|
|
||||||
<default to="End"/>
|
|
||||||
</switch>
|
|
||||||
</decision>
|
|
||||||
<action name="send_zenodo">
|
<action name="send_zenodo">
|
||||||
<java>
|
<java>
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
||||||
|
@ -453,6 +417,7 @@
|
||||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||||
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||||
<arg>--metadata</arg><arg>${metadata}</arg>
|
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
||||||
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
||||||
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
||||||
|
|
|
@ -596,6 +596,7 @@
|
||||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||||
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||||
<arg>--metadata</arg><arg>${metadata}</arg>
|
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
||||||
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
||||||
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
||||||
|
|
|
@ -1,23 +1,6 @@
|
||||||
{
|
{
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
"definitions": {
|
"definitions": {
|
||||||
"AccessRight":{
|
|
||||||
"type":"object",
|
|
||||||
"properties":{
|
|
||||||
"code": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
|
||||||
},
|
|
||||||
"label": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Label for the access mode"
|
|
||||||
},
|
|
||||||
"scheme": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"ControlledField": {
|
"ControlledField": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -283,57 +266,6 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"instance":{
|
|
||||||
"type":"array",
|
|
||||||
"items":{
|
|
||||||
"type":"object",
|
|
||||||
"properties":{
|
|
||||||
"accessright":{
|
|
||||||
"allOf":[
|
|
||||||
{
|
|
||||||
"$ref":"#/definitions/AccessRight"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"The accessright of this materialization of the result"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"articleprocessingcharge":{
|
|
||||||
"type":"object",
|
|
||||||
"properties":{
|
|
||||||
"amount":{
|
|
||||||
"type":"string"
|
|
||||||
},
|
|
||||||
"currency":{
|
|
||||||
"type":"string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"license":{
|
|
||||||
"type":"string"
|
|
||||||
},
|
|
||||||
"publicationdate":{
|
|
||||||
"type":"string"
|
|
||||||
},
|
|
||||||
"refereed":{
|
|
||||||
"type":"string"
|
|
||||||
},
|
|
||||||
"type":{
|
|
||||||
"type":"string",
|
|
||||||
"description":"The specific sub-type of this materialization of the result (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
|
|
||||||
},
|
|
||||||
"url":{
|
|
||||||
"description":"Description of url",
|
|
||||||
"type":"array",
|
|
||||||
"items":{
|
|
||||||
"type":"string",
|
|
||||||
"description":"urls where it is possible to access the materialization of the result"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"description":"One of the materialization for this result"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"programmingLanguage": {
|
"programmingLanguage": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Only for results with type 'software': the programming language"
|
"description": "Only for results with type 'software': the programming language"
|
||||||
|
@ -370,7 +302,7 @@
|
||||||
"subject": {
|
"subject": {
|
||||||
"allOf": [
|
"allOf": [
|
||||||
{"$ref": "#/definitions/ControlledField"},
|
{"$ref": "#/definitions/ControlledField"},
|
||||||
{"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."}
|
{"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -548,6 +548,7 @@
|
||||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||||
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||||
<arg>--metadata</arg><arg>${metadata}</arg>
|
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
||||||
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
||||||
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
||||||
|
|
|
@ -35,12 +35,7 @@
|
||||||
"paramLongName":"dumpType",
|
"paramLongName":"dumpType",
|
||||||
"paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project",
|
"paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
}, {
|
}
|
||||||
"paramName":"cid",
|
|
||||||
"paramLongName":"communityId",
|
|
||||||
"paramDescription": "the id of the community to be dumped",
|
|
||||||
"paramRequired": false
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
|
|
||||||
[
|
|
||||||
|
|
||||||
{
|
|
||||||
"paramName":"ci",
|
|
||||||
"paramLongName":"communityId",
|
|
||||||
"paramDescription": "URL of the isLookUp Service",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName":"nn",
|
|
||||||
"paramLongName":"nameNode",
|
|
||||||
"paramDescription": "the name node",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "p",
|
|
||||||
"paramLongName": "path",
|
|
||||||
"paramDescription": "the path used to store temporary output files",
|
|
||||||
"paramRequired": true
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,6 @@
|
||||||
"paramLongName": "isSparkSessionManaged",
|
"paramLongName": "isSparkSessionManaged",
|
||||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "cid",
|
|
||||||
"paramLongName": "communityId",
|
|
||||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
|
||||||
"paramRequired": false
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,12 @@
|
||||||
"paramDescription": "The id of the concept record for a new version",
|
"paramDescription": "The id of the concept record for a new version",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cmp",
|
||||||
|
"paramLongName":"communityMapPath",
|
||||||
|
"paramDescription": "the path to the serialization of the community map",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"paramName":"di",
|
"paramName":"di",
|
||||||
"paramLongName":"depositionId",
|
"paramLongName":"depositionId",
|
||||||
|
|
|
@ -9,7 +9,7 @@ import com.github.victools.jsonschema.generator.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||||
|
|
||||||
//@Disabled
|
@Disabled
|
||||||
public class GenerateJsonSchema {
|
public class GenerateJsonSchema {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -21,7 +21,7 @@ public class GenerateJsonSchema {
|
||||||
configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
|
configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
|
||||||
SchemaGeneratorConfig config = configBuilder.build();
|
SchemaGeneratorConfig config = configBuilder.build();
|
||||||
SchemaGenerator generator = new SchemaGenerator(config);
|
SchemaGenerator generator = new SchemaGenerator(config);
|
||||||
JsonNode jsonSchema = generator.generateSchema(GraphResult.class);
|
JsonNode jsonSchema = generator.generateSchema(Relation.class);
|
||||||
|
|
||||||
System.out.println(jsonSchema.toString());
|
System.out.println(jsonSchema.toString());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,60 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
|
||||||
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.LocalFileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.MakeTar;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.MakeTarTest;
|
|
||||||
|
|
||||||
public class RemoveCommunityTest {
|
|
||||||
|
|
||||||
private static String workingDir;
|
|
||||||
|
|
||||||
@BeforeAll
|
|
||||||
public static void beforeAll() throws IOException {
|
|
||||||
workingDir = Files
|
|
||||||
.createTempDirectory(MakeTarTest.class.getSimpleName())
|
|
||||||
.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testRemove() throws Exception {
|
|
||||||
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
|
|
||||||
|
|
||||||
fs
|
|
||||||
.copyFromLocalFile(
|
|
||||||
false, new Path(getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
|
||||||
.getPath()),
|
|
||||||
new Path(workingDir + "/communityMap"));
|
|
||||||
|
|
||||||
String path = workingDir + "/communityMap";
|
|
||||||
|
|
||||||
RemoveCommunities.main(new String[] {
|
|
||||||
"-nameNode", "local",
|
|
||||||
"-path", path,
|
|
||||||
"-communityId", "beopen"
|
|
||||||
}
|
|
||||||
|
|
||||||
);
|
|
||||||
|
|
||||||
CommunityMap cm = new ObjectMapper()
|
|
||||||
.readValue(new FileInputStream(workingDir + "/communityMap"), CommunityMap.class);
|
|
||||||
|
|
||||||
Assertions.assertEquals(1, cm.size());
|
|
||||||
Assertions.assertTrue(cm.containsKey("beopen"));
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -380,7 +380,7 @@ public class CreateRelationTest {
|
||||||
" <param name=\"rule\"/>\n" +
|
" <param name=\"rule\"/>\n" +
|
||||||
" <param name=\"CD_PROJECT_NUMBER\">675858</param>\n" +
|
" <param name=\"CD_PROJECT_NUMBER\">675858</param>\n" +
|
||||||
" <param name=\"url\"/>\n" +
|
" <param name=\"url\"/>\n" +
|
||||||
" <param name=\"funding\">EC | H2020 | RIA</param>\n" +
|
" <param name=\"funding\">H2020-EINFRA-2015-1</param>\n" +
|
||||||
" <param name=\"funder\">EC</param>\n" +
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
" <param name=\"acronym\">West-Life</param>\n" +
|
" <param name=\"acronym\">West-Life</param>\n" +
|
||||||
" </concept>\n" +
|
" </concept>\n" +
|
||||||
|
|
|
@ -10,7 +10,6 @@ export SOURCE=$1
|
||||||
export SHADOW=$2
|
export SHADOW=$2
|
||||||
|
|
||||||
echo "Updating shadow database"
|
echo "Updating shadow database"
|
||||||
impala-shell -q "invalidate metadata"
|
|
||||||
impala-shell -d ${SOURCE} -q "invalidate metadata"
|
impala-shell -d ${SOURCE} -q "invalidate metadata"
|
||||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f -
|
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f -
|
||||||
impala-shell -q "create database if not exists ${SHADOW}"
|
impala-shell -q "create database if not exists ${SHADOW}"
|
||||||
|
|
Loading…
Reference in New Issue