forked from D-Net/dnet-hadoop
This commit is contained in:
parent
375ef07d7b
commit
7fe00cb4fb
|
@ -20,15 +20,14 @@ public class QueryInformationSystem {
|
||||||
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
+
|
+
|
||||||
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
" and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') " +
|
" and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') "
|
||||||
|
+
|
||||||
" return " +
|
" return " +
|
||||||
"<community> " +
|
"<community> " +
|
||||||
"{$x//CONFIGURATION/context/@id}" +
|
"{$x//CONFIGURATION/context/@id}" +
|
||||||
"{$x//CONFIGURATION/context/@label}" +
|
"{$x//CONFIGURATION/context/@label}" +
|
||||||
"</community>";
|
"</community>";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public CommunityMap getCommunityMap()
|
public CommunityMap getCommunityMap()
|
||||||
throws ISLookUpException {
|
throws ISLookUpException {
|
||||||
return getMap(isLookUp.quickSearchProfile(XQUERY));
|
return getMap(isLookUp.quickSearchProfile(XQUERY));
|
||||||
|
|
|
@ -102,7 +102,8 @@ public class SparkSplitForCommunity implements Serializable {
|
||||||
.filter(r -> containsCommunity(r, c));
|
.filter(r -> containsCommunity(r, c));
|
||||||
|
|
||||||
if (community_products.count() > 0) {
|
if (community_products.count() > 0) {
|
||||||
community_products.repartition(1)
|
community_products
|
||||||
|
.repartition(1)
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
|
|
|
@ -362,7 +362,7 @@
|
||||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="send_zenodo"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
|
@ -63,8 +63,6 @@ public class DumpJobTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
List<String> communityMap = Arrays
|
List<String> communityMap = Arrays
|
||||||
.asList(
|
.asList(
|
||||||
"<community id=\"egi\" label=\"EGI Federation\"/>",
|
"<community id=\"egi\" label=\"EGI Federation\"/>",
|
||||||
|
@ -122,8 +120,6 @@ public class DumpJobTest {
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@AfterAll
|
@AfterAll
|
||||||
public static void afterAll() throws IOException {
|
public static void afterAll() throws IOException {
|
||||||
FileUtils.deleteDirectory(workingDir.toFile());
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
@ -146,7 +142,6 @@ public class DumpJobTest {
|
||||||
"-communityMap", new Gson().toJson(map)
|
"-communityMap", new Gson().toJson(map)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
|
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
|
||||||
|
@ -186,7 +181,6 @@ public class DumpJobTest {
|
||||||
|
|
||||||
Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90);
|
Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90);
|
||||||
|
|
||||||
|
|
||||||
//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
|
//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -275,7 +269,6 @@ public class DumpJobTest {
|
||||||
"-communityMap", new Gson().toJson(map)
|
"-communityMap", new Gson().toJson(map)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
|
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
|
||||||
|
@ -294,7 +287,6 @@ public class DumpJobTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRecord() throws Exception {
|
public void testRecord() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
|
|
|
@ -37,7 +37,6 @@ public class ZenodoUploadTest {
|
||||||
.toString();
|
.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void HDFSConnection() throws IOException {
|
public void HDFSConnection() throws IOException {
|
||||||
CommunityMap communityMap = new CommunityMap();
|
CommunityMap communityMap = new CommunityMap();
|
||||||
|
@ -58,7 +57,6 @@ public class ZenodoUploadTest {
|
||||||
.getPath()),
|
.getPath()),
|
||||||
new Path(workingDir + "/zenodo/dh-ch/dh-ch"));
|
new Path(workingDir + "/zenodo/dh-ch/dh-ch"));
|
||||||
|
|
||||||
|
|
||||||
APIClient client = new APIClient(URL_STRING,
|
APIClient client = new APIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
client.connect();
|
client.connect();
|
||||||
|
@ -88,7 +86,6 @@ public class ZenodoUploadTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
String metadata = "{\"metadata\":{\"access_right\":\"open\",\"communities\":[{\"identifier\":\"openaire-research-graph\"}],\"creators\":[{\"affiliation\":\"CNR - ISTI\",\"name\":\"Manghi, Paolo\",\"orcid\":\"0000-0001-7291-3210\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Atzori, Claudio\",\"orcid\":\"0000-0001-9613-6639\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Bardi, Alessia\",\"orcid\":\"0000-0002-1112-1292\"},{\"affiliation\":\"ISTI - CNR\",\"name\":\"Baglioni, Miriam\",\"orcid\":\"0000-0002-2273-9004\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Shirrwagen, Jochen\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Dimitropoulos, Harry\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"La Bruzzo, Sandro\",\"orcid\":\"0000-0003-2855-1245\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Foufoulas, Ioannis\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Löhden, Aenne\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Bäcker, Amelie\",\"orcid\":\"0000-0001-6015-2063\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Mannocci, Andrea\",\"orcid\":\"0000-0002-5193-7851\"},{\"affiliation\":\"University of Warsaw\",\"name\":\"Horst, Marek\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Czerniak, Andreas\",\"orcid\":\"0000-0003-3883-4169\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kiatropoulou, Katerina\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kokogiannaki, Argiro\",\"orcid\":\"0000-0002-3880-0244\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"De Bonis, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Artini, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Ottonello, Enrico\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Lempesis, Antonis\"},{\"affiliation\":\"CERN\",\"name\":\"Ioannidis, Alexandros\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Summan, Friedrich\"}],\"description\":\"\\u003cp\\u003eThis dataset contains dumps of the OpenAIRE Research Graph containing metadata records relevant for the research communities and initiatives collaborating with OpenAIRE\\u003c/p\\u003e. \\u003cp\\u003eEach dataset is a zip containing a file with one json per line. Each json is compliant to the schema available at XXXX\\u003c/p\\u003e Note that the file that is offered is not a typical json file: each line contains a separate, self-contained json object. For more information please see http://jsonlines.org\",\"grants\":[{\"id\":\"777541\"},{\"id\":\"824091\"},{\"id\":\"824323\"}],\"keywords\":[\"Open Science\",\"Scholarly Communication\",\"Information Science\"],\"language\":\"eng\",\"license\":\"CC-BY-4.0\",\"title\":\"OpenAIRE Research Graph: Dumps for research communities and initiatives.\",\"upload_type\":\"dataset\",\"version\":\"1.0\"}}";
|
String metadata = "{\"metadata\":{\"access_right\":\"open\",\"communities\":[{\"identifier\":\"openaire-research-graph\"}],\"creators\":[{\"affiliation\":\"CNR - ISTI\",\"name\":\"Manghi, Paolo\",\"orcid\":\"0000-0001-7291-3210\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Atzori, Claudio\",\"orcid\":\"0000-0001-9613-6639\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Bardi, Alessia\",\"orcid\":\"0000-0002-1112-1292\"},{\"affiliation\":\"ISTI - CNR\",\"name\":\"Baglioni, Miriam\",\"orcid\":\"0000-0002-2273-9004\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Shirrwagen, Jochen\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Dimitropoulos, Harry\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"La Bruzzo, Sandro\",\"orcid\":\"0000-0003-2855-1245\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Foufoulas, Ioannis\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Löhden, Aenne\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Bäcker, Amelie\",\"orcid\":\"0000-0001-6015-2063\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Mannocci, Andrea\",\"orcid\":\"0000-0002-5193-7851\"},{\"affiliation\":\"University of Warsaw\",\"name\":\"Horst, Marek\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Czerniak, Andreas\",\"orcid\":\"0000-0003-3883-4169\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kiatropoulou, Katerina\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kokogiannaki, Argiro\",\"orcid\":\"0000-0002-3880-0244\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"De Bonis, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Artini, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Ottonello, Enrico\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Lempesis, Antonis\"},{\"affiliation\":\"CERN\",\"name\":\"Ioannidis, Alexandros\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Summan, Friedrich\"}],\"description\":\"\\u003cp\\u003eThis dataset contains dumps of the OpenAIRE Research Graph containing metadata records relevant for the research communities and initiatives collaborating with OpenAIRE\\u003c/p\\u003e. \\u003cp\\u003eEach dataset is a zip containing a file with one json per line. Each json is compliant to the schema available at XXXX\\u003c/p\\u003e Note that the file that is offered is not a typical json file: each line contains a separate, self-contained json object. For more information please see http://jsonlines.org\",\"grants\":[{\"id\":\"777541\"},{\"id\":\"824091\"},{\"id\":\"824323\"}],\"keywords\":[\"Open Science\",\"Scholarly Communication\",\"Information Science\"],\"language\":\"eng\",\"license\":\"CC-BY-4.0\",\"title\":\"OpenAIRE Research Graph: Dumps for research communities and initiatives.\",\"upload_type\":\"dataset\",\"version\":\"1.0\"}}";
|
||||||
|
|
||||||
System.out.println(client.sendMretadata(metadata));
|
System.out.println(client.sendMretadata(metadata));
|
||||||
|
@ -97,5 +94,4 @@ public class ZenodoUploadTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue