[OpenCitation] changed the name of destination folders

This commit is contained in:
Miriam Baglioni 2022-02-14 15:49:44 +01:00
parent 1490867cc7
commit be64055cfe
13 changed files with 12 additions and 41 deletions

View File

@ -73,7 +73,7 @@ public class ReadCOCI implements Serializable {
String delimiter) throws IOException { String delimiter) throws IOException {
for(String inputFile : inputFiles){ for(String inputFile : inputFiles){
String p_string = workingPath + "/" + inputFile ; String p_string = workingPath + "/" + inputFile + ".gz";
Dataset<Row> cociData = spark Dataset<Row> cociData = spark
.read() .read()

View File

@ -37,5 +37,5 @@ public class COCI implements Serializable {
this.cited = cited; this.cited = cited;
} }
} }

View File

@ -78,37 +78,37 @@ public class ReadCOCITest {
fs fs
.copyFromLocalFile( .copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass() false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1") .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz")
.getPath()), .getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1")); new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz"));
fs fs
.copyFromLocalFile( .copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass() false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2") .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz")
.getPath()), .getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2")); new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz"));
fs fs
.copyFromLocalFile( .copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass() false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3") .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz")
.getPath()), .getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3")); new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz"));
fs fs
.copyFromLocalFile( .copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass() false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4") .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz")
.getPath()), .getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4")); new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz"));
fs fs
.copyFromLocalFile( .copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass() false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5") .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz")
.getPath()), .getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5")); new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz"));
ReadCOCI ReadCOCI
.main( .main(

View File

@ -1,8 +0,0 @@
oci,citing,cited,creation,timespan,journal_sc,author_sc
02001000007362801000805046300010563030608046333-0200101010136193701050501630209010637020000083700020400083733,10.1007/s10854-015-3684-x,10.1111/j.1551-2916.2008.02408.x,2015-09-01,P7Y2M,no,no
02001000007362801000805046300010563030608046333-02001000007362801000805046300010463020101046309,10.1007/s10854-015-3684-x,10.1007/s10854-014-2114-9,2015-09-01,P1Y2M4D,yes,no
02001000007362801000805046300010563030608046333-020010001063619371214271022182329370200010337000937000609,10.1007/s10854-015-3684-x,10.1016/j.ceramint.2013.09.069,2015-09-01,P1Y6M,no,no
02001000007362801000805046300010563030608046333-02001000007362801000805046300000963090901036304,10.1007/s10854-015-3684-x,10.1007/s10854-009-9913-4,2015-09-01,P6Y3M10D,yes,no
02001000007362801000805046300010563030608046333-02001000106360000030863010009085807025909000307006305,10.1007/s10854-015-3684-x,10.1016/0038-1098(72)90370-5,2015-09-01,P43Y8M,no,no
02001000007362801000805046300010563030608056309-02001000106361937281010370200010437000937000308,10.1007/s10854-015-3685-9,10.1016/j.saa.2014.09.038,2015-09-03,P0Y7M,no,no
02001000007362801000805046300010563030608056309-0200100010636193722102912171027370200010537000437000106,10.1007/s10854-015-3685-9,10.1016/j.matchar.2015.04.016,2015-09-03,P0Y2M,no,no

View File

@ -1,8 +0,0 @@
oci,citing,cited,creation,timespan,journal_sc,author_sc
02001000308362804010509076300010963000003086301-0200100020936020001003227000009010004,10.1038/s41597-019-0038-1,10.1029/2010wr009104,2019-04-15,P8Y1M,no,no
02001000308362804010509076300010963000003086301-0200100010636280103060463080105025800015900000006006303,10.1038/s41597-019-0038-1,10.1016/s1364-8152(01)00060-3,2019-04-15,P17Y3M,no,no
02001000308362804010509076300010963000003086301-02001000007362800000407076300010063000401066333,10.1038/s41597-019-0038-1,10.1007/s00477-010-0416-x,2019-04-15,P8Y9M6D,no,no
02001000308362804010509076300010963000003086301-02001000007362800000700046300010363000905016308,10.1038/s41597-019-0038-1,10.1007/s00704-013-0951-8,2019-04-15,P5Y9M23D,no,no
02001000308362804010509076300010963000003086301-02001000002361924123705070707,10.1038/s41597-019-0038-1,10.1002/joc.5777,2019-04-15,P0Y8M1D,no,no
02001000308362804010509076300010963000003086301-02005010904361714282863020263040504076302000108,10.1038/s41597-019-0038-1,10.5194/hess-22-4547-2018,2019-04-15,P0Y7M18D,no,no
02001000308362804010509076300010963000003086301-02001000002361924123703050404,10.1038/s41597-019-0038-1,10.1002/joc.3544,2019-04-15,P6Y9M6D,no,no

View File

@ -1,9 +0,0 @@
oci,citing,cited,creation,timespan,journal_sc,author_sc
0200100000236090708010101090307000202023727141528-020050302063600040000010307,10.1002/9781119370222.refs,10.5326/0400137,2020-06-22,P16Y3M,no,no
0200100000236090708010101090307000202023727141528-0200101010136193701050302630905003337020000073700000301093733,10.1002/9781119370222.refs,10.1111/j.1532-950x.2007.00319.x,2020-06-22,P12Y8M,no,no
0200100000236090708010101090307000202023727141528-0200101010136312830370102030509,10.1002/9781119370222.refs,10.1111/vsu.12359,2020-06-22,P4Y10M29D,no,no
0200100000236090708010101090307000202023727141528-020050302063600030900020904,10.1002/9781119370222.refs,10.5326/0390294,2020-06-22,P17Y1M,no,no
0200100000236090708010101090307000202023727141528-020050302063600040200030701,10.1002/9781119370222.refs,10.5326/0420371,2020-06-22,P13Y9M,no,no
0200100000236090708010101090307000202023727141528-0200101010136193701050302630905003337020001033701020000003733,10.1002/9781119370222.refs,10.1111/j.1532-950x.2013.12000.x,2020-06-22,P7Y2M,no,no
0200100000236090708010101090307000202023727141528-020010008003600000408000106093702000006370306070200,10.1002/9781119370222.refs,10.1080/00480169.2006.36720,2020-06-22,P13Y6M,no,no
0200100000236090708010101090307000202023727141528-0200101010136193701070501630008010337020000063700000003033733,10.1002/9781119370222.refs,10.1111/j.1751-0813.2006.00033.x,2020-06-22,P13Y8M,no,no

View File

@ -1,2 +0,0 @@
oci,citing,cited,creation,timespan,journal_sc,author_sc
0200102000736280105030207060407191213036204630001-02001000107362800030005000000090000000006060903,"10.1207/s15327647jcd3,4-01",10.1017/s0305000900006693,2002-11-01,P17Y1M,no,no

View File

@ -1,2 +0,0 @@
oci,citing,cited,creation,timespan,journal_sc,author_sc
02001000007362801000805046300010563030608046333-02001000007362801000805046300010563030608046333,10.1007/s10854-015-3684-x,10.1007/s10854-015-3684-x,2015-09-01,P7Y2M,no,no