1
0
Fork 0

[ImportOC] fix to move original folder instead of extracted ones

This commit is contained in:
Miriam Baglioni 2024-09-30 15:10:10 +02:00
parent 599e56dbc6
commit e430826e00
5 changed files with 14 additions and 26 deletions

View File

@ -46,6 +46,9 @@ public class GetOpenCitationsRefs implements Serializable {
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
log.info("outputPath {}", outputPath); log.info("outputPath {}", outputPath);
final String backupPath = parser.get("backupPath");
log.info("backupPath {}", backupPath);
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.defaultFS", hdfsNameNode);
@ -53,11 +56,11 @@ public class GetOpenCitationsRefs implements Serializable {
GetOpenCitationsRefs ocr = new GetOpenCitationsRefs(); GetOpenCitationsRefs ocr = new GetOpenCitationsRefs();
ocr.doExtract(inputPath, outputPath, fileSystem); ocr.doExtract(inputPath, outputPath, backupPath, fileSystem);
} }
private void doExtract(String inputPath, String outputPath, FileSystem fileSystem) private void doExtract(String inputPath, String outputPath, String backupPath, FileSystem fileSystem)
throws IOException { throws IOException {
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
@ -89,6 +92,7 @@ public class GetOpenCitationsRefs implements Serializable {
} }
} }
fileSystem.rename(fileStatus.getPath(), new Path(backupPath));
} }
} }

View File

@ -49,9 +49,6 @@ public class ReadCOCI implements Serializable {
final String workingPath = parser.get("inputPath"); final String workingPath = parser.get("inputPath");
log.info("workingPath {}", workingPath); log.info("workingPath {}", workingPath);
final String backupPath = parser.get("backupPath");
log.info("backupPath {}", backupPath);
SparkConf sconf = new SparkConf(); SparkConf sconf = new SparkConf();
Configuration conf = new Configuration(); Configuration conf = new Configuration();
@ -71,14 +68,12 @@ public class ReadCOCI implements Serializable {
workingPath, workingPath,
fileSystem, fileSystem,
outputPath, outputPath,
backupPath,
delimiter); delimiter);
}); });
} }
private static void doRead(SparkSession spark, String workingPath, FileSystem fileSystem, private static void doRead(SparkSession spark, String workingPath, FileSystem fileSystem,
String outputPath, String outputPath,
String backupPath,
String delimiter) throws IOException { String delimiter) throws IOException {
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
.listFiles( .listFiles(
@ -113,7 +108,7 @@ public class ReadCOCI implements Serializable {
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath); .json(outputPath);
fileSystem.rename(fileStatus.getPath(), new Path(backupPath)); fileSystem.delete(fileStatus.getPath());
} }
} }

View File

@ -16,5 +16,11 @@
"paramLongName": "hdfsNameNode", "paramLongName": "hdfsNameNode",
"paramDescription": "the hdfs name node", "paramDescription": "the hdfs name node",
"paramRequired": true "paramRequired": true
},
{
"paramName": "bp",
"paramLongName": "backupPath",
"paramDescription": "the hdfs path to move the OC data after the extraction",
"paramRequired": true
} }
] ]

View File

@ -30,12 +30,6 @@
"paramLongName": "hdfsNameNode", "paramLongName": "hdfsNameNode",
"paramDescription": "the hdfs name node", "paramDescription": "the hdfs name node",
"paramRequired": true "paramRequired": true
},
{
"paramName": "bp",
"paramLongName": "backupPath",
"paramDescription": "the hdfs path to move the OC data after the extraction",
"paramRequired": true
} }
] ]

View File

@ -94,17 +94,7 @@
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--inputPath</arg><arg>${inputPath}/Original</arg> <arg>--inputPath</arg><arg>${inputPath}/Original</arg>
<arg>--outputPath</arg><arg>${inputPath}/Extracted</arg> <arg>--outputPath</arg><arg>${inputPath}/Extracted</arg>
</java> <arg>--backupPath</arg><arg>${inputPath}/backup</arg>
<ok to="read"/>
<error to="Kill"/>
</action>
<action name="extract_correspondence">
<java>
<main-class>eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--inputPath</arg><arg>${inputPath}/correspondence</arg>
<arg>--outputPath</arg><arg>${inputPath}/correspondence_extracted</arg>
</java> </java>
<ok to="read"/> <ok to="read"/>
<error to="Kill"/> <error to="Kill"/>
@ -129,7 +119,6 @@
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${inputPath}/Extracted</arg> <arg>--inputPath</arg><arg>${inputPath}/Extracted</arg>
<arg>--outputPath</arg><arg>${inputPath}/JSON</arg> <arg>--outputPath</arg><arg>${inputPath}/JSON</arg>
<arg>--backupPath</arg><arg>${inputPath}/backup</arg>
<arg>--delimiter</arg><arg>${delimiter}</arg> <arg>--delimiter</arg><arg>${delimiter}</arg>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
</spark> </spark>