forked from D-Net/dnet-hadoop
updated wf Datacite Import to retrieve the block size as parameter
This commit is contained in:
parent
4f58418184
commit
bced804151
|
@ -56,6 +56,7 @@ object ImportDatacite {
|
||||||
val hdfsTargetPath = new Path(targetPath)
|
val hdfsTargetPath = new Path(targetPath)
|
||||||
log.info(s"hdfsTargetPath is $hdfsTargetPath")
|
log.info(s"hdfsTargetPath is $hdfsTargetPath")
|
||||||
|
|
||||||
|
val bs = if (parser.get("blocksize") == null) 100 else parser.get("blocksize").toInt
|
||||||
|
|
||||||
val spkipImport = parser.get("skipImport")
|
val spkipImport = parser.get("skipImport")
|
||||||
log.info(s"skipImport is $spkipImport")
|
log.info(s"skipImport is $spkipImport")
|
||||||
|
@ -110,7 +111,7 @@ object ImportDatacite {
|
||||||
|
|
||||||
println(s"last Timestamp is $ts")
|
println(s"last Timestamp is $ts")
|
||||||
|
|
||||||
val cnt = if ("true".equalsIgnoreCase(spkipImport)) 1 else writeSequenceFile(hdfsTargetPath, ts, conf)
|
val cnt = if ("true".equalsIgnoreCase(spkipImport)) 1 else writeSequenceFile(hdfsTargetPath, ts, conf, bs)
|
||||||
|
|
||||||
println(s"Imported from Datacite API $cnt documents")
|
println(s"Imported from Datacite API $cnt documents")
|
||||||
|
|
||||||
|
@ -137,7 +138,7 @@ object ImportDatacite {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration): Long = {
|
private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs:Int): Long = {
|
||||||
var from:Long = timestamp * 1000
|
var from:Long = timestamp * 1000
|
||||||
val delta:Long = 50000000L
|
val delta:Long = 50000000L
|
||||||
var client: DataciteAPIImporter = null
|
var client: DataciteAPIImporter = null
|
||||||
|
@ -148,7 +149,7 @@ object ImportDatacite {
|
||||||
try {
|
try {
|
||||||
var start: Long = System.currentTimeMillis
|
var start: Long = System.currentTimeMillis
|
||||||
while (from < now) {
|
while (from < now) {
|
||||||
client = new DataciteAPIImporter(from, 100, from + delta)
|
client = new DataciteAPIImporter(from, bs, from + delta)
|
||||||
var end: Long = 0
|
var end: Long = 0
|
||||||
val key: IntWritable = new IntWritable(i)
|
val key: IntWritable = new IntWritable(i)
|
||||||
val value: Text = new Text
|
val value: Text = new Text
|
||||||
|
|
|
@ -18,6 +18,12 @@
|
||||||
"paramDescription": "avoid to downlaod new items but apply the previous update",
|
"paramDescription": "avoid to downlaod new items but apply the previous update",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"paramName": "bs",
|
||||||
|
"paramLongName": "blocksize",
|
||||||
|
"paramDescription": "define the requests block size",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"paramName": "n",
|
"paramName": "n",
|
||||||
"paramLongName": "namenode",
|
"paramLongName": "namenode",
|
||||||
|
|
Loading…
Reference in New Issue