minor improvements in the aggregation of PubMed baseline
This commit is contained in:
parent
b8bc237079
commit
c06ef1d68f
|
@ -60,8 +60,9 @@ object SparkCreateBaselineDataFrame {
|
||||||
.setSocketTimeout(timeout * 1000)
|
.setSocketTimeout(timeout * 1000)
|
||||||
.build()
|
.build()
|
||||||
val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build()
|
val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build()
|
||||||
|
println(s"Downloading ${url}")
|
||||||
val response = client.execute(r)
|
val response = client.execute(r)
|
||||||
println(s"get response with status${response.getStatusLine.getStatusCode}")
|
println(s"got response with status: ${response.getStatusLine.getStatusCode}")
|
||||||
response.getEntity.getContent
|
response.getEntity.getContent
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -125,7 +126,7 @@ object SparkCreateBaselineDataFrame {
|
||||||
val fsDataOutputStream: FSDataOutputStream = fs.create(hdfsWritePath, true)
|
val fsDataOutputStream: FSDataOutputStream = fs.create(hdfsWritePath, true)
|
||||||
val i = downloadBaselinePart(u._2)
|
val i = downloadBaselinePart(u._2)
|
||||||
IOUtils.copy(i, fsDataOutputStream)
|
IOUtils.copy(i, fsDataOutputStream)
|
||||||
println(s"Downloaded ${u._2} into $baselinePath/${u._1}")
|
println(s"Saved file ${u._2} in path $baselinePath/${u._1}")
|
||||||
fsDataOutputStream.close()
|
fsDataOutputStream.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -217,6 +218,8 @@ object SparkCreateBaselineDataFrame {
|
||||||
.save(s"$workingPath/baseline_dataset")
|
.save(s"$workingPath/baseline_dataset")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.info(s"saved dataset: '$workingPath/baseline_dataset'")
|
||||||
|
|
||||||
val exported_dataset = spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle]
|
val exported_dataset = spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle]
|
||||||
CollectionUtils.saveDataset(
|
CollectionUtils.saveDataset(
|
||||||
exported_dataset
|
exported_dataset
|
||||||
|
|
|
@ -309,6 +309,13 @@ object PubMedToOaf {
|
||||||
} else
|
} else
|
||||||
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
||||||
|
|
||||||
|
if (article.getDoi != null) {
|
||||||
|
val normalizedPid = cleanDoi(article.getDoi)
|
||||||
|
if (normalizedPid != null) {
|
||||||
|
result.getOriginalId.add(IdentifierFactory.idFromPid("50", PidType.doi.toString, normalizedPid, true))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
result.setId(article.getPmid)
|
result.setId(article.getPmid)
|
||||||
|
|
||||||
// END RESULT MAPPING
|
// END RESULT MAPPING
|
||||||
|
|
Loading…
Reference in New Issue