Master branch updates from beta September 2023 #337
|
@ -13,6 +13,8 @@ import java.util.stream.Collectors;
|
|||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
|
||||
import com.github.sisyphsu.dateparser.DateParserUtils;
|
||||
import com.google.common.collect.Lists;
|
||||
|
@ -23,8 +25,6 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import me.xuender.unidecode.Unidecode;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
|
||||
public class GraphCleaningFunctions extends CleaningFunctions {
|
||||
|
||||
|
|
|
@ -27,7 +27,8 @@ object SparkCreateBaselineDataFrame {
|
|||
def requestBaseLineUpdatePage(maxFile: String): List[(String, String)] = {
|
||||
val data = requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/")
|
||||
|
||||
val result = data.linesWithSeparators.map(l =>l.stripLineEnd)
|
||||
val result = data.linesWithSeparators
|
||||
.map(l => l.stripLineEnd)
|
||||
.filter(l => l.startsWith("<a href="))
|
||||
.map { l =>
|
||||
val end = l.lastIndexOf("\">")
|
||||
|
|
|
@ -63,7 +63,9 @@ class BioScholixTest extends AbstractVocabularyTest {
|
|||
val records: String = Source
|
||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump"))
|
||||
.mkString
|
||||
val r: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).toList
|
||||
val r: List[Oaf] = records.linesWithSeparators
|
||||
.map(l => l.stripLineEnd)
|
||||
.toList
|
||||
.map(s => mapper.readValue(s, classOf[PMArticle]))
|
||||
.map(a => PubMedToOaf.convert(a, vocabularies))
|
||||
assertEquals(10, r.size)
|
||||
|
@ -175,7 +177,8 @@ class BioScholixTest extends AbstractVocabularyTest {
|
|||
.mkString
|
||||
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
|
||||
|
||||
val result: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).toList.flatMap(o => BioDBToOAF.pdbTOOaf(o))
|
||||
val result: List[Oaf] =
|
||||
records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.pdbTOOaf(o))
|
||||
|
||||
assertTrue(result.nonEmpty)
|
||||
result.foreach(r => assertNotNull(r))
|
||||
|
@ -196,7 +199,8 @@ class BioScholixTest extends AbstractVocabularyTest {
|
|||
.mkString
|
||||
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
|
||||
|
||||
val result: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).toList.flatMap(o => BioDBToOAF.uniprotToOAF(o))
|
||||
val result: List[Oaf] =
|
||||
records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.uniprotToOAF(o))
|
||||
|
||||
assertTrue(result.nonEmpty)
|
||||
result.foreach(r => assertNotNull(r))
|
||||
|
@ -241,7 +245,8 @@ class BioScholixTest extends AbstractVocabularyTest {
|
|||
.mkString
|
||||
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
|
||||
|
||||
val result: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList
|
||||
val result: List[Oaf] =
|
||||
records.linesWithSeparators.map(l => l.stripLineEnd).map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList
|
||||
|
||||
assertNotNull(result)
|
||||
assertTrue(result.nonEmpty)
|
||||
|
@ -280,10 +285,13 @@ class BioScholixTest extends AbstractVocabularyTest {
|
|||
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
||||
val l: List[ScholixResolved] = records.linesWithSeparators.map(l =>l.stripLineEnd).map { input =>
|
||||
val l: List[ScholixResolved] = records.linesWithSeparators
|
||||
.map(l => l.stripLineEnd)
|
||||
.map { input =>
|
||||
lazy val json = parse(input)
|
||||
json.extract[ScholixResolved]
|
||||
}.toList
|
||||
}
|
||||
.toList
|
||||
|
||||
val result: List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s))
|
||||
|
||||
|
|
Loading…
Reference in New Issue