master #59

Closed
claudio.atzori wants to merge 3221 commits from master into stable_ids
7 changed files with 78 additions and 32 deletions
Showing only changes of commit f86e19b282 - Show all commits

View File

@ -27,7 +27,8 @@ object SparkCreateBaselineDataFrame {
def requestBaseLineUpdatePage(maxFile: String): List[(String, String)] = { def requestBaseLineUpdatePage(maxFile: String): List[(String, String)] = {
val data = requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") val data = requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/")
val result = data.linesWithSeparators.map(l => l.stripLineEnd) val result = data.linesWithSeparators
.map(l => l.stripLineEnd)
.filter(l => l.startsWith("<a href=")) .filter(l => l.startsWith("<a href="))
.map { l => .map { l =>
val end = l.lastIndexOf("\">") val end = l.lastIndexOf("\">")

View File

@ -63,7 +63,9 @@ class BioScholixTest extends AbstractVocabularyTest {
val records: String = Source val records: String = Source
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump")) .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump"))
.mkString .mkString
val r: List[Oaf] = records.linesWithSeparators.map(l => l.stripLineEnd).toList val r: List[Oaf] = records.linesWithSeparators
.map(l => l.stripLineEnd)
.toList
.map(s => mapper.readValue(s, classOf[PMArticle])) .map(s => mapper.readValue(s, classOf[PMArticle]))
.map(a => PubMedToOaf.convert(a, vocabularies)) .map(a => PubMedToOaf.convert(a, vocabularies))
assertEquals(10, r.size) assertEquals(10, r.size)
@ -175,7 +177,8 @@ class BioScholixTest extends AbstractVocabularyTest {
.mkString .mkString
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
val result: List[Oaf] = records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.pdbTOOaf(o)) val result: List[Oaf] =
records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.pdbTOOaf(o))
assertTrue(result.nonEmpty) assertTrue(result.nonEmpty)
result.foreach(r => assertNotNull(r)) result.foreach(r => assertNotNull(r))
@ -196,7 +199,8 @@ class BioScholixTest extends AbstractVocabularyTest {
.mkString .mkString
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
val result: List[Oaf] = records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.uniprotToOAF(o)) val result: List[Oaf] =
records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.uniprotToOAF(o))
assertTrue(result.nonEmpty) assertTrue(result.nonEmpty)
result.foreach(r => assertNotNull(r)) result.foreach(r => assertNotNull(r))
@ -241,7 +245,8 @@ class BioScholixTest extends AbstractVocabularyTest {
.mkString .mkString
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
val result: List[Oaf] = records.linesWithSeparators.map(l => l.stripLineEnd).map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList val result: List[Oaf] =
records.linesWithSeparators.map(l => l.stripLineEnd).map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList
assertNotNull(result) assertNotNull(result)
assertTrue(result.nonEmpty) assertTrue(result.nonEmpty)
@ -280,10 +285,13 @@ class BioScholixTest extends AbstractVocabularyTest {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
val l: List[ScholixResolved] = records.linesWithSeparators.map(l => l.stripLineEnd).map { input => val l: List[ScholixResolved] = records.linesWithSeparators
lazy val json = parse(input) .map(l => l.stripLineEnd)
json.extract[ScholixResolved] .map { input =>
}.toList lazy val json = parse(input)
json.extract[ScholixResolved]
}
.toList
val result: List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s)) val result: List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s))

View File

@ -48,10 +48,13 @@ public class ConversionUtils {
private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class); private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class);
private ConversionUtils() {} private ConversionUtils() {
}
public static List<OaBrokerInstance> oafInstanceToBrokerInstances(final Instance i) { public static List<OaBrokerInstance> oafInstanceToBrokerInstances(final Instance i) {
if (i == null) { return new ArrayList<>(); } if (i == null) {
return new ArrayList<>();
}
return mappedList(i.getUrl(), url -> { return mappedList(i.getUrl(), url -> {
final OaBrokerInstance res = new OaBrokerInstance(); final OaBrokerInstance res = new OaBrokerInstance();
@ -72,7 +75,9 @@ public class ConversionUtils {
} }
public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) {
if (d == null) { return null; } if (d == null) {
return null;
}
final OaBrokerRelatedDataset res = new OaBrokerRelatedDataset(); final OaBrokerRelatedDataset res = new OaBrokerRelatedDataset();
res.setOpenaireId(cleanOpenaireId(d.getId())); res.setOpenaireId(cleanOpenaireId(d.getId()));
@ -85,7 +90,9 @@ public class ConversionUtils {
} }
public static OaBrokerRelatedPublication oafPublicationToBrokerPublication(final Publication p) { public static OaBrokerRelatedPublication oafPublicationToBrokerPublication(final Publication p) {
if (p == null) { return null; } if (p == null) {
return null;
}
final OaBrokerRelatedPublication res = new OaBrokerRelatedPublication(); final OaBrokerRelatedPublication res = new OaBrokerRelatedPublication();
res.setOpenaireId(cleanOpenaireId(p.getId())); res.setOpenaireId(cleanOpenaireId(p.getId()));
@ -99,7 +106,9 @@ public class ConversionUtils {
} }
public static OaBrokerMainEntity oafResultToBrokerResult(final Result result) { public static OaBrokerMainEntity oafResultToBrokerResult(final Result result) {
if (result == null) { return null; } if (result == null) {
return null;
}
final OaBrokerMainEntity res = new OaBrokerMainEntity(); final OaBrokerMainEntity res = new OaBrokerMainEntity();
@ -116,7 +125,8 @@ public class ConversionUtils {
res.setEmbargoenddate(fieldValue(result.getEmbargoenddate())); res.setEmbargoenddate(fieldValue(result.getEmbargoenddate()));
res.setContributor(fieldList(result.getContributor())); res.setContributor(fieldList(result.getContributor()));
res res
.setJournal(result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null); .setJournal(
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null);
res.setPids(allResultPids(result)); res.setPids(allResultPids(result));
res.setInstances(flatMappedList(result.getInstance(), ConversionUtils::oafInstanceToBrokerInstances)); res.setInstances(flatMappedList(result.getInstance(), ConversionUtils::oafInstanceToBrokerInstances));
res res
@ -141,7 +151,9 @@ public class ConversionUtils {
} }
private static OaBrokerAuthor oafAuthorToBrokerAuthor(final Author author) { private static OaBrokerAuthor oafAuthorToBrokerAuthor(final Author author) {
if (author == null) { return null; } if (author == null) {
return null;
}
final String pids = author.getPid() != null ? author final String pids = author.getPid() != null ? author
.getPid() .getPid()
@ -165,7 +177,9 @@ public class ConversionUtils {
} }
private static OaBrokerJournal oafJournalToBrokerJournal(final Journal journal) { private static OaBrokerJournal oafJournalToBrokerJournal(final Journal journal) {
if (journal == null) { return null; } if (journal == null) {
return null;
}
final OaBrokerJournal res = new OaBrokerJournal(); final OaBrokerJournal res = new OaBrokerJournal();
res.setName(journal.getName()); res.setName(journal.getName());
@ -177,7 +191,9 @@ public class ConversionUtils {
} }
private static OaBrokerExternalReference oafExtRefToBrokerExtRef(final ExternalReference ref) { private static OaBrokerExternalReference oafExtRefToBrokerExtRef(final ExternalReference ref) {
if (ref == null) { return null; } if (ref == null) {
return null;
}
final OaBrokerExternalReference res = new OaBrokerExternalReference(); final OaBrokerExternalReference res = new OaBrokerExternalReference();
res.setRefidentifier(ref.getRefidentifier()); res.setRefidentifier(ref.getRefidentifier());
@ -188,7 +204,9 @@ public class ConversionUtils {
} }
public static OaBrokerProject oafProjectToBrokerProject(final Project p) { public static OaBrokerProject oafProjectToBrokerProject(final Project p) {
if (p == null) { return null; } if (p == null) {
return null;
}
final OaBrokerProject res = new OaBrokerProject(); final OaBrokerProject res = new OaBrokerProject();
res.setOpenaireId(cleanOpenaireId(p.getId())); res.setOpenaireId(cleanOpenaireId(p.getId()));
@ -212,7 +230,9 @@ public class ConversionUtils {
} }
public static OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { public static OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) {
if (sw == null) { return null; } if (sw == null) {
return null;
}
final OaBrokerRelatedSoftware res = new OaBrokerRelatedSoftware(); final OaBrokerRelatedSoftware res = new OaBrokerRelatedSoftware();
res.setOpenaireId(cleanOpenaireId(sw.getId())); res.setOpenaireId(cleanOpenaireId(sw.getId()));
@ -225,7 +245,9 @@ public class ConversionUtils {
} }
public static OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { public static OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) {
if (ds == null) { return null; } if (ds == null) {
return null;
}
final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource(); final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource();
res.setName(StringUtils.defaultIfBlank(fieldValue(ds.getOfficialname()), fieldValue(ds.getEnglishname()))); res.setName(StringUtils.defaultIfBlank(fieldValue(ds.getOfficialname()), fieldValue(ds.getEnglishname())));
@ -285,7 +307,9 @@ public class ConversionUtils {
} }
private static List<OaBrokerTypedValue> structPropTypedList(final List<StructuredProperty> list) { private static List<OaBrokerTypedValue> structPropTypedList(final List<StructuredProperty> list) {
if (list == null) { return new ArrayList<>(); } if (list == null) {
return new ArrayList<>();
}
return list return list
.stream() .stream()
@ -295,7 +319,9 @@ public class ConversionUtils {
} }
private static <F, T> List<T> mappedList(final Collection<F> list, final Function<F, T> func) { private static <F, T> List<T> mappedList(final Collection<F> list, final Function<F, T> func) {
if (list == null) { return new ArrayList<>(); } if (list == null) {
return new ArrayList<>();
}
return list return list
.stream() .stream()
@ -306,7 +332,9 @@ public class ConversionUtils {
} }
private static <F, T> List<T> flatMappedList(final List<F> list, final Function<F, List<T>> func) { private static <F, T> List<T> flatMappedList(final List<F> list, final Function<F, List<T>> func) {
if (list == null) { return new ArrayList<>(); } if (list == null) {
return new ArrayList<>();
}
return list return list
.stream() .stream()
@ -318,7 +346,9 @@ public class ConversionUtils {
} }
private static <F, T> T mappedFirst(final List<F> list, final Function<F, T> func) { private static <F, T> T mappedFirst(final List<F> list, final Function<F, T> func) {
if (list == null) { return null; } if (list == null) {
return null;
}
return list return list
.stream() .stream()

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
@ -17,7 +18,8 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
class ConversionUtilsTest { class ConversionUtilsTest {
@BeforeEach @BeforeEach
void setUp() throws Exception {} void setUp() throws Exception {
}
@Test @Test
void testAllResultPids() { void testAllResultPids() {

View File

@ -25,9 +25,11 @@ class MappingORCIDToOAFTest {
.mkString .mkString
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty) assertFalse(json.isEmpty)
json.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => { json.linesWithSeparators
assertNotNull(ORCIDToOAF.extractValueFromInputString(s)) .map(l => l.stripLineEnd)
}) .foreach(s => {
assertNotNull(ORCIDToOAF.extractValueFromInputString(s))
})
} }
@Test @Test

View File

@ -53,7 +53,8 @@ class ResolveEntitiesTest extends Serializable {
def generateUpdates(spark: SparkSession): Unit = { def generateUpdates(spark: SparkSession): Unit = {
val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString
val pids: List[String] = template.linesWithSeparators.map(l => l.stripLineEnd) val pids: List[String] = template.linesWithSeparators
.map(l => l.stripLineEnd)
.map { id => .map { id =>
val r = new Result val r = new Result
r.setId(id.toLowerCase.trim) r.setId(id.toLowerCase.trim)
@ -264,7 +265,8 @@ class ResolveEntitiesTest extends Serializable {
Source Source
.fromInputStream(this.getClass.getResourceAsStream(s"publication")) .fromInputStream(this.getClass.getResourceAsStream(s"publication"))
.mkString .mkString
.linesWithSeparators.map(l => l.stripLineEnd) .linesWithSeparators
.map(l => l.stripLineEnd)
.next(), .next(),
classOf[Publication] classOf[Publication]
) )

View File

@ -69,7 +69,8 @@ class ScholixGraphTest extends AbstractVocabularyTest {
getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix") getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix")
) )
.mkString .mkString
val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators.map(l => l.stripLineEnd) val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators
.map(l => l.stripLineEnd)
.sliding(2) .sliding(2)
.map(s => (s.head, s(1))) .map(s => (s.head, s(1)))
.map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary]))) .map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])))