forked from D-Net/dnet-hadoop
code formatting
This commit is contained in:
parent
d40e20f437
commit
f86e19b282
|
@ -27,7 +27,8 @@ object SparkCreateBaselineDataFrame {
|
||||||
def requestBaseLineUpdatePage(maxFile: String): List[(String, String)] = {
|
def requestBaseLineUpdatePage(maxFile: String): List[(String, String)] = {
|
||||||
val data = requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/")
|
val data = requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/")
|
||||||
|
|
||||||
val result = data.linesWithSeparators.map(l => l.stripLineEnd)
|
val result = data.linesWithSeparators
|
||||||
|
.map(l => l.stripLineEnd)
|
||||||
.filter(l => l.startsWith("<a href="))
|
.filter(l => l.startsWith("<a href="))
|
||||||
.map { l =>
|
.map { l =>
|
||||||
val end = l.lastIndexOf("\">")
|
val end = l.lastIndexOf("\">")
|
||||||
|
|
|
@ -63,7 +63,9 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
val records: String = Source
|
val records: String = Source
|
||||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump"))
|
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump"))
|
||||||
.mkString
|
.mkString
|
||||||
val r: List[Oaf] = records.linesWithSeparators.map(l => l.stripLineEnd).toList
|
val r: List[Oaf] = records.linesWithSeparators
|
||||||
|
.map(l => l.stripLineEnd)
|
||||||
|
.toList
|
||||||
.map(s => mapper.readValue(s, classOf[PMArticle]))
|
.map(s => mapper.readValue(s, classOf[PMArticle]))
|
||||||
.map(a => PubMedToOaf.convert(a, vocabularies))
|
.map(a => PubMedToOaf.convert(a, vocabularies))
|
||||||
assertEquals(10, r.size)
|
assertEquals(10, r.size)
|
||||||
|
@ -175,7 +177,8 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
.mkString
|
.mkString
|
||||||
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
|
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
|
||||||
|
|
||||||
val result: List[Oaf] = records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.pdbTOOaf(o))
|
val result: List[Oaf] =
|
||||||
|
records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.pdbTOOaf(o))
|
||||||
|
|
||||||
assertTrue(result.nonEmpty)
|
assertTrue(result.nonEmpty)
|
||||||
result.foreach(r => assertNotNull(r))
|
result.foreach(r => assertNotNull(r))
|
||||||
|
@ -196,7 +199,8 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
.mkString
|
.mkString
|
||||||
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
|
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
|
||||||
|
|
||||||
val result: List[Oaf] = records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.uniprotToOAF(o))
|
val result: List[Oaf] =
|
||||||
|
records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.uniprotToOAF(o))
|
||||||
|
|
||||||
assertTrue(result.nonEmpty)
|
assertTrue(result.nonEmpty)
|
||||||
result.foreach(r => assertNotNull(r))
|
result.foreach(r => assertNotNull(r))
|
||||||
|
@ -241,7 +245,8 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
.mkString
|
.mkString
|
||||||
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
|
records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty))
|
||||||
|
|
||||||
val result: List[Oaf] = records.linesWithSeparators.map(l => l.stripLineEnd).map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList
|
val result: List[Oaf] =
|
||||||
|
records.linesWithSeparators.map(l => l.stripLineEnd).map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList
|
||||||
|
|
||||||
assertNotNull(result)
|
assertNotNull(result)
|
||||||
assertTrue(result.nonEmpty)
|
assertTrue(result.nonEmpty)
|
||||||
|
@ -280,10 +285,13 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
|
||||||
val l: List[ScholixResolved] = records.linesWithSeparators.map(l => l.stripLineEnd).map { input =>
|
val l: List[ScholixResolved] = records.linesWithSeparators
|
||||||
|
.map(l => l.stripLineEnd)
|
||||||
|
.map { input =>
|
||||||
lazy val json = parse(input)
|
lazy val json = parse(input)
|
||||||
json.extract[ScholixResolved]
|
json.extract[ScholixResolved]
|
||||||
}.toList
|
}
|
||||||
|
.toList
|
||||||
|
|
||||||
val result: List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s))
|
val result: List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s))
|
||||||
|
|
||||||
|
|
|
@ -48,10 +48,13 @@ public class ConversionUtils {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class);
|
private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class);
|
||||||
|
|
||||||
private ConversionUtils() {}
|
private ConversionUtils() {
|
||||||
|
}
|
||||||
|
|
||||||
public static List<OaBrokerInstance> oafInstanceToBrokerInstances(final Instance i) {
|
public static List<OaBrokerInstance> oafInstanceToBrokerInstances(final Instance i) {
|
||||||
if (i == null) { return new ArrayList<>(); }
|
if (i == null) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
return mappedList(i.getUrl(), url -> {
|
return mappedList(i.getUrl(), url -> {
|
||||||
final OaBrokerInstance res = new OaBrokerInstance();
|
final OaBrokerInstance res = new OaBrokerInstance();
|
||||||
|
@ -72,7 +75,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) {
|
public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) {
|
||||||
if (d == null) { return null; }
|
if (d == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final OaBrokerRelatedDataset res = new OaBrokerRelatedDataset();
|
final OaBrokerRelatedDataset res = new OaBrokerRelatedDataset();
|
||||||
res.setOpenaireId(cleanOpenaireId(d.getId()));
|
res.setOpenaireId(cleanOpenaireId(d.getId()));
|
||||||
|
@ -85,7 +90,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OaBrokerRelatedPublication oafPublicationToBrokerPublication(final Publication p) {
|
public static OaBrokerRelatedPublication oafPublicationToBrokerPublication(final Publication p) {
|
||||||
if (p == null) { return null; }
|
if (p == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final OaBrokerRelatedPublication res = new OaBrokerRelatedPublication();
|
final OaBrokerRelatedPublication res = new OaBrokerRelatedPublication();
|
||||||
res.setOpenaireId(cleanOpenaireId(p.getId()));
|
res.setOpenaireId(cleanOpenaireId(p.getId()));
|
||||||
|
@ -99,7 +106,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OaBrokerMainEntity oafResultToBrokerResult(final Result result) {
|
public static OaBrokerMainEntity oafResultToBrokerResult(final Result result) {
|
||||||
if (result == null) { return null; }
|
if (result == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final OaBrokerMainEntity res = new OaBrokerMainEntity();
|
final OaBrokerMainEntity res = new OaBrokerMainEntity();
|
||||||
|
|
||||||
|
@ -116,7 +125,8 @@ public class ConversionUtils {
|
||||||
res.setEmbargoenddate(fieldValue(result.getEmbargoenddate()));
|
res.setEmbargoenddate(fieldValue(result.getEmbargoenddate()));
|
||||||
res.setContributor(fieldList(result.getContributor()));
|
res.setContributor(fieldList(result.getContributor()));
|
||||||
res
|
res
|
||||||
.setJournal(result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null);
|
.setJournal(
|
||||||
|
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null);
|
||||||
res.setPids(allResultPids(result));
|
res.setPids(allResultPids(result));
|
||||||
res.setInstances(flatMappedList(result.getInstance(), ConversionUtils::oafInstanceToBrokerInstances));
|
res.setInstances(flatMappedList(result.getInstance(), ConversionUtils::oafInstanceToBrokerInstances));
|
||||||
res
|
res
|
||||||
|
@ -141,7 +151,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static OaBrokerAuthor oafAuthorToBrokerAuthor(final Author author) {
|
private static OaBrokerAuthor oafAuthorToBrokerAuthor(final Author author) {
|
||||||
if (author == null) { return null; }
|
if (author == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final String pids = author.getPid() != null ? author
|
final String pids = author.getPid() != null ? author
|
||||||
.getPid()
|
.getPid()
|
||||||
|
@ -165,7 +177,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static OaBrokerJournal oafJournalToBrokerJournal(final Journal journal) {
|
private static OaBrokerJournal oafJournalToBrokerJournal(final Journal journal) {
|
||||||
if (journal == null) { return null; }
|
if (journal == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final OaBrokerJournal res = new OaBrokerJournal();
|
final OaBrokerJournal res = new OaBrokerJournal();
|
||||||
res.setName(journal.getName());
|
res.setName(journal.getName());
|
||||||
|
@ -177,7 +191,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static OaBrokerExternalReference oafExtRefToBrokerExtRef(final ExternalReference ref) {
|
private static OaBrokerExternalReference oafExtRefToBrokerExtRef(final ExternalReference ref) {
|
||||||
if (ref == null) { return null; }
|
if (ref == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final OaBrokerExternalReference res = new OaBrokerExternalReference();
|
final OaBrokerExternalReference res = new OaBrokerExternalReference();
|
||||||
res.setRefidentifier(ref.getRefidentifier());
|
res.setRefidentifier(ref.getRefidentifier());
|
||||||
|
@ -188,7 +204,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OaBrokerProject oafProjectToBrokerProject(final Project p) {
|
public static OaBrokerProject oafProjectToBrokerProject(final Project p) {
|
||||||
if (p == null) { return null; }
|
if (p == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final OaBrokerProject res = new OaBrokerProject();
|
final OaBrokerProject res = new OaBrokerProject();
|
||||||
res.setOpenaireId(cleanOpenaireId(p.getId()));
|
res.setOpenaireId(cleanOpenaireId(p.getId()));
|
||||||
|
@ -212,7 +230,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) {
|
public static OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) {
|
||||||
if (sw == null) { return null; }
|
if (sw == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final OaBrokerRelatedSoftware res = new OaBrokerRelatedSoftware();
|
final OaBrokerRelatedSoftware res = new OaBrokerRelatedSoftware();
|
||||||
res.setOpenaireId(cleanOpenaireId(sw.getId()));
|
res.setOpenaireId(cleanOpenaireId(sw.getId()));
|
||||||
|
@ -225,7 +245,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) {
|
public static OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) {
|
||||||
if (ds == null) { return null; }
|
if (ds == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource();
|
final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource();
|
||||||
res.setName(StringUtils.defaultIfBlank(fieldValue(ds.getOfficialname()), fieldValue(ds.getEnglishname())));
|
res.setName(StringUtils.defaultIfBlank(fieldValue(ds.getOfficialname()), fieldValue(ds.getEnglishname())));
|
||||||
|
@ -285,7 +307,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<OaBrokerTypedValue> structPropTypedList(final List<StructuredProperty> list) {
|
private static List<OaBrokerTypedValue> structPropTypedList(final List<StructuredProperty> list) {
|
||||||
if (list == null) { return new ArrayList<>(); }
|
if (list == null) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
return list
|
return list
|
||||||
.stream()
|
.stream()
|
||||||
|
@ -295,7 +319,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <F, T> List<T> mappedList(final Collection<F> list, final Function<F, T> func) {
|
private static <F, T> List<T> mappedList(final Collection<F> list, final Function<F, T> func) {
|
||||||
if (list == null) { return new ArrayList<>(); }
|
if (list == null) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
return list
|
return list
|
||||||
.stream()
|
.stream()
|
||||||
|
@ -306,7 +332,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <F, T> List<T> flatMappedList(final List<F> list, final Function<F, List<T>> func) {
|
private static <F, T> List<T> flatMappedList(final List<F> list, final Function<F, List<T>> func) {
|
||||||
if (list == null) { return new ArrayList<>(); }
|
if (list == null) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
return list
|
return list
|
||||||
.stream()
|
.stream()
|
||||||
|
@ -318,7 +346,9 @@ public class ConversionUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <F, T> T mappedFirst(final List<F> list, final Function<F, T> func) {
|
private static <F, T> T mappedFirst(final List<F> list, final Function<F, T> func) {
|
||||||
if (list == null) { return null; }
|
if (list == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
return list
|
return list
|
||||||
.stream()
|
.stream()
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.broker.oa.util;
|
package eu.dnetlib.dhp.broker.oa.util;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
@ -17,7 +18,8 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
class ConversionUtilsTest {
|
class ConversionUtilsTest {
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
void setUp() throws Exception {}
|
void setUp() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testAllResultPids() {
|
void testAllResultPids() {
|
||||||
|
|
|
@ -25,7 +25,9 @@ class MappingORCIDToOAFTest {
|
||||||
.mkString
|
.mkString
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty)
|
assertFalse(json.isEmpty)
|
||||||
json.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => {
|
json.linesWithSeparators
|
||||||
|
.map(l => l.stripLineEnd)
|
||||||
|
.foreach(s => {
|
||||||
assertNotNull(ORCIDToOAF.extractValueFromInputString(s))
|
assertNotNull(ORCIDToOAF.extractValueFromInputString(s))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,8 @@ class ResolveEntitiesTest extends Serializable {
|
||||||
def generateUpdates(spark: SparkSession): Unit = {
|
def generateUpdates(spark: SparkSession): Unit = {
|
||||||
val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString
|
val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString
|
||||||
|
|
||||||
val pids: List[String] = template.linesWithSeparators.map(l => l.stripLineEnd)
|
val pids: List[String] = template.linesWithSeparators
|
||||||
|
.map(l => l.stripLineEnd)
|
||||||
.map { id =>
|
.map { id =>
|
||||||
val r = new Result
|
val r = new Result
|
||||||
r.setId(id.toLowerCase.trim)
|
r.setId(id.toLowerCase.trim)
|
||||||
|
@ -264,7 +265,8 @@ class ResolveEntitiesTest extends Serializable {
|
||||||
Source
|
Source
|
||||||
.fromInputStream(this.getClass.getResourceAsStream(s"publication"))
|
.fromInputStream(this.getClass.getResourceAsStream(s"publication"))
|
||||||
.mkString
|
.mkString
|
||||||
.linesWithSeparators.map(l => l.stripLineEnd)
|
.linesWithSeparators
|
||||||
|
.map(l => l.stripLineEnd)
|
||||||
.next(),
|
.next(),
|
||||||
classOf[Publication]
|
classOf[Publication]
|
||||||
)
|
)
|
||||||
|
|
|
@ -69,7 +69,8 @@ class ScholixGraphTest extends AbstractVocabularyTest {
|
||||||
getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix")
|
getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix")
|
||||||
)
|
)
|
||||||
.mkString
|
.mkString
|
||||||
val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators.map(l => l.stripLineEnd)
|
val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators
|
||||||
|
.map(l => l.stripLineEnd)
|
||||||
.sliding(2)
|
.sliding(2)
|
||||||
.map(s => (s.head, s(1)))
|
.map(s => (s.head, s(1)))
|
||||||
.map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])))
|
.map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])))
|
||||||
|
|
Loading…
Reference in New Issue