forked from antonis.lempesis/dnet-hadoop
code formatting
This commit is contained in:
parent
0727f0ef48
commit
f62c4e05cd
|
@ -255,7 +255,6 @@ public class PMArticle implements Serializable {
|
||||||
return grants;
|
return grants;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public String getPmcId() {
|
public String getPmcId() {
|
||||||
return pmcId;
|
return pmcId;
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,13 +56,11 @@ object PubMedToOaf {
|
||||||
null
|
null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def createOriginalOpenaireId(article: PMArticle): String = {
|
||||||
def createOriginalOpenaireId(article:PMArticle) :String = {
|
|
||||||
if (StringUtils.isNotEmpty(article.getPmcId)) {
|
if (StringUtils.isNotEmpty(article.getPmcId)) {
|
||||||
val md5 = DHPUtils.md5(s"$OAI_HEADER${article.getPmcId.replace("PMC","")}")
|
val md5 = DHPUtils.md5(s"$OAI_HEADER${article.getPmcId.replace("PMC", "")}")
|
||||||
s"$OLD_PMC_PREFIX$md5"
|
s"$OLD_PMC_PREFIX$md5"
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
null
|
null
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -142,26 +140,24 @@ object PubMedToOaf {
|
||||||
val pidList = ListBuffer[StructuredProperty]()
|
val pidList = ListBuffer[StructuredProperty]()
|
||||||
|
|
||||||
pidList += OafMapperUtils.structuredProperty(
|
pidList += OafMapperUtils.structuredProperty(
|
||||||
article.getPmid,
|
article.getPmid,
|
||||||
PidType.pmid.toString,
|
PidType.pmid.toString,
|
||||||
PidType.pmid.toString,
|
PidType.pmid.toString,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
dataInfo
|
||||||
|
)
|
||||||
|
|
||||||
|
if (StringUtils.isNotBlank(article.getPmcId)) {
|
||||||
|
pidList += OafMapperUtils.structuredProperty(
|
||||||
|
article.getPmcId,
|
||||||
|
PidType.pmc.toString,
|
||||||
|
PidType.pmc.toString,
|
||||||
ModelConstants.DNET_PID_TYPES,
|
ModelConstants.DNET_PID_TYPES,
|
||||||
ModelConstants.DNET_PID_TYPES,
|
ModelConstants.DNET_PID_TYPES,
|
||||||
dataInfo
|
dataInfo
|
||||||
)
|
)
|
||||||
|
}
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(article.getPmcId))
|
|
||||||
{
|
|
||||||
pidList += OafMapperUtils.structuredProperty(
|
|
||||||
article.getPmcId,
|
|
||||||
PidType.pmc.toString,
|
|
||||||
PidType.pmc.toString,
|
|
||||||
ModelConstants.DNET_PID_TYPES,
|
|
||||||
ModelConstants.DNET_PID_TYPES,
|
|
||||||
dataInfo
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if (pidList == null)
|
if (pidList == null)
|
||||||
return null
|
return null
|
||||||
|
|
||||||
|
@ -297,7 +293,7 @@ object PubMedToOaf {
|
||||||
if (StringUtils.isNotEmpty(article.getPmcId)) {
|
if (StringUtils.isNotEmpty(article.getPmcId)) {
|
||||||
val originalIDS = ListBuffer[String]()
|
val originalIDS = ListBuffer[String]()
|
||||||
originalIDS += createOriginalOpenaireId(article)
|
originalIDS += createOriginalOpenaireId(article)
|
||||||
pidList.map(s => s.getValue).foreach(p =>originalIDS += p)
|
pidList.map(s => s.getValue).foreach(p => originalIDS += p)
|
||||||
result.setOriginalId(originalIDS.asJava)
|
result.setOriginalId(originalIDS.asJava)
|
||||||
} else
|
} else
|
||||||
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
||||||
|
|
|
@ -48,9 +48,6 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testConvert(): Unit = {
|
def testConvert(): Unit = {
|
||||||
|
|
||||||
|
@ -76,11 +73,8 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
assertEquals(100, nativeSize)
|
assertEquals(100, nativeSize)
|
||||||
|
|
||||||
val result: Dataset[String] = spark.read.text(targetPath).as[String].map(DataciteUtilityTest.convertToOAF)(Encoders.STRING)
|
val result: Dataset[String] =
|
||||||
|
spark.read.text(targetPath).as[String].map(DataciteUtilityTest.convertToOAF)(Encoders.STRING)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
result
|
result
|
||||||
.groupBy(col("value").alias("class"))
|
.groupBy(col("value").alias("class"))
|
||||||
|
|
|
@ -6,25 +6,24 @@ import org.json4s.jackson.JsonMethods.parse
|
||||||
|
|
||||||
object DataciteUtilityTest {
|
object DataciteUtilityTest {
|
||||||
|
|
||||||
def convertToOAF(input:String) : String = {
|
def convertToOAF(input: String): String = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json = parse(input)
|
lazy val json = parse(input)
|
||||||
|
|
||||||
|
val isRelation: String = (json \\ "source").extractOrElse("NULL")
|
||||||
val isRelation:String = (json \\ "source").extractOrElse("NULL")
|
|
||||||
|
|
||||||
if (isRelation != "NULL") {
|
if (isRelation != "NULL") {
|
||||||
return "Relation"
|
return "Relation"
|
||||||
}
|
}
|
||||||
|
|
||||||
val iType: List[String] = for {
|
val iType: List[String] = for {
|
||||||
JObject(instance) <- json \\ "instance"
|
JObject(instance) <- json \\ "instance"
|
||||||
JField("instancetype", JObject(instancetype)) <- instance
|
JField("instancetype", JObject(instancetype)) <- instance
|
||||||
JField("classname", JString(classname)) <- instancetype
|
JField("classname", JString(classname)) <- instancetype
|
||||||
|
|
||||||
} yield classname
|
} yield classname
|
||||||
|
|
||||||
val l:String =iType.head.toLowerCase()
|
val l: String = iType.head.toLowerCase()
|
||||||
l
|
l
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -76,12 +76,11 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private def checkPMArticle(article: PMArticle): Unit = {
|
||||||
private def checkPMArticle(article:PMArticle): Unit = {
|
|
||||||
assertNotNull(article.getPmid)
|
assertNotNull(article.getPmid)
|
||||||
assertNotNull(article.getTitle)
|
assertNotNull(article.getTitle)
|
||||||
assertNotNull(article.getAuthors)
|
assertNotNull(article.getAuthors)
|
||||||
article.getAuthors.asScala.foreach{a =>
|
article.getAuthors.asScala.foreach { a =>
|
||||||
assertNotNull(a)
|
assertNotNull(a)
|
||||||
assertNotNull(a.getFullName)
|
assertNotNull(a.getFullName)
|
||||||
}
|
}
|
||||||
|
@ -89,20 +88,21 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testParsingPubmedXML():Unit = {
|
def testParsingPubmedXML(): Unit = {
|
||||||
val xml = new XMLEventReader(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")))
|
val xml = new XMLEventReader(
|
||||||
|
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
||||||
|
)
|
||||||
val parser = new PMParser(xml)
|
val parser = new PMParser(xml)
|
||||||
parser.foreach(checkPMArticle)
|
parser.foreach(checkPMArticle)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private def checkPubmedPublication(o: Oaf): Unit = {
|
||||||
private def checkPubmedPublication(o:Oaf): Unit = {
|
|
||||||
assertTrue(o.isInstanceOf[Publication])
|
assertTrue(o.isInstanceOf[Publication])
|
||||||
val p:Publication = o.asInstanceOf[Publication]
|
val p: Publication = o.asInstanceOf[Publication]
|
||||||
assertNotNull(p.getId)
|
assertNotNull(p.getId)
|
||||||
assertNotNull(p.getTitle)
|
assertNotNull(p.getTitle)
|
||||||
p.getTitle.asScala.foreach(t =>assertNotNull(t.getValue))
|
p.getTitle.asScala.foreach(t => assertNotNull(t.getValue))
|
||||||
p.getAuthor.asScala.foreach(a =>assertNotNull(a.getFullname))
|
p.getAuthor.asScala.foreach(a => assertNotNull(a.getFullname))
|
||||||
assertNotNull(p.getInstance())
|
assertNotNull(p.getInstance())
|
||||||
p.getInstance().asScala.foreach { i =>
|
p.getInstance().asScala.foreach { i =>
|
||||||
assertNotNull(i.getCollectedfrom)
|
assertNotNull(i.getCollectedfrom)
|
||||||
|
@ -112,28 +112,26 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
assertNotNull(p.getOriginalId)
|
assertNotNull(p.getOriginalId)
|
||||||
p.getOriginalId.asScala.foreach(oId => assertNotNull(oId))
|
p.getOriginalId.asScala.foreach(oId => assertNotNull(oId))
|
||||||
|
|
||||||
|
val hasPMC = p
|
||||||
val hasPMC = p.getInstance().asScala.exists(i => i.getPid.asScala.exists(pid => pid.getQualifier.getClassid.equalsIgnoreCase(PidType.pmc.toString)))
|
.getInstance()
|
||||||
|
.asScala
|
||||||
|
.exists(i => i.getPid.asScala.exists(pid => pid.getQualifier.getClassid.equalsIgnoreCase(PidType.pmc.toString)))
|
||||||
|
|
||||||
if (hasPMC) {
|
if (hasPMC) {
|
||||||
assertTrue(p.getOriginalId.asScala.exists(oId => oId.startsWith("od_______267::")))
|
assertTrue(p.getOriginalId.asScala.exists(oId => oId.startsWith("od_______267::")))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testPubmedOriginalID():Unit = {
|
def testPubmedOriginalID(): Unit = {
|
||||||
val article:PMArticle = new PMArticle
|
val article: PMArticle = new PMArticle
|
||||||
|
|
||||||
|
|
||||||
article.setPmid("1234")
|
article.setPmid("1234")
|
||||||
|
|
||||||
article.setTitle("a Title")
|
article.setTitle("a Title")
|
||||||
|
|
||||||
// VERIFY PUBLICATION IS NOT NULL
|
// VERIFY PUBLICATION IS NOT NULL
|
||||||
article.getPublicationTypes.add( new PMSubject("article",null, null))
|
article.getPublicationTypes.add(new PMSubject("article", null, null))
|
||||||
var publication = PubMedToOaf.convert(article, vocabularies).asInstanceOf[Publication]
|
var publication = PubMedToOaf.convert(article, vocabularies).asInstanceOf[Publication]
|
||||||
assertNotNull(publication)
|
assertNotNull(publication)
|
||||||
assertEquals("50|pmid________::81dc9bdb52d04dc20036dbd8313ed055", publication.getId)
|
assertEquals("50|pmid________::81dc9bdb52d04dc20036dbd8313ed055", publication.getId)
|
||||||
|
@ -146,30 +144,25 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
// VERIFY ORIGINAL ID GENERATE IN OLD WAY USING PMC IDENTIFIER EXISTS
|
// VERIFY ORIGINAL ID GENERATE IN OLD WAY USING PMC IDENTIFIER EXISTS
|
||||||
|
|
||||||
|
val oldOpenaireID = "od_______267::0000072375bc0e68fa09d4e6b7658248"
|
||||||
val oldOpenaireID ="od_______267::0000072375bc0e68fa09d4e6b7658248"
|
|
||||||
|
|
||||||
val hasOldOpenAIREID = publication.getOriginalId.asScala.exists(o => o.equalsIgnoreCase(oldOpenaireID))
|
val hasOldOpenAIREID = publication.getOriginalId.asScala.exists(o => o.equalsIgnoreCase(oldOpenaireID))
|
||||||
|
|
||||||
assertTrue(hasOldOpenAIREID)
|
assertTrue(hasOldOpenAIREID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testPubmedMapping() :Unit = {
|
def testPubmedMapping(): Unit = {
|
||||||
|
|
||||||
val xml = new XMLEventReader(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")))
|
val xml = new XMLEventReader(
|
||||||
|
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
||||||
|
)
|
||||||
val parser = new PMParser(xml)
|
val parser = new PMParser(xml)
|
||||||
val results = ListBuffer[Oaf]()
|
val results = ListBuffer[Oaf]()
|
||||||
parser.foreach(x => results += PubMedToOaf.convert(x, vocabularies))
|
parser.foreach(x => results += PubMedToOaf.convert(x, vocabularies))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
results.foreach(checkPubmedPublication)
|
results.foreach(checkPubmedPublication)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -162,7 +162,7 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
.stream()
|
.stream()
|
||||||
.map(Qualifier::getSchemeid)
|
.map(Qualifier::getSchemeid)
|
||||||
.collect(Collectors.toCollection(HashSet::new));
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
assertEquals(1,cpSchemeId.size());
|
assertEquals(1, cpSchemeId.size());
|
||||||
assertTrue(cpSchemeId.contains("eosc:contentpolicies"));
|
assertTrue(cpSchemeId.contains("eosc:contentpolicies"));
|
||||||
HashSet<String> cpSchemeName = ds
|
HashSet<String> cpSchemeName = ds
|
||||||
.getContentpolicies()
|
.getContentpolicies()
|
||||||
|
@ -289,16 +289,16 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
checkProperty(r1, "contribution", "436754.0");
|
checkProperty(r1, "contribution", "436754.0");
|
||||||
checkProperty(r2, "contribution", "436754.0");
|
checkProperty(r2, "contribution", "436754.0");
|
||||||
|
|
||||||
checkProperty(r1, "currency","EUR");
|
checkProperty(r1, "currency", "EUR");
|
||||||
checkProperty(r2, "currency", "EUR");
|
checkProperty(r2, "currency", "EUR");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkProperty(Relation r, String property, String value) {
|
private void checkProperty(Relation r, String property, String value) {
|
||||||
final List<KeyValue> p = r
|
final List<KeyValue> p = r
|
||||||
.getProperties()
|
.getProperties()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(kv -> kv.getKey().equals(property))
|
.filter(kv -> kv.getKey().equals(property))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
assertFalse(p.isEmpty());
|
assertFalse(p.isEmpty());
|
||||||
assertEquals(1, p.size());
|
assertEquals(1, p.size());
|
||||||
assertEquals(value, p.get(0).getValue());
|
assertEquals(value, p.get(0).getValue());
|
||||||
|
|
Loading…
Reference in New Issue