enrichment steps #38

Merged
claudio.atzori merged 334 commits from miriam.baglioni/dnet-hadoop:master into enrichment_wfs 2020-08-11 16:40:26 +02:00
8 changed files with 12 additions and 12 deletions
Showing only changes of commit 13815d5d13 - Show all commits

View File

@ -122,8 +122,8 @@ object DoiBoostMappingUtil {
hb.setValue(item.officialName) hb.setValue(item.officialName)
hb.setKey(generateDSId(item.id)) hb.setKey(generateDSId(item.id))
if (item.openAccess) if (item.openAccess)
i.setAccessright(createQualifier("Open", "dnet:access_modes")) i.setAccessright(createQualifier("OPEN", "dnet:access_modes"))
publication.setBestaccessright(createQualifier("Open", "dnet:access_modes")) publication.setBestaccessright(createQualifier("OPEN", "dnet:access_modes"))
} }
else { else {
hb.setValue("Unknown Repository") hb.setValue("Unknown Repository")
@ -134,8 +134,8 @@ object DoiBoostMappingUtil {
val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid) val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid)
if (ar.nonEmpty) { if (ar.nonEmpty) {
if(ar.contains("Open")){ if(ar.contains("OPEN")){
publication.setBestaccessright(createQualifier("Open", "dnet:access_modes")) publication.setBestaccessright(createQualifier("OPEN", "dnet:access_modes"))
} }
else { else {
publication.setBestaccessright(createQualifier(ar.head, "dnet:access_modes")) publication.setBestaccessright(createQualifier(ar.head, "dnet:access_modes"))

View File

@ -168,9 +168,10 @@ case object Crossref2Oaf {
instance.setRefereed(asField("peerReviewed")) instance.setRefereed(asField("peerReviewed"))
instance.setAccessright(createQualifier("Restricted", "dnet:access_modes")) instance.setAccessright(createQualifier("RESTRICTED", "dnet:access_modes"))
result.setInstance(List(instance).asJava) result.setInstance(List(instance).asJava)
instance.setInstancetype(createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), "dnet:publication_resource", "dnet:publication_resource")) instance.setInstancetype(createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), "dnet:publication_resource", "dnet:publication_resource"))
result.setResourcetype(createQualifier(cobjCategory.substring(0, 4),"dnet:dataCite_resource"))
instance.setCollectedfrom(createCrossrefCollectedFrom()) instance.setCollectedfrom(createCrossrefCollectedFrom())
if (StringUtils.isNotBlank(issuedDate)) { if (StringUtils.isNotBlank(issuedDate)) {
@ -199,7 +200,6 @@ case object Crossref2Oaf {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input) lazy val json: json4s.JValue = parse(input)
var resultList: List[Oaf] = List() var resultList: List[Oaf] = List()
@ -344,7 +344,7 @@ case object Crossref2Oaf {
} }
def convertDataset(dataset: Dataset): Unit = { def convertDataset(dataset: Dataset): Unit = {
//TODO probably we need to add relation and other stuff here // TODO check if there are other info to map into the Dataset
} }

View File

@ -46,7 +46,7 @@ object UnpayWallToOAF {
val i :Instance= new Instance() val i :Instance= new Instance()
i.setCollectedfrom(createUnpayWallCollectedFrom()) i.setCollectedfrom(createUnpayWallCollectedFrom())
i.setAccessright(createQualifier("Open", "dnet:access_modes")) i.setAccessright(createQualifier("OPEN", "dnet:access_modes"))
i.setUrl(List(oaLocation.url.get).asJava) i.setUrl(List(oaLocation.url.get).asJava)
if (oaLocation.license.isDefined) if (oaLocation.license.isDefined)

File diff suppressed because one or more lines are too long

View File

@ -1,4 +1,4 @@
<workflow-app name="import MAG into HDFS" xmlns="uri:oozie:workflow:0.5"> <workflow-app name="Create DOIBoostActionSet" xmlns="uri:oozie:workflow:0.5">
<parameters> <parameters>
<property> <property>
<name>crossrefPublicationPath</name> <name>crossrefPublicationPath</name>

View File

@ -22,7 +22,7 @@
</property> </property>
</parameters> </parameters>
<start to="PreprocessMag"/> <start to="ResetWorkingPath"/>
<kill name="Kill"> <kill name="Kill">

View File

@ -43,7 +43,7 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
${sparkExtraOPT} ${sparkExtraOPT}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/json.gz</arg> <arg>--sourcePath</arg><arg>${sourcePath}/uw_extracted</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg> <arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--master</arg><arg>yarn-cluster</arg> <arg>--master</arg><arg>yarn-cluster</arg>
</spark> </spark>

View File

@ -193,6 +193,7 @@
<groupId>net.sf.saxon</groupId> <groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId> <artifactId>Saxon-HE</artifactId>
<version>9.9.1-6</version> <version>9.9.1-6</version>
<scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>