forked from D-Net/dnet-hadoop
[HostedByMap] changed the model to include also oaStart date and review process that could be possibly used in the future
This commit is contained in:
parent
b7c2340952
commit
5d608d6291
|
@ -2,6 +2,7 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.hostedbymap.model;
|
package eu.dnetlib.dhp.oa.graph.hostedbymap.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import com.opencsv.bean.CsvBindByName;
|
import com.opencsv.bean.CsvBindByName;
|
||||||
|
|
||||||
|
@ -17,7 +18,17 @@ public class DOAJModel implements Serializable {
|
||||||
private String eissn;
|
private String eissn;
|
||||||
|
|
||||||
@CsvBindByName(column = "Review process")
|
@CsvBindByName(column = "Review process")
|
||||||
private String reviewProcess;
|
private List<String> reviewProcess;
|
||||||
|
|
||||||
|
private Integer oaStart;
|
||||||
|
|
||||||
|
public Integer getOaStart() {
|
||||||
|
return oaStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOaStart(Integer oaStart) {
|
||||||
|
this.oaStart = oaStart;
|
||||||
|
}
|
||||||
|
|
||||||
public String getJournalTitle() {
|
public String getJournalTitle() {
|
||||||
return journalTitle;
|
return journalTitle;
|
||||||
|
@ -43,11 +54,11 @@ public class DOAJModel implements Serializable {
|
||||||
this.eissn = eissn;
|
this.eissn = eissn;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getReviewProcess() {
|
public List<String> getReviewProcess() {
|
||||||
return reviewProcess;
|
return reviewProcess;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setReviewProcess(String reviewProcess) {
|
public void setReviewProcess(List<String> reviewProcess) {
|
||||||
this.reviewProcess = reviewProcess;
|
this.reviewProcess = reviewProcess;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,9 +2,10 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport
|
||||||
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel}
|
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel}
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource
|
import eu.dnetlib.dhp.schema.oaf.Datasource
|
||||||
import org.apache.commons.io.IOUtils
|
import org.apache.commons.io.{FileUtils, IOUtils}
|
||||||
import org.apache.hadoop.conf.Configuration
|
import org.apache.hadoop.conf.Configuration
|
||||||
import org.apache.hadoop.fs.{FileSystem, Path}
|
import org.apache.hadoop.fs.{FileSystem, Path}
|
||||||
import org.apache.hadoop.io.compress.GzipCodec
|
import org.apache.hadoop.io.compress.GzipCodec
|
||||||
|
@ -13,7 +14,8 @@ import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
import java.io.PrintWriter
|
import java.io.{File, PrintWriter}
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
object SparkProduceHostedByMap {
|
object SparkProduceHostedByMap {
|
||||||
|
|
||||||
|
@ -34,7 +36,9 @@ object SparkProduceHostedByMap {
|
||||||
openaire.journal_id,
|
openaire.journal_id,
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
isOpenAccess
|
isOpenAccess,
|
||||||
|
-1,
|
||||||
|
List[String]()
|
||||||
)
|
)
|
||||||
case Constants.EISSN =>
|
case Constants.EISSN =>
|
||||||
HostedByItemType(
|
HostedByItemType(
|
||||||
|
@ -43,7 +47,9 @@ object SparkProduceHostedByMap {
|
||||||
"",
|
"",
|
||||||
openaire.journal_id,
|
openaire.journal_id,
|
||||||
"",
|
"",
|
||||||
isOpenAccess
|
isOpenAccess,
|
||||||
|
-1,
|
||||||
|
List[String]()
|
||||||
)
|
)
|
||||||
case Constants.ISSNL =>
|
case Constants.ISSNL =>
|
||||||
HostedByItemType(
|
HostedByItemType(
|
||||||
|
@ -52,7 +58,9 @@ object SparkProduceHostedByMap {
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
openaire.journal_id,
|
openaire.journal_id,
|
||||||
isOpenAccess
|
isOpenAccess,
|
||||||
|
-1,
|
||||||
|
List[String]()
|
||||||
)
|
)
|
||||||
|
|
||||||
// catch the default with a variable so you can print it
|
// catch the default with a variable so you can print it
|
||||||
|
@ -77,34 +85,36 @@ object SparkProduceHostedByMap {
|
||||||
issn: String,
|
issn: String,
|
||||||
eissn: String,
|
eissn: String,
|
||||||
issnl: String,
|
issnl: String,
|
||||||
oa: Boolean
|
oa: Boolean,
|
||||||
|
oaDate: Int,
|
||||||
|
reviewProcess: List[String]
|
||||||
): HostedByItemType = {
|
): HostedByItemType = {
|
||||||
if (issn != null) {
|
if (issn != null) {
|
||||||
if (eissn != null) {
|
if (eissn != null) {
|
||||||
if (issnl != null) {
|
if (issnl != null) {
|
||||||
HostedByItemType(id, officialname, issn, eissn, issnl, oa)
|
HostedByItemType(id, officialname, issn, eissn, issnl, oa, oaDate, reviewProcess)
|
||||||
} else {
|
} else {
|
||||||
HostedByItemType(id, officialname, issn, eissn, "", oa)
|
HostedByItemType(id, officialname, issn, eissn, "", oa, oaDate, reviewProcess)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (issnl != null) {
|
if (issnl != null) {
|
||||||
HostedByItemType(id, officialname, issn, "", issnl, oa)
|
HostedByItemType(id, officialname, issn, "", issnl, oa, oaDate, reviewProcess)
|
||||||
} else {
|
} else {
|
||||||
HostedByItemType(id, officialname, issn, "", "", oa)
|
HostedByItemType(id, officialname, issn, "", "", oa, oaDate, reviewProcess)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (eissn != null) {
|
if (eissn != null) {
|
||||||
if (issnl != null) {
|
if (issnl != null) {
|
||||||
HostedByItemType(id, officialname, "", eissn, issnl, oa)
|
HostedByItemType(id, officialname, "", eissn, issnl, oa, oaDate, reviewProcess)
|
||||||
} else {
|
} else {
|
||||||
HostedByItemType(id, officialname, "", eissn, "", oa)
|
HostedByItemType(id, officialname, "", eissn, "", oa, oaDate, reviewProcess)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (issnl != null) {
|
if (issnl != null) {
|
||||||
HostedByItemType(id, officialname, "", "", issnl, oa)
|
HostedByItemType(id, officialname, "", "", issnl, oa, oaDate, reviewProcess)
|
||||||
} else {
|
} else {
|
||||||
HostedByItemType("", "", "", "", "", oa)
|
HostedByItemType("", "", "", "", "", oa, oaDate, reviewProcess)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -119,10 +129,12 @@ object SparkProduceHostedByMap {
|
||||||
dats.getJournal.getIssnPrinted,
|
dats.getJournal.getIssnPrinted,
|
||||||
dats.getJournal.getIssnOnline,
|
dats.getJournal.getIssnOnline,
|
||||||
dats.getJournal.getIssnLinking,
|
dats.getJournal.getIssnLinking,
|
||||||
false
|
false,
|
||||||
|
-1,
|
||||||
|
List[String]()
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
HostedByItemType("", "", "", "", "", false)
|
HostedByItemType("", "", "", "", "", false, -1, List[String]())
|
||||||
}
|
}
|
||||||
|
|
||||||
def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = {
|
def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = {
|
||||||
|
@ -148,7 +160,9 @@ object SparkProduceHostedByMap {
|
||||||
gold.getIssn,
|
gold.getIssn,
|
||||||
"",
|
"",
|
||||||
gold.getIssnL,
|
gold.getIssnL,
|
||||||
true
|
true,
|
||||||
|
-1,
|
||||||
|
List[String]()
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,14 +185,27 @@ object SparkProduceHostedByMap {
|
||||||
}
|
}
|
||||||
|
|
||||||
def doajToHostedbyItemType(doaj: DOAJModel): HostedByItemType = {
|
def doajToHostedbyItemType(doaj: DOAJModel): HostedByItemType = {
|
||||||
|
if (doaj.getOaStart == null) {
|
||||||
|
return getHostedByItemType(
|
||||||
|
Constants.DOAJ,
|
||||||
|
doaj.getJournalTitle,
|
||||||
|
doaj.getIssn,
|
||||||
|
doaj.getEissn,
|
||||||
|
"",
|
||||||
|
true,
|
||||||
|
-1,
|
||||||
|
doaj.getReviewProcess.asScala.toList
|
||||||
|
)
|
||||||
|
}
|
||||||
return getHostedByItemType(
|
return getHostedByItemType(
|
||||||
Constants.DOAJ,
|
Constants.DOAJ,
|
||||||
doaj.getJournalTitle,
|
doaj.getJournalTitle,
|
||||||
doaj.getIssn,
|
doaj.getIssn,
|
||||||
doaj.getEissn,
|
doaj.getEissn,
|
||||||
"",
|
"",
|
||||||
true
|
true,
|
||||||
|
doaj.getOaStart,
|
||||||
|
doaj.getReviewProcess.asScala.toList
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -256,6 +283,8 @@ object SparkProduceHostedByMap {
|
||||||
|
|
||||||
logger.info("Getting the Datasources")
|
logger.info("Getting the Datasources")
|
||||||
|
|
||||||
|
HdfsSupport.remove(outputPath, spark.sparkContext.hadoopConfiguration)
|
||||||
|
|
||||||
Aggregators
|
Aggregators
|
||||||
.explodeHostedByItemType(
|
.explodeHostedByItemType(
|
||||||
oaHostedByDataset(spark, datasourcePath)
|
oaHostedByDataset(spark, datasourcePath)
|
||||||
|
|
Loading…
Reference in New Issue