reports for types
This commit is contained in:
parent
71204a8056
commit
32f4d6f691
|
@ -42,6 +42,7 @@ import org.slf4j.LoggerFactory;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.DbClient;
|
import eu.dnetlib.dhp.common.DbClient;
|
||||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class BaseAnalyzerJob {
|
public class BaseAnalyzerJob {
|
||||||
|
|
||||||
|
@ -77,6 +78,9 @@ public class BaseAnalyzerJob {
|
||||||
final String opendoarPath = parser.get("opendoarPath");
|
final String opendoarPath = parser.get("opendoarPath");
|
||||||
log.info("opendoarPath {}: ", opendoarPath);
|
log.info("opendoarPath {}: ", opendoarPath);
|
||||||
|
|
||||||
|
final String typesReportPath = parser.get("typesReportPath");
|
||||||
|
log.info("typesReportPath {}: ", typesReportPath);
|
||||||
|
|
||||||
final int fromStep = Integer.parseInt(parser.get("fromStep"));
|
final int fromStep = Integer.parseInt(parser.get("fromStep"));
|
||||||
log.info("fromStep {}: ", fromStep);
|
log.info("fromStep {}: ", fromStep);
|
||||||
|
|
||||||
|
@ -115,10 +119,48 @@ public class BaseAnalyzerJob {
|
||||||
log
|
log
|
||||||
.info("\n**************************************\n* EXECUTING STEP 2: DONE\n**************************************");
|
.info("\n**************************************\n* EXECUTING STEP 2: DONE\n**************************************");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (fromStep <= 3) {
|
||||||
|
log
|
||||||
|
.info("\n**************************************\n* EXECUTING STEP 3: Type Vocabulary Report\n**************************************");
|
||||||
|
generateVocTypeReport(spark, outputPath, typesReportPath);
|
||||||
|
log
|
||||||
|
.info("\n**************************************\n* EXECUTING STEP 3: DONE\n**************************************");
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void generateVocTypeReport(final SparkSession spark,
|
||||||
|
final String reportPath,
|
||||||
|
final String typesReportPath) {
|
||||||
|
spark
|
||||||
|
.read()
|
||||||
|
.parquet(reportPath)
|
||||||
|
.as(Encoders.bean(BaseRecordInfo.class))
|
||||||
|
.flatMap(rec -> {
|
||||||
|
final List<Tuple2<String, String>> list = new ArrayList<>();
|
||||||
|
for (final String t1 : rec.getTypes()) {
|
||||||
|
if (t1.startsWith("TYPE_NORM:")) {
|
||||||
|
for (final String t2 : rec.getTypes()) {
|
||||||
|
if (t2.startsWith("TYPE:")) {
|
||||||
|
list
|
||||||
|
.add(new Tuple2<>(StringUtils.substringAfter(t1, "TYPE_NORM:").trim(),
|
||||||
|
StringUtils.substringAfter(t2, "TYPE:").trim()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return list.iterator();
|
||||||
|
}, Encoders.tuple(Encoders.STRING(), Encoders.STRING()))
|
||||||
|
.distinct()
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.format("parquet")
|
||||||
|
.save(typesReportPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private static void generateOpenDoarReport(final SparkSession spark,
|
private static void generateOpenDoarReport(final SparkSession spark,
|
||||||
final String reportPath,
|
final String reportPath,
|
||||||
final String opendoarPath,
|
final String opendoarPath,
|
||||||
|
|
|
@ -23,6 +23,12 @@
|
||||||
"paramDescription": "the path of the generated the OpenDOAR report",
|
"paramDescription": "the path of the generated the OpenDOAR report",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"paramName": "t",
|
||||||
|
"paramLongName": "typesReportPath",
|
||||||
|
"paramDescription": "the path of the generated the types report",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"paramName": "f",
|
"paramName": "f",
|
||||||
"paramLongName": "fromStep",
|
"paramLongName": "fromStep",
|
||||||
|
|
|
@ -16,6 +16,10 @@
|
||||||
<name>baseOpenDoarReportsPath</name>
|
<name>baseOpenDoarReportsPath</name>
|
||||||
<description>path where to store the OpenDOAR reports</description>
|
<description>path where to store the OpenDOAR reports</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>baseTypesReportPath</name>
|
||||||
|
<description>path of the generated the types report</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>postgresURL</name>
|
<name>postgresURL</name>
|
||||||
<description>the postgres URL to access to the database</description>
|
<description>the postgres URL to access to the database</description>
|
||||||
|
@ -61,6 +65,7 @@
|
||||||
<arg>--dataPath</arg><arg>${baseDataPath}</arg>
|
<arg>--dataPath</arg><arg>${baseDataPath}</arg>
|
||||||
<arg>--outputPath</arg><arg>${baseReportsPath}</arg>
|
<arg>--outputPath</arg><arg>${baseReportsPath}</arg>
|
||||||
<arg>--opendoarPath</arg><arg>${baseOpenDoarReportsPath}</arg>
|
<arg>--opendoarPath</arg><arg>${baseOpenDoarReportsPath}</arg>
|
||||||
|
<arg>--typesReportPath</arg><arg>${baseTypesReportPath}</arg>
|
||||||
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
|
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
|
||||||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
||||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
||||||
|
|
Loading…
Reference in New Issue