forked from D-Net/dnet-hadoop
removed unuseful accumulator
This commit is contained in:
parent
657b0208a2
commit
64311b8be4
|
@ -80,17 +80,10 @@ public class SparkDownloadOrcidWorks {
|
|||
LongAccumulator parsedWorksAcc = spark.sparkContext().longAccumulator("parsed_works");
|
||||
LongAccumulator modifiedWorksAcc = spark.sparkContext().longAccumulator("modified_works");
|
||||
LongAccumulator errorCodeFoundAcc = spark.sparkContext().longAccumulator("error_code_found");
|
||||
LongAccumulator errorLoadingJsonFoundAcc = spark
|
||||
.sparkContext()
|
||||
.longAccumulator("error_loading_json_found");
|
||||
LongAccumulator errorLoadingXMLFoundAcc = spark
|
||||
.sparkContext()
|
||||
.longAccumulator("error_loading_xml_found");
|
||||
LongAccumulator errorParsingXMLFoundAcc = spark
|
||||
.sparkContext()
|
||||
.longAccumulator("error_parsing_xml_found");
|
||||
LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records");
|
||||
LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors");
|
||||
|
||||
JavaPairRDD<Text, Text> updatedAuthorsRDD = sc
|
||||
.sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class);
|
||||
|
@ -107,11 +100,10 @@ public class SparkDownloadOrcidWorks {
|
|||
if (statusCode.equals("200")) {
|
||||
String compressedData = getJsonValue(jElement, "compressedData");
|
||||
if (StringUtils.isEmpty(compressedData)) {
|
||||
errorLoadingJsonFoundAcc.add(1);
|
||||
|
||||
} else {
|
||||
String authorSummary = ArgumentApplicationParser.decompressValue(compressedData);
|
||||
if (StringUtils.isEmpty(authorSummary)) {
|
||||
errorLoadingXMLFoundAcc.add(1);
|
||||
} else {
|
||||
try {
|
||||
workIdLastModifiedDate = XMLRecordParser
|
||||
|
@ -184,7 +176,6 @@ public class SparkDownloadOrcidWorks {
|
|||
} else {
|
||||
downloaded.setStatusCode(-4);
|
||||
}
|
||||
errorsAcc.add(1);
|
||||
}
|
||||
long endReq = System.currentTimeMillis();
|
||||
long reqTime = endReq - startReq;
|
||||
|
@ -193,7 +184,6 @@ public class SparkDownloadOrcidWorks {
|
|||
}
|
||||
if (downloadCompleted) {
|
||||
downloaded.setStatusCode(200);
|
||||
downloadedRecordsAcc.add(1);
|
||||
downloaded
|
||||
.setCompressedData(
|
||||
ArgumentApplicationParser
|
||||
|
@ -214,9 +204,20 @@ public class SparkDownloadOrcidWorks {
|
|||
String works = ArgumentApplicationParser.decompressValue(compressedData);
|
||||
|
||||
// split a single xml containing multiple works into multiple xml (a single work for each xml)
|
||||
List<String> splittedWorks = XMLRecordParser
|
||||
List<String> splittedWorks = null;
|
||||
try {
|
||||
splittedWorks = XMLRecordParser
|
||||
.splitWorks(orcidId, works.getBytes(StandardCharsets.UTF_8));
|
||||
|
||||
} catch (Throwable t) {
|
||||
final DownloadedRecordData errDownloaded = new DownloadedRecordData();
|
||||
errDownloaded.setOrcidId(orcidId);
|
||||
errDownloaded.setLastModifiedDate(lastModifiedDate);
|
||||
errDownloaded.setStatusCode(-10);
|
||||
errDownloaded.setErrorMessage(t.getMessage());
|
||||
splittedDownloadedWorks.add(errDownloaded.toTuple2());
|
||||
errorParsingXMLFoundAcc.add(1);
|
||||
return splittedDownloadedWorks.iterator();
|
||||
}
|
||||
splittedWorks.forEach(w -> {
|
||||
final DownloadedRecordData downloaded = new DownloadedRecordData();
|
||||
downloaded.setOrcidId(orcidId);
|
||||
|
@ -228,10 +229,12 @@ public class SparkDownloadOrcidWorks {
|
|||
.setCompressedData(
|
||||
ArgumentApplicationParser
|
||||
.compressArgument(w));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (Throwable t) {
|
||||
downloaded.setStatusCode(-11);
|
||||
downloaded.setErrorMessage(t.getMessage());
|
||||
}
|
||||
splittedDownloadedWorks.add(downloaded.toTuple2());
|
||||
downloadedRecordsAcc.add(1);
|
||||
});
|
||||
|
||||
return splittedDownloadedWorks.iterator();
|
||||
|
@ -250,11 +253,8 @@ public class SparkDownloadOrcidWorks {
|
|||
logger.info("parsedWorksAcc: {}", parsedWorksAcc.value());
|
||||
logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value());
|
||||
logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value());
|
||||
logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value());
|
||||
logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value());
|
||||
logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value());
|
||||
logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value());
|
||||
logger.info("errorsAcc: {}", errorsAcc.value());
|
||||
});
|
||||
|
||||
}
|
||||
|
|
|
@ -78,7 +78,7 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="DownloadOrcidWorks"/>
|
||||
<start to="ResetLambda"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
|
@ -190,7 +190,7 @@
|
|||
<arg>-o</arg><arg>downloads/updated_works</arg>
|
||||
<arg>-t</arg><arg>${token}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<ok to="ResetNewAuthors"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
|
|
@ -7,5 +7,6 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender
|
|||
# A1 uses PatternLayout.
|
||||
log4j.logger.org = ERROR
|
||||
log4j.logger.eu.dnetlib = DEBUG
|
||||
log4j.logger.eu.dnetlib.doiboost.orcid = INFO
|
||||
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
|
Loading…
Reference in New Issue