2017-09-05 17:13:50 +02:00
package org.gcube.accounting.aggregator.aggregation ;
import java.io.File ;
2021-05-06 13:13:30 +02:00
import java.sql.ResultSet ;
2021-11-08 14:57:35 +01:00
import java.time.OffsetDateTime ;
2017-09-05 17:13:50 +02:00
import java.util.Calendar ;
import java.util.List ;
2021-11-08 14:57:35 +01:00
import java.util.Set ;
2017-09-05 17:13:50 +02:00
import java.util.UUID ;
2020-03-16 16:43:37 +01:00
import java.util.concurrent.TimeUnit ;
2017-09-05 17:13:50 +02:00
2024-02-06 12:31:38 +01:00
import org.gcube.accounting.aggregator.persistence.AggregatorPersistenceFactory ;
2024-02-06 12:38:15 +01:00
import org.gcube.accounting.aggregator.persistence.AggregatorPersistenceSrc ;
2017-09-05 17:13:50 +02:00
import org.gcube.accounting.aggregator.status.AggregationState ;
import org.gcube.accounting.aggregator.status.AggregationStatus ;
import org.gcube.accounting.aggregator.utility.Constant ;
import org.gcube.accounting.aggregator.utility.Utility ;
import org.gcube.accounting.datamodel.AggregatedUsageRecord ;
2017-09-15 10:40:03 +02:00
import org.gcube.accounting.datamodel.aggregation.AggregatedServiceUsageRecord ;
2017-09-21 09:53:53 +02:00
import org.gcube.accounting.datamodel.basetypes.AbstractServiceUsageRecord ;
2017-09-15 10:40:03 +02:00
import org.gcube.accounting.datamodel.usagerecords.ServiceUsageRecord ;
2021-11-08 14:57:35 +01:00
import org.gcube.accounting.utility.postgresql.RecordToDBFields ;
import org.gcube.accounting.utility.postgresql.RecordToDBMapping ;
2021-05-06 13:13:30 +02:00
import org.gcube.com.fasterxml.jackson.databind.ObjectMapper ;
import org.gcube.com.fasterxml.jackson.databind.node.ObjectNode ;
2017-09-18 15:07:34 +02:00
import org.gcube.documentstore.exception.InvalidValueException ;
2017-09-05 17:13:50 +02:00
import org.gcube.documentstore.records.AggregatedRecord ;
import org.gcube.documentstore.records.DSMapper ;
2017-09-18 15:07:34 +02:00
import org.gcube.documentstore.records.Record ;
2017-09-05 17:13:50 +02:00
import org.gcube.documentstore.records.RecordUtility ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
/ * *
* @author Luca Frosini ( ISTI - CNR )
* /
public class Aggregator {
private static Logger logger = LoggerFactory . getLogger ( Aggregator . class ) ;
private static final String TMP_SUFFIX = " .tmp " ;
protected final AggregationStatus aggregationStatus ;
protected final File originalRecordsbackupFile ;
protected final File aggregateRecordsBackupFile ;
2017-09-18 15:07:34 +02:00
protected final File malformedRecordsFile ;
protected int malformedRecordNumber ;
2017-09-05 17:13:50 +02:00
2021-05-06 13:13:30 +02:00
protected ObjectMapper objectMapper ;
2017-09-05 17:13:50 +02:00
protected Calendar startTime ;
2021-05-06 13:13:30 +02:00
public Aggregator ( AggregationStatus aggregationStatus , File originalRecordsbackupFile , File aggregateRecordsBackupFile ) {
2017-09-05 17:13:50 +02:00
this . aggregationStatus = aggregationStatus ;
this . originalRecordsbackupFile = originalRecordsbackupFile ;
this . aggregateRecordsBackupFile = aggregateRecordsBackupFile ;
2017-09-18 15:07:34 +02:00
this . malformedRecordsFile = Utility . getMalformatedFile ( aggregateRecordsBackupFile ) ;
2021-05-06 13:13:30 +02:00
this . objectMapper = new ObjectMapper ( ) ;
2017-09-05 17:13:50 +02:00
}
public void aggregate ( ) throws Exception {
if ( AggregationState . canContinue ( aggregationStatus . getAggregationState ( ) , AggregationState . STARTED ) ) {
startTime = Utility . getUTCCalendarInstance ( ) ;
2021-05-06 13:13:30 +02:00
2024-02-06 12:38:15 +01:00
AggregatorPersistenceSrc aggregatorPersistenceSrc = AggregatorPersistenceFactory . getAggregatorPersistenceSrc ( ) ;
ResultSet resultSet = aggregatorPersistenceSrc . getResultSetOfRecordToBeAggregated ( aggregationStatus ) ;
2021-11-05 14:25:45 +01:00
2021-05-06 13:13:30 +02:00
retrieveAndAggregate ( resultSet ) ;
2017-09-05 17:13:50 +02:00
}
}
2017-09-21 09:53:53 +02:00
private static final String USAGE_RECORD_TYPE = " usageRecordType " ;
private static final String SINGLE = " Single " ;
private static final String SIMPLE = " Simple " ;
2017-09-18 15:07:34 +02:00
2017-09-26 15:33:39 +02:00
2021-05-06 13:13:30 +02:00
protected int elaborateRow ( ObjectNode content , AggregatorBuffer aggregatorBuffer , int originalRecordsCounter ) throws Exception {
2017-09-26 15:33:39 +02:00
try {
2021-05-06 13:13:30 +02:00
if ( content . has ( USAGE_RECORD_TYPE ) ) {
String recordType = content . get ( USAGE_RECORD_TYPE ) . asText ( ) ;
content . remove ( USAGE_RECORD_TYPE ) ;
2017-09-26 15:33:39 +02:00
content . put ( Record . RECORD_TYPE , recordType ) ;
}
Boolean aggregated = false ;
2018-01-26 15:00:16 +01:00
2021-05-06 13:13:30 +02:00
if ( content . has ( AggregatedRecord . CREATION_TIME ) ) {
2018-01-26 15:00:16 +01:00
Object object = content . get ( AggregatedRecord . CREATION_TIME ) ;
if ( object instanceof Double ) {
Double d = ( ( Double ) object ) ;
content . put ( AggregatedRecord . CREATION_TIME , d . longValue ( ) ) ;
}
}
2021-05-06 13:13:30 +02:00
if ( content . has ( AggregatedRecord . START_TIME ) ) {
2018-01-26 16:02:43 +01:00
aggregated = true ;
2018-01-26 15:00:16 +01:00
Object object = content . get ( AggregatedRecord . START_TIME ) ;
if ( object instanceof Double ) {
Double d = ( ( Double ) object ) ;
content . put ( AggregatedRecord . START_TIME , d . longValue ( ) ) ;
}
}
2021-05-06 13:13:30 +02:00
if ( content . has ( AggregatedRecord . END_TIME ) ) {
2018-01-26 16:02:43 +01:00
aggregated = true ;
2018-01-26 15:00:16 +01:00
Object object = content . get ( AggregatedRecord . END_TIME ) ;
if ( object instanceof Double ) {
Double d = ( ( Double ) object ) ;
content . put ( AggregatedRecord . END_TIME , d . longValue ( ) ) ;
}
}
2021-05-06 13:13:30 +02:00
if ( content . has ( AggregatedRecord . OPERATION_COUNT ) ) {
2018-01-26 15:00:16 +01:00
Object object = content . get ( AggregatedRecord . OPERATION_COUNT ) ;
if ( object instanceof Double ) {
Double d = ( ( Double ) object ) ;
content . put ( AggregatedRecord . OPERATION_COUNT , d . intValue ( ) ) ;
}
2018-01-26 16:02:43 +01:00
2021-05-06 13:13:30 +02:00
if ( content . get ( AggregatedRecord . OPERATION_COUNT ) . asInt ( ) > 1 ) {
2018-01-26 16:02:43 +01:00
aggregated = true ;
}
}
if ( aggregated ) {
content . put ( AggregatedRecord . AGGREGATED , true ) ;
}
2021-05-06 13:13:30 +02:00
String recordType = content . get ( Record . RECORD_TYPE ) . asText ( ) ;
2018-01-26 16:02:43 +01:00
if ( ! aggregated ) {
if ( recordType . startsWith ( SIMPLE ) ) {
recordType = recordType . replace ( SIMPLE , SINGLE ) ;
content . put ( Record . RECORD_TYPE , recordType ) ;
}
if ( ! recordType . startsWith ( SINGLE ) ) {
recordType = SINGLE + recordType ;
content . put ( Record . RECORD_TYPE , recordType ) ;
}
} else {
if ( recordType . startsWith ( SIMPLE ) ) {
recordType = recordType . replace ( SIMPLE , " " ) ;
content . put ( Record . RECORD_TYPE , recordType ) ;
}
if ( recordType . startsWith ( SINGLE ) ) {
recordType = recordType . replace ( SINGLE , " " ) ;
content . put ( Record . RECORD_TYPE , recordType ) ;
}
2017-09-26 15:33:39 +02:00
}
String record = content . toString ( ) ;
// Aggregate the Record
aggregateRow ( aggregatorBuffer , record ) ;
+ + originalRecordsCounter ;
if ( originalRecordsCounter % 1000 = = 0 ) {
int aggregatedRecordsNumber = aggregatorBuffer . getAggregatedRecords ( ) . size ( ) ;
int diff = originalRecordsCounter - aggregatedRecordsNumber ;
float percentage = ( 100 * diff ) / originalRecordsCounter ;
logger . info ( " {} At the moment, the elaborated original records are {}. The Aggregated records are {}. Difference {}. We are recovering {}% of Documents " ,
aggregationStatus . getAggregationInfo ( ) , originalRecordsCounter , aggregatedRecordsNumber , diff , percentage ) ;
}
2017-10-10 12:25:13 +02:00
Utility . printLine ( originalRecordsbackupFile , record ) ;
2017-09-26 15:33:39 +02:00
return originalRecordsCounter ;
} catch ( Exception e ) {
throw e ;
}
}
private static final int MAX_RETRY = 3 ;
2021-11-08 14:57:35 +01:00
protected void addProperty ( ObjectNode objectNode , String key , Object value ) {
if ( value instanceof Number ) {
if ( value instanceof Integer ) {
objectNode . put ( key , ( int ) value ) ;
return ;
}
Long longValue = Long . valueOf ( value . toString ( ) ) ;
objectNode . put ( key , longValue ) ;
return ;
}
objectNode . put ( key , ( String ) value . toString ( ) ) ;
}
protected Calendar getCalendar ( OffsetDateTime offsetDateTime ) {
Calendar calendar = Calendar . getInstance ( ) ;
long epochMillis = offsetDateTime . toInstant ( ) . toEpochMilli ( ) ;
calendar . setTimeInMillis ( epochMillis ) ;
return calendar ;
}
2021-05-06 13:13:30 +02:00
protected void retrieveAndAggregate ( ResultSet resultSet ) throws Exception {
2017-09-05 17:13:50 +02:00
AggregatorBuffer aggregatorBuffer = new AggregatorBuffer ( ) ;
Calendar start = Utility . getUTCCalendarInstance ( ) ;
logger . debug ( " Elaboration of Records started at {} " , Constant . DEFAULT_DATE_FORMAT . format ( start . getTime ( ) ) ) ;
originalRecordsbackupFile . delete ( ) ;
aggregateRecordsBackupFile . delete ( ) ;
2017-09-18 15:07:34 +02:00
malformedRecordsFile . delete ( ) ;
2017-09-05 17:13:50 +02:00
2021-11-08 14:57:35 +01:00
AggregationInfo aggregationInfo = aggregationStatus . getAggregationInfo ( ) ;
String type = aggregationInfo . getRecordType ( ) ;
Class < ? extends AggregatedRecord < ? , ? > > clz = RecordUtility . getAggregatedRecordClass ( type ) ;
RecordToDBFields recordToDBFields = RecordToDBMapping . getRecordToDBFields ( clz ) ;
Set < String > requiredFields = clz . newInstance ( ) . getRequiredFields ( ) ;
2017-09-18 15:07:34 +02:00
malformedRecordNumber = 0 ;
2017-09-05 17:13:50 +02:00
int originalRecordsCounter = 0 ;
2021-05-06 13:13:30 +02:00
while ( resultSet . next ( ) ) {
2017-09-26 15:33:39 +02:00
for ( int i = 1 ; i < = MAX_RETRY ; i + + ) {
try {
2021-11-08 14:57:35 +01:00
ObjectNode objectNode = objectMapper . createObjectNode ( ) ;
addProperty ( objectNode , Record . RECORD_TYPE , type ) ;
2021-05-06 13:13:30 +02:00
2021-11-08 14:57:35 +01:00
for ( String recordField : requiredFields ) {
String tableField = recordToDBFields . getTableField ( recordField ) ;
Object obj = null ;
switch ( recordField ) {
case AggregatedRecord . START_TIME : case AggregatedRecord . END_TIME : case AggregatedRecord . CREATION_TIME :
OffsetDateTime offsetDateTime = resultSet . getObject ( tableField , OffsetDateTime . class ) ;
Calendar calendar = getCalendar ( offsetDateTime ) ;
obj = calendar . getTimeInMillis ( ) ;
break ;
default :
obj = resultSet . getObject ( tableField ) ;
break ;
}
addProperty ( objectNode , recordField , obj ) ;
}
originalRecordsCounter = elaborateRow ( objectNode , aggregatorBuffer , originalRecordsCounter ) ;
2020-03-16 16:43:37 +01:00
TimeUnit . MILLISECONDS . sleep ( 3 ) ;
2017-09-26 15:33:39 +02:00
break ;
2017-09-26 16:55:56 +02:00
} catch ( RuntimeException e ) {
2017-09-26 15:33:39 +02:00
if ( i = = 2 ) {
2021-05-06 13:13:30 +02:00
logger . error ( " Unable to elaborate row {}. Tryed {} times. " , i , e ) ;
2017-09-26 15:33:39 +02:00
}
2017-09-13 16:27:59 +02:00
}
2017-09-05 17:13:50 +02:00
}
}
Calendar end = Utility . getUTCCalendarInstance ( ) ;
long duration = end . getTimeInMillis ( ) - start . getTimeInMillis ( ) ;
String durationForHuman = Utility . getHumanReadableDuration ( duration ) ;
logger . debug ( " {} Elaboration of Records terminated at {}. Duration {} " ,
aggregationStatus . getAggregationInfo ( ) , Constant . DEFAULT_DATE_FORMAT . format ( end . getTime ( ) ) , durationForHuman ) ;
File aggregateRecordsBackupFileTmp = new File ( aggregateRecordsBackupFile . getParent ( ) ,
aggregateRecordsBackupFile . getName ( ) + TMP_SUFFIX ) ;
aggregateRecordsBackupFileTmp . delete ( ) ;
// Saving Aggregated Record on local file
logger . debug ( " Going to save {} to file {} " , AggregatedUsageRecord . class . getSimpleName ( ) ,
aggregateRecordsBackupFile ) ;
2017-09-18 15:07:34 +02:00
2017-09-15 10:40:03 +02:00
2017-09-05 17:13:50 +02:00
List < AggregatedRecord < ? , ? > > aggregatedRecords = aggregatorBuffer . getAggregatedRecords ( ) ;
for ( AggregatedRecord < ? , ? > aggregatedRecord : aggregatedRecords ) {
2017-09-18 15:07:34 +02:00
String marshalled = DSMapper . marshal ( aggregatedRecord ) ;
2021-05-06 13:13:30 +02:00
Utility . printLine ( aggregateRecordsBackupFileTmp , marshalled ) ;
2017-09-05 17:13:50 +02:00
}
aggregateRecordsBackupFileTmp . renameTo ( aggregateRecordsBackupFile ) ;
2017-09-15 11:08:52 +02:00
aggregationStatus . setRecordNumbers ( originalRecordsCounter , aggregatedRecords . size ( ) , malformedRecordNumber ) ;
2018-02-22 14:49:54 +01:00
aggregationStatus . setAggregationState ( AggregationState . AGGREGATED , startTime , true ) ;
2017-09-05 17:13:50 +02:00
}
2017-09-18 15:07:34 +02:00
2017-09-05 17:13:50 +02:00
protected void aggregateRow ( AggregatorBuffer aggregatorBuffer , String json ) throws Exception {
2017-09-18 15:07:34 +02:00
Record record = RecordUtility . getRecord ( json ) ;
try {
record . validate ( ) ;
} catch ( InvalidValueException e ) {
+ + malformedRecordNumber ;
Utility . printLine ( malformedRecordsFile , json ) ;
if ( record instanceof AggregatedServiceUsageRecord ) {
2021-04-12 16:50:41 +02:00
try {
if ( record . getResourceProperty ( AggregatedServiceUsageRecord . MIN_INVOCATION_TIME ) = = null ) {
record . setResourceProperty ( AggregatedServiceUsageRecord . MIN_INVOCATION_TIME , record . getResourceProperty ( ServiceUsageRecord . DURATION ) ) ;
}
if ( record . getResourceProperty ( AggregatedServiceUsageRecord . MAX_INVOCATION_TIME ) = = null ) {
record . setResourceProperty ( AggregatedServiceUsageRecord . MAX_INVOCATION_TIME , record . getResourceProperty ( ServiceUsageRecord . DURATION ) ) ;
}
if ( record . getResourceProperty ( AggregatedServiceUsageRecord . CALLER_QUALIFIER ) = = null ) {
record . setResourceProperty ( AggregatedServiceUsageRecord . CALLER_QUALIFIER , AbstractServiceUsageRecord . UNKNOWN ) ;
}
record . validate ( ) ;
} catch ( Exception ex ) {
return ;
2017-09-21 09:53:53 +02:00
}
2021-04-12 16:50:41 +02:00
} else {
2021-04-08 11:30:13 +02:00
return ;
2017-09-18 15:07:34 +02:00
}
2021-04-08 11:30:13 +02:00
2017-09-18 15:07:34 +02:00
}
record . setId ( UUID . randomUUID ( ) . toString ( ) ) ;
2017-09-05 17:13:50 +02:00
@SuppressWarnings ( " rawtypes " )
2017-09-18 15:07:34 +02:00
AggregatedRecord aggregatedRecord = AggregatorBuffer . getAggregatedRecord ( record ) ;
aggregatorBuffer . aggregate ( aggregatedRecord ) ;
2017-09-05 17:13:50 +02:00
}
}