2020-05-14 09:40:08 +02:00
package org.gcube.dataharvest.harvester ;
import static org.gcube.resources.discovery.icclient.ICFactory.clientFor ;
import static org.gcube.resources.discovery.icclient.ICFactory.queryFor ;
import java.io.IOException ;
import java.io.StringReader ;
import java.security.GeneralSecurityException ;
import java.time.Instant ;
import java.time.LocalDate ;
import java.time.ZoneId ;
import java.time.format.DateTimeFormatter ;
import java.util.ArrayList ;
import java.util.Arrays ;
import java.util.Date ;
import java.util.HashMap ;
import java.util.List ;
import javax.xml.parsers.DocumentBuilder ;
import javax.xml.parsers.DocumentBuilderFactory ;
import org.gcube.accounting.accounting.summary.access.model.ScopeDescriptor ;
import org.gcube.accounting.accounting.summary.access.model.update.AccountingRecord ;
import org.gcube.common.authorization.client.exceptions.ObjectNotFound ;
import org.gcube.common.encryption.encrypter.StringEncrypter ;
import org.gcube.common.resources.gcore.GenericResource ;
import org.gcube.common.resources.gcore.ServiceEndpoint ;
import org.gcube.common.resources.gcore.ServiceEndpoint.AccessPoint ;
import org.gcube.common.resources.gcore.ServiceEndpoint.Property ;
import org.gcube.common.resources.gcore.utils.Group ;
import org.gcube.common.resources.gcore.utils.XPathHelper ;
import org.gcube.common.scope.api.ScopeProvider ;
import org.gcube.common.scope.impl.ScopeBean ;
import org.gcube.dataharvest.datamodel.AnalyticsReportCredentials ;
import org.gcube.dataharvest.datamodel.CoreServiceAccessesReportRow ;
import org.gcube.dataharvest.datamodel.HarvestedDataKey ;
import org.gcube.resources.discovery.client.api.DiscoveryClient ;
import org.gcube.resources.discovery.client.queries.api.SimpleQuery ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
import org.w3c.dom.Node ;
import org.xml.sax.InputSource ;
2023-07-03 17:06:02 +02:00
import com.google.analytics.data.v1beta.BetaAnalyticsDataClient ;
import com.google.analytics.data.v1beta.BetaAnalyticsDataSettings ;
import com.google.analytics.data.v1beta.DateRange ;
import com.google.analytics.data.v1beta.DateRange.Builder ;
import com.google.analytics.data.v1beta.Dimension ;
import com.google.analytics.data.v1beta.Metric ;
import com.google.analytics.data.v1beta.Row ;
import com.google.analytics.data.v1beta.RunReportRequest ;
import com.google.analytics.data.v1beta.RunReportResponse ;
import com.google.api.gax.core.FixedCredentialsProvider ;
import com.google.auth.oauth2.ServiceAccountCredentials ;
2020-05-14 09:40:08 +02:00
2020-05-21 15:32:02 +02:00
/ * *
* @author Massimiliano Assante ( ISTI - CNR )
* /
2020-05-14 09:40:08 +02:00
public class CoreServicesAccessesHarvester extends BasicHarvester {
private static Logger logger = LoggerFactory . getLogger ( CoreServicesAccessesHarvester . class ) ;
private static final String MAPPING_RESOURCE_CATEGORY = " BigGAnalyticsMapping " ;
private static final String SERVICE_ENDPOINT_CATEGORY = " OnlineService " ;
2023-07-03 17:06:02 +02:00
private static final String SERVICE_ENDPOINT_NAME = " GA4AnalyticsDataService " ;
2020-05-14 09:40:08 +02:00
private static final String AP_VIEWS_PROPERTY = " views " ;
2023-07-03 17:06:02 +02:00
private static final String AP_CLIENT_ID = " client_id " ;
private static final String AP_PRIVATEKEY_ID_PROPERTY = " private_key_id " ;
2020-05-14 09:40:08 +02:00
private static final String PAGE_WORKSPACE_ACCESSES = " /workspace " ;
private static final String PAGE_MESSAGES_ACCESSES = " /messages " ;
private static final String PAGE_PROFILE_ACCESSES = " /profile " ;
private static final String PAGE_NOTIFICATION_ACCESSES = " /notifications " ;
private HashMap < String , List < CoreServiceAccessesReportRow > > coreServicesAccesses ;
public CoreServicesAccessesHarvester ( Date start , Date end ) throws Exception {
super ( start , end ) ;
coreServicesAccesses = getAllAccesses ( start , end ) ;
}
@Override
public List < AccountingRecord > getAccountingRecords ( ) throws Exception {
try {
ArrayList < AccountingRecord > accountingRecords = new ArrayList < AccountingRecord > ( ) ;
for ( String dashboardContext : coreServicesAccesses . keySet ( ) ) {
int workspaceAccesses = 0 ;
int messagesAccesses = 0 ;
int notificationsAccesses = 0 ;
int profileAccesses = 0 ;
2020-05-14 15:33:46 +02:00
logger . debug ( " {}; " , dashboardContext ) ;
2020-05-14 09:40:08 +02:00
for ( CoreServiceAccessesReportRow row : coreServicesAccesses . get ( dashboardContext ) ) {
// String pagePath = row.getPagePath();
switch ( row . getKey ( ) ) {
case WORKSPACE_ACCESSES :
workspaceAccesses + = row . getVisitNumber ( ) ;
break ;
case MESSAGES_ACCESSES :
messagesAccesses + = row . getVisitNumber ( ) ;
break ;
case NOTIFICATIONS_ACCESSES :
notificationsAccesses + = row . getVisitNumber ( ) ;
break ;
case PROFILE_ACCESSES :
profileAccesses + = row . getVisitNumber ( ) ;
break ;
default :
break ;
}
}
ScopeDescriptor scopeDescriptor = new ScopeDescriptor ( ) ;
ScopeBean scopeBean = new ScopeBean ( dashboardContext ) ;
scopeDescriptor . setId ( dashboardContext ) ;
scopeDescriptor . setName ( scopeBean . name ( ) ) ;
AccountingRecord ar1 = new AccountingRecord ( scopeDescriptor , instant , getDimension ( HarvestedDataKey . WORKSPACE_ACCESSES ) , ( long ) workspaceAccesses ) ;
AccountingRecord ar2 = new AccountingRecord ( scopeDescriptor , instant , getDimension ( HarvestedDataKey . MESSAGES_ACCESSES ) , ( long ) messagesAccesses ) ;
AccountingRecord ar3 = new AccountingRecord ( scopeDescriptor , instant , getDimension ( HarvestedDataKey . NOTIFICATIONS_ACCESSES ) , ( long ) notificationsAccesses ) ;
AccountingRecord ar4 = new AccountingRecord ( scopeDescriptor , instant , getDimension ( HarvestedDataKey . PROFILE_ACCESSES ) , ( long ) profileAccesses ) ;
2020-05-14 15:33:46 +02:00
logger . debug ( " {};{} " , ar1 . getDimension ( ) . getId ( ) , ar1 . getMeasure ( ) ) ;
2020-05-14 09:40:08 +02:00
accountingRecords . add ( ar1 ) ;
2020-05-14 15:33:46 +02:00
logger . debug ( " {};{} " , ar2 . getDimension ( ) . getId ( ) , ar2 . getMeasure ( ) ) ;
2020-05-14 09:40:08 +02:00
accountingRecords . add ( ar2 ) ;
2020-05-14 15:33:46 +02:00
logger . debug ( " {};{} " , ar3 . getDimension ( ) . getId ( ) , ar3 . getMeasure ( ) ) ;
2020-05-14 09:40:08 +02:00
accountingRecords . add ( ar3 ) ;
2020-05-14 15:33:46 +02:00
logger . debug ( " {};{} " , ar4 . getDimension ( ) . getId ( ) , ar4 . getMeasure ( ) ) ;
2020-05-14 09:40:08 +02:00
accountingRecords . add ( ar4 ) ;
}
logger . debug ( " Returning {} accountingRecords " , accountingRecords . size ( ) ) ;
return accountingRecords ;
} catch ( Exception e ) {
throw e ;
}
}
/ * *
*
* /
private static HashMap < String , List < CoreServiceAccessesReportRow > > getAllAccesses ( Date start , Date end ) throws Exception {
2023-07-03 17:06:02 +02:00
Builder dateRangeBuilder = getDateRangeBuilderForAnalytics ( start , end ) ;
logger . trace ( " Getting core services accesses in this time range {} " , dateRangeBuilder . toString ( ) ) ;
2020-05-14 09:40:08 +02:00
AnalyticsReportCredentials credentialsFromD4S = getAuthorisedApplicationInfoFromIs ( ) ;
logger . trace ( " gotten credentialsFromD4S id = {} " , credentialsFromD4S . getClientId ( ) ) ;
2023-07-03 17:06:02 +02:00
BetaAnalyticsDataSettings serviceSettings = initializeAnalyticsReporting ( credentialsFromD4S ) ;
2020-05-14 09:40:08 +02:00
logger . trace ( " gotten credentialsFromD4S viewIds= {} " , credentialsFromD4S . getViewIds ( ) . toString ( ) ) ;
2023-07-03 17:06:02 +02:00
HashMap < String , List < RunReportResponse > > responses = getReportResponses ( serviceSettings , credentialsFromD4S . getViewIds ( ) , dateRangeBuilder ) ;
2020-05-14 09:40:08 +02:00
HashMap < String , List < CoreServiceAccessesReportRow > > toReturn = new HashMap < > ( ) ;
2020-05-14 15:33:46 +02:00
int i = 1 ;
2020-05-14 09:40:08 +02:00
for ( String view : responses . keySet ( ) ) {
String dashboardContext = getAccountingDashboardContextGivenGAViewID ( view ) ;
if ( dashboardContext ! = null ) {
2020-05-14 15:33:46 +02:00
logger . trace ( " \ n ({}) *** Parsing responses for this Gateway view, which corresponds to Dashboard Context: {} \ n " , i , dashboardContext ) ;
2020-05-14 09:40:08 +02:00
List < CoreServiceAccessesReportRow > viewReport = parseResponse ( view , responses . get ( view ) , dashboardContext ) ;
logger . trace ( " Got {} entries from view id={} " , viewReport . size ( ) , view ) ;
toReturn . put ( dashboardContext , viewReport ) ;
} else {
logger . warn ( " Got entries from view id={} but cannot find Dashboard Context correspondance, I think you need to update the Generic Resource of the Mappings " , view ) ;
}
2020-05-14 15:33:46 +02:00
i + + ;
2020-05-14 09:40:08 +02:00
}
return toReturn ;
}
/ * *
2023-07-03 17:06:02 +02:00
* Initializes an Google Analytics Data API service object .
2020-05-14 09:40:08 +02:00
*
2023-07-03 17:06:02 +02:00
* @return An authorized Google Analytics Data API
2020-05-14 09:40:08 +02:00
* @throws IOException
* @throws GeneralSecurityException
* /
2023-07-03 17:06:02 +02:00
private static BetaAnalyticsDataSettings initializeAnalyticsReporting ( AnalyticsReportCredentials cred ) throws IOException {
return BetaAnalyticsDataSettings . newBuilder ( )
. setCredentialsProvider ( FixedCredentialsProvider . create (
ServiceAccountCredentials . fromPkcs8 ( cred . getClientId ( ) , cred . getClientEmail ( ) , cred . getPrivateKeyPem ( ) , cred . getPrivateKeyId ( ) , null ) ) )
. build ( ) ;
2020-05-14 09:40:08 +02:00
}
/ * *
2023-07-03 17:06:02 +02:00
* Queries Analytics Data API service
2020-05-14 09:40:08 +02:00
*
2023-07-03 17:06:02 +02:00
* @param service Analytics Data API service service settings .
* @return Row Analytics Data API service
2020-05-14 09:40:08 +02:00
* @throws IOException
* /
2023-07-03 17:06:02 +02:00
private static HashMap < String , List < RunReportResponse > > getReportResponses ( BetaAnalyticsDataSettings betaAnalyticsDataSettings ,
List < String > viewIDs , Builder dateRangeBuilder ) throws IOException {
HashMap < String , List < RunReportResponse > > reports = new HashMap < > ( ) ;
try ( BetaAnalyticsDataClient analyticsData = BetaAnalyticsDataClient . create ( betaAnalyticsDataSettings ) ) {
for ( String propertyId : viewIDs ) {
List < RunReportResponse > gReportResponses = new ArrayList < > ( ) ;
logger . debug ( " Getting data from Analytics Data API for propertyId: " + propertyId ) ;
RunReportRequest request =
RunReportRequest . newBuilder ( )
. setProperty ( " properties/ " + propertyId )
. addDimensions ( Dimension . newBuilder ( ) . setName ( " pagePath " ) )
. addMetrics ( Metric . newBuilder ( ) . setName ( " screenPageViews " ) )
. addDateRanges ( dateRangeBuilder )
. build ( ) ;
// Make the request.
RunReportResponse response = analyticsData . runReport ( request ) ;
2020-05-14 09:40:08 +02:00
gReportResponses . add ( response ) ;
2023-07-03 17:06:02 +02:00
reports . put ( propertyId , gReportResponses ) ;
2020-05-14 09:40:08 +02:00
}
}
return reports ;
}
2023-07-03 17:06:02 +02:00
2020-05-14 09:40:08 +02:00
/ * *
2023-07-03 17:06:02 +02:00
* Parses and prints the Analytics Data API service respose
2020-05-14 09:40:08 +02:00
*
2023-07-03 17:06:02 +02:00
* @param response An Analytics Data API service response .
2020-05-14 09:40:08 +02:00
* /
2023-07-03 17:06:02 +02:00
private static List < CoreServiceAccessesReportRow > parseResponse ( String viewId , List < RunReportResponse > responses , String dashboardContext ) {
logger . debug ( " parsing Response for propertyID= " + viewId ) ;
2020-05-14 09:40:08 +02:00
List < CoreServiceAccessesReportRow > toReturn = new ArrayList < > ( ) ;
2023-07-03 17:06:02 +02:00
for ( RunReportResponse response : responses ) {
for ( Row row : response . getRowsList ( ) ) {
String dimension = row . getDimensionValues ( 0 ) . getValue ( ) ;
String metric = row . getMetricValues ( 0 ) . getValue ( ) ;
CoreServiceAccessesReportRow var = new CoreServiceAccessesReportRow ( ) ;
boolean validEntry = false ;
String pagePath = dimension ;
logger . trace ( " parsing pagepath {}: value: {} " , pagePath , Integer . parseInt ( metric ) ) ;
if ( ! pagePath . contains ( " _redirect=/group " ) ) {
if ( pagePath . contains ( PAGE_WORKSPACE_ACCESSES ) ) {
var . setKey ( HarvestedDataKey . WORKSPACE_ACCESSES ) ;
logger . trace ( " **matched " + pagePath ) ;
validEntry = true ;
}
else if ( pagePath . contains ( PAGE_MESSAGES_ACCESSES ) ) {
var . setKey ( HarvestedDataKey . MESSAGES_ACCESSES ) ;
logger . trace ( " **matched " + pagePath ) ;
validEntry = true ;
}
else if ( pagePath . contains ( PAGE_PROFILE_ACCESSES ) ) {
var . setKey ( HarvestedDataKey . PROFILE_ACCESSES ) ;
logger . trace ( " **matched " + pagePath ) ;
validEntry = true ;
}
else if ( pagePath . contains ( PAGE_NOTIFICATION_ACCESSES ) ) {
var . setKey ( HarvestedDataKey . NOTIFICATIONS_ACCESSES ) ;
logger . trace ( " **matched " + pagePath ) ;
validEntry = true ;
2020-05-14 09:40:08 +02:00
}
}
2023-07-03 17:06:02 +02:00
if ( validEntry ) {
var . setDashboardContext ( dashboardContext ) ;
var . setPagePath ( dimension ) ;
var . setVisitNumber ( Integer . parseInt ( metric ) ) ;
toReturn . add ( var ) ;
}
2020-05-14 09:40:08 +02:00
}
}
return toReturn ;
}
2023-07-03 17:06:02 +02:00
2020-05-14 09:40:08 +02:00
private static List < ServiceEndpoint > getAnalyticsReportingConfigurationFromIS ( String infrastructureScope )
throws Exception {
String scope = infrastructureScope ;
String currScope = ScopeProvider . instance . get ( ) ;
ScopeProvider . instance . set ( scope ) ;
SimpleQuery query = queryFor ( ServiceEndpoint . class ) ;
query . addCondition ( " $resource/Profile/Category/text() eq ' " + SERVICE_ENDPOINT_CATEGORY + " ' " ) ;
query . addCondition ( " $resource/Profile/Name/text() eq ' " + SERVICE_ENDPOINT_NAME + " ' " ) ;
DiscoveryClient < ServiceEndpoint > client = clientFor ( ServiceEndpoint . class ) ;
List < ServiceEndpoint > toReturn = client . submit ( query ) ;
ScopeProvider . instance . set ( currScope ) ;
return toReturn ;
}
/ * *
* This method look up in the IS the Gateway which corresponds to a given Google Analytics viewId
* @param viewID
* @return the gateway name , e . g . " Blue-Cloud Gateway " or null if no correspondance was found
* @throws Exception
* @throws ObjectNotFound
* /
private static String getAccountingDashboardContextGivenGAViewID ( String viewID ) throws ObjectNotFound , Exception {
String toReturn = null ;
String context = org . gcube . dataharvest . utils . Utils . getCurrentContext ( ) ;
String currScope = ScopeProvider . instance . get ( ) ;
ScopeProvider . instance . set ( context ) ;
SimpleQuery query = queryFor ( GenericResource . class ) ;
query . addCondition ( " $resource/Profile/SecondaryType/text() eq ' " + MAPPING_RESOURCE_CATEGORY + " ' " ) ;
query . addCondition ( " $resource/Profile/Body/Property/viewID/text() eq ' " + viewID + " ' " ) ;
DiscoveryClient < GenericResource > client = clientFor ( GenericResource . class ) ;
List < GenericResource > list = client . submit ( query ) ;
if ( list . size ( ) > 1 ) {
logger . error ( " Too many Generic Resources having GA viewID " + viewID
+ " in this scope having SecondaryType " + MAPPING_RESOURCE_CATEGORY ) ;
} else if ( list . size ( ) = = 0 ) {
logger . warn ( " There is no Generic Resources having GA viewID " + viewID + " and SecondaryType "
+ MAPPING_RESOURCE_CATEGORY + " in this context: " + context ) ;
} else {
GenericResource found = list . get ( 0 ) ;
String elem = new StringBuilder ( " <body> " ) . append ( found . profile ( ) . bodyAsString ( ) ) . append ( " </body> " ) . toString ( ) ;
DocumentBuilder docBuilder = DocumentBuilderFactory . newInstance ( ) . newDocumentBuilder ( ) ;
Node node = docBuilder . parse ( new InputSource ( new StringReader ( elem ) ) ) . getDocumentElement ( ) ;
XPathHelper helper = new XPathHelper ( node ) ;
List < String > currValue = helper . evaluate ( " //Property/viewID/text() " ) ;
if ( currValue ! = null & & currValue . size ( ) > 0 ) {
List < String > contexts = currValue ;
for ( int i = 0 ; i < contexts . size ( ) ; i + + ) {
if ( currValue . get ( i ) . trim ( ) . compareTo ( viewID ) = = 0 ) {
toReturn = helper . evaluate ( " //Property/DashboardContext/text() " ) . get ( i ) ;
break ;
}
}
}
logger . debug ( " Found DashboardContext for viewId {} : {} " , viewID , toReturn ) ;
}
ScopeProvider . instance . set ( currScope ) ;
return toReturn ;
}
/ * *
* l
* @throws Exception
* /
private static AnalyticsReportCredentials getAuthorisedApplicationInfoFromIs ( ) throws Exception {
AnalyticsReportCredentials reportCredentials = new AnalyticsReportCredentials ( ) ;
String context = org . gcube . dataharvest . utils . Utils . getCurrentContext ( ) ;
try {
List < ServiceEndpoint > list = getAnalyticsReportingConfigurationFromIS ( context ) ;
if ( list . size ( ) > 1 ) {
logger . error ( " Too many Service Endpoints having name " + SERVICE_ENDPOINT_NAME
+ " in this scope having Category " + SERVICE_ENDPOINT_CATEGORY ) ;
} else if ( list . size ( ) = = 0 ) {
logger . warn ( " There is no Service Endpoint having name " + SERVICE_ENDPOINT_NAME + " and Category "
+ SERVICE_ENDPOINT_CATEGORY + " in this context: " + context ) ;
} else {
for ( ServiceEndpoint res : list ) {
Group < AccessPoint > apGroup = res . profile ( ) . accessPoints ( ) ;
AccessPoint [ ] accessPoints = ( AccessPoint [ ] ) apGroup . toArray ( new AccessPoint [ apGroup . size ( ) ] ) ;
AccessPoint found = accessPoints [ 0 ] ;
2023-07-03 17:06:02 +02:00
reportCredentials . setClientEmail ( found . username ( ) ) ;
String decryptedPrivateKey = StringEncrypter . getEncrypter ( ) . decrypt ( found . password ( ) ) ;
reportCredentials . setPrivateKeyPem ( decryptedPrivateKey . trim ( ) ) ;
2020-05-14 09:40:08 +02:00
for ( Property prop : found . properties ( ) ) {
if ( prop . name ( ) . compareTo ( AP_VIEWS_PROPERTY ) = = 0 ) {
String decryptedValue = StringEncrypter . getEncrypter ( ) . decrypt ( prop . value ( ) ) ;
String [ ] views = decryptedValue . split ( " ; " ) ;
reportCredentials . setViewIds ( Arrays . asList ( views ) ) ;
}
2023-07-03 17:06:02 +02:00
if ( prop . name ( ) . compareTo ( AP_CLIENT_ID ) = = 0 ) {
2020-05-14 09:40:08 +02:00
String decryptedValue = StringEncrypter . getEncrypter ( ) . decrypt ( prop . value ( ) ) ;
reportCredentials . setClientId ( decryptedValue ) ;
}
2023-07-03 17:06:02 +02:00
if ( prop . name ( ) . compareTo ( AP_PRIVATEKEY_ID_PROPERTY ) = = 0 ) {
2020-05-14 09:40:08 +02:00
String decryptedValue = StringEncrypter . getEncrypter ( ) . decrypt ( prop . value ( ) ) ;
reportCredentials . setPrivateKeyId ( decryptedValue ) ;
}
}
}
}
} catch ( Exception e ) {
e . printStackTrace ( ) ;
return null ;
}
return reportCredentials ;
}
2023-07-03 17:06:02 +02:00
2020-05-14 09:40:08 +02:00
private static LocalDate asLocalDate ( Date date ) {
return Instant . ofEpochMilli ( date . getTime ( ) ) . atZone ( ZoneId . systemDefault ( ) ) . toLocalDate ( ) ;
}
2023-07-03 17:06:02 +02:00
private static Builder getDateRangeBuilderForAnalytics ( Date start , Date end ) {
2020-05-14 09:40:08 +02:00
DateTimeFormatter formatter = DateTimeFormatter . ofPattern ( " yyyy-MM-dd " ) ; //required by Analytics
String startDate = asLocalDate ( start ) . format ( formatter ) ;
String endDate = asLocalDate ( end ) . format ( formatter ) ;
2023-07-03 17:06:02 +02:00
Builder dateRangeBuilder = DateRange . newBuilder ( ) . setStartDate ( startDate ) . setEndDate ( endDate ) ;
return dateRangeBuilder ;
2020-05-14 09:40:08 +02:00
}
}