package org.gcube.dataharvest.harvester; import static org.gcube.resources.discovery.icclient.ICFactory.clientFor; import static org.gcube.resources.discovery.icclient.ICFactory.queryFor; import java.io.IOException; import java.io.StringReader; import java.security.GeneralSecurityException; import java.time.Instant; import java.time.LocalDate; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.gcube.accounting.accounting.summary.access.model.ScopeDescriptor; import org.gcube.accounting.accounting.summary.access.model.update.AccountingRecord; import org.gcube.common.authorization.client.exceptions.ObjectNotFound; import org.gcube.common.encryption.encrypter.StringEncrypter; import org.gcube.common.resources.gcore.GenericResource; import org.gcube.common.resources.gcore.ServiceEndpoint; import org.gcube.common.resources.gcore.ServiceEndpoint.AccessPoint; import org.gcube.common.resources.gcore.ServiceEndpoint.Property; import org.gcube.common.resources.gcore.utils.Group; import org.gcube.common.resources.gcore.utils.XPathHelper; import org.gcube.common.scope.api.ScopeProvider; import org.gcube.common.scope.impl.ScopeBean; import org.gcube.dataharvest.datamodel.AnalyticsReportCredentials; import org.gcube.dataharvest.datamodel.CatalogueAccessesReportRow; import org.gcube.dataharvest.datamodel.HarvestedDataKey; import org.gcube.resources.discovery.client.api.DiscoveryClient; import org.gcube.resources.discovery.client.queries.api.SimpleQuery; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Node; import org.xml.sax.InputSource; import com.google.analytics.data.v1beta.BetaAnalyticsDataClient; import com.google.analytics.data.v1beta.BetaAnalyticsDataSettings; import com.google.analytics.data.v1beta.DateRange; import com.google.analytics.data.v1beta.DateRange.Builder; import com.google.analytics.data.v1beta.Dimension; import com.google.analytics.data.v1beta.Metric; import com.google.analytics.data.v1beta.Row; import com.google.analytics.data.v1beta.RunReportRequest; import com.google.analytics.data.v1beta.RunReportResponse; import com.google.api.client.json.JsonFactory; import com.google.api.client.json.gson.GsonFactory; import com.google.api.gax.core.FixedCredentialsProvider; import com.google.auth.oauth2.ServiceAccountCredentials; public class CatalogueAccessesHarvester extends BasicHarvester { private static Logger logger = LoggerFactory.getLogger(CatalogueAccessesHarvester.class); private static final JsonFactory JSON_FACTORY = GsonFactory.getDefaultInstance(); private static final String MAPPING_RESOURCE_CATEGORY = "BigGAnalyticsMapping"; private static final String SERVICE_ENDPOINT_CATEGORY = "OnlineService"; private static final String SERVICE_ENDPOINT_NAME = "GA4AnalyticsDataService"; private static final String AP_CATALOGUE_PAGEVIEWS_PROPERTY = "catalogue-pageviews"; private static final String AP_CLIENT_PROPERTY = "client_id"; private static final String AP_PRIVATEKEY_PROPERTY = "private_key_id"; private static final String REGEX_CATALOGUE_ACCESSES = "^\\/$"; private static final String REGEX_CATALOGUE_DATASET_LIST_ACCESSES = "^\\/dataset(\\?([a-zA-Z0-9_.-]*.+))*"; private static final String REGEX_CATALOGUE_DATASET_ACCESSES = "^\\/dataset\\/[a-zA-Z0-9_.-]+$"; private static final String REGEX_CATALOGUE_RESOURCE_ACCESSES = "^\\/dataset\\/[a-zA-Z0-9_.-]+\\/resource\\/[a-zA-Z0-9_.-]+$"; private HashMap> catalogueAccesses; public CatalogueAccessesHarvester(Date start, Date end) throws Exception { super(start, end); catalogueAccesses = getAllAccesses(start, end); } @Override public List getAccountingRecords() throws Exception { try { ArrayList accountingRecords = new ArrayList(); for (String dashboardContext : catalogueAccesses.keySet()) { int catalogueTotalAccesses = 0; int catalogueDatasetListAccesses = 0; int catalogueDatasetAccesses = 0; int catalogueResourceAccesses = 0; logger.debug("Catalogue accesses for {} ", dashboardContext); for(CatalogueAccessesReportRow row : catalogueAccesses.get(dashboardContext)) { // String pagePath = row.getPagePath(); switch (row.getKey()) { case CATALOGUE_ACCESSES: catalogueTotalAccesses += row.getVisitNumber(); break; case CATALOGUE_DATASET_LIST_ACCESSES: catalogueDatasetListAccesses += row.getVisitNumber(); break; case CATALOGUE_DATASET_ACCESSES: catalogueDatasetAccesses += row.getVisitNumber(); break; case CATALOGUE_RESOURCE_ACCESSES: catalogueResourceAccesses += row.getVisitNumber(); break; default: break; } } ScopeDescriptor scopeDescriptor = new ScopeDescriptor(); try { ScopeBean scopeBean = new ScopeBean(dashboardContext); scopeDescriptor.setId(dashboardContext); scopeDescriptor.setName(scopeBean.name()); AccountingRecord ar1 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_ACCESSES), (long) catalogueTotalAccesses); AccountingRecord ar2 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_DATASET_LIST_ACCESSES), (long) catalogueDatasetListAccesses); AccountingRecord ar3 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_DATASET_ACCESSES), (long) catalogueDatasetAccesses); AccountingRecord ar4 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_RESOURCE_ACCESSES), (long) catalogueResourceAccesses); logger.debug("{} : {}", ar1.getDimension().getId(), ar1.getMeasure()); accountingRecords.add(ar1); logger.debug("{} : {}", ar2.getDimension().getId(), ar2.getMeasure()); accountingRecords.add(ar2); logger.debug("{} : {}", ar3.getDimension().getId(), ar3.getMeasure()); accountingRecords.add(ar3); logger.debug("{} : {}", ar4.getDimension().getId(), ar4.getMeasure()); accountingRecords.add(ar4); } catch (NullPointerException e) { logger.warn("I found no correspondance in the Genereric Resource for a PropertyId you should check this, type: BigGAnalyticsMapping name: AccountingDashboardMapping"); e.printStackTrace(); } } logger.debug("Returning {} accountingRecords ", accountingRecords.size()); return accountingRecords; } catch(Exception e) { throw e; } } /** * */ private static HashMap> getAllAccesses(Date start, Date end) throws Exception { Builder dateRangeBuilder = getDateRangeBuilderForAnalytics(start, end); logger.debug("Getting Catalogue accesses in this time range {}", dateRangeBuilder.toString()); AnalyticsReportCredentials credentialsFromD4S = getAuthorisedApplicationInfoFromIs(); logger.debug("Getting credentials credentialsFromD4S"); BetaAnalyticsDataSettings serviceSettings = initializeAnalyticsReporting(credentialsFromD4S); logger.debug("initializeAnalyticsReporting service settings"); HashMap> responses = getReportResponses(serviceSettings, credentialsFromD4S.getViewIds(), dateRangeBuilder); HashMap> toReturn = new HashMap<>(); for(String view : responses.keySet()) { String dashboardContext = getAccountingDashboardContextGivenGAViewID(view); logger.info("\n\n**************** Parsing responses for this catalogue view, which corresponds to Dashboard Context: " + dashboardContext); List viewReport = parseResponse(view, responses.get(view), dashboardContext); logger.trace("Got {} entries from view id={}", viewReport.size(), view); toReturn.put(dashboardContext, viewReport); } return toReturn; } /** * Initializes an Google Analytics Data API service object. * * @return An authorized Google Analytics Data API * @throws IOException * @throws GeneralSecurityException */ private static BetaAnalyticsDataSettings initializeAnalyticsReporting(AnalyticsReportCredentials cred) throws IOException { return BetaAnalyticsDataSettings.newBuilder() .setCredentialsProvider(FixedCredentialsProvider.create( ServiceAccountCredentials.fromPkcs8(cred.getClientId(), cred.getClientEmail(), cred.getPrivateKeyPem(), cred.getPrivateKeyId(), null))) .build(); } /** * Queries Analytics Data API service * * @param service Analytics Data API service service settings. * @return Row Analytics Data API service * @throws IOException */ private static HashMap> getReportResponses(BetaAnalyticsDataSettings betaAnalyticsDataSettings, List viewIDs, Builder dateRangeBuilder) throws IOException { HashMap> reports = new HashMap<>(); try (BetaAnalyticsDataClient analyticsData = BetaAnalyticsDataClient.create(betaAnalyticsDataSettings)) { for(String propertyId : viewIDs) { List gReportResponses = new ArrayList<>(); logger.debug("Getting data from Analytics Data API for propertyId: " + propertyId); RunReportRequest request = RunReportRequest.newBuilder() .setProperty("properties/" + propertyId) .addDimensions(Dimension.newBuilder().setName("pagePath")) .addMetrics(Metric.newBuilder().setName("screenPageViews")) .addDateRanges(dateRangeBuilder) .build(); // Make the request. RunReportResponse response = analyticsData.runReport(request); gReportResponses.add(response); // Iterate through every row of the API response. // for (Row row : response.getRowsList()) { // System.out.printf( // "%s, %s%n", row.getDimensionValues(0).getValue(), row.getMetricValues(0).getValue()); // } reports.put(propertyId, gReportResponses); } } return reports; } /** * Parses and prints the Analytics Data API service respose * * @param dashboardContext */ private static List parseResponse(String viewId, List responses, String dashboardContext) { logger.debug("parsing Response for " + viewId); List toReturn = new ArrayList<>(); for (RunReportResponse response : responses) { for (Row row: response.getRowsList()) { String dimension = row.getDimensionValues(0).getValue(); String metric = row.getMetricValues(0).getValue(); CatalogueAccessesReportRow var = new CatalogueAccessesReportRow(); boolean validEntry = false; String pagePath = dimension; logger.trace("parsing pagepath {}: value: {}", pagePath, Integer.parseInt(metric)); if (pagePath.matches(REGEX_CATALOGUE_RESOURCE_ACCESSES)) { var.setKey(HarvestedDataKey.CATALOGUE_RESOURCE_ACCESSES); validEntry = true; } else if (pagePath.matches(REGEX_CATALOGUE_DATASET_ACCESSES)) { var.setKey(HarvestedDataKey.CATALOGUE_DATASET_ACCESSES); validEntry = true; } else if (pagePath.matches(REGEX_CATALOGUE_DATASET_LIST_ACCESSES)) { var.setKey(HarvestedDataKey.CATALOGUE_DATASET_LIST_ACCESSES); validEntry = true; } else if (pagePath.matches(REGEX_CATALOGUE_ACCESSES)) { var.setKey(HarvestedDataKey.CATALOGUE_ACCESSES); validEntry = true; } if (validEntry) { var.setDashboardContext(dashboardContext); var.setPagePath(dimension); var.setVisitNumber(Integer.parseInt(metric)); toReturn.add(var); } } } return toReturn; } private static List getAnalyticsReportingConfigurationFromIS(String infrastructureScope) throws Exception { String scope = infrastructureScope; String currScope = ScopeProvider.instance.get(); ScopeProvider.instance.set(scope); SimpleQuery query = queryFor(ServiceEndpoint.class); query.addCondition("$resource/Profile/Category/text() eq '" + SERVICE_ENDPOINT_CATEGORY + "'"); query.addCondition("$resource/Profile/Name/text() eq '" + SERVICE_ENDPOINT_NAME + "'"); DiscoveryClient client = clientFor(ServiceEndpoint.class); List toReturn = client.submit(query); ScopeProvider.instance.set(currScope); return toReturn; } /** * This method look up in the IS the Gateway which corresponds to a given Google Analytics viewId * @param viewID * @return the gateway name, e.g. "Blue-Cloud Gateway" or null if no correspondance was found * @throws Exception * @throws ObjectNotFound */ private static String getAccountingDashboardContextGivenGAViewID(String viewID) throws ObjectNotFound, Exception { String toReturn = null; String context = org.gcube.dataharvest.utils.Utils.getCurrentContext(); String currScope = ScopeProvider.instance.get(); ScopeProvider.instance.set(context); SimpleQuery query = queryFor(GenericResource.class); query.addCondition("$resource/Profile/SecondaryType/text() eq '" + MAPPING_RESOURCE_CATEGORY + "'"); query.addCondition("$resource/Profile/Body/Property/viewID/text() eq '" + viewID + "'"); DiscoveryClient client = clientFor(GenericResource.class); List list = client.submit(query); if(list.size() > 1) { logger.error("Too many Generic Resources having GA viewID " + viewID + " in this scope having SecondaryType " + MAPPING_RESOURCE_CATEGORY); } else if(list.size() == 0) { logger.warn("There is no Generic Resources having GA viewID " + viewID + " and SecondaryType " + MAPPING_RESOURCE_CATEGORY + " in this context: " + context); } else { GenericResource found = list.get(0); String elem = new StringBuilder("").append(found.profile().bodyAsString()).append("").toString(); DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); Node node = docBuilder.parse(new InputSource(new StringReader(elem))).getDocumentElement(); XPathHelper helper = new XPathHelper(node); List currValue = helper.evaluate("//Property/viewID/text()"); if (currValue != null && currValue.size() > 0) { List contexts = currValue; for (int i = 0; i < contexts.size(); i++) { if (currValue.get(i).trim().compareTo(viewID) == 0) { toReturn = helper.evaluate("//Property/DashboardContext/text()").get(i); break; } } } logger.debug("Found DashboardContext for viewId {} : {} ", viewID, toReturn); } ScopeProvider.instance.set(currScope); return toReturn; } /** * @throws Exception */ private static AnalyticsReportCredentials getAuthorisedApplicationInfoFromIs() throws Exception { AnalyticsReportCredentials reportCredentials = new AnalyticsReportCredentials(); String context = org.gcube.dataharvest.utils.Utils.getCurrentContext(); try { List list = getAnalyticsReportingConfigurationFromIS(context); if(list.size() > 1) { logger.error("Too many Service Endpoints having name " + SERVICE_ENDPOINT_NAME + " in this scope having Category " + SERVICE_ENDPOINT_CATEGORY); } else if(list.size() == 0) { logger.warn("There is no Service Endpoint having name " + SERVICE_ENDPOINT_NAME + " and Category " + SERVICE_ENDPOINT_CATEGORY + " in this context: " + context); } else { for(ServiceEndpoint res : list) { Group apGroup = res.profile().accessPoints(); AccessPoint[] accessPoints = (AccessPoint[]) apGroup.toArray(new AccessPoint[apGroup.size()]); AccessPoint found = accessPoints[0]; reportCredentials.setClientEmail(found.username()); String decryptedPrivateKey = StringEncrypter.getEncrypter().decrypt(found.password()); reportCredentials.setPrivateKeyPem(decryptedPrivateKey.trim()); for(Property prop : found.properties()) { if(prop.name().compareTo(AP_CATALOGUE_PAGEVIEWS_PROPERTY) == 0) { String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value()); String[] views = decryptedValue.split(";"); reportCredentials.setViewIds(Arrays.asList(views)); } if(prop.name().compareTo(AP_CLIENT_PROPERTY) == 0) { String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value()); reportCredentials.setClientId(decryptedValue); } if(prop.name().compareTo(AP_PRIVATEKEY_PROPERTY) == 0) { String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value()); reportCredentials.setPrivateKeyId(decryptedValue); } } } } } catch(Exception e) { e.printStackTrace(); return null; } return reportCredentials; } private static LocalDate asLocalDate(Date date) { return Instant.ofEpochMilli(date.getTime()).atZone(ZoneId.systemDefault()).toLocalDate(); } private static Builder getDateRangeBuilderForAnalytics(Date start, Date end) { DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); //required by Analytics String startDate = asLocalDate(start).format(formatter); String endDate = asLocalDate(end).format(formatter); Builder dateRangeBuilder = DateRange.newBuilder().setStartDate(startDate).setEndDate(endDate); return dateRangeBuilder; } }