accounting-dashboard-harves.../src/main/java/org/gcube/dataharvest/harvester/CatalogueAccessesHarvester....

396 lines
17 KiB
Java

package org.gcube.dataharvest.harvester;
import static org.gcube.resources.discovery.icclient.ICFactory.clientFor;
import static org.gcube.resources.discovery.icclient.ICFactory.queryFor;
import java.io.IOException;
import java.io.StringReader;
import java.security.GeneralSecurityException;
import java.time.Instant;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.gcube.accounting.accounting.summary.access.model.ScopeDescriptor;
import org.gcube.accounting.accounting.summary.access.model.update.AccountingRecord;
import org.gcube.common.authorization.client.exceptions.ObjectNotFound;
import org.gcube.common.encryption.encrypter.StringEncrypter;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.ServiceEndpoint;
import org.gcube.common.resources.gcore.ServiceEndpoint.AccessPoint;
import org.gcube.common.resources.gcore.ServiceEndpoint.Property;
import org.gcube.common.resources.gcore.utils.Group;
import org.gcube.common.resources.gcore.utils.XPathHelper;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.common.scope.impl.ScopeBean;
import org.gcube.dataharvest.datamodel.AnalyticsReportCredentials;
import org.gcube.dataharvest.datamodel.CatalogueAccessesReportRow;
import org.gcube.dataharvest.datamodel.HarvestedDataKey;
import org.gcube.resources.discovery.client.api.DiscoveryClient;
import org.gcube.resources.discovery.client.queries.api.SimpleQuery;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import com.google.analytics.data.v1beta.BetaAnalyticsDataClient;
import com.google.analytics.data.v1beta.BetaAnalyticsDataSettings;
import com.google.analytics.data.v1beta.DateRange;
import com.google.analytics.data.v1beta.DateRange.Builder;
import com.google.analytics.data.v1beta.Dimension;
import com.google.analytics.data.v1beta.Metric;
import com.google.analytics.data.v1beta.Row;
import com.google.analytics.data.v1beta.RunReportRequest;
import com.google.analytics.data.v1beta.RunReportResponse;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.gson.GsonFactory;
import com.google.api.gax.core.FixedCredentialsProvider;
import com.google.auth.oauth2.ServiceAccountCredentials;
public class CatalogueAccessesHarvester extends BasicHarvester {
private static Logger logger = LoggerFactory.getLogger(CatalogueAccessesHarvester.class);
private static final JsonFactory JSON_FACTORY = GsonFactory.getDefaultInstance();
private static final String MAPPING_RESOURCE_CATEGORY = "BigGAnalyticsMapping";
private static final String SERVICE_ENDPOINT_CATEGORY = "OnlineService";
private static final String SERVICE_ENDPOINT_NAME = "GA4AnalyticsDataService";
private static final String AP_CATALOGUE_PAGEVIEWS_PROPERTY = "catalogue-pageviews";
private static final String AP_CLIENT_PROPERTY = "client_id";
private static final String AP_PRIVATEKEY_PROPERTY = "private_key_id";
private static final String REGEX_CATALOGUE_ACCESSES = "^\\/$";
private static final String REGEX_CATALOGUE_DATASET_LIST_ACCESSES = "^\\/dataset(\\?([a-zA-Z0-9_.-]*.+))*";
private static final String REGEX_CATALOGUE_DATASET_ACCESSES = "^\\/dataset\\/[a-zA-Z0-9_.-]+$";
private static final String REGEX_CATALOGUE_RESOURCE_ACCESSES = "^\\/dataset\\/[a-zA-Z0-9_.-]+\\/resource\\/[a-zA-Z0-9_.-]+$";
private HashMap<String, List<CatalogueAccessesReportRow>> catalogueAccesses;
public CatalogueAccessesHarvester(Date start, Date end) throws Exception {
super(start, end);
catalogueAccesses = getAllAccesses(start, end);
}
@Override
public List<AccountingRecord> getAccountingRecords() throws Exception {
try {
ArrayList<AccountingRecord> accountingRecords = new ArrayList<AccountingRecord>();
for (String dashboardContext : catalogueAccesses.keySet()) {
int catalogueTotalAccesses = 0;
int catalogueDatasetListAccesses = 0;
int catalogueDatasetAccesses = 0;
int catalogueResourceAccesses = 0;
logger.debug("Catalogue accesses for {} ", dashboardContext);
for(CatalogueAccessesReportRow row : catalogueAccesses.get(dashboardContext)) {
// String pagePath = row.getPagePath();
switch (row.getKey()) {
case CATALOGUE_ACCESSES:
catalogueTotalAccesses += row.getVisitNumber();
break;
case CATALOGUE_DATASET_LIST_ACCESSES:
catalogueDatasetListAccesses += row.getVisitNumber();
break;
case CATALOGUE_DATASET_ACCESSES:
catalogueDatasetAccesses += row.getVisitNumber();
break;
case CATALOGUE_RESOURCE_ACCESSES:
catalogueResourceAccesses += row.getVisitNumber();
break;
default:
break;
}
}
ScopeDescriptor scopeDescriptor = new ScopeDescriptor();
try {
ScopeBean scopeBean = new ScopeBean(dashboardContext);
scopeDescriptor.setId(dashboardContext);
scopeDescriptor.setName(scopeBean.name());
AccountingRecord ar1 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_ACCESSES), (long) catalogueTotalAccesses);
AccountingRecord ar2 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_DATASET_LIST_ACCESSES), (long) catalogueDatasetListAccesses);
AccountingRecord ar3 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_DATASET_ACCESSES), (long) catalogueDatasetAccesses);
AccountingRecord ar4 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_RESOURCE_ACCESSES), (long) catalogueResourceAccesses);
logger.debug("{} : {}", ar1.getDimension().getId(), ar1.getMeasure());
accountingRecords.add(ar1);
logger.debug("{} : {}", ar2.getDimension().getId(), ar2.getMeasure());
accountingRecords.add(ar2);
logger.debug("{} : {}", ar3.getDimension().getId(), ar3.getMeasure());
accountingRecords.add(ar3);
logger.debug("{} : {}", ar4.getDimension().getId(), ar4.getMeasure());
accountingRecords.add(ar4);
} catch (NullPointerException e) {
logger.warn("I found no correspondance in the Genereric Resource for a PropertyId you should check this, type: BigGAnalyticsMapping name: AccountingDashboardMapping");
e.printStackTrace();
}
}
logger.debug("Returning {} accountingRecords ", accountingRecords.size());
return accountingRecords;
} catch(Exception e) {
throw e;
}
}
/**
*
*/
private static HashMap<String, List<CatalogueAccessesReportRow>> getAllAccesses(Date start, Date end) throws Exception {
Builder dateRangeBuilder = getDateRangeBuilderForAnalytics(start, end);
logger.debug("Getting Catalogue accesses in this time range {}", dateRangeBuilder.toString());
AnalyticsReportCredentials credentialsFromD4S = getAuthorisedApplicationInfoFromIs();
logger.debug("Getting credentials credentialsFromD4S");
BetaAnalyticsDataSettings serviceSettings = initializeAnalyticsReporting(credentialsFromD4S);
logger.debug("initializeAnalyticsReporting service settings");
HashMap<String,List<RunReportResponse>> responses = getReportResponses(serviceSettings, credentialsFromD4S.getViewIds(), dateRangeBuilder);
HashMap<String, List<CatalogueAccessesReportRow>> toReturn = new HashMap<>();
for(String view : responses.keySet()) {
String dashboardContext = getAccountingDashboardContextGivenGAViewID(view);
logger.info("\n\n**************** Parsing responses for this catalogue view, which corresponds to Dashboard Context: " + dashboardContext);
List<CatalogueAccessesReportRow> viewReport = parseResponse(view, responses.get(view), dashboardContext);
logger.trace("Got {} entries from view id={}", viewReport.size(), view);
toReturn.put(dashboardContext, viewReport);
}
return toReturn;
}
/**
* Initializes an Google Analytics Data API service object.
*
* @return An authorized Google Analytics Data API
* @throws IOException
* @throws GeneralSecurityException
*/
private static BetaAnalyticsDataSettings initializeAnalyticsReporting(AnalyticsReportCredentials cred) throws IOException {
return BetaAnalyticsDataSettings.newBuilder()
.setCredentialsProvider(FixedCredentialsProvider.create(
ServiceAccountCredentials.fromPkcs8(cred.getClientId(), cred.getClientEmail(), cred.getPrivateKeyPem(), cred.getPrivateKeyId(), null)))
.build();
}
/**
* Queries Analytics Data API service
*
* @param service Analytics Data API service service settings.
* @return Row Analytics Data API service
* @throws IOException
*/
private static HashMap<String,List<RunReportResponse>> getReportResponses(BetaAnalyticsDataSettings betaAnalyticsDataSettings,
List<String> viewIDs, Builder dateRangeBuilder) throws IOException {
HashMap<String,List<RunReportResponse>> reports = new HashMap<>();
try (BetaAnalyticsDataClient analyticsData = BetaAnalyticsDataClient.create(betaAnalyticsDataSettings)) {
for(String propertyId : viewIDs) {
List<RunReportResponse> gReportResponses = new ArrayList<>();
logger.debug("Getting data from Analytics Data API for propertyId: " + propertyId);
RunReportRequest request =
RunReportRequest.newBuilder()
.setProperty("properties/" + propertyId)
.addDimensions(Dimension.newBuilder().setName("pagePath"))
.addMetrics(Metric.newBuilder().setName("screenPageViews"))
.addDateRanges(dateRangeBuilder)
.build();
// Make the request.
RunReportResponse response = analyticsData.runReport(request);
gReportResponses.add(response);
// Iterate through every row of the API response.
// for (Row row : response.getRowsList()) {
// System.out.printf(
// "%s, %s%n", row.getDimensionValues(0).getValue(), row.getMetricValues(0).getValue());
// }
reports.put(propertyId, gReportResponses);
}
}
return reports;
}
/**
* Parses and prints the Analytics Data API service respose
*
* @param dashboardContext
*/
private static List<CatalogueAccessesReportRow> parseResponse(String viewId, List<RunReportResponse> responses, String dashboardContext) {
logger.debug("parsing Response for " + viewId);
List<CatalogueAccessesReportRow> toReturn = new ArrayList<>();
for (RunReportResponse response : responses) {
for (Row row: response.getRowsList()) {
String dimension = row.getDimensionValues(0).getValue();
String metric = row.getMetricValues(0).getValue();
CatalogueAccessesReportRow var = new CatalogueAccessesReportRow();
boolean validEntry = false;
String pagePath = dimension;
logger.trace("parsing pagepath {}: value: {}", pagePath, Integer.parseInt(metric));
if (pagePath.matches(REGEX_CATALOGUE_RESOURCE_ACCESSES)) {
var.setKey(HarvestedDataKey.CATALOGUE_RESOURCE_ACCESSES);
validEntry = true;
}
else if (pagePath.matches(REGEX_CATALOGUE_DATASET_ACCESSES)) {
var.setKey(HarvestedDataKey.CATALOGUE_DATASET_ACCESSES);
validEntry = true;
}
else if (pagePath.matches(REGEX_CATALOGUE_DATASET_LIST_ACCESSES)) {
var.setKey(HarvestedDataKey.CATALOGUE_DATASET_LIST_ACCESSES);
validEntry = true;
}
else if (pagePath.matches(REGEX_CATALOGUE_ACCESSES)) {
var.setKey(HarvestedDataKey.CATALOGUE_ACCESSES);
validEntry = true;
}
if (validEntry) {
var.setDashboardContext(dashboardContext);
var.setPagePath(dimension);
var.setVisitNumber(Integer.parseInt(metric));
toReturn.add(var);
}
}
}
return toReturn;
}
private static List<ServiceEndpoint> getAnalyticsReportingConfigurationFromIS(String infrastructureScope)
throws Exception {
String scope = infrastructureScope;
String currScope = ScopeProvider.instance.get();
ScopeProvider.instance.set(scope);
SimpleQuery query = queryFor(ServiceEndpoint.class);
query.addCondition("$resource/Profile/Category/text() eq '" + SERVICE_ENDPOINT_CATEGORY + "'");
query.addCondition("$resource/Profile/Name/text() eq '" + SERVICE_ENDPOINT_NAME + "'");
DiscoveryClient<ServiceEndpoint> client = clientFor(ServiceEndpoint.class);
List<ServiceEndpoint> toReturn = client.submit(query);
ScopeProvider.instance.set(currScope);
return toReturn;
}
/**
* This method look up in the IS the Gateway which corresponds to a given Google Analytics viewId
* @param viewID
* @return the gateway name, e.g. "Blue-Cloud Gateway" or null if no correspondance was found
* @throws Exception
* @throws ObjectNotFound
*/
private static String getAccountingDashboardContextGivenGAViewID(String viewID) throws ObjectNotFound, Exception {
String toReturn = null;
String context = org.gcube.dataharvest.utils.Utils.getCurrentContext();
String currScope = ScopeProvider.instance.get();
ScopeProvider.instance.set(context);
SimpleQuery query = queryFor(GenericResource.class);
query.addCondition("$resource/Profile/SecondaryType/text() eq '" + MAPPING_RESOURCE_CATEGORY + "'");
query.addCondition("$resource/Profile/Body/Property/viewID/text() eq '" + viewID + "'");
DiscoveryClient<GenericResource> client = clientFor(GenericResource.class);
List<GenericResource> list = client.submit(query);
if(list.size() > 1) {
logger.error("Too many Generic Resources having GA viewID " + viewID
+ " in this scope having SecondaryType " + MAPPING_RESOURCE_CATEGORY);
} else if(list.size() == 0) {
logger.warn("There is no Generic Resources having GA viewID " + viewID + " and SecondaryType "
+ MAPPING_RESOURCE_CATEGORY + " in this context: " + context);
} else {
GenericResource found = list.get(0);
String elem = new StringBuilder("<body>").append(found.profile().bodyAsString()).append("</body>").toString();
DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Node node = docBuilder.parse(new InputSource(new StringReader(elem))).getDocumentElement();
XPathHelper helper = new XPathHelper(node);
List<String> currValue = helper.evaluate("//Property/viewID/text()");
if (currValue != null && currValue.size() > 0) {
List<String> contexts = currValue;
for (int i = 0; i < contexts.size(); i++) {
if (currValue.get(i).trim().compareTo(viewID) == 0) {
toReturn = helper.evaluate("//Property/DashboardContext/text()").get(i);
break;
}
}
}
logger.debug("Found DashboardContext for viewId {} : {} ", viewID, toReturn);
}
ScopeProvider.instance.set(currScope);
return toReturn;
}
/**
* @throws Exception
*/
private static AnalyticsReportCredentials getAuthorisedApplicationInfoFromIs() throws Exception {
AnalyticsReportCredentials reportCredentials = new AnalyticsReportCredentials();
String context = org.gcube.dataharvest.utils.Utils.getCurrentContext();
try {
List<ServiceEndpoint> list = getAnalyticsReportingConfigurationFromIS(context);
if(list.size() > 1) {
logger.error("Too many Service Endpoints having name " + SERVICE_ENDPOINT_NAME
+ " in this scope having Category " + SERVICE_ENDPOINT_CATEGORY);
} else if(list.size() == 0) {
logger.warn("There is no Service Endpoint having name " + SERVICE_ENDPOINT_NAME + " and Category "
+ SERVICE_ENDPOINT_CATEGORY + " in this context: " + context);
} else {
for(ServiceEndpoint res : list) {
Group<AccessPoint> apGroup = res.profile().accessPoints();
AccessPoint[] accessPoints = (AccessPoint[]) apGroup.toArray(new AccessPoint[apGroup.size()]);
AccessPoint found = accessPoints[0];
reportCredentials.setClientEmail(found.username());
String decryptedPrivateKey = StringEncrypter.getEncrypter().decrypt(found.password());
reportCredentials.setPrivateKeyPem(decryptedPrivateKey.trim());
for(Property prop : found.properties()) {
if(prop.name().compareTo(AP_CATALOGUE_PAGEVIEWS_PROPERTY) == 0) {
String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value());
String[] views = decryptedValue.split(";");
reportCredentials.setViewIds(Arrays.asList(views));
}
if(prop.name().compareTo(AP_CLIENT_PROPERTY) == 0) {
String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value());
reportCredentials.setClientId(decryptedValue);
}
if(prop.name().compareTo(AP_PRIVATEKEY_PROPERTY) == 0) {
String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value());
reportCredentials.setPrivateKeyId(decryptedValue);
}
}
}
}
} catch(Exception e) {
e.printStackTrace();
return null;
}
return reportCredentials;
}
private static LocalDate asLocalDate(Date date) {
return Instant.ofEpochMilli(date.getTime()).atZone(ZoneId.systemDefault()).toLocalDate();
}
private static Builder getDateRangeBuilderForAnalytics(Date start, Date end) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); //required by Analytics
String startDate = asLocalDate(start).format(formatter);
String endDate = asLocalDate(end).format(formatter);
Builder dateRangeBuilder = DateRange.newBuilder().setStartDate(startDate).setEndDate(endDate);
return dateRangeBuilder;
}
}