From 13c74dc4f913196084b07064c3f443cf8b68fa3e Mon Sep 17 00:00:00 2001 From: "luca.frosini" Date: Thu, 14 Jun 2018 14:19:25 +0000 Subject: [PATCH] Added logic to exclude harvesting in some cases git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/accounting/accounting-dashboard-harvester-se-plugin@169212 82a268e6-3cf1-43bd-a215-b396298e98cf --- .../AccountingDataHarvesterPlugin.java | 15 +- .../harvester/VREAccessesHarvester.java | 227 +++++++++--------- .../AccountingDataHarvesterPluginTest.java | 14 +- 3 files changed, 132 insertions(+), 124 deletions(-) diff --git a/src/main/java/org/gcube/dataharvest/AccountingDataHarvesterPlugin.java b/src/main/java/org/gcube/dataharvest/AccountingDataHarvesterPlugin.java index 948504f..42b98d2 100644 --- a/src/main/java/org/gcube/dataharvest/AccountingDataHarvesterPlugin.java +++ b/src/main/java/org/gcube/dataharvest/AccountingDataHarvesterPlugin.java @@ -3,6 +3,7 @@ package org.gcube.dataharvest; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Calendar; import java.util.Date; import java.util.List; import java.util.Map; @@ -45,6 +46,7 @@ public class AccountingDataHarvesterPlugin extends Plugin harvested = vreAccessesHarvester.getData(); - data.addAll(harvested); + if(context.startsWith(SO_BIG_DATA_VO) && start.before(DateUtils.getStartCalendar(2018, Calendar.APRIL, 1).getTime())) { + logger.info("Not Harvesting VREs Accesses for {} from {} to {}", context, DateUtils.format(start), DateUtils.format(end)); + } else { + // Collecting Google Analytics Data for VREs Accesses + List harvested = vreAccessesHarvester.getData(); + data.addAll(harvested); + } } catch(Exception e) { logger.error("Error harvesting Social Interactions for {}", context, e); } @@ -205,7 +211,8 @@ public class AccountingDataHarvesterPlugin extends Plugin harvested = vreUsersHarvester.getData(); data.addAll(harvested); diff --git a/src/main/java/org/gcube/dataharvest/harvester/VREAccessesHarvester.java b/src/main/java/org/gcube/dataharvest/harvester/VREAccessesHarvester.java index e6ec6d4..1043542 100644 --- a/src/main/java/org/gcube/dataharvest/harvester/VREAccessesHarvester.java +++ b/src/main/java/org/gcube/dataharvest/harvester/VREAccessesHarvester.java @@ -69,21 +69,21 @@ public class VREAccessesHarvester extends BasicHarvester { private static Logger logger = LoggerFactory.getLogger(VREAccessesHarvester.class); private static final JsonFactory JSON_FACTORY = GsonFactory.getDefaultInstance(); - + private static final String SERVICE_ENDPOINT_CATEGORY = "OnlineService"; private static final String SERVICE_ENDPOINT_NAME = "BigGAnalyticsReportService"; private static final String AP_VIEWS_PROPERTY = "views"; private static final String AP_CLIENT_PROPERTY = "clientId"; private static final String AP_PRIVATEKEY_PROPERTY = "privateKeyId"; private static final String APPLICATION_NAME = "Analytics Reporting"; - + private List vreAccesses; - + public VREAccessesHarvester(Date start, Date end) throws Exception { super(start, end); vreAccesses = getAllAccesses(start, end); } - + @Override public List getData() throws Exception { try { @@ -93,22 +93,21 @@ public class VREAccessesHarvester extends BasicHarvester { ScopeBean scopeBean = new ScopeBean(context); String lowerCasedContext = scopeBean.name().toLowerCase(); - String case1=lowerCasedContext+"/"; - String case2=lowerCasedContext+"?"; - for (VREAccessesReportRow row : vreAccesses) { + String case1 = lowerCasedContext + "/"; + String case2 = lowerCasedContext + "?"; + for(VREAccessesReportRow row : vreAccesses) { String pagePath = row.getPagePath(); - if (!pagePath.contains("_redirect=/group")) { - if (pagePath.endsWith(lowerCasedContext)) { - System.out.println("match end->"+pagePath); + if(!pagePath.contains("_redirect=/group")) { + if(pagePath.endsWith(lowerCasedContext)) { + logger.trace("Matched endsWith({}) : {}", lowerCasedContext, pagePath); measure++; - } - else if (pagePath.contains(case1) || pagePath.contains(case2) ) { - System.out.println("match compare->"+pagePath); + } else if(pagePath.contains(case1) || pagePath.contains(case2)) { + logger.trace("Matched contains({}) || contains({}) : {}", case1, case2, pagePath); measure++; } } } - + HarvestedData harvest = new HarvestedData(HarvestedDataKey.ACCESSES, context, measure); logger.debug(harvest.toString()); data.add(harvest); @@ -117,7 +116,7 @@ public class VREAccessesHarvester extends BasicHarvester { throw e; } } - + /** * * @return a list of {@link VREAccessesReportRow} objects containing the pagePath and the visit number e.g. @@ -127,23 +126,23 @@ public class VREAccessesHarvester extends BasicHarvester { */ private static List getAllAccesses(Date start, Date end) throws Exception { DateRange dateRange = getDateRangeForAnalytics(start, end); - System.out.println("getting accesses in this time range: " + dateRange.toPrettyString()); - + logger.trace("Getting accesses in this time range {}", dateRange.toPrettyString()); + AnalyticsReportCredentials credentialsFromD4S = getAuthorisedApplicationInfoFromIs(); AnalyticsReporting service = initializeAnalyticsReporting(credentialsFromD4S); - HashMap responses = getReportResponses(service, credentialsFromD4S.getViewIds(), dateRange); + HashMap responses = getReportResponses(service, credentialsFromD4S.getViewIds(), + dateRange); List totalAccesses = new ArrayList<>(); - - for (String view : responses.keySet()) { + + for(String view : responses.keySet()) { List viewReport = parseResponse(view, responses.get(view)); - System.out.println("got " + viewReport.size() + " entries from view id= "+view); + logger.trace("Got {} entries from view id={}", viewReport.size(), view); totalAccesses.addAll(viewReport); } - System.out.println("Merged in " + totalAccesses.size() + " toal entries from all views"); + logger.trace("Merged in {} total entries from all views", totalAccesses.size()); return totalAccesses; } - - + /** * Initializes an Analytics Reporting API V4 service object. * @@ -151,15 +150,16 @@ public class VREAccessesHarvester extends BasicHarvester { * @throws IOException * @throws GeneralSecurityException */ - private static AnalyticsReporting initializeAnalyticsReporting(AnalyticsReportCredentials cred) throws GeneralSecurityException, IOException { + private static AnalyticsReporting initializeAnalyticsReporting(AnalyticsReportCredentials cred) + throws GeneralSecurityException, IOException { HttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport(); GoogleCredential credential = fromD4SServiceEndpoint(cred).createScoped(AnalyticsReportingScopes.all()); - + // Construct the Analytics Reporting service object. return new AnalyticsReporting.Builder(httpTransport, JSON_FACTORY, credential) .setApplicationName(APPLICATION_NAME).build(); - } - + } + /** * Queries the Analytics Reporting API V4. * @@ -167,32 +167,27 @@ public class VREAccessesHarvester extends BasicHarvester { * @return GetReportResponse The Analytics Reporting API V4 response. * @throws IOException */ - private static HashMap getReportResponses(AnalyticsReporting service, List viewIDs, DateRange dateRange) throws IOException { - - HashMap reports = new HashMap<>(); - + private static HashMap getReportResponses(AnalyticsReporting service, + List viewIDs, DateRange dateRange) throws IOException { + + HashMap reports = new HashMap<>(); + // Create the Metrics object. - Metric sessions = new Metric() - .setExpression("ga:pageviews") - .setAlias("pages"); + Metric sessions = new Metric().setExpression("ga:pageviews").setAlias("pages"); Dimension pageTitle = new Dimension().setName("ga:pagePath"); - - for (String view : viewIDs) { - logger.info("Getting data from Google Analytics for viewid: "+ view); + + for(String view : viewIDs) { + logger.info("Getting data from Google Analytics for viewid: " + view); // Create the ReportRequest object. - ReportRequest request = new ReportRequest() - .setViewId(view) - .setDateRanges(Arrays.asList(dateRange)) - .setMetrics(Arrays.asList(sessions)) - .setDimensions(Arrays.asList(pageTitle)); - + ReportRequest request = new ReportRequest().setViewId(view).setDateRanges(Arrays.asList(dateRange)) + .setMetrics(Arrays.asList(sessions)).setDimensions(Arrays.asList(pageTitle)); + ArrayList requests = new ArrayList(); requests.add(request); - + // Create the GetReportsRequest object. - GetReportsRequest getReport = new GetReportsRequest() - .setReportRequests(requests); - + GetReportsRequest getReport = new GetReportsRequest().setReportRequests(requests); + // Call the batchGet method. GetReportsResponse response = service.reports().batchGet(getReport).execute(); reports.put(view, response); @@ -200,48 +195,47 @@ public class VREAccessesHarvester extends BasicHarvester { // Return the response. return reports; } - + /** * Parses and prints the Analytics Reporting API V4 response. * * @param response An Analytics Reporting API V4 response. */ private static List parseResponse(String viewId, GetReportsResponse response) { - System.out.println("\n*** parsing Response for " + viewId); - + logger.trace("\n*** parsing Response for {}", viewId); + List toReturn = new ArrayList<>(); - - for (Report report: response.getReports()) { + + for(Report report : response.getReports()) { ColumnHeader header = report.getColumnHeader(); List dimensionHeaders = header.getDimensions(); List metricHeaders = header.getMetricHeader().getMetricHeaderEntries(); List rows = report.getData().getRows(); - - if (rows == null) { + + if(rows == null) { logger.warn("No data found for " + viewId); - } - else - for (ReportRow row: rows) { + } else + for(ReportRow row : rows) { List dimensions = row.getDimensions(); List metrics = row.getMetrics(); - + VREAccessesReportRow var = new VREAccessesReportRow(); boolean validEntry = false; - for (int i = 0; i < dimensionHeaders.size() && i < dimensions.size(); i++) { - //System.out.println(dimensionHeaders.get(i) + ": " + dimensions.get(i)); + for(int i = 0; i < dimensionHeaders.size() && i < dimensions.size(); i++) { + //logger.trace("{} : {}", dimensionHeaders.get(i), dimensions.get(i)); String pagePath = dimensions.get(i); - if (pagePath.startsWith("/group") || pagePath.startsWith("/web")) { + if(pagePath.startsWith("/group") || pagePath.startsWith("/web")) { var.setPagePath(dimensions.get(i)); validEntry = true; } } - if (validEntry) { - for (int j = 0; j < metrics.size(); j++) { + if(validEntry) { + for(int j = 0; j < metrics.size(); j++) { DateRangeValues values = metrics.get(j); - for (int k = 0; k < values.getValues().size() && k < metricHeaders.size(); k++) { + for(int k = 0; k < values.getValues().size() && k < metricHeaders.size(); k++) { var.setVisitNumber(Integer.parseInt(values.getValues().get(k))); - } - } + } + } toReturn.add(var); logger.debug(var.toString()); } @@ -249,49 +243,46 @@ public class VREAccessesHarvester extends BasicHarvester { } return toReturn; } + private static GoogleCredential fromD4SServiceEndpoint(AnalyticsReportCredentials cred) throws IOException { - + String clientId = cred.getClientId(); String clientEmail = cred.getClientEmail(); String privateKeyPem = cred.getPrivateKeyPem(); String privateKeyId = cred.getPrivateKeyId(); String tokenUri = cred.getTokenUri(); String projectId = cred.getProjectId(); - - if (clientId == null || clientEmail == null || privateKeyPem == null - || privateKeyId == null) { + + if(clientId == null || clientEmail == null || privateKeyPem == null || privateKeyId == null) { throw new IOException("Error reading service account credential from stream, " + "expecting 'client_id', 'client_email', 'private_key' and 'private_key_id'."); } - + PrivateKey privateKey = privateKeyFromPkcs8(privateKeyPem); - + Collection emptyScopes = Collections.emptyList(); - - Builder credentialBuilder = new GoogleCredential.Builder() - .setTransport( Utils.getDefaultTransport()) - .setJsonFactory(Utils.getDefaultJsonFactory()) - .setServiceAccountId(clientEmail) - .setServiceAccountScopes(emptyScopes) - .setServiceAccountPrivateKey(privateKey) + + Builder credentialBuilder = new GoogleCredential.Builder().setTransport(Utils.getDefaultTransport()) + .setJsonFactory(Utils.getDefaultJsonFactory()).setServiceAccountId(clientEmail) + .setServiceAccountScopes(emptyScopes).setServiceAccountPrivateKey(privateKey) .setServiceAccountPrivateKeyId(privateKeyId); - - if (tokenUri != null) { + + if(tokenUri != null) { credentialBuilder.setTokenServerEncodedUrl(tokenUri); } - - if (projectId != null) { + + if(projectId != null) { credentialBuilder.setServiceAccountProjectId(projectId); } - + // Don't do a refresh at this point, as it will always fail before the scopes are added. return credentialBuilder.build(); } - + private static PrivateKey privateKeyFromPkcs8(String privateKeyPem) throws IOException { Reader reader = new StringReader(privateKeyPem); Section section = PemReader.readFirstSectionAndClose(reader, "PRIVATE KEY"); - if (section == null) { + if(section == null) { throw new IOException("Invalid PKCS8 data."); } byte[] bytes = section.getBase64DecodedBytes(); @@ -301,78 +292,82 @@ public class VREAccessesHarvester extends BasicHarvester { KeyFactory keyFactory = SecurityUtils.getRsaKeyFactory(); PrivateKey privateKey = keyFactory.generatePrivate(keySpec); return privateKey; - } catch (NoSuchAlgorithmException exception) { + } catch(NoSuchAlgorithmException exception) { unexpectedException = exception; - } catch (InvalidKeySpecException exception) { + } catch(InvalidKeySpecException exception) { unexpectedException = exception; } throw new IOException("Unexpected exception reading PKCS data", unexpectedException); } - - private static List getAnalyticsReportingConfigurationFromIS(String infrastructureScope) throws Exception { + + private static List getAnalyticsReportingConfigurationFromIS(String infrastructureScope) + throws Exception { String scope = infrastructureScope; - String currScope = ScopeProvider.instance.get(); + String currScope = ScopeProvider.instance.get(); ScopeProvider.instance.set(scope); SimpleQuery query = queryFor(ServiceEndpoint.class); - query.addCondition("$resource/Profile/Category/text() eq '"+ SERVICE_ENDPOINT_CATEGORY +"'"); - query.addCondition("$resource/Profile/Name/text() eq '"+ SERVICE_ENDPOINT_NAME +"'"); + query.addCondition("$resource/Profile/Category/text() eq '" + SERVICE_ENDPOINT_CATEGORY + "'"); + query.addCondition("$resource/Profile/Name/text() eq '" + SERVICE_ENDPOINT_NAME + "'"); DiscoveryClient client = clientFor(ServiceEndpoint.class); List toReturn = client.submit(query); ScopeProvider.instance.set(currScope); return toReturn; - } + } + /** * l * @throws Exception */ private static AnalyticsReportCredentials getAuthorisedApplicationInfoFromIs() throws Exception { AnalyticsReportCredentials reportCredentials = new AnalyticsReportCredentials(); - + String context = org.gcube.dataharvest.utils.Utils.getCurrentContext(); try { List list = getAnalyticsReportingConfigurationFromIS(context); - if (list.size() > 1) { - logger.error("Too many Service Endpoints having name " + SERVICE_ENDPOINT_NAME +" in this scope having Category " + SERVICE_ENDPOINT_CATEGORY); - } - else if (list.size() == 0){ - logger.warn("There is no Service Endpoint having name " + SERVICE_ENDPOINT_NAME +" and Category " + SERVICE_ENDPOINT_CATEGORY + " in this context: " + context); - } - else { - - for (ServiceEndpoint res : list) { + if(list.size() > 1) { + logger.error("Too many Service Endpoints having name " + SERVICE_ENDPOINT_NAME + + " in this scope having Category " + SERVICE_ENDPOINT_CATEGORY); + } else if(list.size() == 0) { + logger.warn("There is no Service Endpoint having name " + SERVICE_ENDPOINT_NAME + " and Category " + + SERVICE_ENDPOINT_CATEGORY + " in this context: " + context); + } else { + + for(ServiceEndpoint res : list) { reportCredentials.setTokenUri(res.profile().runtime().hostedOn()); - Group apGroup = res.profile().accessPoints(); + Group apGroup = res.profile().accessPoints(); AccessPoint[] accessPoints = (AccessPoint[]) apGroup.toArray(new AccessPoint[apGroup.size()]); AccessPoint found = accessPoints[0]; reportCredentials.setClientEmail(found.address()); reportCredentials.setProjectId(found.username()); reportCredentials.setPrivateKeyPem(StringEncrypter.getEncrypter().decrypt(found.password())); - for (Property prop : found.properties()) { - if (prop.name().compareTo(AP_VIEWS_PROPERTY) == 0) { + for(Property prop : found.properties()) { + if(prop.name().compareTo(AP_VIEWS_PROPERTY) == 0) { String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value()); String[] views = decryptedValue.split(";"); reportCredentials.setViewIds(Arrays.asList(views)); - } - if (prop.name().compareTo(AP_CLIENT_PROPERTY) == 0) { + } + if(prop.name().compareTo(AP_CLIENT_PROPERTY) == 0) { String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value()); reportCredentials.setClientId(decryptedValue); - } - if (prop.name().compareTo(AP_PRIVATEKEY_PROPERTY) == 0) { + } + if(prop.name().compareTo(AP_PRIVATEKEY_PROPERTY) == 0) { String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value()); reportCredentials.setPrivateKeyId(decryptedValue); - } - } + } + } } } - } catch (Exception e) { + } catch(Exception e) { e.printStackTrace(); return null; } return reportCredentials; } + private static LocalDate asLocalDate(Date date) { return Instant.ofEpochMilli(date.getTime()).atZone(ZoneId.systemDefault()).toLocalDate(); } + private static DateRange getDateRangeForAnalytics(Date start, Date end) { DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); //required by Analytics String startDate = asLocalDate(start).format(formatter); @@ -382,5 +377,5 @@ public class VREAccessesHarvester extends BasicHarvester { dateRange.setEndDate(endDate); return dateRange; } - + } diff --git a/src/test/java/org/gcube/dataharvest/AccountingDataHarvesterPluginTest.java b/src/test/java/org/gcube/dataharvest/AccountingDataHarvesterPluginTest.java index 246042e..0890577 100644 --- a/src/test/java/org/gcube/dataharvest/AccountingDataHarvesterPluginTest.java +++ b/src/test/java/org/gcube/dataharvest/AccountingDataHarvesterPluginTest.java @@ -1,6 +1,7 @@ package org.gcube.dataharvest; import java.util.ArrayList; +import java.util.Calendar; import java.util.Date; import java.util.HashMap; import java.util.LinkedHashMap; @@ -85,10 +86,11 @@ public class AccountingDataHarvesterPluginTest extends ContextTest { @Test public void testScopeBean() throws Exception { + org.gcube.dataharvest.utils.Utils.setContext(ROOT); SortedSet contexts = getContexts(); for(String context : contexts) { ScopeBean scopeBean = new ScopeBean(context); - logger.debug("Name {}, FullName {}", scopeBean.name(), scopeBean.toString()); + logger.debug("FullName {} - Name {}", scopeBean.toString(), scopeBean.name()); } } @@ -147,9 +149,13 @@ public class AccountingDataHarvesterPluginTest extends ContextTest { } try { - // Collecting Google Analytics Data for VREs Accesses - List harvested = vreAccessesHarvester.getData(); - data.addAll(harvested); + if(context.startsWith(AccountingDataHarvesterPlugin.SO_BIG_DATA_VO) && start.before(DateUtils.getStartCalendar(2018, Calendar.APRIL, 1).getTime())) { + logger.info("Not Harvesting VREs Accesses for {} from {} to {}", context, DateUtils.format(start), DateUtils.format(end)); + } else { + // Collecting Google Analytics Data for VREs Accesses + List harvested = vreAccessesHarvester.getData(); + data.addAll(harvested); + } } catch(Exception e) { logger.error("Error harvesting Social Interactions for {}", context, e); }