package org.gcube.portlets.user.topics.server; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.apache.commons.codec.binary.Base64; import org.gcube.common.portal.PortalContext; import org.gcube.social_networking.social_networking_client_library.LibClient; import org.gcube.social_networking.socialnetworking.model.client.GCubeSocialNetworking; import org.gcube.social_networking.socialnetworking.model.shared.Post; import org.gcube.portlets.user.topics.client.TopicService; import org.gcube.portlets.user.topics.shared.HashTagOccAndWeight; import org.gcube.portlets.user.topics.shared.HashtagsWrapper; import org.gcube.vomanagement.usermanagement.GroupManager; import org.gcube.vomanagement.usermanagement.UserManager; import org.gcube.vomanagement.usermanagement.impl.LiferayGroupManager; import org.gcube.vomanagement.usermanagement.impl.LiferayUserManager; import org.gcube.vomanagement.usermanagement.model.GCubeGroup; import org.gcube.vomanagement.usermanagement.model.GCubeUser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.gwt.user.server.rpc.RemoteServiceServlet; /** * TopicServiceImpl server side implementation for top-topics class * @author Massimiliano Assante, ISTI-CNR * @author Costantino Perciante, ISTI-CNR */ @SuppressWarnings("serial") public class TopicServiceImpl extends RemoteServiceServlet implements TopicService { private static final Logger logger = LoggerFactory.getLogger(TopicServiceImpl.class); private static final int WINDOW_SIZE_IN_MONTHS = 6; // it must not exceed 12 private static final double FRESHNESS_FACTOR = 0.4; private static final double NORMALIZED_SCORE_FACTOR = 0.6; private LibClient libClient; private GroupManager gm; private UserManager um; /** * connect to cassandra at startup */ public void init() { try { libClient = new LibClient(); } catch (Exception e) { throw new RuntimeException(e); } gm = new LiferayGroupManager(); um = new LiferayUserManager(); } /** * close connection to cassandra at shutdown */ /*public void destroy() { store.closeConnection(); }*/ /** * return trending hashtags */ @Override public HashtagsWrapper getHashtags() { String userName = PortalContext.getConfiguration().getCurrentUser(getThreadLocalRequest()).getUsername(); String currentScope = PortalContext.getConfiguration().getCurrentScope(getThreadLocalRequest()); boolean isInfrastructure = isInfrastructureScope(currentScope); // get the reference time Calendar referenceTime = Calendar.getInstance(); int currentMonth = referenceTime.get(Calendar.MONTH); // jan = 0, ..... dec = 11 referenceTime.set(Calendar.MONTH, currentMonth - WINDOW_SIZE_IN_MONTHS); // the year is automatically decreased if needed SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); logger.debug("Reference time for trending topics is " + format.format(referenceTime.getTime())); ArrayList toSort = new ArrayList(); ArrayList hashtagsChart = new ArrayList(); Map> hashtagsInVres = null; try { Map hashtagsAndOccurrences = new HashMap(); if (isInfrastructure) { logger.debug("****** retrieving hashtags for user VREs and site"); GCubeUser user = um.getUserByUsername(userName); Set vresInPortal = gm.listGroupsByUserAndSite(user.getUserId(), getThreadLocalRequest().getServerName()); logger.debug("Contexts in this site per user are " + vresInPortal); List contexts = new ArrayList(); // get the scopes associated with such groups for (GCubeGroup gCubeGroup : vresInPortal) { contexts.add(gm.getInfrastructureScope(gCubeGroup.getGroupId())); } hashtagsInVres = new HashMap>(); for (String context : contexts) { Map hashtagsAndOccurrenceInScope = libClient.getVREHashtagsWithOccurrenceFilteredByTimeLib(context, referenceTime.getTimeInMillis()); // merge the values if needed for (String hashtag : hashtagsAndOccurrenceInScope.keySet()) { int newValue; List vres = new ArrayList(); if(hashtagsAndOccurrences.containsKey(hashtag)){ newValue = hashtagsAndOccurrences.get(hashtag) + hashtagsAndOccurrenceInScope.get(hashtag); vres = hashtagsInVres.get(hashtag); }else{ newValue = hashtagsAndOccurrenceInScope.get(hashtag); } hashtagsAndOccurrences.put(hashtag, newValue); vres.add(context); hashtagsInVres.put(hashtag, vres); } } } else { logger.debug("****** retrieving hashtags for scope " + currentScope); hashtagsAndOccurrences = libClient.getVREHashtagsWithOccurrenceFilteredByTimeLib(currentScope, referenceTime.getTimeInMillis()); } // now we need to evaluate score for each element Map weights = evaluateWeights(hashtagsAndOccurrences, WINDOW_SIZE_IN_MONTHS, currentMonth, referenceTime, currentScope, hashtagsInVres); // at the end build the list for (String hashtag : hashtagsAndOccurrences.keySet()) { toSort.add(new HashTagOccAndWeight(hashtag, hashtagsAndOccurrences.get(hashtag), weights.get(hashtag))); } // sort for weights Collections.sort(toSort); // build the list of hashtags to display for (HashTagOccAndWeight wrapper : toSort) { logger.debug("Entry is " + wrapper.toString() + " with weight " + wrapper.getWeight()); String hashtag = wrapper.getHashtag(); String href="\"?"+ new String(Base64.encodeBase64(GCubeSocialNetworking.HASHTAG_OID.getBytes()))+"="+ new String(Base64.encodeBase64(hashtag.getBytes()))+"\""; String hashtagLink = ""+hashtag+""; hashtagsChart.add(hashtagLink); } return new HashtagsWrapper(isInfrastructure, hashtagsChart); } catch (Exception e) { logger.error("Error while retrieving hashtags ", e); return null; } } /** * Evaluate the weight for each element as w = 0.6 * s + 0.4 * f * where s is the score: a normalized value given by counter_i / counter_max * f is the freshness: evaluated taking into account the most recent post containing that hashtag into the window w (that is, the period taken into account) * @param hashtags * @param hashtagsInVres (present if vreid is null) * @param window size * @param current month * @param referenceTime * @param vreid (present if hashtagsInVres is null) * @return a Map of weight for each hashtag */ private Map evaluateWeights( Map hashtags, int windowSize, int currentMonth, Calendar referenceTime, String vreId, Map> hashtagsInVres) { Map weights = new HashMap(); // find max score inside the list (counter) int max = 0; for(Entry entry : hashtags.entrySet()){ max = max < entry.getValue() ? entry.getValue() : max; } // normalize Map normalized = new HashMap(); for(Entry entry : hashtags.entrySet()){ normalized.put(entry.getKey(), (double)entry.getValue() / (double)max); } // create the weight for each entry as: // w = NORMALIZED_SCORE_FACTOR * normalized_score + FRESHNESS_FACTOR * freshness // freshness is evaluated as (window_size - latest_post_for_hashtag_in_window_month)/window_size for(Entry entry : hashtags.entrySet()){ // first part of the weight double weight = NORMALIZED_SCORE_FACTOR * normalized.get(entry.getKey()); List mostRecentPostForHashtag = null; // we are in the simplest case.. the hashtag belongs (or the request comes) from a single vre if(hashtagsInVres == null){ try{ mostRecentPostForHashtag = libClient.getVREPostsByHashtagLib(vreId, entry.getKey()); }catch(Exception e){ logger.error("Unable to retrieve the most recent posts for hashtag " + entry.getKey() + " in " + vreId); // put a weight of zero for this hashtag weights.put(entry.getKey(), 0.0); continue; } }else{ // we are not so lucky // get the list of vres for this hashtag List vres = hashtagsInVres.get(entry.getKey()); // init list mostRecentPostForHashtag = new ArrayList(); List postsForVre; for (String vre : vres) { try{ postsForVre = libClient.getVREPostsByHashtagLib(vre, entry.getKey()); }catch(Exception e){ logger.error("Unable to retrieve the most recent posts for hashtag " + entry.getKey() + " in " + vreId); continue; } // add to the list mostRecentPostForHashtag.addAll(postsForVre); } // check if there is at least a post or it is empty if(mostRecentPostForHashtag.isEmpty()){ // put a weight of zero for this hashtag weights.put(entry.getKey(), 0.0); continue; } } // retrieve the most recent one among these posts Collections.sort(mostRecentPostForHashtag, Collections.reverseOrder()); // get month of the last recent post for this hashtag Calendar monstRecentPostForHashTagTime = Calendar.getInstance(); monstRecentPostForHashTagTime.setTimeInMillis(mostRecentPostForHashtag.get(0).getTime().getTime()); int sub = currentMonth - monstRecentPostForHashTagTime.get(Calendar.MONTH); int value = sub >= 0? sub : 12 - Math.abs(sub); double freshness = 1.0 - (double)(value) / (double)(windowSize); logger.debug("freshness is " + freshness + " for hashtag " + entry.getKey() + " because the last post has month " + monstRecentPostForHashTagTime.get(Calendar.MONTH)); // update the weight weight += FRESHNESS_FACTOR * freshness; // put it into the hashmap weights.put(entry.getKey(), weight); } return weights; } /** * Indicates whether the scope is the whole infrastructure. * @return true if it is, false otherwise. */ private boolean isInfrastructureScope(String currentScope) { boolean toReturn = false; try { long groupId = gm.getGroupIdFromInfrastructureScope(currentScope); toReturn = !gm.isVRE(groupId); return toReturn; } catch (Exception e) { logger.error("NullPointerException in isInfrastructureScope returning false"); return false; } } }