package org.gcube.portlets.user.topics.server; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.commons.codec.binary.Base64; import org.gcube.application.framework.core.session.ASLSession; import org.gcube.application.framework.core.session.SessionManager; import org.gcube.portal.custom.scopemanager.scopehelper.ScopeHelper; import org.gcube.portal.databook.client.GCubeSocialNetworking; import org.gcube.portal.databook.server.DBCassandraAstyanaxImpl; import org.gcube.portal.databook.server.DatabookStore; import org.gcube.portal.databook.shared.Feed; import org.gcube.portlets.user.topics.client.TopicService; import org.gcube.portlets.user.topics.shared.HashTagAndOccurrence; import org.gcube.portlets.user.topics.shared.HashtagsWrapper; import org.gcube.vomanagement.usermanagement.GroupManager; import org.gcube.vomanagement.usermanagement.UserManager; import org.gcube.vomanagement.usermanagement.impl.LiferayGroupManager; import org.gcube.vomanagement.usermanagement.impl.LiferayUserManager; import org.gcube.vomanagement.usermanagement.model.GCubeGroup; import org.gcube.vomanagement.usermanagement.model.GCubeUser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.gwt.user.server.rpc.RemoteServiceServlet; /** * @author Massimiliano Assante, ISTI-CNR */ @SuppressWarnings("serial") public class TopicServiceImpl extends RemoteServiceServlet implements TopicService { private static final Logger _log = LoggerFactory.getLogger(TopicServiceImpl.class); public static final String TEST_USER = "test.user"; private static final int WINDOW_SIZE_IN_MONTHS = 6; // it must not exceed 12 /** * The Cassandra store interface */ private DatabookStore store; /** * connect to cassandra at startup */ public void init() { store = new DBCassandraAstyanaxImpl(); } /** * close connection to cassandra at shutdown */ public void destroy() { store.closeConnection(); } /** * the current ASLSession * @return the session */ private ASLSession getASLSession() { String sessionID = this.getThreadLocalRequest().getSession().getId(); String user = (String) this.getThreadLocalRequest().getSession().getAttribute(ScopeHelper.USERNAME_ATTRIBUTE); if (user == null) { _log.warn("USER IS NULL setting test.user and Running OUTSIDE PORTAL"); user = getDevelopmentUser(); SessionManager.getInstance().getASLSession(sessionID, user).setScope("/gcube/devsec/devVRE"); } return SessionManager.getInstance().getASLSession(sessionID, user); } /** * when packaging test will fail if the user is not set to test.user * @return . */ public String getDevelopmentUser() { String user = TEST_USER; // user = "massimiliano.assante"; return user; } /** * return trending hashtags */ @Override public HashtagsWrapper getHashtags() { ArrayList hashtagsChart = new ArrayList(); ASLSession session = getASLSession(); String userName = session.getUsername(); boolean isInfrastructure = isInfrastructureScope(); String currentScope = session.getScope(); long timestampStart = System.currentTimeMillis(); // get the reference time Calendar referenceTime = Calendar.getInstance(); int currentMonth = referenceTime.get(Calendar.MONTH); // jan = 0, ..... dec = 11 referenceTime.set(Calendar.MONTH, currentMonth - WINDOW_SIZE_IN_MONTHS); // the year is automatically decreased if needed // print it SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); _log.debug("Reference time for trending topics is " + format.format(referenceTime.getTime())); try { //in case the portal is restarted and you have the social home open it will get test.user (no callback to set session info) //this check just return nothing if that happens if (userName.compareTo("test.user") == 0) { _log.debug("Found " + userName + " returning nothing"); return null; } ArrayList toSort = new ArrayList(); if (isInfrastructure) { _log.debug("****** retrieving hashtags for user VREs"); // different vres could have a same hashtag, we need to merge them Map hashtags = new HashMap(); // we need a map for the couple // it is needed because later we need to retrieve the most recent feed among the ones // containing the hashtag itself Map> hashtagsInVres = new HashMap>(); GroupManager gm = new LiferayGroupManager(); UserManager um = new LiferayUserManager(); GCubeUser user = um.getUserByUsername(userName); List groups = gm.listGroupsByUser(user.getUserId()); for (GCubeGroup group : groups) { if (gm.isVRE(group.getGroupId())) { String vreid = gm.getInfrastructureScope(group.getGroupId()); //get the scope Map map = store.getVREHashtagsWithOccurrenceFilteredByTime(vreid, referenceTime.getTimeInMillis()); // merge the values if needed for (String hashtag : map.keySet()) { if(hashtags.containsKey(hashtag)){ int currentValue = hashtags.get(hashtag); int newValue = currentValue + map.get(hashtag); // remove and re-add hashtags.remove(hashtag); hashtags.put(hashtag, newValue); // get the current list of vres in which the hashtag is present and add this new one List vres = hashtagsInVres.get(hashtag); vres.add(vreid); hashtagsInVres.remove(hashtag); hashtagsInVres.put(hashtag, vres); }else{ hashtags.put(hashtag, map.get(hashtag)); // put in the hashmap hashtagsInVres too List vres = new ArrayList(); vres.add(vreid); hashtagsInVres.put(hashtag, vres); } } } } // now we need to evaluate score for each element Map weights = evaluateWeight(hashtags, WINDOW_SIZE_IN_MONTHS, currentMonth, referenceTime, null, hashtagsInVres); // at the end build the list for (String hashtag : hashtags.keySet()) { toSort.add(new HashTagAndOccurrence(hashtag, hashtags.get(hashtag), weights.get(hashtag))); } } else { //else must be in a VRE scope String scope = session.getScope(); _log.debug("****** retrieving hashtags for scope " + scope); Map hashtags = store.getVREHashtagsWithOccurrenceFilteredByTime(scope, referenceTime.getTimeInMillis()); // now we need to evaluate the weight for each element Map weights = evaluateWeight(hashtags, WINDOW_SIZE_IN_MONTHS, currentMonth, referenceTime, scope, null); for (String hashtag : hashtags.keySet()) { toSort.add(new HashTagAndOccurrence(hashtag, hashtags.get(hashtag), weights.get(hashtag))); } } _log.debug("Number of topics retrieved is " + toSort.size()); Collections.sort(toSort); // sort for weight for (HashTagAndOccurrence wrapper : toSort) { _log.debug("Entry is " + wrapper.toString() + " with weight " + wrapper.getWeight()); String hashtag = wrapper.getHashtag(); String href="\"?"+ new String(Base64.encodeBase64(GCubeSocialNetworking.HASHTAG_OID.getBytes()))+"="+ new String(Base64.encodeBase64(hashtag.getBytes()))+"\""; String hashtagLink = ""+hashtag+""; hashtagsChart.add(hashtagLink); } } catch (Exception e) { e.printStackTrace(); return null; } long timestampEnd = System.currentTimeMillis() - timestampStart; _log.debug("Overall time to retrieve hastags is " + timestampEnd + "ms"); return new HashtagsWrapper(isInfrastructure, hashtagsChart); } /** * Evaluate the weight for each element as w = 0.6 * s + 0.4 * f * where s is the score: a normalized value given by counter_i / counter_max * f is the freshness: evaluated taking into account the most recent feed containing that hashtag into the window w (that is, * the period taken into account) * @param hashtags * @param hashtagsInVres (present if vreid is null) * @param window size * @param current month * @param referenceTime * @param vreid (present if hashtagsInVres is null) * @return a Map of weight for each hashtag */ private Map evaluateWeight(Map hashtags, int windowSize, int currentMonth, Calendar referenceTime, String vreId, Map> hashtagsInVres) { Map weights = new HashMap(); // find max score inside the list (counter) int max = 0; for(Entry entry : hashtags.entrySet()){ max = max < entry.getValue() ? entry.getValue() : max; } // normalize Map normalized = new HashMap(); for(Entry entry : hashtags.entrySet()){ normalized.put(entry.getKey(), (double)entry.getValue() / (double)max); } // create the weight for each entry as: // w = 0.6 * normalized_score + 0.4 * freshness // freshness is evaluated as (window_size - latest_feed_for_hashtag_in_window_month)/window_size for(Entry entry : hashtags.entrySet()){ // first part of the weight double weight = 0.6 * normalized.get(entry.getKey()); List mostRecentFeedForHashtag = null; // we are in the simplest case.. the hashtag belongs (or the request comes) from a single vre if(vreId != null){ try{ mostRecentFeedForHashtag = store.getVREFeedsByHashtag(vreId, entry.getKey()); }catch(Exception e){ _log.error("Unable to retrieve the most recent feeds for hashtag " + entry.getKey() + " in " + vreId); // put a weight of zero for this hashtag weights.put(entry.getKey(), 0.0); continue; } }else{ // we are not so lucky // get the list of vres for this hashtag List vres = hashtagsInVres.get(entry.getKey()); // init list mostRecentFeedForHashtag = new ArrayList(); List feedsForVre; for (String vre : vres) { try{ feedsForVre = store.getVREFeedsByHashtag(vre, entry.getKey()); }catch(Exception e){ _log.error("Unable to retrieve the most recent feeds for hashtag " + entry.getKey() + " in " + vreId); continue; } // add to the list mostRecentFeedForHashtag.addAll(feedsForVre); } // check if there is at least a feed or it is empty if(mostRecentFeedForHashtag.isEmpty()){ // put a weight of zero for this hashtag weights.put(entry.getKey(), 0.0); continue; } } // retrieve the most recent one among these feeds Collections.sort(mostRecentFeedForHashtag, Collections.reverseOrder()); // get month of the last recent feed for this hashtag Calendar monstRecentFeedForHashTagTime = Calendar.getInstance(); monstRecentFeedForHashTagTime.setTimeInMillis(mostRecentFeedForHashtag.get(0).getTime().getTime()); int sub = currentMonth - monstRecentFeedForHashTagTime.get(Calendar.MONTH); int value = sub >= 0? sub : 12 - Math.abs(sub); double freshness = 1.0 - (double)(value) / (double)(windowSize); _log.debug("freshness is " + freshness + " for hashtag " + entry.getKey() + " because the last feed has month " + monstRecentFeedForHashTagTime.get(Calendar.MONTH)); // update the weight weight += 0.4 * freshness; // put it into the hashmap weights.put(entry.getKey(), weight); } // print sorted Map scoredListSorted = sortByWeight(weights); for(Entry entry : scoredListSorted.entrySet()){ _log.debug("[hashtag=" + entry.getKey() + " , weight=" + entry.getValue() + "]"); } return weights; } /** * Sort a map by its values * @param map * @return */ private static > Map sortByWeight( Map map ) { List> list = new LinkedList>( map.entrySet() ); Collections.sort( list, new Comparator>() { public int compare( Map.Entry o1, Map.Entry o2 ) { return (o2.getValue()).compareTo( o1.getValue() ); } }); Map result = new LinkedHashMap(); for (Map.Entry entry : list) { result.put( entry.getKey(), entry.getValue() ); } return result; } /** * Indicates whether the scope is the whole infrastructure. * @return true if it is, false otherwise. */ private boolean isInfrastructureScope() { boolean toReturn = false; try { GroupManager manager = new LiferayGroupManager(); long groupId = manager.getGroupIdFromInfrastructureScope(getASLSession().getScope()); toReturn = !manager.isVRE(groupId); return toReturn; } catch (Exception e) { _log.error("NullPointerException in isInfrastructureScope returning false"); return false; } } }