Ranking algorithm now considers time. Button to show all hashtags whene there are more than ten added. Portlet moved to liferay 6.2 and version number to 2.0.0

git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/portlets/user/top-topics@128534 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Costantino Perciante 2016-05-10 10:03:57 +00:00
parent f7948fafce
commit f08a74a8a7
7 changed files with 307 additions and 30 deletions

14
pom.xml
View File

@ -25,7 +25,7 @@
</scm>
<properties>
<!-- Convenience property to set the GWT version -->
<gwtVersion>2.5.1</gwtVersion>
<gwtVersion>2.7.0</gwtVersion>
<distroDirectory>distro</distroDirectory>
<liferayVersion>6.2.5</liferayVersion>
<!-- GWT needs at least java 1.6 -->
@ -52,10 +52,18 @@
<dependency>
<groupId>com.google.gwt</groupId>
<artifactId>gwt-user</artifactId>
<version>${gwtVersion}</version>
</dependency>
<dependency>
<groupId>com.google.gwt</groupId>
<artifactId>gwt-servlet</artifactId>
<version>${gwtVersion}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.gwt</groupId>
<artifactId>gwt-dev</artifactId>
<version>${gwtVersion}</version>
<scope>provided</scope>
</dependency>
<dependency>
@ -106,6 +114,10 @@
<artifactId>portlet-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.github.gwtbootstrap</groupId>
<artifactId>gwt-bootstrap</artifactId>
</dependency>
</dependencies>
<build>

View File

@ -6,7 +6,10 @@ import org.gcube.portlets.user.topics.client.TopicService;
import org.gcube.portlets.user.topics.client.TopicServiceAsync;
import org.gcube.portlets.user.topics.shared.HashtagsWrapper;
import com.github.gwtbootstrap.client.ui.Button;
import com.google.gwt.core.client.GWT;
import com.google.gwt.event.dom.client.ClickEvent;
import com.google.gwt.event.dom.client.ClickHandler;
import com.google.gwt.user.client.rpc.AsyncCallback;
import com.google.gwt.user.client.ui.Composite;
import com.google.gwt.user.client.ui.HTML;
@ -27,6 +30,7 @@ public class TopicsPanel extends Composite {
public static final String loading = GWT.getModuleBaseURL() + "../images/topics-loader.gif";
public static final String DISPLAY_NAME = "Top Topics";
public static final int THRESHOLD_SHOW_HASHTAGS = 10; // show the first X ones
private Image loadingImage;
@ -50,13 +54,42 @@ public class TopicsPanel extends Composite {
showServError();
}
else {
int counter = 0;
if (hashtags != null) {
for (String hashtag : hashtags) {
counter ++;
HTML toAdd = new HTML(hashtag);
toAdd.addStyleName("hashtag-label");
mainPanel.add(toAdd);
if(counter > THRESHOLD_SHOW_HASHTAGS) // 11, 12...
toAdd.setVisible(false);
}
}
// add a show all button if needed
if(counter > THRESHOLD_SHOW_HASHTAGS){
final Button showAllHashtags = new Button("Show All");
showAllHashtags.addClickHandler(new ClickHandler() {
@Override
public void onClick(ClickEvent event) {
int numberChildren = mainPanel.getWidgetCount();
for (int i = THRESHOLD_SHOW_HASHTAGS; i < numberChildren; i++) {
mainPanel.getWidget(i).setVisible(true);
}
// hide the button
showAllHashtags.setVisible(false);
}
});
mainPanel.add(showAllHashtags);
}
}
}

View File

@ -1,9 +1,16 @@
package org.gcube.portlets.user.topics.server;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.codec.binary.Base64;
import org.gcube.application.framework.core.session.ASLSession;
@ -12,6 +19,7 @@ import org.gcube.portal.custom.scopemanager.scopehelper.ScopeHelper;
import org.gcube.portal.databook.client.GCubeSocialNetworking;
import org.gcube.portal.databook.server.DBCassandraAstyanaxImpl;
import org.gcube.portal.databook.server.DatabookStore;
import org.gcube.portal.databook.shared.Feed;
import org.gcube.portlets.user.topics.client.TopicService;
import org.gcube.portlets.user.topics.shared.HashTagAndOccurrence;
import org.gcube.portlets.user.topics.shared.HashtagsWrapper;
@ -34,6 +42,8 @@ public class TopicServiceImpl extends RemoteServiceServlet implements TopicServi
private static final Logger _log = LoggerFactory.getLogger(TopicServiceImpl.class);
public static final String TEST_USER = "test.user";
private static final int WINDOW_SIZE_IN_MONTHS = 6; // it must not exceed 12
/**
* The Cassandra store interface
*/
@ -68,20 +78,32 @@ public class TopicServiceImpl extends RemoteServiceServlet implements TopicServi
*/
public String getDevelopmentUser() {
String user = TEST_USER;
//user = "massimiliano.assante";
// user = "massimiliano.assante";
return user;
}
/**
* return the top 10 hashtag with max occurrence
* return trending hashtags
*/
@Override
public HashtagsWrapper getHashtags() {
ArrayList<String> hashtagsChart = new ArrayList<>();
ASLSession session = getASLSession();
long timestampStart = System.currentTimeMillis();
// get the reference time
Calendar referenceTime = Calendar.getInstance();
int currentMonth = referenceTime.get(Calendar.MONTH); // jan = 0, ..... dec = 11
referenceTime.set(Calendar.MONTH, currentMonth - WINDOW_SIZE_IN_MONTHS); // the year is automatically decreased if needed
// print it
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
_log.debug("Reference time for trending topics is " + format.format(referenceTime.getTime()));
String userName = session.getUsername();
boolean isInfrastructure = isInfrastructureScope();
try {
//in case the portal is restarted and you have the social home open it will get test.user (no callback to set session info)
//this check just return nothing if that happens
@ -90,41 +112,87 @@ public class TopicServiceImpl extends RemoteServiceServlet implements TopicServi
return null;
}
ArrayList<HashTagAndOccurrence> toSort = new ArrayList<HashTagAndOccurrence>();
/**
* this handles the case where the portlet is deployed outside of VREs (regular)
*/
if (isInfrastructure) {
_log.debug("****** retrieving hashtags for user VREs");
// different vres could have a same hashtag, we need to merge them
Map<String, Integer> hashtags = new HashMap<String, Integer>();
// we need a map for the couple <hashtag, vre in which it is present>
// it is needed because later we need to retrieve the most recent feed among the ones
// containing the hashtag itself
Map<String, List<String>> hashtagsInVres = new HashMap<String, List<String>>();
GroupManager gm = new LiferayGroupManager();
UserManager um = new LiferayUserManager();
GCubeUser user = um.getUserByUsername(userName);
List<GCubeGroup> groups = gm.listGroupsByUser(user.getUserId());
for (GCubeGroup group : groups) {
if (gm.isVRE(group.getGroupId())) {
_log.debug("Retrieving hashtags from VRE " + group.getGroupName());
String vreid = gm.getInfrastructureScope(group.getGroupId()); //get the scope
Map<String, Integer> map = store.getVREHashtagsWithOccurrence(vreid);
Map<String, Integer> map = store.getVREHashtagsWithOccurrenceFilteredByTime(vreid, referenceTime.getTimeInMillis());
// merge the values if needed
for (String hashtag : map.keySet()) {
toSort.add(new HashTagAndOccurrence(hashtag, map.get(hashtag)));
if(hashtags.containsKey(hashtag)){
int currentValue = hashtags.get(hashtag);
int newValue = currentValue + map.get(hashtag);
// remove and re-add
hashtags.remove(hashtag);
hashtags.put(hashtag, newValue);
// get the current list of vres in which the hashtag is present and add this new one
List<String> vres = hashtagsInVres.get(hashtag);
vres.add(vreid);
hashtagsInVres.remove(hashtag);
hashtagsInVres.put(hashtag, vres);
}else{
hashtags.put(hashtag, map.get(hashtag));
// put in the hashmap hashtagsInVres too
List<String> vres = new ArrayList<String>();
vres.add(vreid);
hashtagsInVres.put(hashtag, vres);
}
}
}
}
}
// now we need to evaluate score for each element
Map<String, Double> weights = evaluateWeight(hashtags, WINDOW_SIZE_IN_MONTHS, currentMonth, referenceTime, null, hashtagsInVres);
// at the end build the list
for (String hashtag : hashtags.keySet()) {
toSort.add(new HashTagAndOccurrence(hashtag, hashtags.get(hashtag), weights.get(hashtag)));
}
}
//else must be in a VRE scope
else {
String scope = session.getScope();
_log.debug("****** retrieving hashtags for scope " + scope);
Map<String, Integer> map = store.getVREHashtagsWithOccurrence(scope);
for (String hashtag : map.keySet()) {
toSort.add(new HashTagAndOccurrence(hashtag, map.get(hashtag)));
Map<String, Integer> hashtags = store.getVREHashtagsWithOccurrenceFilteredByTime(scope, referenceTime.getTimeInMillis());
// now we need to evaluate the wiehgt for each element
Map<String, Double> weights = evaluateWeight(hashtags, WINDOW_SIZE_IN_MONTHS, currentMonth, referenceTime, scope, null);
for (String hashtag : hashtags.keySet()) {
toSort.add(new HashTagAndOccurrence(hashtag, hashtags.get(hashtag), weights.get(hashtag)));
}
}
_log.debug("Number of topics retrieved is " + toSort.size());
Collections.sort(toSort, Collections.reverseOrder());
int i = 0;
Collections.sort(toSort); // sort for weight
for (HashTagAndOccurrence wrapper : toSort) {
_log.debug("Entry is " + wrapper.toString() + " with weight " + wrapper.getWeight());
String hashtag = wrapper.getHashtag();
String href="\"?"+
@ -132,19 +200,165 @@ public class TopicServiceImpl extends RemoteServiceServlet implements TopicServi
new String(Base64.encodeBase64(hashtag.getBytes()))+"\"";
String hashtagLink = "<a class=\"topiclink\" href=" + href + ">"+hashtag+"</a>";
hashtagsChart.add(hashtagLink);
i++;
if (i >= 10)
break;
}
}
catch (Exception e) {
e.printStackTrace();
return null;
}
long timestampEnd = System.currentTimeMillis() - timestampStart;
_log.debug("Overall time to retrieve hastags is " + timestampEnd);
return new HashtagsWrapper(isInfrastructure, hashtagsChart);
}
/**
* Evaluate the weight for each element as w = 0.6 * s + 0.4 * f
* where s is the score: a normalized value given by counter_i / counter_max
* f is the freshness: evaluated taking into account the most recent feed containing that hashtag into the window w (that is,
* the period taken into account)
* @param hashtags
* @param hashtagsInVres (present if vreid is null)
* @param window size
* @param current month
* @param referenceTime
* @param vreid (present if hashtagsInVres is null)
* @return a Map of weight for each hashtag
*/
private Map<String, Double> evaluateWeight(Map<String, Integer> hashtags, int windowSize, int currentMonth, Calendar referenceTime, String vreId, Map<String, List<String>> hashtagsInVres) {
Map<String, Double> weights = new HashMap<String, Double>();
// find max score inside the list (counter)
int max = 0;
for(Entry<String, Integer> entry : hashtags.entrySet()){
max = max < entry.getValue() ? entry.getValue() : max;
}
// normalize
Map<String, Double> normalized = new HashMap<String, Double>();
for(Entry<String, Integer> entry : hashtags.entrySet()){
normalized.put(entry.getKey(), (double)entry.getValue() / (double)max);
}
// create the weight for each entry as:
// w = 0.6 * normalized_score + 0.4 * freshness
// freshness is evaluated as (window_size - latest_feed_for_hashtag_in_window_month)/window_size
for(Entry<String, Integer> entry : hashtags.entrySet()){
// first part of the weight
double weight = 0.6 * normalized.get(entry.getKey());
List<Feed> mostRecentFeedForHashtag = null;
// we are in the simplest case.. the hashtag belongs (or the request comes) from a single vre
if(vreId != null){
try{
mostRecentFeedForHashtag = store.getVREFeedsByHashtag(vreId, entry.getKey());
}catch(Exception e){
_log.error("Unable to retrieve the most recent feeds for hashtag " + entry.getKey() + " in " + vreId);
// put a weight of zero for this hashtag
weights.put(entry.getKey(), 0.0);
continue;
}
}else{ // we are not so lucky
// get the list of vres for this hashtag
List<String> vres = hashtagsInVres.get(entry.getKey());
// init list
mostRecentFeedForHashtag = new ArrayList<Feed>();
List<Feed> feedsForVre;
for (String vre : vres) {
try{
feedsForVre = store.getVREFeedsByHashtag(vre, entry.getKey());
}catch(Exception e){
_log.error("Unable to retrieve the most recent feeds for hashtag " + entry.getKey() + " in " + vreId);
continue;
}
// add to the list
mostRecentFeedForHashtag.addAll(feedsForVre);
}
// check if there is at least a feed or it is empty
if(mostRecentFeedForHashtag.isEmpty()){
// put a weight of zero for this hashtag
weights.put(entry.getKey(), 0.0);
continue;
}
}
// retrieve the most recent one among these feeds
Collections.sort(mostRecentFeedForHashtag, Collections.reverseOrder());
// get month of the last recent feed for this hashtag
Calendar monstRecentFeedForHashTagTime = Calendar.getInstance();
monstRecentFeedForHashTagTime.setTimeInMillis(mostRecentFeedForHashtag.get(0).getTime().getTime());
int sub = currentMonth - monstRecentFeedForHashTagTime.get(Calendar.MONTH);
int value = sub >= 0? sub : 12 - Math.abs(sub);
double freshness = 1.0 - (double)(value) / (double)(windowSize);
_log.debug("freshness is " + freshness + " for hashtag " + entry.getKey() +
" because the last feed has month " + monstRecentFeedForHashTagTime.get(Calendar.MONTH));
// update the weight
weight += 0.4 * freshness;
// put it into the hashmap
weights.put(entry.getKey(), weight);
}
// print sorted
Map<String, Double> scoredListSorted = sortByWeight(weights);
for(Entry<String, Double> entry : scoredListSorted.entrySet()){
System.out.println("[hashtag=" + entry.getKey() + " , weight=" + entry.getValue() + "]");
}
return weights;
}
/**
* Sort a map by its values
* @param map
* @return
*/
private static <K, V extends Comparable<? super V>> Map<K, V>
sortByWeight( Map<K, V> map )
{
List<Map.Entry<K, V>> list =
new LinkedList<Map.Entry<K, V>>( map.entrySet() );
Collections.sort( list, new Comparator<Map.Entry<K, V>>()
{
public int compare( Map.Entry<K, V> o1, Map.Entry<K, V> o2 )
{
return (o2.getValue()).compareTo( o1.getValue() );
}
});
Map<K, V> result = new LinkedHashMap<K, V>();
for (Map.Entry<K, V> entry : list)
{
result.put( entry.getKey(), entry.getValue() );
}
return result;
}
/**
* Indicates whether the scope is the whole infrastructure.
* @return <code>true</code> if it is, <code>false</code> otherwise.

View File

@ -3,11 +3,18 @@ package org.gcube.portlets.user.topics.shared;
public class HashTagAndOccurrence implements Comparable<HashTagAndOccurrence>{
private String hashtag;
private Integer occurrence;
private double weight;
public HashTagAndOccurrence(String hashtag, Integer occurrence) {
super();
this.hashtag = hashtag;
this.occurrence = occurrence;
}
public HashTagAndOccurrence(String hashtag, Integer occurrence, double weight) {
super();
this.hashtag = hashtag;
this.occurrence = occurrence;
this.weight = weight;
}
public String getHashtag() {
return hashtag;
}
@ -20,16 +27,20 @@ public class HashTagAndOccurrence implements Comparable<HashTagAndOccurrence>{
public void setOccurrence(Integer occurrence) {
this.occurrence = occurrence;
}
public double getWeight() {
return weight;
}
public void setWeight(double weight) {
this.weight = weight;
}
@Override
public String toString() {
return "HashTagAndOccurrence [hashtag=" + hashtag + ", occurrence="
+ occurrence + "]";
+ occurrence + ", weight=" + weight + "]";
}
@Override
public int compareTo(HashTagAndOccurrence o) {
if (this.occurrence == o.getOccurrence()) return 0;
return (this.occurrence > o.getOccurrence()) ? 1 : -1;
return Double.compare(o.getWeight(), this.weight);
}
}

View File

@ -2,8 +2,14 @@ package org.gcube.portlets.user.topics.shared;
import java.io.Serializable;
import java.util.ArrayList;
/**
* @author Massimiliano Assante at ISTI-CNR
* (massimiliano.assante@isti.cnr.it)
* @author Costantino Perciante at ISTI-CNR
* (costantino.perciante@isti.cnr.it)
*/
public class HashtagsWrapper implements Serializable {
private static final long serialVersionUID = -532083077958376460L;
private boolean isInfrastructure;
private ArrayList<String> hashtags;
public HashtagsWrapper(boolean isInfrastructure, ArrayList<String> hashtags) {

View File

@ -3,13 +3,15 @@
<!-- Inherit the core Web Toolkit stuff. -->
<inherits name='com.google.gwt.user.User' />
<!-- To Comment out -->
<!-- <set-property name="user.agent" value="safari,gecko1_8" /> -->
<!-- <set-property name="user.agent" value="safari,gecko1_8" /> -->
<!-- Bootstrap import -->
<inherits name="com.github.gwtbootstrap.Bootstrap" />
<!-- Other module inherits -->
<inherits name='org.gcube.portlets.user.gcubewidgets.WidgetFactory' />
<inherits name='org.gcube.portal.databook.GCubeSocialNetworking' />
<entry-point
class='org.gcube.portlets.user.topics.client.TopTopics' />
<entry-point class='org.gcube.portlets.user.topics.client.TopTopics' />
<!-- Specify the paths for translatable code -->
<source path='client' />

View File

@ -6,7 +6,6 @@ a.topiclink, a.topiclink:active, a.topiclink:visited {
cursor: hand;
text-decoration: none;
color: #0078b2 !important;
font-weight: bold !important;
}
a.topiclink:hover {