In: Computer Science
Java code
pick out 5 articles/ documents and have a program that find the top 20 bigrams that are in articles with the total number of times they are in each article. Then have the results in descending order. Also make sure that ./<> and other symbols are not included in the results
Below is the java program to find top 20 bigrams. You can replace the hardcoded strings to read data from files as well.
package demo;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import javax.sound.sampled.ReverbType;
public class abcd {
public static void main(String[] args)
{
String article1 = "article
in.<> the document and bigrams";
String article2 = "documents and.
have a program that find the top 20 bigrams that are in
articles";
String article3 = "this is a
.random article or document or bigram";
String article4 = "and bigrams /and
bigrams";
String article5 = "that<> are
and bigrams that find top 20 bigrams";
String[] articles = {article1,
article2,article3,article4,article5};
Bigram b = new
Bigram(articles);
HashMap<String, Integer>
bigrams = b.controlled_func();
HashMap<String, Integer>
sorted_bigrams = sort_bigrams(bigrams);
ArrayList keyList = new
ArrayList(sorted_bigrams.keySet());
System.out.println("Printing top 20
bigrams");
for(int i=0; i< 20; i++)
{
System.out.println(keyList.get(i));
}
}
public static HashMap<String, Integer>
sort_bigrams(HashMap<String, Integer> bigrams)
{
List<Map.Entry<String, Integer> > my_list =
new LinkedList<Map.Entry<String, Integer>
>(bigrams.entrySet());
Collections.sort(my_list, new Comparator<Map.Entry<String,
Integer> >() {
public int compare(Map.Entry<String, Integer> e1,
Map.Entry<String, Integer> e2)
{
return (e1.getValue()).compareTo(e2.getValue());
}
});
HashMap<String,
Integer> temp_list = new LinkedHashMap<String,
Integer>();
for
(Map.Entry<String, Integer> aa : my_list) {
temp_list.put(aa.getKey(), aa.getValue());
}
ArrayList keyList = new
ArrayList(temp_list.keySet());
HashMap<String,
Integer> return_temp_list = new LinkedHashMap<String,
Integer>();
for(int
i=temp_list.size()-1; i>=0;i--){
return_temp_list.put(keyList.get(i).toString(),
temp_list.get(keyList.get(i)));
}
return
return_temp_list;
}
}
class Bigram{
static String[] articles;
public String[] getArticles() {
return articles;
}
public void setArticles(String[] articles) {
this.articles = articles;
}
public Bigram(String[] articles) {
super();
this.articles = articles;
}
public static List<String> getbigrams(String
article)
{
List<String> bigrams = new
ArrayList<String>();
article = article.replace(".",
"").replace("/", "").replace("<", "").replace(">", "");
String[] words = article.split("
");
for(int
i=0;i<words.length-1;i++)
{
bigrams.add(words[i].concat(" ").concat(words[i+1])
);
}
return bigrams;
}
public static HashMap<String, Integer>
controlled_func()
{
HashMap<String, Integer>
bigramcount = new HashMap<String, Integer>();
for(int i=0; i< articles.length;
i++ )
{
List<String> bigrams = getbigrams(articles[i]);
for(String s :
bigrams)
{
if(bigramcount.containsKey(s))
{
bigramcount.put(s,
bigramcount.get(s) + 1);
}
else
{
bigramcount.put(s, 1);
}
}
}
return bigramcount;
}
}