import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
public class HashMapEx {
public static void main(String[] args) {
// Creating new HashMap objects
// keys are String, values are Integer
HashMap wordcount = new HashMap();
try {
// Opening file
// change "/Users/anyexample/input.txt" to path to your test file
BufferedReader in = new BufferedReader(new FileReader(
"/Users/andrian/input.txt"));
// string buffer for file reading
String str;
// reading line by line from file
while ((str = in.readLine()) != null) {
str = str.toLowerCase(); // convert to lower case
// starting index, we'll use this to copy words from string
int idx1 = -1;
// process each characters
for (int i = 0; i < str.length(); i++) {
// trigger condition if current character is not letter
// or it is the end of line
if ((!Character.isLetter(str.charAt(i))) || (i + 1 == str.length())) {
// do nothing if previous character was also non-letter
if (i - idx1 > 1) {
// copy word from input string buffer to new variable
// from previous non-letter symbol
// to current symbol which is also non-letter
// if this is a letter(than it is last character in the line
// and we should copy it to word)
if (Character.isLetter(str.charAt(i)))
i++;
// copying...
String word = str.substring(idx1 + 1, i);
// Check if word is in HashMap
if (wordcount.containsKey(word)) {
// get number of occurrences for this word
// increment it
// and put back again
wordcount.put(word, wordcount.get(word) + 1);
} else {
// this is first time we see this word, set value '1'
wordcount.put(word, 1);
}
}
// remember current position as last non-letter symbol
idx1 = i;
}
}
}
// Close buffered reader
in.close();
} catch (Exception e) {
// If something unexpected happened
// print exception information and quit
e.printStackTrace();
System.exit(1);
}
// This code sorts outputs HashMap sorting it by values
// First we're getting values array
ArrayList values = new ArrayList();
values.addAll(wordcount.values());
// and sorting it (in reverse order)
Collections.sort(values, Collections.reverseOrder());
int last_i = -1;
// Now, for each value
for (Integer i : values) {
if (last_i == i) // without dublicates
continue;
last_i = i;
// we print all hash keys
for (String s : wordcount.keySet()) {
if (wordcount.get(s) == i) // which have this value
System.out.println(s + ":" + i);
}
// pretty inefficient, but works
}
}
}
Monday, 16 May 2011
Java Code for calculating number of occurrences for each latin-letter word
Labels:
Java
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment