update
This commit is contained in:
68
java/wordStat/WordStat.java
Normal file
68
java/wordStat/WordStat.java
Normal file
@@ -0,0 +1,68 @@
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author Nikita Doschennikov (me@fymio.us)
|
||||
*/
|
||||
public class WordStat {
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println("usage: java WordStat <inputFile> <outputFile>");
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(
|
||||
new FileReader(inputFileName)
|
||||
);
|
||||
|
||||
LinkedHashMap<String, Integer> wordCount = new LinkedHashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (
|
||||
Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) ||
|
||||
c == '\''
|
||||
) {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (!sb.isEmpty()) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
wordCount.put(
|
||||
word,
|
||||
wordCount.getOrDefault(word, 0) + 1
|
||||
);
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
PrintWriter writer = new PrintWriter(
|
||||
outputFileName,
|
||||
StandardCharsets.UTF_8
|
||||
);
|
||||
|
||||
for (Map.Entry<String, Integer> entry : wordCount.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
int value = entry.getValue();
|
||||
writer.println(key + " " + value);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user