81 lines
2.7 KiB
Java
81 lines
2.7 KiB
Java
package wspp;
|
|
|
|
import java.io.*;
|
|
import java.util.*;
|
|
|
|
/**
|
|
* @author Nikita Doschennikov (me@fymio.us)
|
|
*/
|
|
public class Wspp {
|
|
|
|
public static void main(String[] args) {
|
|
if (args.length != 2) {
|
|
System.err.println(
|
|
"usage: java Wspp <inputFilePath> <outputFilePath>"
|
|
);
|
|
}
|
|
|
|
final String inputFileName = args[0];
|
|
final String outputFileName = args[1];
|
|
|
|
Map<String, WordInfo> words = new LinkedHashMap<>();
|
|
|
|
try (
|
|
BufferedReader br = new BufferedReader(
|
|
new FileReader(inputFileName)
|
|
);
|
|
FileWriter fw = new FileWriter(outputFileName)
|
|
) {
|
|
String line;
|
|
int wordPos = 1;
|
|
while ((line = br.readLine()) != null) {
|
|
line = line.toLowerCase();
|
|
StringBuilder word = new StringBuilder();
|
|
|
|
for (char c : line.toCharArray()) {
|
|
if (
|
|
Character.isLetter(c) ||
|
|
c == '\'' ||
|
|
Character.getType(c) == Character.DASH_PUNCTUATION
|
|
) {
|
|
word.append(c);
|
|
} else {
|
|
if (!word.isEmpty()) {
|
|
if (words.containsKey(word.toString())) {
|
|
words
|
|
.get(word.toString())
|
|
.occurrences.put(wordPos++);
|
|
} else {
|
|
WordInfo info = new WordInfo(word.toString());
|
|
info.occurrences.put(wordPos++);
|
|
words.put(word.toString(), info);
|
|
}
|
|
}
|
|
|
|
word = new StringBuilder();
|
|
}
|
|
}
|
|
|
|
if (!word.isEmpty()) {
|
|
if (words.containsKey(word.toString())) {
|
|
words.get(word.toString()).occurrences.put(wordPos++);
|
|
} else {
|
|
WordInfo info = new WordInfo(word.toString());
|
|
info.occurrences.put(wordPos++);
|
|
words.put(word.toString(), info);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (String word : words.keySet()) {
|
|
WordInfo info = words.get(word);
|
|
int count = info.occurrences.getLength();
|
|
String occurencies = info.occurrences.toString();
|
|
fw.write(word + " " + count + " " + occurencies);
|
|
}
|
|
} catch (IOException e) {
|
|
System.out.println("Error reading file.");
|
|
}
|
|
}
|
|
}
|