import java.io.*; import java.nio.charset.StandardCharsets; import java.util.*; /** * @author Nikita Doschennikov (me@fymio.us) */ public class WordStat { public static void main(String[] args) { if (args.length != 2) { System.err.println("incorrect input!"); System.err.println("usage: java WordStat "); } String inputFileName = args[0]; String outputFileName = args[1]; try { BufferedReader r = new BufferedReader( new FileReader(inputFileName) ); LinkedHashMap wordCount = new LinkedHashMap<>(); StringBuilder sb = new StringBuilder(); int data = r.read(); while (data != -1) { char c = (char) data; if ( Character.getType(c) == Character.DASH_PUNCTUATION || Character.isLetter(c) || c == '\'' ) { sb.append(c); } else { if (!sb.isEmpty()) { String word = sb.toString().toLowerCase(); wordCount.put( word, wordCount.getOrDefault(word, 0) + 1 ); sb.setLength(0); } } data = r.read(); } r.close(); PrintWriter writer = new PrintWriter( outputFileName, StandardCharsets.UTF_8 ); for (Map.Entry entry : wordCount.entrySet()) { String key = entry.getKey(); int value = entry.getValue(); writer.println(key + " " + value); } writer.close(); } catch (Exception ex) { System.err.println("An error occured: " + ex.getMessage()); } } }