From 8f41006963ffd6eb4ef0061123ee6c725a6e22a8 Mon Sep 17 00:00:00 2001 From: codejava Date: Mon, 13 Apr 2026 10:41:44 +0300 Subject: [PATCH] Upload files to "java/wordStat" --- java/wordStat/WordInfo.java | 15 +++ java/wordStat/WordStat.java | 68 +++++++++++ java/wordStat/WordStatChecker.java | 183 +++++++++++++++++++++++++++++ java/wordStat/WordStatLength.java | 83 +++++++++++++ java/wordStat/package-info.java | 7 ++ 5 files changed, 356 insertions(+) create mode 100644 java/wordStat/WordInfo.java create mode 100644 java/wordStat/WordStat.java create mode 100644 java/wordStat/WordStatChecker.java create mode 100644 java/wordStat/WordStatLength.java create mode 100644 java/wordStat/package-info.java diff --git a/java/wordStat/WordInfo.java b/java/wordStat/WordInfo.java new file mode 100644 index 0000000..b2a726a --- /dev/null +++ b/java/wordStat/WordInfo.java @@ -0,0 +1,15 @@ +/** + * @author Nikita Doschennikov (me@fymio.us) + */ +public class WordInfo { + + String word; + int count; + int firstIndex; + + WordInfo(String word, int count, int firstIndex) { + this.word = word; + this.count = count; + this.firstIndex = firstIndex; + } +} diff --git a/java/wordStat/WordStat.java b/java/wordStat/WordStat.java new file mode 100644 index 0000000..536a1fe --- /dev/null +++ b/java/wordStat/WordStat.java @@ -0,0 +1,68 @@ +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.*; + +/** + * @author Nikita Doschennikov (me@fymio.us) + */ +public class WordStat { + + public static void main(String[] args) { + if (args.length != 2) { + System.err.println("incorrect input!"); + System.err.println("usage: java WordStat "); + } + + String inputFileName = args[0]; + String outputFileName = args[1]; + try { + BufferedReader r = new BufferedReader( + new FileReader(inputFileName) + ); + + LinkedHashMap wordCount = new LinkedHashMap<>(); + StringBuilder sb = new StringBuilder(); + + int data = r.read(); + while (data != -1) { + char c = (char) data; + + if ( + Character.getType(c) == Character.DASH_PUNCTUATION || + Character.isLetter(c) || + c == '\'' + ) { + sb.append(c); + } else { + if (!sb.isEmpty()) { + String word = sb.toString().toLowerCase(); + wordCount.put( + word, + wordCount.getOrDefault(word, 0) + 1 + ); + sb.setLength(0); + } + } + + data = r.read(); + } + + r.close(); + + PrintWriter writer = new PrintWriter( + outputFileName, + StandardCharsets.UTF_8 + ); + + for (Map.Entry entry : wordCount.entrySet()) { + String key = entry.getKey(); + int value = entry.getValue(); + writer.println(key + " " + value); + } + + writer.close(); + } catch (Exception ex) { + System.err.println("An error occured: " + ex.getMessage()); + } + } +} diff --git a/java/wordStat/WordStatChecker.java b/java/wordStat/WordStatChecker.java new file mode 100644 index 0000000..85181e1 --- /dev/null +++ b/java/wordStat/WordStatChecker.java @@ -0,0 +1,183 @@ +package wordStat; + +import base.*; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** + * @author Georgiy Korneev (kgeorgiy@kgeorgiy.info) + */ +public final class WordStatChecker extends BaseChecker { + + public static final String DASH = "-֊־‒–—―⸗⸚⸺〰゠︱︲﹘﹣-'"; + public static final String SIMPLE_DELIMITERS = " \t"; + public static final String ADVANCED_DELIMITERS = + " \t!\"#%&()*+,./:;<=>?@[\\]^`{|}~ ¡¦§¨©«¬\u00AD®¯°±²³´¶·¸¹»¼½¾¿×÷˂˃˄˅˒˓˔˕˖˗˘˙˚˛˜˝"; + public static final String ALL = + ExtendedRandom.RUSSIAN + + ExtendedRandom.ENGLISH + + ExtendedRandom.GREEK + + DASH; + private static final Pattern PATTERN = Pattern.compile( + "[^\\p{IsLetter}'\\p{Pd}]+" + ); + public static final Runner.Packages RUNNER = Runner.packages( + "", + "wordstat", + "wspp" + ); + + private final Function< + String[][], + ? extends List> + > processor; + + private final MainChecker main; + + private WordStatChecker( + final String className, + final Function< + String[][], + ? extends List> + > processor, + final TestCounter counter + ) { + super(counter); + main = new MainChecker(RUNNER.files(className)); + this.processor = processor; + } + + public static void test( + final TestCounter counter, + final String className, + final Function< + String[][], + ? extends List> + > processor, + final Consumer tests + ) { + tests.accept(new WordStatChecker(className, processor, counter)); + } + + public void test(final String... lines) { + test(PATTERN, lines); + } + + public void test(final Pattern pattern, final String... lines) { + final String[][] data = Arrays.stream(lines) + .map(line -> + Arrays.stream(pattern.split(line)) + .filter(Predicate.not(String::isEmpty)) + .toArray(String[]::new) + ) + .toArray(String[][]::new); + test(lines, processor.apply(data)); + } + + private void randomTest( + final int wordLength, + final int totalWords, + final int wordsPerLine, + final int lines, + final String chars, + final String delimiters, + final Function>> processor + ) { + final String[] words = generateWords(wordLength, totalWords, chars); + final String[][] text = generateTest(lines, words, wordsPerLine); + test(input(text, delimiters), processor.apply(text)); + } + + public void randomTest( + final int wordLength, + final int totalWords, + final int wordsPerLine, + final int lines, + final String chars, + final String delimiters + ) { + randomTest( + wordLength, + totalWords, + wordsPerLine, + lines, + chars, + delimiters, + processor::apply + ); + } + + private void test( + final String[] text, + final List> expected + ) { + final List expectedList = expected + .stream() + .map(p -> p.first() + " " + p.second()) + .collect(Collectors.toList()); + main.testEquals(counter, Arrays.asList(text), expectedList); + } + + public void test( + final String[][] text, + final String delimiters, + final List> answer + ) { + test(input(text, delimiters), answer); + } + + private String[] generateWords( + final int wordLength, + final int totalWords, + final String chars + ) { + final String allChars = chars.chars().anyMatch(Character::isUpperCase) + ? chars + : chars + chars.toUpperCase(Locale.ROOT); + return IntStream.range(0, totalWords) + .mapToObj(i -> + random().randomString(allChars, wordLength / 2, wordLength) + ) + .toArray(String[]::new); + } + + private String[][] generateTest( + final int lines, + final String[] words, + final int wordsPerLine + ) { + final String[][] text = new String[lines][]; + for (int i = 0; i < text.length; i++) { + text[i] = new String[random().nextInt( + wordsPerLine / 2, + wordsPerLine + )]; + for (int j = 0; j < text[i].length; j++) { + text[i][j] = random().randomItem(words); + } + } + return text; + } + + private String[] input(final String[][] text, final String delimiters) { + final String[] input = new String[text.length]; + for (int i = 0; i < text.length; i++) { + final String[] line = text[i]; + final StringBuilder sb = new StringBuilder( + random().randomString(delimiters) + ); + for (final String word : line) { + sb.append(word).append(random().randomString(delimiters)); + } + input[i] = sb.toString(); + } + return input; + } +} diff --git a/java/wordStat/WordStatLength.java b/java/wordStat/WordStatLength.java new file mode 100644 index 0000000..23fdc55 --- /dev/null +++ b/java/wordStat/WordStatLength.java @@ -0,0 +1,83 @@ +import java.io.*; +import java.util.*; + +/** + * @author Nikita Doschennikov (me@fymio.us) + */ +public class WordStatLength { + + public static void main(String[] args) { + if (args.length != 2) { + System.err.println("incorrect input!"); + System.err.println( + "usage: java WordStatLength " + ); + } + + String inputFileName = args[0]; + String outputFileName = args[1]; + try { + BufferedReader r = new BufferedReader( + new FileReader(inputFileName) + ); + + Map wordMap = new HashMap<>(); + StringBuilder sb = new StringBuilder(); + int wordIndex = 0; + + int data = r.read(); + while (data != -1) { + char c = (char) data; + + if ( + Character.getType(c) == Character.DASH_PUNCTUATION || + Character.isLetter(c) || + c == '\'' + ) { + sb.append(c); + } else { + if (sb.length() > 0) { + String word = sb.toString().toLowerCase(); + if (wordMap.containsKey(word)) { + wordMap.get(word).count++; + } else { + wordMap.put(word, new WordInfo(word, 1, wordIndex)); + wordIndex++; + } + sb.setLength(0); + } + } + + data = r.read(); + } + + if (sb.length() > 0) { + String word = sb.toString().toLowerCase(); + if (wordMap.containsKey(word)) { + wordMap.get(word).count++; + } else { + wordMap.put(word, new WordInfo(word, 1, wordIndex)); + } + } + + r.close(); + + List sortedWords = new ArrayList<>(wordMap.values()); + sortedWords.sort( + Comparator.comparingInt((WordInfo w) -> + w.word.length() + ).thenComparingInt(w -> w.firstIndex) + ); + + PrintWriter writer = new PrintWriter(outputFileName, "UTF-8"); + + for (WordInfo info : sortedWords) { + writer.println(info.word + " " + info.count); + } + + writer.close(); + } catch (Exception ex) { + System.err.println("An error occured: " + ex.getMessage()); + } + } +} diff --git a/java/wordStat/package-info.java b/java/wordStat/package-info.java new file mode 100644 index 0000000..412dee7 --- /dev/null +++ b/java/wordStat/package-info.java @@ -0,0 +1,7 @@ +/** + * Tests for Word Statistics homework + * of Introduction to Programming course. + * + * @author Georgiy Korneev (kgeorgiy@kgeorgiy.info) + */ +package wordStat;