From 84c5957a919e62a7d01d1c751c59f6fe23aa6f9b Mon Sep 17 00:00:00 2001 From: codejava Date: Mon, 13 Apr 2026 10:43:29 +0300 Subject: [PATCH] Upload files to "java/wspp" --- java/wspp/WsppMiddle.java | 127 ++++++++++++++++ java/wspp/WsppPos.java | 119 +++++++++++++++ java/wspp/WsppPosition.java | 129 ++++++++++++++++ java/wspp/WsppTest.java | 89 +++++++++++ java/wspp/WsppTester.java | 289 ++++++++++++++++++++++++++++++++++++ 5 files changed, 753 insertions(+) create mode 100644 java/wspp/WsppMiddle.java create mode 100644 java/wspp/WsppPos.java create mode 100644 java/wspp/WsppPosition.java create mode 100644 java/wspp/WsppTest.java create mode 100644 java/wspp/WsppTester.java diff --git a/java/wspp/WsppMiddle.java b/java/wspp/WsppMiddle.java new file mode 100644 index 0000000..a0f1f1b --- /dev/null +++ b/java/wspp/WsppMiddle.java @@ -0,0 +1,127 @@ +package wspp; + +import java.io.*; +import java.util.*; + +/** + * @author Nikita Doschennikov (me@fymio.us) + */ +public class WsppMiddle { + + public static void main(String[] args) { + if (args.length != 2) { + System.err.println( + "usage: java WsppMiddle " + ); + } + + final String inputFileName = args[0]; + final String outputFileName = args[1]; + + Map words = new LinkedHashMap<>(); + + try ( + BufferedReader br = new BufferedReader( + new FileReader(inputFileName) + ); + FileWriter fw = new FileWriter(outputFileName) + ) { + String line; + int wordPos = 1; + int lineNumber = 1; + while ((line = br.readLine()) != null) { + line = line.toLowerCase(); + StringBuilder word = new StringBuilder(); + + for (char c : line.toCharArray()) { + if ( + Character.isLetter(c) || + c == '\'' || + Character.getType(c) == Character.DASH_PUNCTUATION || + Character.isDigit(c) || + c == '$' || + c == '_' + ) { + word.append(c); + } else { + if (!word.isEmpty()) { + if (words.containsKey(word.toString())) { + var lO = words.get( + word.toString() + ).lineOccurrences; + if (lO.containsKey(lineNumber)) { + lO.get(lineNumber).put(wordPos++); + } else { + var intList = new IntList(); + intList.put(wordPos++); + lO.put(lineNumber, intList); + } + } else { + WordInfo info = new WordInfo( + word.toString(), + wordPos + ); + var intList = new IntList(); + intList.put(wordPos++); + info.lineOccurrences.put(lineNumber, intList); + words.put(word.toString(), info); + } + } + + word = new StringBuilder(); + } + } + + if (!word.isEmpty()) { + if (words.containsKey(word.toString())) { + var lO = words.get(word.toString()).lineOccurrences; + if (lO.containsKey(lineNumber)) { + lO.get(lineNumber).put(wordPos++); + } else { + var intList = new IntList(); + intList.put(wordPos++); + lO.put(lineNumber, intList); + } + } else { + WordInfo info = new WordInfo(word.toString(), wordPos); + var intList = new IntList(); + intList.put(wordPos++); + info.lineOccurrences.put(lineNumber, intList); + words.put(word.toString(), info); + } + } + + lineNumber++; + } + + List sortedWords = new ArrayList<>(words.values()); + sortedWords.sort( + Comparator.comparingInt((WordInfo w) -> + w.word.length() + ).thenComparingInt(w -> w.firstOccurrence) + ); + + for (WordInfo info : sortedWords) { + int totalNumberOfOccurrences = 0; + var lO = info.lineOccurrences; + String word = info.word; + for (int key : lO.keySet()) { + totalNumberOfOccurrences += lO.get(key).getLength(); + } + + fw.write(word + " " + totalNumberOfOccurrences); + + for (int key : lO.keySet()) { + var occurrences = lO.get(key); + + fw.write( + " " + occurrences.get(occurrences.getLength() / 2) + ); + } + fw.write("\n"); + } + } catch (IOException e) { + System.out.println("Error reading file."); + } + } +} diff --git a/java/wspp/WsppPos.java b/java/wspp/WsppPos.java new file mode 100644 index 0000000..1293a81 --- /dev/null +++ b/java/wspp/WsppPos.java @@ -0,0 +1,119 @@ +package wspp; + +import java.io.*; +import java.util.*; + +/** + * @author Nikita Doschennikov (me@fymio.us) + */ +public class WsppPos { + + public static void main(String[] args) { + if (args.length != 2) { + System.err.println( + "usage: java WsppPos " + ); + } + + final String inputFileName = args[0]; + final String outputFileName = args[1]; + + Map words = new LinkedHashMap<>(); + + try ( + BufferedReader br = new BufferedReader( + new FileReader(inputFileName) + ); + FileWriter fw = new FileWriter(outputFileName) + ) { + String line; + int wordPos = 1; + int lineNumber = 1; + while ((line = br.readLine()) != null) { + line = line.toLowerCase(); + StringBuilder word = new StringBuilder(); + + for (char c : line.toCharArray()) { + if ( + Character.isLetter(c) || + c == '\'' || + Character.getType(c) == Character.DASH_PUNCTUATION || + Character.isDigit(c) || + c == '$' || + c == '_' + ) { + word.append(c); + } else { + if (!word.isEmpty()) { + if (words.containsKey(word.toString())) { + var lO = words.get( + word.toString() + ).lineOccurrences; + if (lO.containsKey(lineNumber)) { + lO.get(lineNumber).put(wordPos++); + } else { + var intList = new IntList(); + intList.put(wordPos++); + lO.put(lineNumber, intList); + } + } else { + WordInfo info = new WordInfo(word.toString()); + var intList = new IntList(); + intList.put(wordPos++); + info.lineOccurrences.put(lineNumber, intList); + words.put(word.toString(), info); + } + } + + word = new StringBuilder(); + } + } + + if (!word.isEmpty()) { + if (words.containsKey(word.toString())) { + var lO = words.get(word.toString()).lineOccurrences; + if (lO.containsKey(lineNumber)) { + lO.get(lineNumber).put(wordPos++); + } else { + var intList = new IntList(); + intList.put(wordPos++); + lO.put(lineNumber, intList); + } + } else { + WordInfo info = new WordInfo(word.toString()); + var intList = new IntList(); + intList.put(wordPos++); + info.lineOccurrences.put(lineNumber, intList); + words.put(word.toString(), info); + } + } + + lineNumber++; + } + + for (String word : words.keySet()) { + int totalNumberOfOccurrences = 0; + WordInfo info = words.get(word); + var lO = info.lineOccurrences; + for (int key : lO.keySet()) { + totalNumberOfOccurrences += lO.get(key).getLength(); + } + + fw.write(word + " " + totalNumberOfOccurrences); + + for (int key : lO.keySet()) { + var occurrences = lO.get(key); + + for (int i = 0; i < occurrences.getLength(); i++) { + fw.write( + " " + key + ":" + (wordPos - occurrences.get(i)) + ); + } + } + fw.write("\n"); + } + } catch (IOException e) { + System.out.println("Error reading file."); + } + } +} diff --git a/java/wspp/WsppPosition.java b/java/wspp/WsppPosition.java new file mode 100644 index 0000000..2febbbb --- /dev/null +++ b/java/wspp/WsppPosition.java @@ -0,0 +1,129 @@ +package wspp; + +import java.io.*; +import java.util.*; + +/** + * @author Nikita Doschennikov (me@fymio.us) + */ +public class WsppPosition { + + public static void main(String[] args) { + if (args.length != 2) { + System.err.println( + "usage: java WsppPosition " + ); + } + + final String inputFileName = args[0]; + final String outputFileName = args[1]; + + Map words = new LinkedHashMap<>(); + + try ( + BufferedReader br = new BufferedReader( + new FileReader(inputFileName) + ); + FileWriter fw = new FileWriter(outputFileName) + ) { + String line; + int wordPos = 1; + int lineNumber = 1; + while ((line = br.readLine()) != null) { + line = line.toLowerCase(); + StringBuilder word = new StringBuilder(); + + for (char c : line.toCharArray()) { + if ( + Character.isLetter(c) || + c == '\'' || + Character.getType(c) == Character.DASH_PUNCTUATION || + Character.isDigit(c) || + c == '$' || + c == '_' + ) { + word.append(c); + } else { + if (!word.isEmpty()) { + if (words.containsKey(word.toString())) { + var lO = words.get( + word.toString() + ).lineOccurrences; + if (lO.containsKey(lineNumber)) { + lO.get(lineNumber).put(wordPos++); + } else { + var intList = new IntList(); + intList.put(wordPos++); + lO.put(lineNumber, intList); + } + } else { + WordInfo info = new WordInfo( + word.toString(), + wordPos + ); + var intList = new IntList(); + intList.put(wordPos++); + info.lineOccurrences.put(lineNumber, intList); + words.put(word.toString(), info); + } + } + + word = new StringBuilder(); + } + } + + if (!word.isEmpty()) { + if (words.containsKey(word.toString())) { + var lO = words.get(word.toString()).lineOccurrences; + if (lO.containsKey(lineNumber)) { + lO.get(lineNumber).put(wordPos++); + } else { + var intList = new IntList(); + intList.put(wordPos++); + lO.put(lineNumber, intList); + } + } else { + WordInfo info = new WordInfo(word.toString(), wordPos); + var intList = new IntList(); + intList.put(wordPos++); + info.lineOccurrences.put(lineNumber, intList); + words.put(word.toString(), info); + } + } + + lineNumber++; + } + + List sortedWords = new ArrayList<>(words.values()); + sortedWords.sort( + Comparator.comparingInt((WordInfo w) -> + w.word.length() + ).thenComparingInt(w -> w.firstOccurrence) + ); + + for (WordInfo info : sortedWords) { + int totalNumberOfOccurrences = 0; + var lO = info.lineOccurrences; + String word = info.word; + for (int key : lO.keySet()) { + totalNumberOfOccurrences += lO.get(key).getLength(); + } + + fw.write(word + " " + totalNumberOfOccurrences); + + for (int key : lO.keySet()) { + var occurrences = lO.get(key); + + for (int i = 0; i < occurrences.getLength(); i++) { + fw.write( + " " + key + ":" + (wordPos - occurrences.get(i)) + ); + } + } + fw.write("\n"); + } + } catch (IOException e) { + System.out.println("Error reading file."); + } + } +} diff --git a/java/wspp/WsppTest.java b/java/wspp/WsppTest.java new file mode 100644 index 0000000..399a043 --- /dev/null +++ b/java/wspp/WsppTest.java @@ -0,0 +1,89 @@ +package wspp; + +import base.Named; +import base.Selector; +import java.util.Comparator; +import java.util.Map; +import java.util.function.IntFunction; +import java.util.stream.IntStream; + +/** + * @author Georgiy Korneev (kgeorgiy@kgeorgiy.info) + */ +public final class WsppTest { + + // === Base + private static final Named>> INPUT = + Named.of("", Comparator.comparingInt(e -> 0)); + private static final Named> ALL = Named.of( + "", + size -> IntStream.range(0, size) + ); + private static final Named> WSPP = Named.of( + "", + (r, l, L, g, G) -> g + ); + private static final Named NONE = Named.of("", ""); + + // === 3637 + private static final Named>> LENGTH = + Named.of( + "", + Map.Entry.comparingByKey(Comparator.comparingInt(String::length)) + ); + private static final Named> LAST = Named.of( + "Last", + size -> IntStream.of(size - 1) + ); + private static final Named JAVA = Named.of("", "XHB7TmR9JF8="); + + // === 3839 + private static final Named> MIDDLE = Named.of( + "Middle", + size -> IntStream.of(size / 2) + ); + + // === 3435 + public static final WsppTester.Extractor POSITION = ( + r, + l, + L, + g, + G + ) -> r + ":" + (G - g + 1); + + // === Common + public static final Selector SELECTOR = new Selector(WsppTester.class) + .variant("Base", WsppTester.variant(INPUT, ALL, WSPP, NONE)) + .variant("3637", WsppTester.variant(LENGTH, LAST, WSPP, JAVA)) + .variant("3839", WsppTester.variant(LENGTH, MIDDLE, WSPP, JAVA)) + .variant( + "3435", + WsppTester.variant( + LENGTH, + ALL, + Named.of("Position", POSITION), + JAVA + ) + ) + .variant( + "3233", + WsppTester.variant(INPUT, ALL, Named.of("Pos", POSITION), JAVA) + ) + .variant("4142", WsppTester.variant(LENGTH, LAST, WSPP, JAVA)) + .variant( + "4749", + WsppTester.variant( + LENGTH, + ALL, + Named.of("Position", POSITION), + JAVA + ) + ); + + private WsppTest() {} + + public static void main(final String... args) { + SELECTOR.main(args); + } +} diff --git a/java/wspp/WsppTester.java b/java/wspp/WsppTester.java new file mode 100644 index 0000000..5d176c4 --- /dev/null +++ b/java/wspp/WsppTester.java @@ -0,0 +1,289 @@ +package wspp; + +import base.ExtendedRandom; +import base.Named; +import base.Pair; +import base.TestCounter; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.IntFunction; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import wordStat.WordStatChecker; +import wordStat.WordStatTester; + +/** + * @author Georgiy Korneev (kgeorgiy@kgeorgiy.info) + */ +public final class WsppTester { + + private WsppTester() {} + + public static Consumer variant( + final Named>> comparator, + final Named> selector, + final Named> extractor, + final Named extra + ) { + // Stream "magic" code. You do not expect to understand it + return counter -> + WordStatChecker.test( + counter, + "Wspp" + + comparator.name() + + selector.name() + + extractor.name() + + extra.name(), + text -> { + final Map totals = Arrays.stream(text) + .flatMap(Arrays::stream) + .map(word -> word.toLowerCase(Locale.ROOT)) + .collect( + Collectors.toMap( + Function.identity(), + k -> 1, + Integer::sum, + LinkedHashMap::new + ) + ); + final int[] lengths = Arrays.stream(text) + .mapToInt(a -> a.length) + .toArray(); + final int[] sizes = new int[lengths.length + 1]; + int start = 0; + for (int i = 0; i < lengths.length; i++) { + sizes[i] = start; + start += lengths[i]; + } + sizes[lengths.length] = start; + + final Map selected = IntStream.range( + 0, + text.length + ) + .boxed() + .flatMap(r -> { + final String[] line = text[r]; + return IntStream.range(0, line.length) + .boxed() + .collect( + Collectors.groupingBy( + w -> line[w].toLowerCase(Locale.ROOT), + Collectors.collectingAndThen( + Collectors.mapping( + w -> + extractor + .value() + .select( + r + 1, + w + 1, + line.length, + sizes[r] + w + 1, + sizes[lengths.length] + ), + Collectors.toUnmodifiableList() + ), + list -> + selector + .value() + .apply(list.size()) + .mapToObj(list::get) + .toList() + ) + ) + ) + .entrySet() + .stream(); + }) + .collect( + Collectors.groupingBy( + Map.Entry::getKey, + Collectors.flatMapping( + e -> e.getValue().stream(), + Collectors.mapping( + String::valueOf, + Collectors.mapping( + " "::concat, + Collectors.joining() + ) + ) + ) + ) + ); + return totals + .entrySet() + .stream() + .sorted(comparator.value()) + .map(e -> + Pair.of( + e.getKey(), + e.getValue() + selected.get(e.getKey()) + ) + ) + .collect(Collectors.toList()); + }, + checker -> { + final Pattern pattern = Pattern.compile( + new String( + Base64.getDecoder().decode( + "W15ccHtJc0xldHRlcn0nXHB7UGR9" + + extra.value() + ), + StandardCharsets.US_ASCII + ) + + "]+" + ); + final String good = String.join( + "", + pattern.split(WordStatTester.POST_LOWER) + ); + + checker.test( + pattern, + "To be, or not to be, that is the question:" + ); + checker.test( + pattern, + "Monday's child is fair of face.", + "Tuesday's child is full of grace." + ); + checker.test( + pattern, + "Шалтай-Болтай", + "Сидел на стене.", + "Шалтай-Болтай", + "Свалился во сне." + ); + + checker.randomTest( + 3, + 10, + 10, + 3, + ExtendedRandom.ENGLISH, + WordStatChecker.SIMPLE_DELIMITERS + ); + checker.randomTest( + 10, + 3, + 5, + 5, + ExtendedRandom.RUSSIAN, + WordStatChecker.SIMPLE_DELIMITERS + ); + checker.randomTest( + 3, + 10, + 10, + 3, + ExtendedRandom.GREEK, + WordStatChecker.SIMPLE_DELIMITERS + ); + checker.randomTest( + 3, + 10, + 10, + 3, + WordStatChecker.DASH, + WordStatChecker.SIMPLE_DELIMITERS + ); + checker.randomTest( + 3, + 10, + 10, + 3, + ExtendedRandom.ENGLISH, + WordStatChecker.ADVANCED_DELIMITERS + ); + checker.randomTest( + 10, + 3, + 5, + 5, + ExtendedRandom.RUSSIAN, + WordStatChecker.ADVANCED_DELIMITERS + ); + checker.randomTest( + 3, + 10, + 10, + 3, + ExtendedRandom.GREEK, + WordStatChecker.ADVANCED_DELIMITERS + ); + checker.randomTest( + 3, + 10, + 10, + 3, + WordStatChecker.DASH, + WordStatChecker.ADVANCED_DELIMITERS + ); + + checker.randomTest( + 10, + 20, + 10, + 3, + good, + WordStatChecker.SIMPLE_DELIMITERS + ); + checker.randomTest( + 10, + 20, + 10, + 3, + good, + WordStatChecker.ADVANCED_DELIMITERS + ); + + final int d = TestCounter.DENOMINATOR; + final int d2 = TestCounter.DENOMINATOR2; + checker.randomTest( + 100, + 1000 / d, + 1000 / d2, + 1000 / d2, + WordStatChecker.ALL, + WordStatChecker.ADVANCED_DELIMITERS + ); + checker.randomTest( + 10, + 1000 / d, + 1000 / d2, + 1000 / d2, + good, + WordStatChecker.ADVANCED_DELIMITERS + ); + + checker.randomTest( + 10000 / d, + 20, + 10, + 5, + WordStatChecker.ALL, + WordStatChecker.ADVANCED_DELIMITERS + ); + checker.randomTest( + 1000000 / d, + 2, + 2, + 1, + WordStatChecker.ALL, + WordStatChecker.ADVANCED_DELIMITERS + ); + + checker.test(pattern, WordStatTester.PRE_LOWER); + checker.test(pattern, WordStatTester.POST_LOWER); + } + ); + } + + @FunctionalInterface + public interface Extractor { + T select(int l, int li, int lt, int gi, int gt); + } +}