package wspp; import base.ExtendedRandom; import base.Named; import base.Pair; import base.TestCounter; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.IntFunction; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.IntStream; import wordStat.WordStatChecker; import wordStat.WordStatTester; /** * @author Georgiy Korneev (kgeorgiy@kgeorgiy.info) */ public final class WsppTester { private WsppTester() {} public static Consumer variant( final Named>> comparator, final Named> selector, final Named> extractor, final Named extra ) { // Stream "magic" code. You do not expect to understand it return counter -> WordStatChecker.test( counter, "Wspp" + comparator.name() + selector.name() + extractor.name() + extra.name(), text -> { final Map totals = Arrays.stream(text) .flatMap(Arrays::stream) .map(word -> word.toLowerCase(Locale.ROOT)) .collect( Collectors.toMap( Function.identity(), k -> 1, Integer::sum, LinkedHashMap::new ) ); final int[] lengths = Arrays.stream(text) .mapToInt(a -> a.length) .toArray(); final int[] sizes = new int[lengths.length + 1]; int start = 0; for (int i = 0; i < lengths.length; i++) { sizes[i] = start; start += lengths[i]; } sizes[lengths.length] = start; final Map selected = IntStream.range( 0, text.length ) .boxed() .flatMap(r -> { final String[] line = text[r]; return IntStream.range(0, line.length) .boxed() .collect( Collectors.groupingBy( w -> line[w].toLowerCase(Locale.ROOT), Collectors.collectingAndThen( Collectors.mapping( w -> extractor .value() .select( r + 1, w + 1, line.length, sizes[r] + w + 1, sizes[lengths.length] ), Collectors.toUnmodifiableList() ), list -> selector .value() .apply(list.size()) .mapToObj(list::get) .toList() ) ) ) .entrySet() .stream(); }) .collect( Collectors.groupingBy( Map.Entry::getKey, Collectors.flatMapping( e -> e.getValue().stream(), Collectors.mapping( String::valueOf, Collectors.mapping( " "::concat, Collectors.joining() ) ) ) ) ); return totals .entrySet() .stream() .sorted(comparator.value()) .map(e -> Pair.of( e.getKey(), e.getValue() + selected.get(e.getKey()) ) ) .collect(Collectors.toList()); }, checker -> { final Pattern pattern = Pattern.compile( new String( Base64.getDecoder().decode( "W15ccHtJc0xldHRlcn0nXHB7UGR9" + extra.value() ), StandardCharsets.US_ASCII ) + "]+" ); final String good = String.join( "", pattern.split(WordStatTester.POST_LOWER) ); checker.test( pattern, "To be, or not to be, that is the question:" ); checker.test( pattern, "Monday's child is fair of face.", "Tuesday's child is full of grace." ); checker.test( pattern, "Шалтай-Болтай", "Сидел на стене.", "Шалтай-Болтай", "Свалился во сне." ); checker.randomTest( 3, 10, 10, 3, ExtendedRandom.ENGLISH, WordStatChecker.SIMPLE_DELIMITERS ); checker.randomTest( 10, 3, 5, 5, ExtendedRandom.RUSSIAN, WordStatChecker.SIMPLE_DELIMITERS ); checker.randomTest( 3, 10, 10, 3, ExtendedRandom.GREEK, WordStatChecker.SIMPLE_DELIMITERS ); checker.randomTest( 3, 10, 10, 3, WordStatChecker.DASH, WordStatChecker.SIMPLE_DELIMITERS ); checker.randomTest( 3, 10, 10, 3, ExtendedRandom.ENGLISH, WordStatChecker.ADVANCED_DELIMITERS ); checker.randomTest( 10, 3, 5, 5, ExtendedRandom.RUSSIAN, WordStatChecker.ADVANCED_DELIMITERS ); checker.randomTest( 3, 10, 10, 3, ExtendedRandom.GREEK, WordStatChecker.ADVANCED_DELIMITERS ); checker.randomTest( 3, 10, 10, 3, WordStatChecker.DASH, WordStatChecker.ADVANCED_DELIMITERS ); checker.randomTest( 10, 20, 10, 3, good, WordStatChecker.SIMPLE_DELIMITERS ); checker.randomTest( 10, 20, 10, 3, good, WordStatChecker.ADVANCED_DELIMITERS ); final int d = TestCounter.DENOMINATOR; final int d2 = TestCounter.DENOMINATOR2; checker.randomTest( 100, 1000 / d, 1000 / d2, 1000 / d2, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS ); checker.randomTest( 10, 1000 / d, 1000 / d2, 1000 / d2, good, WordStatChecker.ADVANCED_DELIMITERS ); checker.randomTest( 10000 / d, 20, 10, 5, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS ); checker.randomTest( 1000000 / d, 2, 2, 1, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS ); checker.test(pattern, WordStatTester.PRE_LOWER); checker.test(pattern, WordStatTester.POST_LOWER); } ); } @FunctionalInterface public interface Extractor { T select(int l, int li, int lt, int gi, int gt); } }