Upload files to "java/wspp"
This commit is contained in:
289
java/wspp/WsppTester.java
Normal file
289
java/wspp/WsppTester.java
Normal file
@@ -0,0 +1,289 @@
|
||||
package wspp;
|
||||
|
||||
import base.ExtendedRandom;
|
||||
import base.Named;
|
||||
import base.Pair;
|
||||
import base.TestCounter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.IntFunction;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import wordStat.WordStatChecker;
|
||||
import wordStat.WordStatTester;
|
||||
|
||||
/**
|
||||
* @author Georgiy Korneev (kgeorgiy@kgeorgiy.info)
|
||||
*/
|
||||
public final class WsppTester {
|
||||
|
||||
private WsppTester() {}
|
||||
|
||||
public static <T> Consumer<TestCounter> variant(
|
||||
final Named<Comparator<Map.Entry<String, Integer>>> comparator,
|
||||
final Named<IntFunction<IntStream>> selector,
|
||||
final Named<Extractor<T>> extractor,
|
||||
final Named<String> extra
|
||||
) {
|
||||
// Stream "magic" code. You do not expect to understand it
|
||||
return counter ->
|
||||
WordStatChecker.test(
|
||||
counter,
|
||||
"Wspp" +
|
||||
comparator.name() +
|
||||
selector.name() +
|
||||
extractor.name() +
|
||||
extra.name(),
|
||||
text -> {
|
||||
final Map<String, Integer> totals = Arrays.stream(text)
|
||||
.flatMap(Arrays::stream)
|
||||
.map(word -> word.toLowerCase(Locale.ROOT))
|
||||
.collect(
|
||||
Collectors.toMap(
|
||||
Function.identity(),
|
||||
k -> 1,
|
||||
Integer::sum,
|
||||
LinkedHashMap::new
|
||||
)
|
||||
);
|
||||
final int[] lengths = Arrays.stream(text)
|
||||
.mapToInt(a -> a.length)
|
||||
.toArray();
|
||||
final int[] sizes = new int[lengths.length + 1];
|
||||
int start = 0;
|
||||
for (int i = 0; i < lengths.length; i++) {
|
||||
sizes[i] = start;
|
||||
start += lengths[i];
|
||||
}
|
||||
sizes[lengths.length] = start;
|
||||
|
||||
final Map<String, String> selected = IntStream.range(
|
||||
0,
|
||||
text.length
|
||||
)
|
||||
.boxed()
|
||||
.flatMap(r -> {
|
||||
final String[] line = text[r];
|
||||
return IntStream.range(0, line.length)
|
||||
.boxed()
|
||||
.collect(
|
||||
Collectors.groupingBy(
|
||||
w -> line[w].toLowerCase(Locale.ROOT),
|
||||
Collectors.collectingAndThen(
|
||||
Collectors.mapping(
|
||||
w ->
|
||||
extractor
|
||||
.value()
|
||||
.select(
|
||||
r + 1,
|
||||
w + 1,
|
||||
line.length,
|
||||
sizes[r] + w + 1,
|
||||
sizes[lengths.length]
|
||||
),
|
||||
Collectors.toUnmodifiableList()
|
||||
),
|
||||
list ->
|
||||
selector
|
||||
.value()
|
||||
.apply(list.size())
|
||||
.mapToObj(list::get)
|
||||
.toList()
|
||||
)
|
||||
)
|
||||
)
|
||||
.entrySet()
|
||||
.stream();
|
||||
})
|
||||
.collect(
|
||||
Collectors.groupingBy(
|
||||
Map.Entry::getKey,
|
||||
Collectors.flatMapping(
|
||||
e -> e.getValue().stream(),
|
||||
Collectors.mapping(
|
||||
String::valueOf,
|
||||
Collectors.mapping(
|
||||
" "::concat,
|
||||
Collectors.joining()
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
return totals
|
||||
.entrySet()
|
||||
.stream()
|
||||
.sorted(comparator.value())
|
||||
.map(e ->
|
||||
Pair.of(
|
||||
e.getKey(),
|
||||
e.getValue() + selected.get(e.getKey())
|
||||
)
|
||||
)
|
||||
.collect(Collectors.toList());
|
||||
},
|
||||
checker -> {
|
||||
final Pattern pattern = Pattern.compile(
|
||||
new String(
|
||||
Base64.getDecoder().decode(
|
||||
"W15ccHtJc0xldHRlcn0nXHB7UGR9" +
|
||||
extra.value()
|
||||
),
|
||||
StandardCharsets.US_ASCII
|
||||
) +
|
||||
"]+"
|
||||
);
|
||||
final String good = String.join(
|
||||
"",
|
||||
pattern.split(WordStatTester.POST_LOWER)
|
||||
);
|
||||
|
||||
checker.test(
|
||||
pattern,
|
||||
"To be, or not to be, that is the question:"
|
||||
);
|
||||
checker.test(
|
||||
pattern,
|
||||
"Monday's child is fair of face.",
|
||||
"Tuesday's child is full of grace."
|
||||
);
|
||||
checker.test(
|
||||
pattern,
|
||||
"Шалтай-Болтай",
|
||||
"Сидел на стене.",
|
||||
"Шалтай-Болтай",
|
||||
"Свалился во сне."
|
||||
);
|
||||
|
||||
checker.randomTest(
|
||||
3,
|
||||
10,
|
||||
10,
|
||||
3,
|
||||
ExtendedRandom.ENGLISH,
|
||||
WordStatChecker.SIMPLE_DELIMITERS
|
||||
);
|
||||
checker.randomTest(
|
||||
10,
|
||||
3,
|
||||
5,
|
||||
5,
|
||||
ExtendedRandom.RUSSIAN,
|
||||
WordStatChecker.SIMPLE_DELIMITERS
|
||||
);
|
||||
checker.randomTest(
|
||||
3,
|
||||
10,
|
||||
10,
|
||||
3,
|
||||
ExtendedRandom.GREEK,
|
||||
WordStatChecker.SIMPLE_DELIMITERS
|
||||
);
|
||||
checker.randomTest(
|
||||
3,
|
||||
10,
|
||||
10,
|
||||
3,
|
||||
WordStatChecker.DASH,
|
||||
WordStatChecker.SIMPLE_DELIMITERS
|
||||
);
|
||||
checker.randomTest(
|
||||
3,
|
||||
10,
|
||||
10,
|
||||
3,
|
||||
ExtendedRandom.ENGLISH,
|
||||
WordStatChecker.ADVANCED_DELIMITERS
|
||||
);
|
||||
checker.randomTest(
|
||||
10,
|
||||
3,
|
||||
5,
|
||||
5,
|
||||
ExtendedRandom.RUSSIAN,
|
||||
WordStatChecker.ADVANCED_DELIMITERS
|
||||
);
|
||||
checker.randomTest(
|
||||
3,
|
||||
10,
|
||||
10,
|
||||
3,
|
||||
ExtendedRandom.GREEK,
|
||||
WordStatChecker.ADVANCED_DELIMITERS
|
||||
);
|
||||
checker.randomTest(
|
||||
3,
|
||||
10,
|
||||
10,
|
||||
3,
|
||||
WordStatChecker.DASH,
|
||||
WordStatChecker.ADVANCED_DELIMITERS
|
||||
);
|
||||
|
||||
checker.randomTest(
|
||||
10,
|
||||
20,
|
||||
10,
|
||||
3,
|
||||
good,
|
||||
WordStatChecker.SIMPLE_DELIMITERS
|
||||
);
|
||||
checker.randomTest(
|
||||
10,
|
||||
20,
|
||||
10,
|
||||
3,
|
||||
good,
|
||||
WordStatChecker.ADVANCED_DELIMITERS
|
||||
);
|
||||
|
||||
final int d = TestCounter.DENOMINATOR;
|
||||
final int d2 = TestCounter.DENOMINATOR2;
|
||||
checker.randomTest(
|
||||
100,
|
||||
1000 / d,
|
||||
1000 / d2,
|
||||
1000 / d2,
|
||||
WordStatChecker.ALL,
|
||||
WordStatChecker.ADVANCED_DELIMITERS
|
||||
);
|
||||
checker.randomTest(
|
||||
10,
|
||||
1000 / d,
|
||||
1000 / d2,
|
||||
1000 / d2,
|
||||
good,
|
||||
WordStatChecker.ADVANCED_DELIMITERS
|
||||
);
|
||||
|
||||
checker.randomTest(
|
||||
10000 / d,
|
||||
20,
|
||||
10,
|
||||
5,
|
||||
WordStatChecker.ALL,
|
||||
WordStatChecker.ADVANCED_DELIMITERS
|
||||
);
|
||||
checker.randomTest(
|
||||
1000000 / d,
|
||||
2,
|
||||
2,
|
||||
1,
|
||||
WordStatChecker.ALL,
|
||||
WordStatChecker.ADVANCED_DELIMITERS
|
||||
);
|
||||
|
||||
checker.test(pattern, WordStatTester.PRE_LOWER);
|
||||
checker.test(pattern, WordStatTester.POST_LOWER);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
public interface Extractor<T> {
|
||||
T select(int l, int li, int lt, int gi, int gt);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user