Upload files to "java/wspp"

This commit is contained in:
2026-04-13 10:43:29 +03:00
parent 86978ea70a
commit 84c5957a91
5 changed files with 753 additions and 0 deletions

127
java/wspp/WsppMiddle.java Normal file
View File

@@ -0,0 +1,127 @@
package wspp;
import java.io.*;
import java.util.*;
/**
* @author Nikita Doschennikov (me@fymio.us)
*/
public class WsppMiddle {
public static void main(String[] args) {
if (args.length != 2) {
System.err.println(
"usage: java WsppMiddle <inputFilePath> <outputFilePath>"
);
}
final String inputFileName = args[0];
final String outputFileName = args[1];
Map<String, WordInfo> words = new LinkedHashMap<>();
try (
BufferedReader br = new BufferedReader(
new FileReader(inputFileName)
);
FileWriter fw = new FileWriter(outputFileName)
) {
String line;
int wordPos = 1;
int lineNumber = 1;
while ((line = br.readLine()) != null) {
line = line.toLowerCase();
StringBuilder word = new StringBuilder();
for (char c : line.toCharArray()) {
if (
Character.isLetter(c) ||
c == '\'' ||
Character.getType(c) == Character.DASH_PUNCTUATION ||
Character.isDigit(c) ||
c == '$' ||
c == '_'
) {
word.append(c);
} else {
if (!word.isEmpty()) {
if (words.containsKey(word.toString())) {
var lO = words.get(
word.toString()
).lineOccurrences;
if (lO.containsKey(lineNumber)) {
lO.get(lineNumber).put(wordPos++);
} else {
var intList = new IntList();
intList.put(wordPos++);
lO.put(lineNumber, intList);
}
} else {
WordInfo info = new WordInfo(
word.toString(),
wordPos
);
var intList = new IntList();
intList.put(wordPos++);
info.lineOccurrences.put(lineNumber, intList);
words.put(word.toString(), info);
}
}
word = new StringBuilder();
}
}
if (!word.isEmpty()) {
if (words.containsKey(word.toString())) {
var lO = words.get(word.toString()).lineOccurrences;
if (lO.containsKey(lineNumber)) {
lO.get(lineNumber).put(wordPos++);
} else {
var intList = new IntList();
intList.put(wordPos++);
lO.put(lineNumber, intList);
}
} else {
WordInfo info = new WordInfo(word.toString(), wordPos);
var intList = new IntList();
intList.put(wordPos++);
info.lineOccurrences.put(lineNumber, intList);
words.put(word.toString(), info);
}
}
lineNumber++;
}
List<WordInfo> sortedWords = new ArrayList<>(words.values());
sortedWords.sort(
Comparator.comparingInt((WordInfo w) ->
w.word.length()
).thenComparingInt(w -> w.firstOccurrence)
);
for (WordInfo info : sortedWords) {
int totalNumberOfOccurrences = 0;
var lO = info.lineOccurrences;
String word = info.word;
for (int key : lO.keySet()) {
totalNumberOfOccurrences += lO.get(key).getLength();
}
fw.write(word + " " + totalNumberOfOccurrences);
for (int key : lO.keySet()) {
var occurrences = lO.get(key);
fw.write(
" " + occurrences.get(occurrences.getLength() / 2)
);
}
fw.write("\n");
}
} catch (IOException e) {
System.out.println("Error reading file.");
}
}
}

119
java/wspp/WsppPos.java Normal file
View File

@@ -0,0 +1,119 @@
package wspp;
import java.io.*;
import java.util.*;
/**
* @author Nikita Doschennikov (me@fymio.us)
*/
public class WsppPos {
public static void main(String[] args) {
if (args.length != 2) {
System.err.println(
"usage: java WsppPos <inputFilePath> <outputFilePath>"
);
}
final String inputFileName = args[0];
final String outputFileName = args[1];
Map<String, WordInfo> words = new LinkedHashMap<>();
try (
BufferedReader br = new BufferedReader(
new FileReader(inputFileName)
);
FileWriter fw = new FileWriter(outputFileName)
) {
String line;
int wordPos = 1;
int lineNumber = 1;
while ((line = br.readLine()) != null) {
line = line.toLowerCase();
StringBuilder word = new StringBuilder();
for (char c : line.toCharArray()) {
if (
Character.isLetter(c) ||
c == '\'' ||
Character.getType(c) == Character.DASH_PUNCTUATION ||
Character.isDigit(c) ||
c == '$' ||
c == '_'
) {
word.append(c);
} else {
if (!word.isEmpty()) {
if (words.containsKey(word.toString())) {
var lO = words.get(
word.toString()
).lineOccurrences;
if (lO.containsKey(lineNumber)) {
lO.get(lineNumber).put(wordPos++);
} else {
var intList = new IntList();
intList.put(wordPos++);
lO.put(lineNumber, intList);
}
} else {
WordInfo info = new WordInfo(word.toString());
var intList = new IntList();
intList.put(wordPos++);
info.lineOccurrences.put(lineNumber, intList);
words.put(word.toString(), info);
}
}
word = new StringBuilder();
}
}
if (!word.isEmpty()) {
if (words.containsKey(word.toString())) {
var lO = words.get(word.toString()).lineOccurrences;
if (lO.containsKey(lineNumber)) {
lO.get(lineNumber).put(wordPos++);
} else {
var intList = new IntList();
intList.put(wordPos++);
lO.put(lineNumber, intList);
}
} else {
WordInfo info = new WordInfo(word.toString());
var intList = new IntList();
intList.put(wordPos++);
info.lineOccurrences.put(lineNumber, intList);
words.put(word.toString(), info);
}
}
lineNumber++;
}
for (String word : words.keySet()) {
int totalNumberOfOccurrences = 0;
WordInfo info = words.get(word);
var lO = info.lineOccurrences;
for (int key : lO.keySet()) {
totalNumberOfOccurrences += lO.get(key).getLength();
}
fw.write(word + " " + totalNumberOfOccurrences);
for (int key : lO.keySet()) {
var occurrences = lO.get(key);
for (int i = 0; i < occurrences.getLength(); i++) {
fw.write(
" " + key + ":" + (wordPos - occurrences.get(i))
);
}
}
fw.write("\n");
}
} catch (IOException e) {
System.out.println("Error reading file.");
}
}
}

129
java/wspp/WsppPosition.java Normal file
View File

@@ -0,0 +1,129 @@
package wspp;
import java.io.*;
import java.util.*;
/**
* @author Nikita Doschennikov (me@fymio.us)
*/
public class WsppPosition {
public static void main(String[] args) {
if (args.length != 2) {
System.err.println(
"usage: java WsppPosition <inputFilePath> <outputFilePath>"
);
}
final String inputFileName = args[0];
final String outputFileName = args[1];
Map<String, WordInfo> words = new LinkedHashMap<>();
try (
BufferedReader br = new BufferedReader(
new FileReader(inputFileName)
);
FileWriter fw = new FileWriter(outputFileName)
) {
String line;
int wordPos = 1;
int lineNumber = 1;
while ((line = br.readLine()) != null) {
line = line.toLowerCase();
StringBuilder word = new StringBuilder();
for (char c : line.toCharArray()) {
if (
Character.isLetter(c) ||
c == '\'' ||
Character.getType(c) == Character.DASH_PUNCTUATION ||
Character.isDigit(c) ||
c == '$' ||
c == '_'
) {
word.append(c);
} else {
if (!word.isEmpty()) {
if (words.containsKey(word.toString())) {
var lO = words.get(
word.toString()
).lineOccurrences;
if (lO.containsKey(lineNumber)) {
lO.get(lineNumber).put(wordPos++);
} else {
var intList = new IntList();
intList.put(wordPos++);
lO.put(lineNumber, intList);
}
} else {
WordInfo info = new WordInfo(
word.toString(),
wordPos
);
var intList = new IntList();
intList.put(wordPos++);
info.lineOccurrences.put(lineNumber, intList);
words.put(word.toString(), info);
}
}
word = new StringBuilder();
}
}
if (!word.isEmpty()) {
if (words.containsKey(word.toString())) {
var lO = words.get(word.toString()).lineOccurrences;
if (lO.containsKey(lineNumber)) {
lO.get(lineNumber).put(wordPos++);
} else {
var intList = new IntList();
intList.put(wordPos++);
lO.put(lineNumber, intList);
}
} else {
WordInfo info = new WordInfo(word.toString(), wordPos);
var intList = new IntList();
intList.put(wordPos++);
info.lineOccurrences.put(lineNumber, intList);
words.put(word.toString(), info);
}
}
lineNumber++;
}
List<WordInfo> sortedWords = new ArrayList<>(words.values());
sortedWords.sort(
Comparator.comparingInt((WordInfo w) ->
w.word.length()
).thenComparingInt(w -> w.firstOccurrence)
);
for (WordInfo info : sortedWords) {
int totalNumberOfOccurrences = 0;
var lO = info.lineOccurrences;
String word = info.word;
for (int key : lO.keySet()) {
totalNumberOfOccurrences += lO.get(key).getLength();
}
fw.write(word + " " + totalNumberOfOccurrences);
for (int key : lO.keySet()) {
var occurrences = lO.get(key);
for (int i = 0; i < occurrences.getLength(); i++) {
fw.write(
" " + key + ":" + (wordPos - occurrences.get(i))
);
}
}
fw.write("\n");
}
} catch (IOException e) {
System.out.println("Error reading file.");
}
}
}

89
java/wspp/WsppTest.java Normal file
View File

@@ -0,0 +1,89 @@
package wspp;
import base.Named;
import base.Selector;
import java.util.Comparator;
import java.util.Map;
import java.util.function.IntFunction;
import java.util.stream.IntStream;
/**
* @author Georgiy Korneev (kgeorgiy@kgeorgiy.info)
*/
public final class WsppTest {
// === Base
private static final Named<Comparator<Map.Entry<String, Integer>>> INPUT =
Named.of("", Comparator.comparingInt(e -> 0));
private static final Named<IntFunction<IntStream>> ALL = Named.of(
"",
size -> IntStream.range(0, size)
);
private static final Named<WsppTester.Extractor<Object>> WSPP = Named.of(
"",
(r, l, L, g, G) -> g
);
private static final Named<String> NONE = Named.of("", "");
// === 3637
private static final Named<Comparator<Map.Entry<String, Integer>>> LENGTH =
Named.of(
"",
Map.Entry.comparingByKey(Comparator.comparingInt(String::length))
);
private static final Named<IntFunction<IntStream>> LAST = Named.of(
"Last",
size -> IntStream.of(size - 1)
);
private static final Named<String> JAVA = Named.of("", "XHB7TmR9JF8=");
// === 3839
private static final Named<IntFunction<IntStream>> MIDDLE = Named.of(
"Middle",
size -> IntStream.of(size / 2)
);
// === 3435
public static final WsppTester.Extractor<String> POSITION = (
r,
l,
L,
g,
G
) -> r + ":" + (G - g + 1);
// === Common
public static final Selector SELECTOR = new Selector(WsppTester.class)
.variant("Base", WsppTester.variant(INPUT, ALL, WSPP, NONE))
.variant("3637", WsppTester.variant(LENGTH, LAST, WSPP, JAVA))
.variant("3839", WsppTester.variant(LENGTH, MIDDLE, WSPP, JAVA))
.variant(
"3435",
WsppTester.variant(
LENGTH,
ALL,
Named.of("Position", POSITION),
JAVA
)
)
.variant(
"3233",
WsppTester.variant(INPUT, ALL, Named.of("Pos", POSITION), JAVA)
)
.variant("4142", WsppTester.variant(LENGTH, LAST, WSPP, JAVA))
.variant(
"4749",
WsppTester.variant(
LENGTH,
ALL,
Named.of("Position", POSITION),
JAVA
)
);
private WsppTest() {}
public static void main(final String... args) {
SELECTOR.main(args);
}
}

289
java/wspp/WsppTester.java Normal file
View File

@@ -0,0 +1,289 @@
package wspp;
import base.ExtendedRandom;
import base.Named;
import base.Pair;
import base.TestCounter;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.IntFunction;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import wordStat.WordStatChecker;
import wordStat.WordStatTester;
/**
* @author Georgiy Korneev (kgeorgiy@kgeorgiy.info)
*/
public final class WsppTester {
private WsppTester() {}
public static <T> Consumer<TestCounter> variant(
final Named<Comparator<Map.Entry<String, Integer>>> comparator,
final Named<IntFunction<IntStream>> selector,
final Named<Extractor<T>> extractor,
final Named<String> extra
) {
// Stream "magic" code. You do not expect to understand it
return counter ->
WordStatChecker.test(
counter,
"Wspp" +
comparator.name() +
selector.name() +
extractor.name() +
extra.name(),
text -> {
final Map<String, Integer> totals = Arrays.stream(text)
.flatMap(Arrays::stream)
.map(word -> word.toLowerCase(Locale.ROOT))
.collect(
Collectors.toMap(
Function.identity(),
k -> 1,
Integer::sum,
LinkedHashMap::new
)
);
final int[] lengths = Arrays.stream(text)
.mapToInt(a -> a.length)
.toArray();
final int[] sizes = new int[lengths.length + 1];
int start = 0;
for (int i = 0; i < lengths.length; i++) {
sizes[i] = start;
start += lengths[i];
}
sizes[lengths.length] = start;
final Map<String, String> selected = IntStream.range(
0,
text.length
)
.boxed()
.flatMap(r -> {
final String[] line = text[r];
return IntStream.range(0, line.length)
.boxed()
.collect(
Collectors.groupingBy(
w -> line[w].toLowerCase(Locale.ROOT),
Collectors.collectingAndThen(
Collectors.mapping(
w ->
extractor
.value()
.select(
r + 1,
w + 1,
line.length,
sizes[r] + w + 1,
sizes[lengths.length]
),
Collectors.toUnmodifiableList()
),
list ->
selector
.value()
.apply(list.size())
.mapToObj(list::get)
.toList()
)
)
)
.entrySet()
.stream();
})
.collect(
Collectors.groupingBy(
Map.Entry::getKey,
Collectors.flatMapping(
e -> e.getValue().stream(),
Collectors.mapping(
String::valueOf,
Collectors.mapping(
" "::concat,
Collectors.joining()
)
)
)
)
);
return totals
.entrySet()
.stream()
.sorted(comparator.value())
.map(e ->
Pair.of(
e.getKey(),
e.getValue() + selected.get(e.getKey())
)
)
.collect(Collectors.toList());
},
checker -> {
final Pattern pattern = Pattern.compile(
new String(
Base64.getDecoder().decode(
"W15ccHtJc0xldHRlcn0nXHB7UGR9" +
extra.value()
),
StandardCharsets.US_ASCII
) +
"]+"
);
final String good = String.join(
"",
pattern.split(WordStatTester.POST_LOWER)
);
checker.test(
pattern,
"To be, or not to be, that is the question:"
);
checker.test(
pattern,
"Monday's child is fair of face.",
"Tuesday's child is full of grace."
);
checker.test(
pattern,
"Шалтай-Болтай",
"Сидел на стене.",
"Шалтай-Болтай",
"Свалился во сне."
);
checker.randomTest(
3,
10,
10,
3,
ExtendedRandom.ENGLISH,
WordStatChecker.SIMPLE_DELIMITERS
);
checker.randomTest(
10,
3,
5,
5,
ExtendedRandom.RUSSIAN,
WordStatChecker.SIMPLE_DELIMITERS
);
checker.randomTest(
3,
10,
10,
3,
ExtendedRandom.GREEK,
WordStatChecker.SIMPLE_DELIMITERS
);
checker.randomTest(
3,
10,
10,
3,
WordStatChecker.DASH,
WordStatChecker.SIMPLE_DELIMITERS
);
checker.randomTest(
3,
10,
10,
3,
ExtendedRandom.ENGLISH,
WordStatChecker.ADVANCED_DELIMITERS
);
checker.randomTest(
10,
3,
5,
5,
ExtendedRandom.RUSSIAN,
WordStatChecker.ADVANCED_DELIMITERS
);
checker.randomTest(
3,
10,
10,
3,
ExtendedRandom.GREEK,
WordStatChecker.ADVANCED_DELIMITERS
);
checker.randomTest(
3,
10,
10,
3,
WordStatChecker.DASH,
WordStatChecker.ADVANCED_DELIMITERS
);
checker.randomTest(
10,
20,
10,
3,
good,
WordStatChecker.SIMPLE_DELIMITERS
);
checker.randomTest(
10,
20,
10,
3,
good,
WordStatChecker.ADVANCED_DELIMITERS
);
final int d = TestCounter.DENOMINATOR;
final int d2 = TestCounter.DENOMINATOR2;
checker.randomTest(
100,
1000 / d,
1000 / d2,
1000 / d2,
WordStatChecker.ALL,
WordStatChecker.ADVANCED_DELIMITERS
);
checker.randomTest(
10,
1000 / d,
1000 / d2,
1000 / d2,
good,
WordStatChecker.ADVANCED_DELIMITERS
);
checker.randomTest(
10000 / d,
20,
10,
5,
WordStatChecker.ALL,
WordStatChecker.ADVANCED_DELIMITERS
);
checker.randomTest(
1000000 / d,
2,
2,
1,
WordStatChecker.ALL,
WordStatChecker.ADVANCED_DELIMITERS
);
checker.test(pattern, WordStatTester.PRE_LOWER);
checker.test(pattern, WordStatTester.POST_LOWER);
}
);
}
@FunctionalInterface
public interface Extractor<T> {
T select(int l, int li, int lt, int gi, int gt);
}
}