update
This commit is contained in:
15
java/wordStat/WordInfo.java
Normal file
15
java/wordStat/WordInfo.java
Normal file
@@ -0,0 +1,15 @@
|
||||
/**
|
||||
* @author Nikita Doschennikov (me@fymio.us)
|
||||
*/
|
||||
public class WordInfo {
|
||||
|
||||
String word;
|
||||
int count;
|
||||
int firstIndex;
|
||||
|
||||
WordInfo(String word, int count, int firstIndex) {
|
||||
this.word = word;
|
||||
this.count = count;
|
||||
this.firstIndex = firstIndex;
|
||||
}
|
||||
}
|
||||
68
java/wordStat/WordStat.java
Normal file
68
java/wordStat/WordStat.java
Normal file
@@ -0,0 +1,68 @@
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author Nikita Doschennikov (me@fymio.us)
|
||||
*/
|
||||
public class WordStat {
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println("usage: java WordStat <inputFile> <outputFile>");
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(
|
||||
new FileReader(inputFileName)
|
||||
);
|
||||
|
||||
LinkedHashMap<String, Integer> wordCount = new LinkedHashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (
|
||||
Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) ||
|
||||
c == '\''
|
||||
) {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (!sb.isEmpty()) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
wordCount.put(
|
||||
word,
|
||||
wordCount.getOrDefault(word, 0) + 1
|
||||
);
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
PrintWriter writer = new PrintWriter(
|
||||
outputFileName,
|
||||
StandardCharsets.UTF_8
|
||||
);
|
||||
|
||||
for (Map.Entry<String, Integer> entry : wordCount.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
int value = entry.getValue();
|
||||
writer.println(key + " " + value);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
127
java/wordStat/WordStatChecker.java
Normal file
127
java/wordStat/WordStatChecker.java
Normal file
@@ -0,0 +1,127 @@
|
||||
package wordStat;
|
||||
|
||||
import base.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* @author Georgiy Korneev (kgeorgiy@kgeorgiy.info)
|
||||
*/
|
||||
public final class WordStatChecker extends BaseChecker {
|
||||
public static final String DASH = "-֊־‒–—―⸗⸚⸺〰゠︱︲﹘﹣-'";
|
||||
public static final String SIMPLE_DELIMITERS = " \t";
|
||||
public static final String ADVANCED_DELIMITERS = " \t!\"#%&()*+,./:;<=>?@[\\]^`{|}~ ¡¦§¨©«¬\u00AD®¯°±²³´¶·¸¹»¼½¾¿×÷˂˃˄˅˒˓˔˕˖˗˘˙˚˛˜˝";
|
||||
public static final String ALL = ExtendedRandom.RUSSIAN + ExtendedRandom.ENGLISH + ExtendedRandom.GREEK + DASH;
|
||||
private static final Pattern PATTERN = Pattern.compile("[^\\p{IsLetter}'\\p{Pd}]+");
|
||||
public static final Runner.Packages RUNNER = Runner.packages("", "wordstat", "wspp");
|
||||
|
||||
private final Function<String[][], ? extends List<? extends Pair<?, ?>>> processor;
|
||||
|
||||
private final MainChecker main;
|
||||
|
||||
private WordStatChecker(
|
||||
final String className,
|
||||
final Function<String[][], ? extends List<? extends Pair<?, ?>>> processor,
|
||||
final TestCounter counter
|
||||
) {
|
||||
super(counter);
|
||||
main = new MainChecker(RUNNER.files(className));
|
||||
this.processor = processor;
|
||||
}
|
||||
|
||||
public static void test(
|
||||
final TestCounter counter,
|
||||
final String className,
|
||||
final Function<String[][], ? extends List<? extends Pair<?, ?>>> processor,
|
||||
final Consumer<WordStatChecker> tests
|
||||
) {
|
||||
tests.accept(new WordStatChecker(className, processor, counter));
|
||||
}
|
||||
|
||||
public void test(final String... lines) {
|
||||
test(PATTERN, lines);
|
||||
}
|
||||
|
||||
public void test(final Pattern pattern, final String... lines) {
|
||||
final String[][] data = Arrays.stream(lines)
|
||||
.map(line -> Arrays.stream(pattern.split(line)).filter(Predicate.not(String::isEmpty)).toArray(String[]::new))
|
||||
.toArray(String[][]::new);
|
||||
test(lines, processor.apply(data));
|
||||
}
|
||||
|
||||
private void randomTest(
|
||||
final int wordLength,
|
||||
final int totalWords,
|
||||
final int wordsPerLine,
|
||||
final int lines,
|
||||
final String chars,
|
||||
final String delimiters,
|
||||
final Function<String[][], List<? extends Pair<?, ?>>> processor
|
||||
) {
|
||||
final String[] words = generateWords(wordLength, totalWords, chars);
|
||||
final String[][] text = generateTest(lines, words, wordsPerLine);
|
||||
test(input(text, delimiters), processor.apply(text));
|
||||
}
|
||||
|
||||
public void randomTest(
|
||||
final int wordLength,
|
||||
final int totalWords,
|
||||
final int wordsPerLine,
|
||||
final int lines,
|
||||
final String chars,
|
||||
final String delimiters
|
||||
) {
|
||||
randomTest(wordLength, totalWords, wordsPerLine, lines, chars, delimiters, processor::apply);
|
||||
}
|
||||
|
||||
private void test(final String[] text, final List<? extends Pair<?, ?>> expected) {
|
||||
final List<String> expectedList = expected.stream()
|
||||
.map(p -> p.first() + " " + p.second())
|
||||
.collect(Collectors.toList());
|
||||
main.testEquals(counter, Arrays.asList(text), expectedList);
|
||||
}
|
||||
|
||||
public void test(final String[][] text, final String delimiters, final List<Pair<String, Integer>> answer) {
|
||||
test(input(text, delimiters), answer);
|
||||
}
|
||||
|
||||
private String[] generateWords(final int wordLength, final int totalWords, final String chars) {
|
||||
final String allChars = chars.chars().anyMatch(Character::isUpperCase)
|
||||
? chars : chars + chars.toUpperCase(Locale.ROOT);
|
||||
return IntStream.range(0, totalWords)
|
||||
.mapToObj(i -> random().randomString(allChars, wordLength / 2, wordLength))
|
||||
.toArray(String[]::new);
|
||||
}
|
||||
|
||||
private String[][] generateTest(final int lines, final String[] words, final int wordsPerLine) {
|
||||
final String[][] text = new String[lines][];
|
||||
for (int i = 0; i < text.length; i++) {
|
||||
text[i] = new String[random().nextInt(wordsPerLine / 2, wordsPerLine)];
|
||||
for (int j = 0; j < text[i].length; j++) {
|
||||
text[i][j] = random().randomItem(words);
|
||||
}
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
private String[] input(final String[][] text, final String delimiters) {
|
||||
final String[] input = new String[text.length];
|
||||
for (int i = 0; i < text.length; i++) {
|
||||
final String[] line = text[i];
|
||||
final StringBuilder sb = new StringBuilder(random().randomString(delimiters));
|
||||
for (final String word : line) {
|
||||
sb.append(word).append(random().randomString(delimiters));
|
||||
}
|
||||
input[i] = sb.toString();
|
||||
}
|
||||
return input;
|
||||
}
|
||||
}
|
||||
83
java/wordStat/WordStatLength.java
Normal file
83
java/wordStat/WordStatLength.java
Normal file
@@ -0,0 +1,83 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author Nikita Doschennikov (me@fymio.us)
|
||||
*/
|
||||
public class WordStatLength {
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println(
|
||||
"usage: java WordStatLength <inputFile> <outputFile>"
|
||||
);
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(
|
||||
new FileReader(inputFileName)
|
||||
);
|
||||
|
||||
Map<String, WordInfo> wordMap = new HashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int wordIndex = 0;
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (
|
||||
Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) ||
|
||||
c == '\''
|
||||
) {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(word, new WordInfo(word, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(word, new WordInfo(word, 1, wordIndex));
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
List<WordInfo> sortedWords = new ArrayList<>(wordMap.values());
|
||||
sortedWords.sort(
|
||||
Comparator.comparingInt((WordInfo w) ->
|
||||
w.word.length()
|
||||
).thenComparingInt(w -> w.firstIndex)
|
||||
);
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, "UTF-8");
|
||||
|
||||
for (WordInfo info : sortedWords) {
|
||||
writer.println(info.word + " " + info.count);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
117
java/wordStat/WordStatLengthAffix.java
Normal file
117
java/wordStat/WordStatLengthAffix.java
Normal file
@@ -0,0 +1,117 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author Nikita Doschennikov (me@fymio.us)
|
||||
*/
|
||||
public class WordStatLengthAffix {
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println(
|
||||
"usage: java WordStatLengthAffix <inputFile> <outputFile>"
|
||||
);
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(
|
||||
new FileReader(inputFileName)
|
||||
);
|
||||
|
||||
Map<String, WordInfo> wordMap = new HashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int wordIndex = 0;
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (
|
||||
Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) ||
|
||||
c == '\''
|
||||
) {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
String prefix = word.substring(
|
||||
0,
|
||||
word.length() / 2
|
||||
);
|
||||
String suffix = word.substring(
|
||||
word.length() - word.length() / 2
|
||||
);
|
||||
if (wordMap.containsKey(prefix)) {
|
||||
wordMap.get(prefix).count++;
|
||||
} else {
|
||||
wordMap.put(
|
||||
prefix,
|
||||
new WordInfo(prefix, 1, wordIndex)
|
||||
);
|
||||
wordIndex++;
|
||||
}
|
||||
if (wordMap.containsKey(suffix)) {
|
||||
wordMap.get(suffix).count++;
|
||||
} else {
|
||||
wordMap.put(
|
||||
suffix,
|
||||
new WordInfo(suffix, 1, wordIndex)
|
||||
);
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
String prefix = word.substring(0, word.length() / 2);
|
||||
String suffix = word.substring(
|
||||
word.length() - word.length() / 2
|
||||
);
|
||||
if (wordMap.containsKey(prefix)) {
|
||||
wordMap.get(prefix).count++;
|
||||
} else {
|
||||
wordMap.put(prefix, new WordInfo(prefix, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
if (wordMap.containsKey(suffix)) {
|
||||
wordMap.get(suffix).count++;
|
||||
} else {
|
||||
wordMap.put(suffix, new WordInfo(suffix, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
List<WordInfo> sortedWords = new ArrayList<>(wordMap.values());
|
||||
sortedWords.sort(
|
||||
Comparator.comparingInt((WordInfo w) ->
|
||||
w.word.length()
|
||||
).thenComparingInt(w -> w.firstIndex)
|
||||
);
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, "UTF-8");
|
||||
|
||||
for (WordInfo info : sortedWords) {
|
||||
writer.println(info.word + " " + info.count);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
93
java/wordStat/WordStatLengthMiddle.java
Normal file
93
java/wordStat/WordStatLengthMiddle.java
Normal file
@@ -0,0 +1,93 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author Nikita Doschennikov (me@fymio.us)
|
||||
*/
|
||||
public class WordStatLengthMiddle {
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println(
|
||||
"usage: java WordStatLengthMiddle <inputFile> <outputFile>"
|
||||
);
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(
|
||||
new FileReader(inputFileName)
|
||||
);
|
||||
|
||||
Map<String, WordInfo> wordMap = new HashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int wordIndex = 0;
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (
|
||||
Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) ||
|
||||
c == '\''
|
||||
) {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() >= 7) {
|
||||
word = word.substring(3, word.length() - 3);
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(
|
||||
word,
|
||||
new WordInfo(word, 1, wordIndex)
|
||||
);
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() >= 7) {
|
||||
word = word.substring(3, word.length() - 3);
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(word, new WordInfo(word, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
List<WordInfo> sortedWords = new ArrayList<>(wordMap.values());
|
||||
sortedWords.sort(
|
||||
Comparator.comparingInt((WordInfo w) ->
|
||||
w.word.length()
|
||||
).thenComparingInt(w -> w.firstIndex)
|
||||
);
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, "UTF-8");
|
||||
|
||||
for (WordInfo info : sortedWords) {
|
||||
writer.println(info.word + " " + info.count);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
96
java/wordStat/WordStatLengthPrefix.java
Normal file
96
java/wordStat/WordStatLengthPrefix.java
Normal file
@@ -0,0 +1,96 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author Nikita Doschennikov (me@fymio.us)
|
||||
*/
|
||||
public class WordStatLengthPrefix {
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println(
|
||||
"usage: java WordStatLengthPrefix <inputFile> <outputFile>"
|
||||
);
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(
|
||||
new FileReader(inputFileName)
|
||||
);
|
||||
|
||||
Map<String, WordInfo> wordMap = new HashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int wordIndex = 0;
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (
|
||||
Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) ||
|
||||
c == '\''
|
||||
) {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
String prefix = word.substring(
|
||||
0,
|
||||
word.length() / 2
|
||||
);
|
||||
if (wordMap.containsKey(prefix)) {
|
||||
wordMap.get(prefix).count++;
|
||||
} else {
|
||||
wordMap.put(
|
||||
prefix,
|
||||
new WordInfo(prefix, 1, wordIndex)
|
||||
);
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
String prefix = word.substring(0, word.length() / 2);
|
||||
if (wordMap.containsKey(prefix)) {
|
||||
wordMap.get(prefix).count++;
|
||||
} else {
|
||||
wordMap.put(prefix, new WordInfo(prefix, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
List<WordInfo> sortedWords = new ArrayList<>(wordMap.values());
|
||||
sortedWords.sort(
|
||||
Comparator.comparingInt((WordInfo w) ->
|
||||
w.word.length()
|
||||
).thenComparingInt(w -> w.firstIndex)
|
||||
);
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, "UTF-8");
|
||||
|
||||
for (WordInfo info : sortedWords) {
|
||||
writer.println(info.word + " " + info.count);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
95
java/wordStat/WordStatLengthSuffix.java
Normal file
95
java/wordStat/WordStatLengthSuffix.java
Normal file
@@ -0,0 +1,95 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author Nikita Doschennikov (me@fymio.us)
|
||||
*/
|
||||
public class WordStatLengthSuffix {
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println(
|
||||
"usage: java WordStatLengthSuffix <inputFile> <outputFile>"
|
||||
);
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(
|
||||
new FileReader(inputFileName)
|
||||
);
|
||||
|
||||
Map<String, WordInfo> wordMap = new HashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int wordIndex = 0;
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (
|
||||
Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) ||
|
||||
c == '\''
|
||||
) {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
word = word.substring(
|
||||
word.length() - word.length() / 2
|
||||
);
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(
|
||||
word,
|
||||
new WordInfo(word, 1, wordIndex)
|
||||
);
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
word = word.substring(word.length() - word.length() / 2);
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(word, new WordInfo(word, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
List<WordInfo> sortedWords = new ArrayList<>(wordMap.values());
|
||||
sortedWords.sort(
|
||||
Comparator.comparingInt((WordInfo w) ->
|
||||
w.word.length()
|
||||
).thenComparingInt(w -> w.firstIndex)
|
||||
);
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, "UTF-8");
|
||||
|
||||
for (WordInfo info : sortedWords) {
|
||||
writer.println(info.word + " " + info.count);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
70
java/wordStat/WordStatTest.java
Normal file
70
java/wordStat/WordStatTest.java
Normal file
@@ -0,0 +1,70 @@
|
||||
package wordStat;
|
||||
|
||||
import base.Named;
|
||||
import base.Selector;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Tests for <a href="https://www.kgeorgiy.info/courses/prog-intro/homeworks.html#wordstat">Word Statistics</a> homework
|
||||
* of <a href="https://www.kgeorgiy.info/courses/prog-intro/">Introduction to Programming</a> course.
|
||||
*
|
||||
* @author Georgiy Korneev (kgeorgiy@kgeorgiy.info)
|
||||
*/
|
||||
public final class WordStatTest {
|
||||
// === Base
|
||||
private static final Named<Function<String, Stream<String>>> ID = Named.of("", Stream::of);
|
||||
private static final WordStatTester.Variant BASE = new WordStatTester.Variant("", false, Comparator.comparingInt(p -> 0));
|
||||
|
||||
|
||||
// === 3637
|
||||
public static final int SIZE = 3;
|
||||
private static final WordStatTester.Variant LENGTH = new WordStatTester.Variant("Length", false, Comparator.comparingInt(p -> p.first().length()));
|
||||
private static final Named<Function<String, Stream<String>>> MIDDLE =
|
||||
size("Middle", SIZE * 2 + 1, s -> Stream.of(s.substring(SIZE, s.length() - SIZE)));
|
||||
|
||||
static Named<Function<String, Stream<String>>> size(
|
||||
final String name,
|
||||
final int length,
|
||||
final Function<String, Stream<String>> f
|
||||
) {
|
||||
return Named.of(name, s -> s.length() >= length ? f.apply(s) : Stream.empty());
|
||||
}
|
||||
|
||||
// === 3839
|
||||
private static final Named<Function<String, Stream<String>>> AFFIX = size(
|
||||
"Affix",
|
||||
2,
|
||||
s -> Stream.of(s.substring(0, s.length() / 2), s.substring(s.length() - s.length() / 2))
|
||||
);
|
||||
|
||||
// === 3536
|
||||
private static final Named<Function<String, Stream<String>>> SUFFIX =
|
||||
size("Suffix", 2, s -> Stream.of(s.substring(s.length() - s.length() / 2)));
|
||||
|
||||
// === 4749
|
||||
private static final Named<Function<String, Stream<String>>> PREFIX =
|
||||
size("Prefix", 2, s -> Stream.of(s.substring(0, s.length() / 2)));
|
||||
|
||||
// === Common
|
||||
public static final Selector SELECTOR = new Selector(WordStatTester.class)
|
||||
.variant("Base", BASE.with(ID))
|
||||
.variant("3637", LENGTH.with(MIDDLE))
|
||||
.variant("3839", LENGTH.with(AFFIX))
|
||||
.variant("3435", LENGTH.with(SUFFIX))
|
||||
.variant("3233", LENGTH.with(ID))
|
||||
.variant("4142", LENGTH.with(MIDDLE))
|
||||
.variant("4749", LENGTH.with(PREFIX))
|
||||
|
||||
;
|
||||
|
||||
private WordStatTest() {
|
||||
// Utility class
|
||||
}
|
||||
|
||||
public static void main(final String... args) {
|
||||
SELECTOR.main(args);
|
||||
}
|
||||
}
|
||||
100
java/wordStat/WordStatTester.java
Normal file
100
java/wordStat/WordStatTester.java
Normal file
@@ -0,0 +1,100 @@
|
||||
package wordStat;
|
||||
|
||||
import base.ExtendedRandom;
|
||||
import base.Named;
|
||||
import base.Pair;
|
||||
import base.TestCounter;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* @author Georgiy Korneev (kgeorgiy@kgeorgiy.info)
|
||||
*/
|
||||
public final class WordStatTester {
|
||||
public static final String PRE_LOWER = chars()
|
||||
.filter(s -> s.toLowerCase(Locale.ROOT).length() == 1)
|
||||
.collect(Collectors.joining());
|
||||
public static final String POST_LOWER = chars()
|
||||
.collect(Collectors.joining())
|
||||
.toLowerCase();
|
||||
|
||||
private WordStatTester() {
|
||||
}
|
||||
|
||||
private static Stream<String> chars() {
|
||||
return IntStream.range(' ', Character.MAX_VALUE)
|
||||
.filter(ch -> !Character.isSurrogate((char) ch))
|
||||
.filter(ch -> Character.getType(ch) != Character.NON_SPACING_MARK)
|
||||
.filter(ch -> Character.getType(ch) != Character.DIRECTIONALITY_NONSPACING_MARK)
|
||||
.mapToObj(Character::toString);
|
||||
}
|
||||
|
||||
/* package-private */ record Variant(String name, boolean reverse, Comparator<Pair<String, Integer>> c) {
|
||||
public Consumer<TestCounter> with(final Named<Function<String, Stream<String>>> split) {
|
||||
return counter -> WordStatChecker.test(
|
||||
counter,
|
||||
"WordStat" + name + split.name(),
|
||||
text -> answer(split.value(), text),
|
||||
checker -> {
|
||||
checker.test("To be, or not to be, that is the question:");
|
||||
checker.test("Monday's child is fair of face.", "Tuesday's child is full of grace.");
|
||||
checker.test("Шалтай-Болтай", "Сидел на стене.", "Шалтай-Болтай", "Свалился во сне.");
|
||||
checker.test(
|
||||
"27 октября — 300-й день григорианскому календарю. До конца года остаётся 65 дней.",
|
||||
"До 15 октября 1582 года — 27 октября по юлианскому календарю, с 15 октября 1582 года — 27 октября по григорианскому календарю.",
|
||||
"В XX и XXI веках соответствует 14 октября по юлианскому календарю[1].",
|
||||
"(c) Wikipedia"
|
||||
);
|
||||
checker.test("23 октября — Всемирный день психического здоровья", "Тема 2025 года: Психическое здоровье на рабочем месте");
|
||||
|
||||
checker.randomTest(3, 10, 10, 3, ExtendedRandom.ENGLISH, WordStatChecker.SIMPLE_DELIMITERS);
|
||||
checker.randomTest(10, 3, 5, 5, ExtendedRandom.RUSSIAN, WordStatChecker.SIMPLE_DELIMITERS);
|
||||
checker.randomTest(4, 10, 10, 3, ExtendedRandom.GREEK, WordStatChecker.SIMPLE_DELIMITERS);
|
||||
checker.randomTest(4, 10, 10, 3, WordStatChecker.DASH, WordStatChecker.SIMPLE_DELIMITERS);
|
||||
checker.randomTest(3, 10, 10, 3, ExtendedRandom.ENGLISH, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
checker.randomTest(10, 3, 5, 5, ExtendedRandom.RUSSIAN, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
checker.randomTest(3, 10, 10, 3, ExtendedRandom.GREEK, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
checker.randomTest(3, 10, 10, 3, WordStatChecker.DASH, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
checker.randomTest(3, 10, 10, 10, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
|
||||
final int d = TestCounter.DENOMINATOR;
|
||||
final int d2 = TestCounter.DENOMINATOR;
|
||||
checker.randomTest(10, 10000 / d, 10, 10, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
checker.randomTest(10, 1, 10, 10, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
checker.randomTest(10, 1000 / d, 100 / d2, 100 / d2, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
checker.randomTest(4, 1000 / d, 10, 3000 / d, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
checker.randomTest(4, 1000 / d, 3000 / d, 10, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
checker.randomTest(10000 / d, 20, 10, 5, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
checker.randomTest(1000000 / d, 2, 2, 1, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS);
|
||||
|
||||
checker.test(PRE_LOWER);
|
||||
checker.test(POST_LOWER);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
private List<Pair<String, Integer>> answer(final Function<String, Stream<String>> split, final String[][] text) {
|
||||
final List<String> parts = Arrays.stream(text)
|
||||
.flatMap(Arrays::stream)
|
||||
.filter(Predicate.not(String::isEmpty))
|
||||
.flatMap(split)
|
||||
.peek(s -> {assert !s.isBlank();})
|
||||
.collect(Collectors.toList());
|
||||
if (reverse()) {
|
||||
Collections.reverse(parts);
|
||||
}
|
||||
return parts.stream()
|
||||
.collect(Collectors.toMap(String::toLowerCase, v -> 1, Integer::sum, LinkedHashMap::new))
|
||||
.entrySet().stream()
|
||||
.map(Pair::of)
|
||||
.sorted(c)
|
||||
.toList();
|
||||
}
|
||||
}
|
||||
}
|
||||
7
java/wordStat/package-info.java
Normal file
7
java/wordStat/package-info.java
Normal file
@@ -0,0 +1,7 @@
|
||||
/**
|
||||
* Tests for <a href="https://www.kgeorgiy.info/courses/prog-intro/homeworks.html#wordstat">Word Statistics</a> homework
|
||||
* of <a href="https://www.kgeorgiy.info/courses/prog-intro/">Introduction to Programming</a> course.
|
||||
*
|
||||
* @author Georgiy Korneev (kgeorgiy@kgeorgiy.info)
|
||||
*/
|
||||
package wordStat;
|
||||
Reference in New Issue
Block a user