From 3253af7194d9ad8af2e781479a9f7d7b69302aec Mon Sep 17 00:00:00 2001 From: random-randoms Date: Wed, 21 May 2025 19:36:01 +0300 Subject: [PATCH 1/6] optimize rsm edges storage --- .../src/main/kotlin/org/ucfs/gss/GssNode.kt | 2 - .../ucfs/intersection/IntersectionEngine.kt | 22 +--- .../src/main/kotlin/org/ucfs/optbench/Main.kt | 33 ++++++ .../kotlin/org/ucfs/optbench/TestGenerator.kt | 52 +++++++++ .../kotlin/org/ucfs/optbench/TestSource.kt | 102 +++++++++++++++++ .../src/main/kotlin/org/ucfs/optbench/Util.kt | 52 +++++++++ .../optbench/testsource/Dyck3TestGenerator.kt | 88 +++++++++++++++ .../optbench/testsource/DyckTestGenerator.kt | 63 +++++++++++ .../testsource/ExpressionTestGenerator.kt | 105 ++++++++++++++++++ .../testsource/NonSquareTestGenerator.kt | 78 +++++++++++++ .../testsource/PalindromeTestGenerator.kt | 61 ++++++++++ .../testsource/StrangeAStarTestGenerator.kt | 44 ++++++++ .../testsource/UnequalBlocksTestGenerator.kt | 71 ++++++++++++ .../org/ucfs/optbench/testsource/Util.kt | 19 ++++ .../src/main/kotlin/org/ucfs/rsm/RsmState.kt | 41 ++++--- 15 files changed, 802 insertions(+), 31 deletions(-) create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/Main.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/Util.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt create mode 100644 solver/src/main/kotlin/org/ucfs/optbench/testsource/Util.kt diff --git a/solver/src/main/kotlin/org/ucfs/gss/GssNode.kt b/solver/src/main/kotlin/org/ucfs/gss/GssNode.kt index c0817b4a7..e1b29c38c 100644 --- a/solver/src/main/kotlin/org/ucfs/gss/GssNode.kt +++ b/solver/src/main/kotlin/org/ucfs/gss/GssNode.kt @@ -20,7 +20,6 @@ data class GssNode( * Pointer to vertex in input graph */ val inputPosition: InputNodeType, - ) { val id: Int = lastId++ val popped = ArrayList>() @@ -34,5 +33,4 @@ data class GssNode( outgoingEdges.add(edge) return popped } - } diff --git a/solver/src/main/kotlin/org/ucfs/intersection/IntersectionEngine.kt b/solver/src/main/kotlin/org/ucfs/intersection/IntersectionEngine.kt index ce9338808..c522fa5e9 100644 --- a/solver/src/main/kotlin/org/ucfs/intersection/IntersectionEngine.kt +++ b/solver/src/main/kotlin/org/ucfs/intersection/IntersectionEngine.kt @@ -3,12 +3,8 @@ package org.ucfs.intersection import org.ucfs.descriptors.Descriptor import org.ucfs.input.ILabel import org.ucfs.parser.IGll -import org.ucfs.rsm.symbol.ITerminal -import org.ucfs.rsm.symbol.Nonterminal object IntersectionEngine : IIntersectionEngine { - - /** * Process outgoing edges from input position in given descriptor, according to processing logic, represented as * separate functions for both outgoing terminal and nonterminal edges from rsmState in descriptor @@ -19,24 +15,18 @@ object IntersectionEngine : IIntersectionEngine { gll: IGll, descriptor: Descriptor, ) { - for (inputEdge in gll.ctx.input.getEdges(descriptor.inputPosition)) { - val inputTerminal = inputEdge.label.terminal - val rsmEdge = descriptor.rsmState.terminalEdgesStorage.find { - it.symbol == inputTerminal - } - if (rsmEdge != null) { - gll.handleTerminalEdge( - descriptor, inputEdge, rsmEdge.destinationState, rsmEdge.symbol as ITerminal - ) - } + val terminal = inputEdge.label.terminal ?: continue + val destination = descriptor.rsmState.terminalEdgesStorage[terminal] ?: continue + gll.handleTerminalEdge(descriptor, inputEdge, destination, terminal) } for (nonterminalEdge in descriptor.rsmState.nonterminalEdgesStorage) { gll.handleNonterminalEdge( - descriptor, nonterminalEdge.destinationState, nonterminalEdge.symbol as Nonterminal + descriptor, + nonterminalEdge.value, + nonterminalEdge.key, ) } } } - diff --git a/solver/src/main/kotlin/org/ucfs/optbench/Main.kt b/solver/src/main/kotlin/org/ucfs/optbench/Main.kt new file mode 100644 index 000000000..bb6143434 --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/Main.kt @@ -0,0 +1,33 @@ +package org.ucfs.optbench + +import org.ucfs.optbench.testsource.* +import java.io.File + +fun main() { + val generators = + listOf( + // StrangeAStarTestGenerator(), + Dyck3TestGenerator(), + DyckTestGenerator(), + ExpressionTestGenerator(), + PalindromeTestGenerator(), + UnequalBlocksTestGenerator(), + NonSquareTestGenerator(), + ) + + sequence { + generators.forEach { + var size = 10 + val tests = 100 + + while (true) { + val src = it.generateSource(228, size, tests) + val result = src.run() + yield(result) + println(result) + if (result.totalRuntime > 10000) break + size = size * 3 / 2 + } + } + }.toList().dumpToCsv(File("unoptimized.csv")) +} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt b/solver/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt new file mode 100644 index 000000000..07f658c32 --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt @@ -0,0 +1,52 @@ +package org.ucfs.optbench + +import org.ucfs.grammar.combinator.Grammar +import kotlin.random.Random + +interface TestGenerator { + val grammar: Grammar + val name: String + val generator: SingleTestGenerator +} + +interface SingleTestGenerator { + fun generateTest( + seed: Int, + size: Int, + ): Test +} + +class AcceptRejectUniformGenerator( + val generateAccept: (Int, Int) -> Test, + val generateReject: (Int, Int) -> Test, +) : SingleTestGenerator { + override fun generateTest( + seed: Int, + size: Int, + ): Test = if (Random(seed).nextBoolean()) generateAccept(seed, size) else generateReject(seed, size) +} + +class TrivialGenerator(val generate: (Int, Int) -> Test) : SingleTestGenerator { + override fun generateTest( + seed: Int, + size: Int, + ): Test = generate(seed, size) +} + +fun TestGenerator.generateSource( + seed: Int, + size: Int, + number: Int, +): TestSource = + TestSource( + grammar, + Random(seed) + .let { + rnd -> + sequence { + repeat(number) { yield(generator.generateTest(rnd.nextInt(), size)) } + }.toList() + }, + name, + size, + ) diff --git a/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt b/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt new file mode 100644 index 000000000..45898ecf5 --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt @@ -0,0 +1,102 @@ +package org.ucfs.optbench + +import org.ucfs.grammar.combinator.Grammar +import org.ucfs.input.IInputGraph +import org.ucfs.input.LinearInput +import org.ucfs.input.TerminalInputLabel +import org.ucfs.parser.Gll +import org.ucfs.sppf.node.RangeSppfNode +import java.io.File +import kotlin.system.measureTimeMillis + +data class Test(val input: String, val size: Int, val output: RecognizerOutput) + +typealias ParserOutput = RangeSppfNode? + +fun ParserOutput.checkRecognize(input: IInputGraph): RecognizerOutput = + if (this == null || inputRange == null) { + RecognizerOutput.Reject + } else { + (input.isFinal(inputRange.to) && input.isStart(inputRange.from)).toRecognizerOutput() + } + +fun runGll( + input: IInputGraph, + grammar: Grammar, +) = Gll.gll(grammar.rsm, input).parse() + +fun runTest( + test: Test, + grammar: Grammar, +): Triple, RecognizerOutput> { + var result: ParserOutput + val input = LinearInput.buildFromString(test.input) + val time = measureTimeMillis { result = runGll(input, grammar) } + return Triple(time, result, result.checkRecognize(input)) +} + +enum class RecognizerOutput { Accept, Reject } + +fun Boolean.toRecognizerOutput() = if (this) RecognizerOutput.Accept else RecognizerOutput.Reject + +data class SingleTest( + val grammar: String, + val input: String, + val expected: RecognizerOutput, + val actual: RecognizerOutput, +) + +data class TestResult( + val name: String, + val tests: Int, + val size: Int, + val totalRuntime: Long, + val misses: List, +) { + override fun toString(): String { + val missesString = if (isOk()) "" else " | misses: ${misses.size}" + return "name: ${name.chars(20)} | " + + "size: ${size.chars(6)} | " + + "tests: ${tests.chars(4)} | " + + "initial: ${totalRuntime.chars(8)}ms | " + + missesString + } +} + +fun TestResult.isOk() = misses.isEmpty() + +data class TestSource(val grammar: Grammar, val inputs: Collection, val name: String, val size: Int) { + fun run(): TestResult { + var totalRuntime: Long = 0 + val misses = mutableListOf() + inputs.forEach { + val actual = runTest(it, grammar) + totalRuntime += actual.first + val test = SingleTest(name, it.input, it.output, actual.third) + if (actual.third != it.output) misses.add(test) + } + return TestResult( + name, + inputs.size, + size, + totalRuntime, + misses, + ) + } +} + +fun List.dumpToCsv(file: File) { + val bw = file.bufferedWriter() + bw.write("name,size,runtime\r\n") + + forEach { + bw.write(it.name) + bw.write(",") + bw.write(it.size.toString()) + bw.write(",") + bw.write(it.totalRuntime.toString()) + bw.write("\r\n") + } + + bw.close() +} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/Util.kt b/solver/src/main/kotlin/org/ucfs/optbench/Util.kt new file mode 100644 index 000000000..9b69b27b5 --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/Util.kt @@ -0,0 +1,52 @@ +package org.ucfs.optbench + +import org.ucfs.rsm.symbol.Term +import java.lang.String.format +import kotlin.collections.HashSet + +const val lineEndSymbol = "$" +val lineEnd = Term(lineEndSymbol) + +fun String.repeat(n: Int) = + generateSequence { this } + .take(n) + .fold("") { acc, s -> acc + s } + +fun String.chars(n: Int) = if (length < n) " ".repeat(n - length) + this else this + +fun Number.chars(n: Int) = format("%${n}d", this) + +fun parserOutputSame( + left: ParserOutput, + right: ParserOutput, +): Boolean { + val visitedLeft = HashSet() + val visitedRight = HashSet() + + fun checkSame( + left: ParserOutput, + right: ParserOutput, + ): Boolean { + if (left == null && right == null) return true + if (left == null || right == null) return false + + if (visitedLeft.contains(left.id) && visitedRight.contains(right.id)) return true + if (visitedLeft.contains(left.id) || visitedRight.contains(right.id)) return false + + visitedLeft.add(left.id) + visitedRight.add(right.id) + + if (left.type != right.type) return false + if (left.rsmRange != right.rsmRange) return false + if (left.inputRange != right.inputRange) return false + if (left.children.size != right.children.size) return false + + left.children.forEachIndexed { index, it -> if (!checkSame(it, right.children[index])) return false } + + return true + } + + return checkSame(left, right) +} + +infix fun ParserOutput.differsFrom(other: ParserOutput) = !parserOutputSame(this, other) diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt b/solver/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt new file mode 100644 index 000000000..ea5f1c1f5 --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt @@ -0,0 +1,88 @@ +package org.ucfs.optbench.testsource + +import org.ucfs.grammar.combinator.Grammar +import org.ucfs.grammar.combinator.regexp.* +import org.ucfs.optbench.* +import org.ucfs.rsm.symbol.Term +import kotlin.random.Random + +class Dyck3Grammar : Grammar() { + val SS by Nt() + val S by Nt() + val Round by Nt() + val Square by Nt() + val Angle by Nt() + + init { + setStart(SS) + Round /= Term("(") * S * Term(")") * S + Square /= Term("[") * S * Term("]") * S + Angle /= Term("<") * S * Term(">") * S + S /= Epsilon or Round or Square or Angle + SS /= S * lineEnd + } +} + +class Dyck3TestGenerator : TestGenerator { + override val name = "Dyck-3" + override val grammar = Dyck3Grammar() + override val generator = + AcceptRejectUniformGenerator( + { seed, size -> genDyck3Ok(seed, size) + lineEndToken with RecognizerOutput.Accept }, + { seed, size -> genDyck3Fail(seed, size) + lineEndToken with RecognizerOutput.Reject }, + ) +} + +private val lBrace = listOf("( " of 1, "[ " of 1, "< " of 1) +private val rBrace = listOf(") " of 1, "] " of 1, "> " of 1) + +private fun genDyck3Ok( + seed: Int, + size: Int, +): CountedInput { + val rnd = Random(seed) + val brace = rnd.nextInt(0, 3) + if (size == 0) return "" of 0 + val left = rnd.nextInt(0, size) + val right = size - left - 1 + return lBrace[brace] + genDyck3Ok(seed + left, left) + rBrace[brace] + genDyck3Ok(seed + right, right) +} + +private fun genDyck3Fail( + seed: Int, + size: Int, +): CountedInput { + val rnd = Random(seed) + val brace = rnd.nextInt(0, 3) + if (size == 0) { + throw Exception("Dyck-3 cannot fail empty string") + } + val left = rnd.nextInt(0, size) + val right = size - left - 1 + val failLeft = rnd.nextBoolean() + if (rnd.nextBoolean() && size > 1) { // Fail later + return if (right == 0 || failLeft && left > 0) { + lBrace[brace] + + genDyck3Fail(seed + left, left) + + rBrace[brace] + + genDyck3Ok(seed + right, right) + } else { + lBrace[brace] + + genDyck3Ok(seed + left, left) + + rBrace[brace] + + genDyck3Fail(seed + right, right) + } + } + // Fail now + if (rnd.nextBoolean()) { // different brace types, like: (] + return lBrace[brace] + + genDyck3Ok(seed + left, left) + + rBrace[(brace + 1) % 3] + + genDyck3Ok(seed + right, right) + } + // braces look wrong sides, like: )) + return rBrace[brace] + + genDyck3Ok(seed + left, left) + + rBrace[brace] + + genDyck3Ok(seed + right, right) +} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt b/solver/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt new file mode 100644 index 000000000..5dfffbf00 --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt @@ -0,0 +1,63 @@ +package org.ucfs.optbench.testsource + +import org.ucfs.grammar.combinator.Grammar +import org.ucfs.grammar.combinator.regexp.* +import org.ucfs.optbench.* +import org.ucfs.rsm.symbol.Term +import kotlin.random.Random + +class DyckGrammar : Grammar() { + val S by Nt() + val SS by Nt() + private val lBrace = Term("(") + private val rBrace = Term(")") + + init { + setStart(SS) + S /= Epsilon or lBrace * S * rBrace * S // * many(S) + SS /= S * lineEnd + } +} + +class DyckTestGenerator : TestGenerator { + override val grammar = DyckGrammar() + override val name = "Dyck-1" + override val generator = + AcceptRejectUniformGenerator( + { seed, size -> genDyckOk(seed, size) + lineEndToken with RecognizerOutput.Accept }, + { seed, size -> genDyckFail(seed, size) + lineEndToken with RecognizerOutput.Reject }, + ) +} + +private val lBrace = "( " of 1 +private val rBrace = ") " of 1 + +private fun genDyckOk( + seed: Int, + size: Int, +): CountedInput { + val rnd = Random(seed) + if (size == 0) return "" of 0 + val left = rnd.nextInt(0, size) + val right = size - left - 1 + return lBrace + genDyckOk(seed + left, left) + rBrace + genDyckOk(seed + right, right) +} + +private fun genDyckFail( + seed: Int, + size: Int, +): CountedInput { + val rnd = Random(seed) + if (size == 0) throw Exception("Dyck cannot fail empty string") + val left = rnd.nextInt(0, size) + val right = size - left - 1 + if (size == 1 || rnd.nextBoolean()) { + return rBrace + genDyckOk(seed + left, left) + rBrace + genDyckOk(seed + right, right) + } + + return if (left == 0 || rnd.nextBoolean() && right > 0) { + lBrace + genDyckOk(seed + left, left) + rBrace + genDyckFail(seed + right, right) + } else { + lBrace + genDyckFail(seed + left, left) + rBrace + genDyckOk(seed + right, right) + } +} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt b/solver/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt new file mode 100644 index 000000000..24ce32b4f --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt @@ -0,0 +1,105 @@ +package org.ucfs.optbench.testsource + +import org.ucfs.grammar.combinator.Grammar +import org.ucfs.grammar.combinator.regexp.Nt +import org.ucfs.grammar.combinator.regexp.or +import org.ucfs.grammar.combinator.regexp.times +import org.ucfs.optbench.* +import org.ucfs.optbench.lineEnd +import org.ucfs.rsm.symbol.Term +import kotlin.random.Random + +class ExpressionGrammar : Grammar() { + val SS by Nt() + val Statement by Nt() + val Expression by Nt() + val Multiplier by Nt() + val Summand by Nt() + val Variable by Nt() + + val Assign = Term(":=") + val Mul = Term("*") + val Div = Term("/") + val Plus = Term("+") + val Minus = Term("-") + val LBrace = Term("(") + val RBrace = Term(")") + val X = Term("x") + val Y = Term("y") + val Z = Term("z") + + // grammar intentionally made right-associative not to be left-recursive + init { + setStart(SS) + SS /= Statement * lineEnd + Variable /= X or Y or Z + Statement /= Variable * Assign * Expression + Multiplier /= Variable or LBrace * Expression * RBrace + Summand /= Multiplier or Multiplier * Mul * Summand or Multiplier * Div * Summand + Expression /= Summand or Summand * Plus * Expression or Summand * Minus * Expression + } +} + +class ExpressionTestGenerator : TestGenerator { + override val grammar = ExpressionGrammar() + override val name = "Expression" + override val generator = + TrivialGenerator { + seed, size -> + (generateStatement(seed, size) + lineEndToken) with RecognizerOutput.Accept + } +} + +val lineEndToken = lineEndSymbol of 1 +private val variables = listOf("x " of 1, "y " of 1, "z " of 1) +private val assign = ":= " of 1 +private val mulSigns = listOf("* " of 1, "/ " of 1) +private val plusSigns = listOf("+ " of 1, "- " of 1) +private val lBrace = "( " of 1 +private val rBrace = ") " of 1 + +private fun generateVariable(seed: Int) = variables[Random(seed).nextInt(0, 3)] + +private fun generateMultiplier( + seed: Int, + size: Int, +): CountedInput { + if (size == 0) throw Exception("cannot create multiplier of zero size") + if (size == 1) return generateVariable(seed + 1) + return lBrace + generateExpression(seed + 1, size) + rBrace +} + +private fun generateSummand( + seed: Int, + size: Int, +): CountedInput { + if (size == 0) throw Exception("cannot create summand of zero size") + if (size == 1) return generateMultiplier(seed, size) + val rnd = Random(seed) + val split = rnd.nextInt(0, size) + val mulSign = mulSigns[rnd.nextInt(2)] + if (split == 0) return generateMultiplier(seed, size) + return generateSummand(seed + 1, split) + mulSign + generateMultiplier(seed + 2, size - split) +} + +private fun generateExpression( + seed: Int, + size: Int, +): CountedInput { + if (size == 0) throw Exception("cannot create expression of zero size") + if (size == 1) return generateMultiplier(seed, size) + val rnd = Random(seed) + val split = rnd.nextInt(0, size - 1) + val plusSign = plusSigns[rnd.nextInt(2)] + if (split == 0) return generateSummand(seed + 1, size) + return generateExpression(seed + 1, split) + plusSign + generateSummand(seed + 2, size - split) +} + +private fun generateStatement( + seed: Int, + size: Int, +): CountedInput { + if (size < 2) throw Exception("cannot create statement of size less than three") + val variable = generateVariable(seed + 1) + return variable + assign + generateExpression(seed + 2, size - 1) +} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt b/solver/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt new file mode 100644 index 000000000..ebb8d8691 --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt @@ -0,0 +1,78 @@ +package org.ucfs.optbench.testsource + +import org.ucfs.grammar.combinator.Grammar +import org.ucfs.grammar.combinator.regexp.Nt +import org.ucfs.grammar.combinator.regexp.or +import org.ucfs.grammar.combinator.regexp.times +import org.ucfs.optbench.* +import org.ucfs.rsm.symbol.Term +import kotlin.random.Random + +class NonSquareGrammar : Grammar() { + val SS by Nt() + val S by Nt() + val A by Nt() + val B by Nt() + + var a = Term("a") + var b = Term("b") + + init { + setStart(SS) + SS /= S * lineEnd + A /= a * A * a or a * A * b or b * A * a or b * A * b or a + B /= a * B * a or a * B * b or b * B * a or b * B * b or b + S /= A * B or B * A or A or B + } +} + +class NonSquareTestGenerator : TestGenerator { + override val name = "Non square" + override val grammar = NonSquareGrammar() + override val generator = + AcceptRejectUniformGenerator( + { seed, size -> generateNonSquareAccept(seed, size) + lineEndToken with RecognizerOutput.Accept }, + { seed, size -> generateNonSquareReject(seed, size) + lineEndToken with RecognizerOutput.Reject }, + ) +} + +private val a = "a " of 1 +private val b = "b " of 1 + +private val letters = listOf(a, b) + +private fun generateAny( + seed: Int, + size: Int, +): CountedInput { + var ans = empty + val rnd = Random(seed) + + repeat(size) { ans += letters[rnd.nextInt(2)] } + + return ans +} + +private fun generateNonSquareReject( + seed: Int, + size: Int, +): CountedInput = generateAny(seed, size) * 2 + +private fun generateNonSquareAccept( + seed: Int, + size: Int, +): CountedInput { + val rnd = Random(seed) + val hack = rnd.nextInt(size) + val left = generateAny(seed, hack) + val right = generateAny(seed, size - hack - 1) + + val hackLetters = if (rnd.nextBoolean()) a to b else b to a + + // return hacked string + if (rnd.nextBoolean()) return left + hackLetters.first + right + left + hackLetters.second + right + + val letter = letters[rnd.nextInt(2)] + val basic = generateAny(seed, size) + return basic + letter + basic // return odd length +} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt b/solver/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt new file mode 100644 index 000000000..708ed0646 --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt @@ -0,0 +1,61 @@ +package org.ucfs.optbench.testsource + +import org.ucfs.grammar.combinator.Grammar +import org.ucfs.grammar.combinator.regexp.Epsilon +import org.ucfs.grammar.combinator.regexp.Nt +import org.ucfs.grammar.combinator.regexp.or +import org.ucfs.grammar.combinator.regexp.times +import org.ucfs.optbench.* +import org.ucfs.rsm.symbol.Term +import kotlin.random.Random + +class PalindromeGrammar : Grammar() { + val SS by Nt() + val S by Nt() + + val a = Term("a") + val b = Term("b") + val c = Term("c") + + init { + setStart(SS) + SS /= S * lineEnd + S /= Epsilon or a or b or c or a * S * a or b * S * b or c * S * c + } +} + +class PalindromeTestGenerator : TestGenerator { + override val grammar = PalindromeGrammar() + override val name = "Palindrome" + override val generator = + AcceptRejectUniformGenerator( + { seed, size -> generatePalindrome(seed, size) + lineEndToken with RecognizerOutput.Accept }, + { seed, size -> generatePalindromeFail(seed, size) + lineEndToken with RecognizerOutput.Reject }, + ) +} + +private val letters = listOf("a " of 1, "b " of 1, "c " of 1) + +private fun generatePalindrome( + seed: Int, + size: Int, +): CountedInput { + if (size == 0) return empty + val letter = letters[Random(seed).nextInt(3)] + if (size == 1) return letter + return letter + generatePalindrome(seed + 1, size - 2) + letter +} + +private fun generatePalindromeFail( + seed: Int, + size: Int, +): CountedInput { + if (size < 2) throw Exception("too short to fail") + val rnd = Random(seed) + val letterIndex = rnd.nextInt(3) + if (size < 4 || rnd.nextInt(10) == 0) { // fail now + return letters[letterIndex] + generatePalindrome(seed + 1, size - 2) + letters[(letterIndex + 1) % 3] + } + // fail later + return letters[letterIndex] + generatePalindromeFail(seed + 1, size - 2) + letters[letterIndex] +} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt b/solver/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt new file mode 100644 index 000000000..970ab060d --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt @@ -0,0 +1,44 @@ +package org.ucfs.optbench.testsource + +import org.ucfs.grammar.combinator.Grammar +import org.ucfs.grammar.combinator.regexp.* +import org.ucfs.optbench.* +import org.ucfs.rsm.symbol.Term +import kotlin.random.Random +import kotlin.text.repeat + +class StrangeAStar : Grammar() { + val SS by Nt() + val S by Nt() + + init { + setStart(SS) + S /= Epsilon or Term("a") * S * S * S * S * S * S * S * S * S * many(S) + SS /= S * lineEnd + } +} + +class StrangeAStarTestGenerator : TestGenerator { + override val grammar = StrangeAStar() + override val name = "Strange A*" + override val generator = + AcceptRejectUniformGenerator( + { seed, size -> Test(genAStarAccept(seed, size) + lineEndSymbol, size + 1, RecognizerOutput.Accept) }, + { seed, size -> Test(genAStartReject(seed, size) + lineEndSymbol, size + 1, RecognizerOutput.Reject) }, + ) +} + +fun genAStarAccept( + seed: Int, + size: Int, +): String { + return "a ".repeat(size) +} + +fun genAStartReject( + seed: Int, + size: Int, +): String { + val fail = Random(seed).nextInt(0, size) + return "a ".repeat(fail) + "b " + "a ".repeat(size - fail - 1) +} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt b/solver/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt new file mode 100644 index 000000000..63a2b4249 --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt @@ -0,0 +1,71 @@ +package org.ucfs.optbench.testsource + +import org.ucfs.grammar.combinator.Grammar +import org.ucfs.grammar.combinator.regexp.* +import org.ucfs.optbench.* +import org.ucfs.rsm.symbol.Term +import kotlin.random.Random + +class UnequalBlocksGrammar : Grammar() { + val SS by Nt() + val S by Nt() + val E by Nt() + val A by Nt() + val B by Nt() + + val a = Term("a") + val b = Term("b") + + init { + setStart(SS) + SS /= S * lineEnd + A /= a or a * A + B /= b or b * B + E /= A or B + S /= E or a * S * b + } +} + +class UnequalBlocksTestGenerator : TestGenerator { + override val name = "Unequal blocks" + override val grammar = UnequalBlocksGrammar() + override val generator = + AcceptRejectUniformGenerator( + { seed, size -> generateUBSuccess(seed, size) + lineEndToken with RecognizerOutput.Accept }, + { seed, size -> generateUBFail(seed, size) + lineEndToken with RecognizerOutput.Reject }, + ) +} + +private val a = "a " of 1 +private val b = "b " of 1 + +private fun generateUBFail( + seed: Int, + size: Int, +): CountedInput { + val rnd = Random(seed) + // generate equal blocks + if (size <= 3 || rnd.nextBoolean()) return a * size + b * size + val smaller = rnd.nextInt(size - 1) + val hack = rnd.nextInt(1, size - 1) + // generate something that is not a^nb^m + return if (rnd.nextBoolean()) { + a * smaller + b * hack + a + b * (size - hack - 1) + } else { + a * hack + b + a * (size - hack - 1) + b * smaller + } +} + +private fun generateUBSuccess( + seed: Int, + size: Int, +): CountedInput { + val rnd = Random(seed) + if (size == 0) throw Exception("Cannot create unequal blocks of size 0") + val smaller = rnd.nextInt(size - 1) + return if (rnd.nextBoolean()) { + a * smaller + b * size + } else { + a * size + b * smaller + } +} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/Util.kt b/solver/src/main/kotlin/org/ucfs/optbench/testsource/Util.kt new file mode 100644 index 000000000..6d8455c8f --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/optbench/testsource/Util.kt @@ -0,0 +1,19 @@ +package org.ucfs.optbench.testsource + +import org.ucfs.optbench.RecognizerOutput +import org.ucfs.optbench.Test +import org.ucfs.optbench.repeat + +data class CountedInput(val input: String, val tokens: Int) { + operator fun plus(other: CountedInput) = CountedInput(input + other.input, tokens + other.tokens) +} + +infix fun CountedInput.with(output: RecognizerOutput) = Test(input, tokens, output) + +infix fun String.of(tokens: Int) = CountedInput(this, tokens) + +val empty = "" of 0 + +fun CountedInput.repeat(times: Int) = CountedInput(input.repeat(times), tokens * times) + +operator fun CountedInput.times(times: Int) = repeat(times) diff --git a/solver/src/main/kotlin/org/ucfs/rsm/RsmState.kt b/solver/src/main/kotlin/org/ucfs/rsm/RsmState.kt index dfd042d19..9ec6c70c1 100644 --- a/solver/src/main/kotlin/org/ucfs/rsm/RsmState.kt +++ b/solver/src/main/kotlin/org/ucfs/rsm/RsmState.kt @@ -7,42 +7,57 @@ import org.ucfs.rsm.symbol.ITerminal import org.ucfs.rsm.symbol.Nonterminal import org.ucfs.rsm.symbol.Symbol import java.util.* -import kotlin.collections.ArrayList +import kotlin.collections.HashMap -data class RsmEdge(val symbol: Symbol, val destinationState: RsmState) -//data class TerminalRsmEdge(val terminal: Term, val destinationState: RsmState) -//data class NonterminalRsmEdge(val nonterminal: Nonterminal, val destinationState: RsmState) +// data class TerminalRsmEdge(val terminal: Term, val destinationState: RsmState) +// data class NonterminalRsmEdge(val nonterminal: Nonterminal, val destinationState: RsmState) data class RsmState( val nonterminal: Nonterminal, val isStart: Boolean = false, val isFinal: Boolean = false, - var numId: Int = nonterminal.getNextRsmStateId() + var numId: Int = nonterminal.getNextRsmStateId(), ) { val id: String = "${nonterminal.name}_${(numId)}" val outgoingEdges get() = terminalEdgesStorage.plus(nonterminalEdgesStorage) - val terminalEdgesStorage = ArrayList() - - val nonterminalEdgesStorage = ArrayList() + val terminalEdgesStorage = HashMap() + val nonterminalEdgesStorage = HashMap() /** * Adds edge from current rsmState to given destinationState via given symbol, terminal or nonterminal * @param symbol - symbol to store on edge * @param destinationState */ - fun addEdge(symbol: Symbol, destinationState: RsmState) { + fun addEdge( + symbol: Symbol, + destinationState: RsmState, + ) { when (symbol) { - is ITerminal -> terminalEdgesStorage.add(RsmEdge(symbol, destinationState)) - is Nonterminal -> nonterminalEdgesStorage.add(RsmEdge(symbol, destinationState)) + is ITerminal -> addTerminalEdge(symbol, destinationState) + is Nonterminal -> addNonterminalEdge(symbol, destinationState) else -> throw RsmException("Unsupported type of symbol") } } - protected fun getNewState(regex: Regexp): RsmState { + private fun addTerminalEdge( + terminal: ITerminal, + destination: RsmState, + ) { + terminalEdgesStorage[terminal] = destination + } + + private fun addNonterminalEdge( + nonterminal: Nonterminal, + destinationState: RsmState, + ) { + nonterminalEdgesStorage[nonterminal] = destinationState + } + + private fun getNewState(regex: Regexp): RsmState { return RsmState(this.nonterminal, isStart = false, regex.acceptEpsilon()) } @@ -87,4 +102,4 @@ data class RsmState( } } } -} \ No newline at end of file +} From 04787eca1837e0967fc51e689bd0ea0cc47de995 Mon Sep 17 00:00:00 2001 From: random-randoms Date: Wed, 21 May 2025 20:16:52 +0300 Subject: [PATCH 2/6] make time measurement in nanoseconds cache descriptor hash --- solver/src/main/kotlin/org/ucfs/descriptors/Descriptor.kt | 7 +++++-- solver/src/main/kotlin/org/ucfs/optbench/Main.kt | 4 ++-- solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt | 6 +++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/solver/src/main/kotlin/org/ucfs/descriptors/Descriptor.kt b/solver/src/main/kotlin/org/ucfs/descriptors/Descriptor.kt index b31854a05..9e90486b7 100644 --- a/solver/src/main/kotlin/org/ucfs/descriptors/Descriptor.kt +++ b/solver/src/main/kotlin/org/ucfs/descriptors/Descriptor.kt @@ -3,6 +3,7 @@ package org.ucfs.descriptors import org.ucfs.gss.GssNode import org.ucfs.rsm.RsmState import org.ucfs.sppf.node.RangeSppfNode +import java.util.Objects /** * Descriptor represents current parsing stage @@ -26,6 +27,8 @@ data class Descriptor( * to derivation trees, stored on edges of GSS, it corresponds to return from recursive function */ val sppfNode: RangeSppfNode, +) { + private val hash = Objects.hash(inputPosition, gssNode, rsmState, sppfNode) -) - + override fun hashCode() = hash +} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/Main.kt b/solver/src/main/kotlin/org/ucfs/optbench/Main.kt index bb6143434..6136d4cf3 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/Main.kt +++ b/solver/src/main/kotlin/org/ucfs/optbench/Main.kt @@ -25,9 +25,9 @@ fun main() { val result = src.run() yield(result) println(result) - if (result.totalRuntime > 10000) break + if (result.totalRuntime > 10000000000) break size = size * 3 / 2 } } - }.toList().dumpToCsv(File("unoptimized.csv")) + }.toList().dumpToCsv(File("cache_hash.csv")) } diff --git a/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt b/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt index 45898ecf5..f03bc60d8 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt +++ b/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt @@ -7,7 +7,7 @@ import org.ucfs.input.TerminalInputLabel import org.ucfs.parser.Gll import org.ucfs.sppf.node.RangeSppfNode import java.io.File -import kotlin.system.measureTimeMillis +import kotlin.system.measureNanoTime data class Test(val input: String, val size: Int, val output: RecognizerOutput) @@ -31,7 +31,7 @@ fun runTest( ): Triple, RecognizerOutput> { var result: ParserOutput val input = LinearInput.buildFromString(test.input) - val time = measureTimeMillis { result = runGll(input, grammar) } + val time = measureNanoTime { result = runGll(input, grammar) } return Triple(time, result, result.checkRecognize(input)) } @@ -58,7 +58,7 @@ data class TestResult( return "name: ${name.chars(20)} | " + "size: ${size.chars(6)} | " + "tests: ${tests.chars(4)} | " + - "initial: ${totalRuntime.chars(8)}ms | " + + "initial: ${totalRuntime.chars(15)}ns | " + missesString } } From 22715fad03fe4626ed9237992bba9207462e4b36 Mon Sep 17 00:00:00 2001 From: random-randoms Date: Sat, 24 May 2025 12:06:13 +0300 Subject: [PATCH 3/6] improve benchmarks --- .../src/main/kotlin/org/ucfs/optbench/Main.kt | 74 ++++++++++++------- .../kotlin/org/ucfs/optbench/TestSource.kt | 6 +- 2 files changed, 52 insertions(+), 28 deletions(-) diff --git a/solver/src/main/kotlin/org/ucfs/optbench/Main.kt b/solver/src/main/kotlin/org/ucfs/optbench/Main.kt index 6136d4cf3..3d891b945 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/Main.kt +++ b/solver/src/main/kotlin/org/ucfs/optbench/Main.kt @@ -2,32 +2,54 @@ package org.ucfs.optbench import org.ucfs.optbench.testsource.* import java.io.File +import kotlin.random.Random -fun main() { - val generators = - listOf( - // StrangeAStarTestGenerator(), - Dyck3TestGenerator(), - DyckTestGenerator(), - ExpressionTestGenerator(), - PalindromeTestGenerator(), - UnequalBlocksTestGenerator(), - NonSquareTestGenerator(), - ) - - sequence { - generators.forEach { - var size = 10 - val tests = 100 - - while (true) { - val src = it.generateSource(228, size, tests) - val result = src.run() - yield(result) - println(result) - if (result.totalRuntime > 10000000000) break - size = size * 3 / 2 - } +fun bench( + what: TestGenerator, + initial: Int, + cap: Int, + tests: Int = 100, +): List { + var size = initial + + val seed = Random.nextInt() + + return sequence { + while (size < cap) { + what + .generateSource(seed, size, tests) + .run() + .also { println(it) } + .also { yield(it) } + size = size * 3 / 2 } - }.toList().dumpToCsv(File("cache_hash.csv")) + }.toList() +} + +fun warmup() { + ExpressionTestGenerator().generateSource(100, 1000, 100).run() +} + +fun benchDyck() = bench(DyckTestGenerator(), 100, 20000, 50) + +fun benchDyck3() = bench(Dyck3TestGenerator(), 100, 20000, 50) + +fun benchExpression() = bench(ExpressionTestGenerator(), 100, 20000, 50) + +fun benchPalindrome() = bench(PalindromeTestGenerator(), 100, 15000, 50) + +fun benchUnequalBlocks() = bench(UnequalBlocksTestGenerator(), 100, 600, 50) + +fun benchNonSquare() = bench(NonSquareTestGenerator(), 100, 600, 50) + +fun main() { + warmup() + ( + benchDyck() + + benchDyck3() + + benchExpression() + + benchPalindrome() + + benchUnequalBlocks() + + benchNonSquare() + ).dumpToCsv(File("cache_hash.csv")) } diff --git a/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt b/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt index f03bc60d8..45e60d3f2 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt +++ b/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt @@ -53,12 +53,14 @@ data class TestResult( val totalRuntime: Long, val misses: List, ) { + val averageRuntime = totalRuntime / tests + override fun toString(): String { val missesString = if (isOk()) "" else " | misses: ${misses.size}" return "name: ${name.chars(20)} | " + "size: ${size.chars(6)} | " + "tests: ${tests.chars(4)} | " + - "initial: ${totalRuntime.chars(15)}ns | " + + "initial: ${averageRuntime.chars(15)}ns | " + missesString } } @@ -94,7 +96,7 @@ fun List.dumpToCsv(file: File) { bw.write(",") bw.write(it.size.toString()) bw.write(",") - bw.write(it.totalRuntime.toString()) + bw.write(it.averageRuntime.toString()) bw.write("\r\n") } From 652bd4a1bff78f01415b891133eddb61fbe188d3 Mon Sep 17 00:00:00 2001 From: random-randoms Date: Sat, 24 May 2025 17:49:06 +0300 Subject: [PATCH 4/6] create symbol registry to make symbols lighter and faster --- .../org/ucfs/grammar/combinator/regexp/Nt.kt | 10 +- .../main/kotlin/org/ucfs/input/DotParser.kt | 19 ++- solver/src/main/kotlin/org/ucfs/input/Edge.kt | 6 +- .../main/kotlin/org/ucfs/input/IInputGraph.kt | 20 ++-- .../main/kotlin/org/ucfs/input/InputGraph.kt | 23 ++-- .../main/kotlin/org/ucfs/input/LinearInput.kt | 26 ++--- .../kotlin/org/ucfs/input/SymbolRegistry.kt | 43 +++++++ .../kotlin/org/ucfs/input/utils/DotWriter.kt | 9 +- .../input/utils/dot/GraphFromDotVisitor.kt | 35 +++--- .../ucfs/intersection/IIntersectionEngine.kt | 7 +- .../ucfs/intersection/IntersectionEngine.kt | 13 ++- .../src/main/kotlin/org/ucfs/optbench/Main.kt | 2 +- .../kotlin/org/ucfs/optbench/TestSource.kt | 9 +- solver/src/main/kotlin/org/ucfs/parser/Gll.kt | 107 ++++++++++------- .../src/main/kotlin/org/ucfs/parser/IGll.kt | 108 +++++++++++------- .../kotlin/org/ucfs/parser/context/Context.kt | 14 +-- .../src/main/kotlin/org/ucfs/rsm/RsmState.kt | 16 ++- .../src/main/kotlin/org/ucfs/rsm/RsmWrite.kt | 44 ++++--- .../kotlin/org/ucfs/rsm/symbol/Nonterminal.kt | 14 ++- .../main/kotlin/org/ucfs/rsm/symbol/Term.kt | 14 ++- .../main/kotlin/org/ucfs/sppf/SppfStorage.kt | 56 +++++---- 21 files changed, 375 insertions(+), 220 deletions(-) create mode 100644 solver/src/main/kotlin/org/ucfs/input/SymbolRegistry.kt diff --git a/solver/src/main/kotlin/org/ucfs/grammar/combinator/regexp/Nt.kt b/solver/src/main/kotlin/org/ucfs/grammar/combinator/regexp/Nt.kt index a9d5f6949..d117f5fec 100644 --- a/solver/src/main/kotlin/org/ucfs/grammar/combinator/regexp/Nt.kt +++ b/solver/src/main/kotlin/org/ucfs/grammar/combinator/regexp/Nt.kt @@ -6,7 +6,7 @@ import org.ucfs.rsm.symbol.Nonterminal import kotlin.reflect.KProperty open class Nt() : DerivedSymbol { - private lateinit var name : String + private lateinit var name: String constructor(lhs: Regexp) : this() { rsmDescription = lhs } @@ -25,7 +25,10 @@ open class Nt() : DerivedSymbol { nonterm.startState.buildRsmBox(rsmDescription) } - operator fun getValue(grammar: Grammar, property: KProperty<*>): Nt = this + operator fun getValue( + grammar: Grammar, + property: KProperty<*>, + ): Nt = this operator fun divAssign(lhs: Regexp) { if (isInitialized()) { @@ -35,7 +38,8 @@ open class Nt() : DerivedSymbol { } operator fun provideDelegate( - grammar: Grammar, property: KProperty<*> + grammar: Grammar, + property: KProperty<*>, ): Nt { name = property.name nonterm = Nonterminal(property.name) diff --git a/solver/src/main/kotlin/org/ucfs/input/DotParser.kt b/solver/src/main/kotlin/org/ucfs/input/DotParser.kt index 818f61000..25436277f 100644 --- a/solver/src/main/kotlin/org/ucfs/input/DotParser.kt +++ b/solver/src/main/kotlin/org/ucfs/input/DotParser.kt @@ -9,8 +9,7 @@ import java.io.File import java.io.IOException class DotParser { - - fun parseDotFile(filePath: String): InputGraph { + fun parseDotFile(filePath: String): InputGraph { val file = File(filePath) if (!file.exists()) { @@ -19,15 +18,15 @@ class DotParser { return parseDot(file.readText()) } - - fun parseDot(dotView: String): InputGraph { - val realParser = DotParser( - CommonTokenStream( - DotLexer( - CharStreams.fromString(dotView) - ) + fun parseDot(dotView: String): InputGraph { + val realParser = + DotParser( + CommonTokenStream( + DotLexer( + CharStreams.fromString(dotView), + ), + ), ) - ) return GraphFromDotVisitor().visitGraph(realParser.graph()) } } diff --git a/solver/src/main/kotlin/org/ucfs/input/Edge.kt b/solver/src/main/kotlin/org/ucfs/input/Edge.kt index 3124e70ad..5578c3f2a 100644 --- a/solver/src/main/kotlin/org/ucfs/input/Edge.kt +++ b/solver/src/main/kotlin/org/ucfs/input/Edge.kt @@ -1,6 +1,6 @@ package org.ucfs.input -data class Edge( - val label: LabelType, +data class Edge( + val label: LightSymbol, val targetVertex: VertexType, -) \ No newline at end of file +) diff --git a/solver/src/main/kotlin/org/ucfs/input/IInputGraph.kt b/solver/src/main/kotlin/org/ucfs/input/IInputGraph.kt index 4fe5788d7..4e834d024 100644 --- a/solver/src/main/kotlin/org/ucfs/input/IInputGraph.kt +++ b/solver/src/main/kotlin/org/ucfs/input/IInputGraph.kt @@ -3,9 +3,8 @@ package org.ucfs.input /** * Input graph interface * @param VertexType - type of vertex in input graph - * @param LabelType - type of label on edges in input graph */ -interface IInputGraph { +interface IInputGraph { /** * @return collection of all starting vertices */ @@ -23,13 +22,12 @@ interface IInputGraph { */ fun addVertex(vertex: VertexType) - /** * Returns all outgoing edges from given vertex * @param from - vertex to retrieve outgoing edges from * @return Collection of outgoing edges */ - fun getEdges(from: VertexType): MutableList> + fun getEdges(from: VertexType): MutableList> /** * Adds edge to graph @@ -37,7 +35,11 @@ interface IInputGraph { * @param label - value to store on the edge * @param to - head of the edge */ - fun addEdge(from: VertexType, label: LabelType, to: VertexType) + fun addEdge( + from: VertexType, + label: LightSymbol, + to: VertexType, + ) /** * Removes edge from graph @@ -45,7 +47,11 @@ interface IInputGraph { * @param label - value, stored on the edge * @param to - head of the edge */ - fun removeEdge(from: VertexType, label: LabelType, to: VertexType) + fun removeEdge( + from: VertexType, + label: LightSymbol, + to: VertexType, + ) /** * @param vertex - vertex to check @@ -60,4 +66,4 @@ interface IInputGraph { fun isFinal(vertex: VertexType): Boolean fun removeVertex(vertex: VertexType) -} \ No newline at end of file +} diff --git a/solver/src/main/kotlin/org/ucfs/input/InputGraph.kt b/solver/src/main/kotlin/org/ucfs/input/InputGraph.kt index a8a4e009a..0916330cc 100644 --- a/solver/src/main/kotlin/org/ucfs/input/InputGraph.kt +++ b/solver/src/main/kotlin/org/ucfs/input/InputGraph.kt @@ -1,12 +1,11 @@ package org.ucfs.input -open class InputGraph : IInputGraph { - +open class InputGraph : IInputGraph { var name = "G" val vertices: MutableSet = HashSet() - val edges: MutableMap>> = HashMap() + val edges: MutableMap>> = HashMap() val startVertices: MutableSet = HashSet() @@ -28,11 +27,15 @@ open class InputGraph : IInputGraph> { + override fun getEdges(from: VertexType): MutableList> { return edges.getOrDefault(from, ArrayList()) } - override fun addEdge(from: VertexType, label: LabelType, to: VertexType) { + override fun addEdge( + from: VertexType, + label: LightSymbol, + to: VertexType, + ) { val edge = Edge(label, to) if (!edges.containsKey(from)) edges[from] = ArrayList() @@ -40,12 +43,16 @@ open class InputGraph : IInputGraph : InputGraph() { - +open class LinearInput : InputGraph() { override fun toString(): String { - if(startVertices.isEmpty()){ - return "${this.javaClass}: empty" - } + if (startVertices.isEmpty()) + { + return "${this.javaClass}: empty" + } var v: VertexType = startVertices.first() val sb = StringBuilder() - while(v != null){ + while (v != null) { val e = edges[v]?.first() ?: break sb.append("\n") sb.append(e.label) @@ -23,21 +23,21 @@ open class LinearInput : InputGraph { - val inputGraph = LinearInput() + fun buildFromString(input: String): IInputGraph { + val inputGraph = LinearInput() var curVertexId = 0 inputGraph.addStartVertex(curVertexId) inputGraph.addVertex(curVertexId) for (x in input.trim().split(SPACE).filter { it.isNotEmpty() }) { - if (x.isNotEmpty()) { - inputGraph.addEdge(curVertexId, TerminalInputLabel(Term(x)), ++curVertexId) - inputGraph.addVertex(curVertexId) - } + val light = SymbolRegistry.registerTerminal(Term(x)) + inputGraph.addEdge(curVertexId, light, ++curVertexId) + inputGraph.addVertex(curVertexId) } return inputGraph } + const val SPACE = " " } -} \ No newline at end of file +} diff --git a/solver/src/main/kotlin/org/ucfs/input/SymbolRegistry.kt b/solver/src/main/kotlin/org/ucfs/input/SymbolRegistry.kt new file mode 100644 index 000000000..fa76817ff --- /dev/null +++ b/solver/src/main/kotlin/org/ucfs/input/SymbolRegistry.kt @@ -0,0 +1,43 @@ +package org.ucfs.input + +import org.ucfs.rsm.symbol.ITerminal +import org.ucfs.rsm.symbol.Nonterminal + +@JvmInline +value class LightSymbol(val index: Int) + +val Eps = LightSymbol(0) + +object SymbolRegistry { + private var terminalIndex = 1 + private var nonterminalIndex = 1 + private val terminals = HashMap() + private val nonterminals = HashMap() + private val sourceTerminals = arrayListOf() + private val sourceNonterminals = arrayListOf() + + fun registerTerminal(iTerminal: ITerminal): LightSymbol = + terminals[iTerminal] ?: LightSymbol(terminalIndex++) + .also { terminals[iTerminal] = it } + .also { sourceTerminals.add(iTerminal) } + + fun registerNonterminal(nonterminal: Nonterminal): LightSymbol = + nonterminals[nonterminal] ?: LightSymbol(-nonterminalIndex++) + .also { nonterminals[nonterminal] = it } + .also { sourceNonterminals.add(nonterminal) } + + fun getITerminal(terminal: LightSymbol): ITerminal = sourceTerminals[terminal.index - 1] + + fun getNonterminal(nonterminal: LightSymbol): Nonterminal = sourceNonterminals[-1 - nonterminal.index] +} + +val LightSymbol.terminal + get() = SymbolRegistry.getITerminal(this) + +val LightSymbol.nonTerminal + get() = SymbolRegistry.getNonterminal(this) + +val LightSymbol.symbol + get() = if (isNonterminal()) nonTerminal else terminal + +fun LightSymbol.isNonterminal() = index < 0 diff --git a/solver/src/main/kotlin/org/ucfs/input/utils/DotWriter.kt b/solver/src/main/kotlin/org/ucfs/input/utils/DotWriter.kt index ec8783ca0..6325d557d 100644 --- a/solver/src/main/kotlin/org/ucfs/input/utils/DotWriter.kt +++ b/solver/src/main/kotlin/org/ucfs/input/utils/DotWriter.kt @@ -1,13 +1,14 @@ package org.ucfs.input.utils - import org.ucfs.input.InputGraph -import org.ucfs.input.TerminalInputLabel +import org.ucfs.input.terminal import java.lang.StringBuilder class DotWriter { - - fun getDotView(graph: InputGraph, isDirected: Boolean = true): String { + fun getDotView( + graph: InputGraph, + isDirected: Boolean = true, + ): String { val builder = StringBuilder() val graphType = if (isDirected) "digraph" else "graph" val connector = if (isDirected) "->" else "--" diff --git a/solver/src/main/kotlin/org/ucfs/input/utils/dot/GraphFromDotVisitor.kt b/solver/src/main/kotlin/org/ucfs/input/utils/dot/GraphFromDotVisitor.kt index bc668bab7..c907ea76f 100644 --- a/solver/src/main/kotlin/org/ucfs/input/utils/dot/GraphFromDotVisitor.kt +++ b/solver/src/main/kotlin/org/ucfs/input/utils/dot/GraphFromDotVisitor.kt @@ -1,33 +1,34 @@ package org.ucfs.input.utils.dot import org.ucfs.input.InputGraph -import org.ucfs.input.TerminalInputLabel +import org.ucfs.input.LightSymbol +import org.ucfs.input.SymbolRegistry import org.ucfs.rsm.symbol.Term -class GraphFromDotVisitor : DotBaseVisitor>() { - lateinit var graph: InputGraph +class GraphFromDotVisitor : DotBaseVisitor>() { + lateinit var graph: InputGraph - override fun visitGraph(ctx: DotParser.GraphContext?): InputGraph { + override fun visitGraph(ctx: DotParser.GraphContext?): InputGraph { graph = InputGraph() super.visitGraph(ctx) ctx?.id_()?.let { graph.name = it.text } return graph } - private fun getNodeId(vertexView: String): Int { return vertexView.toInt() } - private fun parseSimpleEdge(edgeView: String): TerminalInputLabel { + private fun parseSimpleEdge(edgeView: String): LightSymbol { val viewWithoutQuotes = edgeView.substring(1, edgeView.length - 1) - return TerminalInputLabel(Term(viewWithoutQuotes)) + return SymbolRegistry.registerTerminal(Term(viewWithoutQuotes)) } - override fun visitEdge_stmt(ctx: DotParser.Edge_stmtContext?): InputGraph { - val tos = ctx?.edgeRHS()?.node_id() - //we don't handle subgraph here - ?: return super.visitEdge_stmt(ctx) + override fun visitEdge_stmt(ctx: DotParser.Edge_stmtContext?): InputGraph { + val tos = + ctx?.edgeRHS()?.node_id() + // we don't handle subgraph here + ?: return super.visitEdge_stmt(ctx) if (tos.size > 1) { throw Exception("we can't handle transitives in dot yet!") } @@ -39,8 +40,9 @@ class GraphFromDotVisitor : DotBaseVisitor>( val from = getNodeId(ctx.node_id().text) val attrs = ctx.attr_list().attr() ?: throw Exception("we can't handle edges without labels yet!") - val labelNode = attrs.find { it.label_name.text == "label" } - ?: throw Exception("we can't handle edges without labels yet!") + val labelNode = + attrs.find { it.label_name.text == "label" } + ?: throw Exception("we can't handle edges without labels yet!") graph.addVertex(from) graph.addVertex(to) graph.addEdge(from, parseSimpleEdge(labelNode.label_value.text), to) @@ -49,13 +51,12 @@ class GraphFromDotVisitor : DotBaseVisitor>( return graph } - override fun visitNode_stmt(ctx: DotParser.Node_stmtContext?): InputGraph { + override fun visitNode_stmt(ctx: DotParser.Node_stmtContext?): InputGraph { if (ctx?.node_id()?.text == "start") { return super.visitNode_stmt(ctx) - } - //add node info + // add node info super.visitNode_stmt(ctx) return graph } -} \ No newline at end of file +} diff --git a/solver/src/main/kotlin/org/ucfs/intersection/IIntersectionEngine.kt b/solver/src/main/kotlin/org/ucfs/intersection/IIntersectionEngine.kt index 7747d850f..2a0d881c8 100644 --- a/solver/src/main/kotlin/org/ucfs/intersection/IIntersectionEngine.kt +++ b/solver/src/main/kotlin/org/ucfs/intersection/IIntersectionEngine.kt @@ -1,12 +1,11 @@ package org.ucfs.intersection import org.ucfs.descriptors.Descriptor -import org.ucfs.input.ILabel import org.ucfs.parser.IGll interface IIntersectionEngine { - fun handleEdges( - gll: IGll, + fun handleEdges( + gll: IGll, descriptor: Descriptor, ) -} \ No newline at end of file +} diff --git a/solver/src/main/kotlin/org/ucfs/intersection/IntersectionEngine.kt b/solver/src/main/kotlin/org/ucfs/intersection/IntersectionEngine.kt index c522fa5e9..89e77b428 100644 --- a/solver/src/main/kotlin/org/ucfs/intersection/IntersectionEngine.kt +++ b/solver/src/main/kotlin/org/ucfs/intersection/IntersectionEngine.kt @@ -1,7 +1,7 @@ package org.ucfs.intersection import org.ucfs.descriptors.Descriptor -import org.ucfs.input.ILabel +import org.ucfs.input.Eps import org.ucfs.parser.IGll object IntersectionEngine : IIntersectionEngine { @@ -11,12 +11,13 @@ object IntersectionEngine : IIntersectionEngine { * @param gll - Gll parser instance * @param descriptor - descriptor, represents current parsing stage */ - override fun handleEdges( - gll: IGll, + override fun handleEdges( + gll: IGll, descriptor: Descriptor, ) { for (inputEdge in gll.ctx.input.getEdges(descriptor.inputPosition)) { - val terminal = inputEdge.label.terminal ?: continue + val terminal = inputEdge.label + if (terminal == Eps) continue val destination = descriptor.rsmState.terminalEdgesStorage[terminal] ?: continue gll.handleTerminalEdge(descriptor, inputEdge, destination, terminal) } @@ -24,8 +25,8 @@ object IntersectionEngine : IIntersectionEngine { for (nonterminalEdge in descriptor.rsmState.nonterminalEdgesStorage) { gll.handleNonterminalEdge( descriptor, - nonterminalEdge.value, - nonterminalEdge.key, + nonterminalEdge.second, + nonterminalEdge.first, ) } } diff --git a/solver/src/main/kotlin/org/ucfs/optbench/Main.kt b/solver/src/main/kotlin/org/ucfs/optbench/Main.kt index 3d891b945..f416f2b5a 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/Main.kt +++ b/solver/src/main/kotlin/org/ucfs/optbench/Main.kt @@ -51,5 +51,5 @@ fun main() { benchPalindrome() + benchUnequalBlocks() + benchNonSquare() - ).dumpToCsv(File("cache_hash.csv")) + ).dumpToCsv(File("symbol_registry.csv")) } diff --git a/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt b/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt index 45e60d3f2..57926bd0e 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt +++ b/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt @@ -3,7 +3,6 @@ package org.ucfs.optbench import org.ucfs.grammar.combinator.Grammar import org.ucfs.input.IInputGraph import org.ucfs.input.LinearInput -import org.ucfs.input.TerminalInputLabel import org.ucfs.parser.Gll import org.ucfs.sppf.node.RangeSppfNode import java.io.File @@ -13,7 +12,7 @@ data class Test(val input: String, val size: Int, val output: RecognizerOutput) typealias ParserOutput = RangeSppfNode? -fun ParserOutput.checkRecognize(input: IInputGraph): RecognizerOutput = +fun ParserOutput.checkRecognize(input: IInputGraph): RecognizerOutput = if (this == null || inputRange == null) { RecognizerOutput.Reject } else { @@ -21,7 +20,7 @@ fun ParserOutput.checkRecognize(input: IInputGraph } fun runGll( - input: IInputGraph, + input: IInputGraph, grammar: Grammar, ) = Gll.gll(grammar.rsm, input).parse() @@ -75,7 +74,9 @@ data class TestSource(val grammar: Grammar, val inputs: Collection, val na val actual = runTest(it, grammar) totalRuntime += actual.first val test = SingleTest(name, it.input, it.output, actual.third) - if (actual.third != it.output) misses.add(test) + if (actual.third != it.output) { + misses.add(test) + } } return TestResult( name, diff --git a/solver/src/main/kotlin/org/ucfs/parser/Gll.kt b/solver/src/main/kotlin/org/ucfs/parser/Gll.kt index 6359a3f59..2a1078c69 100644 --- a/solver/src/main/kotlin/org/ucfs/parser/Gll.kt +++ b/solver/src/main/kotlin/org/ucfs/parser/Gll.kt @@ -3,7 +3,6 @@ package org.ucfs.parser import org.ucfs.descriptors.Descriptor import org.ucfs.gss.GssEdge import org.ucfs.input.IInputGraph -import org.ucfs.input.ILabel import org.ucfs.intersection.IIntersectionEngine import org.ucfs.intersection.IntersectionEngine import org.ucfs.parser.context.Context @@ -15,10 +14,10 @@ import org.ucfs.sppf.node.* * @param VertexType - type of vertex in input graph * @param LabelType - type of label on edges in input graph */ -class Gll private constructor( - override var ctx: Context, private val engine: IIntersectionEngine -) : IGll { - +class Gll private constructor( + override var ctx: Context, + private val engine: IIntersectionEngine, +) : IGll { companion object { /** * Creates instance of incremental Gll @@ -26,43 +25,61 @@ class Gll private constructor( * @param inputGraph - input graph * @return default instance of gll parser */ - fun gll( - startState: RsmState, inputGraph: IInputGraph - ): Gll { + fun gll( + startState: RsmState, + inputGraph: IInputGraph, + ): Gll { return Gll(Context(startState, inputGraph), IntersectionEngine) } } private fun getEpsilonRange(descriptor: Descriptor): RangeSppfNode { - val input = InputRange( - descriptor.inputPosition, - descriptor.inputPosition, - ) - val rsm = RsmRange( - descriptor.rsmState, - descriptor.rsmState, - ) + val input = + InputRange( + descriptor.inputPosition, + descriptor.inputPosition, + ) + val rsm = + RsmRange( + descriptor.rsmState, + descriptor.rsmState, + ) return ctx.sppfStorage.addEpsilonNode(input, rsm, descriptor.gssNode.rsm) } private fun handlePoppedGssEdge( - poppedGssEdge: GssEdge, descriptor: Descriptor, childSppf: RangeSppfNode + poppedGssEdge: GssEdge, + descriptor: Descriptor, + childSppf: RangeSppfNode, ) { val leftRange = poppedGssEdge.matchedRange - val startRsmState = if (poppedGssEdge.matchedRange.type is EmptyType) poppedGssEdge.gssNode.rsm - else poppedGssEdge.matchedRange.rsmRange!!.to - val rightRange = ctx.sppfStorage.addNonterminalNode( - InputRange( - descriptor.gssNode.inputPosition, descriptor.inputPosition - ), RsmRange( - startRsmState, - poppedGssEdge.state, - ), descriptor.gssNode.rsm, childSppf - ) + val startRsmState = + if (poppedGssEdge.matchedRange.type is EmptyType) { + poppedGssEdge.gssNode.rsm + } else { + poppedGssEdge.matchedRange.rsmRange!!.to + } + val rightRange = + ctx.sppfStorage.addNonterminalNode( + InputRange( + descriptor.gssNode.inputPosition, + descriptor.inputPosition, + ), + RsmRange( + startRsmState, + poppedGssEdge.state, + ), + descriptor.gssNode.rsm, + childSppf, + ) val newRange = ctx.sppfStorage.addIntermediateNode(leftRange, rightRange) - val newDescriptor = Descriptor( - descriptor.inputPosition, poppedGssEdge.gssNode, poppedGssEdge.state, newRange - ) + val newDescriptor = + Descriptor( + descriptor.inputPosition, + poppedGssEdge.gssNode, + poppedGssEdge.state, + newRange, + ) ctx.descriptors.add(newDescriptor) } @@ -73,17 +90,24 @@ class Gll private constructor( override fun handleDescriptor(descriptor: Descriptor) { ctx.descriptors.addToHandled(descriptor) if (descriptor.rsmState.isFinal) { - val matchedRange = if (descriptor.sppfNode.type is EmptyType) { - val node = getEpsilonRange(descriptor) - //TODO fix - // dirty hack: in fact it's equivavelnt descriptors - // but only initial was added in handlet set - ctx.descriptors.addToHandled(Descriptor(descriptor.inputPosition, - descriptor.gssNode, descriptor.rsmState, node)) - node - } else { - descriptor.sppfNode - } + val matchedRange = + if (descriptor.sppfNode.type is EmptyType) { + val node = getEpsilonRange(descriptor) + // TODO fix + // dirty hack: in fact it's equivavelnt descriptors + // but only initial was added in handlet set + ctx.descriptors.addToHandled( + Descriptor( + descriptor.inputPosition, + descriptor.gssNode, + descriptor.rsmState, + node, + ), + ) + node + } else { + descriptor.sppfNode + } for (poppedEdge in ctx.gss.pop(descriptor, matchedRange)) { handlePoppedGssEdge(poppedEdge, descriptor, matchedRange) } @@ -95,4 +119,3 @@ class Gll private constructor( engine.handleEdges(this, descriptor) } } - diff --git a/solver/src/main/kotlin/org/ucfs/parser/IGll.kt b/solver/src/main/kotlin/org/ucfs/parser/IGll.kt index fd7576418..d629d0ad3 100644 --- a/solver/src/main/kotlin/org/ucfs/parser/IGll.kt +++ b/solver/src/main/kotlin/org/ucfs/parser/IGll.kt @@ -1,13 +1,9 @@ package org.ucfs.parser import org.ucfs.descriptors.Descriptor -import org.ucfs.input.Edge -import org.ucfs.input.IInputGraph -import org.ucfs.input.ILabel +import org.ucfs.input.* import org.ucfs.parser.context.Context import org.ucfs.rsm.RsmState -import org.ucfs.rsm.symbol.ITerminal -import org.ucfs.rsm.symbol.Nonterminal import org.ucfs.sppf.node.* /** @@ -15,11 +11,11 @@ import org.ucfs.sppf.node.* * @param InputNodeType - type of vertex in input graph * @param LabelType - type of label on edges in input graph */ -interface IGll { +interface IGll { /** * Parser configuration */ - var ctx: Context + var ctx: Context /** * Main parsing loop. Iterates over available descriptors and processes them @@ -49,68 +45,96 @@ interface IGll { * Creates descriptors for all starting vertices in input graph * @param input - input graph */ - fun initDescriptors(input: IInputGraph) { + fun initDescriptors(input: IInputGraph) { for (startVertex in input.getInputStartVertices()) { - val gssNode = ctx.gss.getOrCreateNode(startVertex, ctx.startState) - val startDescriptor = Descriptor( - startVertex, gssNode, ctx.startState, getEmptyRange() - ) + val startDescriptor = + Descriptor( + startVertex, + gssNode, + ctx.startState, + getEmptyRange(), + ) ctx.descriptors.add(startDescriptor) } } fun handleNonterminalEdge( - descriptor: Descriptor, destinationRsmState: RsmState, edgeNonterminal: Nonterminal + descriptor: Descriptor, + destinationRsmState: RsmState, + edgeNonterminal: LightSymbol, ) { - val rsmStartState = edgeNonterminal.startState - val (newGssNode, positionToPops) = ctx.gss.addEdge( - descriptor.gssNode, destinationRsmState, descriptor.inputPosition, rsmStartState, descriptor.sppfNode - ) + val rsmStartState = edgeNonterminal.nonTerminal.startState + val (newGssNode, positionToPops) = + ctx.gss.addEdge( + descriptor.gssNode, + destinationRsmState, + descriptor.inputPosition, + rsmStartState, + descriptor.sppfNode, + ) - var newDescriptor = Descriptor( - descriptor.inputPosition, newGssNode, rsmStartState, getEmptyRange() - ) + var newDescriptor = + Descriptor( + descriptor.inputPosition, + newGssNode, + rsmStartState, + getEmptyRange(), + ) ctx.descriptors.add(newDescriptor) for (rangeToPop in positionToPops) { val leftSubRange = descriptor.sppfNode - val rightSubRange = ctx.sppfStorage.addNonterminalNode( - rangeToPop.inputRange!!, RsmRange( - descriptor.rsmState, destinationRsmState - ), rsmStartState + val rightSubRange = + ctx.sppfStorage.addNonterminalNode( + rangeToPop.inputRange!!, + RsmRange( + descriptor.rsmState, + destinationRsmState, + ), + rsmStartState, ) val newSppfNode = ctx.sppfStorage.addIntermediateNode(leftSubRange, rightSubRange) - //TODO why these parameters??? - newDescriptor = Descriptor( - rangeToPop.inputRange!!.to, descriptor.gssNode, destinationRsmState, newSppfNode - ) + // TODO why these parameters??? + newDescriptor = + Descriptor( + rangeToPop.inputRange.to, + descriptor.gssNode, + destinationRsmState, + newSppfNode, + ) ctx.descriptors.add(newDescriptor) } } - fun handleTerminalEdge( descriptor: Descriptor, - inputEdge: Edge, + inputEdge: Edge, destinationRsmState: RsmState, - terminal: ITerminal + terminal: LightSymbol, ) { - var terminalSppfNode = ctx.sppfStorage.addNode( - InputRange( - descriptor.inputPosition, + val terminalSppfNode = + ctx.sppfStorage.addNode( + InputRange( + descriptor.inputPosition, + inputEdge.targetVertex, + ), + RsmRange( + descriptor.rsmState, + destinationRsmState, + ), + terminal, + ) + val intermediateOrTerminalSppf = ctx.sppfStorage.addIntermediateNode(descriptor.sppfNode, terminalSppfNode) + val descriptorForTerminal = + Descriptor( inputEdge.targetVertex, - ), RsmRange( - descriptor.rsmState, + descriptor.gssNode, destinationRsmState, - ), terminal - ) - val intermediateOrTerminalSppf = ctx.sppfStorage.addIntermediateNode(descriptor.sppfNode, terminalSppfNode) - val descriptorForTerminal = Descriptor( - inputEdge.targetVertex, descriptor.gssNode, destinationRsmState, intermediateOrTerminalSppf - ) + intermediateOrTerminalSppf, + ) ctx.descriptors.add(descriptorForTerminal) } } diff --git a/solver/src/main/kotlin/org/ucfs/parser/context/Context.kt b/solver/src/main/kotlin/org/ucfs/parser/context/Context.kt index a18eefef4..f09936b74 100644 --- a/solver/src/main/kotlin/org/ucfs/parser/context/Context.kt +++ b/solver/src/main/kotlin/org/ucfs/parser/context/Context.kt @@ -1,29 +1,23 @@ package org.ucfs.parser.context -import org.ucfs.descriptors.Descriptor import org.ucfs.descriptors.DescriptorsStorage import org.ucfs.gss.GraphStructuredStack import org.ucfs.input.IInputGraph -import org.ucfs.input.ILabel import org.ucfs.rsm.RsmState import org.ucfs.sppf.SppfStorage import org.ucfs.sppf.node.RangeSppfNode - /** * @param InputNodeType - type of vertex in input graph * @param LabelType - type of label on edges in input graph */ -class Context ( +class Context( /** * Starting state of accepting Nonterminal in RSM */ val startState: RsmState, - val input: IInputGraph - - + val input: IInputGraph, ) { - /** * Collection of descriptors */ @@ -33,8 +27,8 @@ class Context ( * Derivation trees storage */ val sppfStorage: SppfStorage = SppfStorage() - + val gss: GraphStructuredStack = GraphStructuredStack() var parseResult: RangeSppfNode? = null -} \ No newline at end of file +} diff --git a/solver/src/main/kotlin/org/ucfs/rsm/RsmState.kt b/solver/src/main/kotlin/org/ucfs/rsm/RsmState.kt index 9ec6c70c1..9219402a7 100644 --- a/solver/src/main/kotlin/org/ucfs/rsm/RsmState.kt +++ b/solver/src/main/kotlin/org/ucfs/rsm/RsmState.kt @@ -3,6 +3,8 @@ package org.ucfs.rsm import org.ucfs.grammar.combinator.regexp.Empty import org.ucfs.grammar.combinator.regexp.Nt import org.ucfs.grammar.combinator.regexp.Regexp +import org.ucfs.input.LightSymbol +import org.ucfs.input.SymbolRegistry import org.ucfs.rsm.symbol.ITerminal import org.ucfs.rsm.symbol.Nonterminal import org.ucfs.rsm.symbol.Symbol @@ -23,9 +25,9 @@ data class RsmState( val outgoingEdges get() = terminalEdgesStorage.plus(nonterminalEdgesStorage) - val terminalEdgesStorage = HashMap() + val terminalEdgesStorage = HashMap() - val nonterminalEdgesStorage = HashMap() + val nonterminalEdgesStorage = ArrayList>() /** * Adds edge from current rsmState to given destinationState via given symbol, terminal or nonterminal @@ -47,14 +49,16 @@ data class RsmState( terminal: ITerminal, destination: RsmState, ) { - terminalEdgesStorage[terminal] = destination + val light = SymbolRegistry.registerTerminal(terminal) + terminalEdgesStorage[light] = destination } private fun addNonterminalEdge( nonterminal: Nonterminal, destinationState: RsmState, ) { - nonterminalEdgesStorage[nonterminal] = destinationState + val light = SymbolRegistry.registerNonterminal(nonterminal) + nonterminalEdgesStorage.add(light to destinationState) } private fun getNewState(regex: Regexp): RsmState { @@ -88,14 +92,14 @@ data class RsmState( when (symbol) { is ITerminal -> { - state?.addEdge(symbol, destinationState) + state?.addTerminalEdge(symbol, destinationState) } is Nt -> { if (!symbol.isInitialized()) { throw IllegalArgumentException("Not initialized Nt used in description of \"${symbol.nonterm.name}\"") } - state?.addEdge(symbol.nonterm, destinationState) + state?.addNonterminalEdge(symbol.nonterm, destinationState) } } } diff --git a/solver/src/main/kotlin/org/ucfs/rsm/RsmWrite.kt b/solver/src/main/kotlin/org/ucfs/rsm/RsmWrite.kt index 0e36732e6..636bc1bff 100644 --- a/solver/src/main/kotlin/org/ucfs/rsm/RsmWrite.kt +++ b/solver/src/main/kotlin/org/ucfs/rsm/RsmWrite.kt @@ -1,5 +1,8 @@ package org.ucfs.rsm +import org.ucfs.input.isNonterminal +import org.ucfs.input.nonTerminal +import org.ucfs.input.symbol import org.ucfs.rsm.symbol.Nonterminal import org.ucfs.rsm.symbol.Symbol import org.ucfs.rsm.symbol.Term @@ -17,8 +20,8 @@ private fun getAllStates(startState: RsmState): HashSet { states.add(state) for ((symbol, destState) in state.outgoingEdges) { - if (symbol is Nonterminal) { - queue.addLast(symbol.startState) + if (symbol.isNonterminal()) { + queue.addLast(symbol.nonTerminal.startState) } queue.addLast(destState) queue.addLast(destState.nonterminal.startState) @@ -36,7 +39,10 @@ fun getView(symbol: Symbol): String { } } -fun writeRsmToTxt(startState: RsmState, pathToTXT: String) { +fun writeRsmToTxt( + startState: RsmState, + pathToTXT: String, +) { val states = getAllStates(startState) File(pathToTXT).printWriter().use { out -> out.println( @@ -47,7 +53,7 @@ fun writeRsmToTxt(startState: RsmState, pathToTXT: String) { |isFinal=${startState.isFinal} |)""" .trimMargin() - .replace("\n", "") + .replace("\n", ""), ) states.forEach { state -> @@ -59,7 +65,7 @@ fun writeRsmToTxt(startState: RsmState, pathToTXT: String) { |isFinal=${state.isFinal} |)""" .trimMargin() - .replace("\n", "") + .replace("\n", ""), ) } @@ -73,20 +79,24 @@ fun writeRsmToTxt(startState: RsmState, pathToTXT: String) { for (state in states) { for ((symbol, destState) in state.outgoingEdges) { - val (typeView, symbolView, typeLabel) = getSymbolView(symbol) - out.println( - """${typeView}Edge( + val (typeView, symbolView, typeLabel) = getSymbolView(symbol.symbol) + out.println( + """${typeView}Edge( |tail=${state.id}, |head=${destState.id}, |$typeLabel=$typeView("$symbolView") - |)""".trimMargin().replace("\n", "") - ) + |) + """.trimMargin().replace("\n", ""), + ) } } } } -fun writeRsmToDot(startState: RsmState, filePath: String) { +fun writeRsmToDot( + startState: RsmState, + filePath: String, +) { val states = getAllStates(startState) val boxes: HashMap> = HashMap() @@ -106,7 +116,15 @@ fun writeRsmToDot(startState: RsmState, filePath: String) { states.forEach { state -> val shape = if (state.isFinal) "doublecircle" else "circle" val color = - if (state == startState) "purple" else if (state.isStart) "green" else if (state.isFinal) "red" else "black" + if (state == startState) { + "purple" + } else if (state.isStart) { + "green" + } else if (state.isFinal) { + "red" + } else { + "black" + } val id = state.id val name = state.nonterminal.name out.println("$id [label = \"$name,$id\", shape = $shape, color = $color]") @@ -114,7 +132,7 @@ fun writeRsmToDot(startState: RsmState, filePath: String) { states.forEach { state -> state.outgoingEdges.forEach { (symbol, destState) -> - out.println("${state.id} -> ${destState.id} [label = \"${getView(symbol)}\"]") + out.println("${state.id} -> ${destState.id} [label = \"${getView(symbol.symbol)}\"]") } } diff --git a/solver/src/main/kotlin/org/ucfs/rsm/symbol/Nonterminal.kt b/solver/src/main/kotlin/org/ucfs/rsm/symbol/Nonterminal.kt index cdf4af191..97ef24842 100644 --- a/solver/src/main/kotlin/org/ucfs/rsm/symbol/Nonterminal.kt +++ b/solver/src/main/kotlin/org/ucfs/rsm/symbol/Nonterminal.kt @@ -3,10 +3,22 @@ package org.ucfs.rsm.symbol import org.ucfs.rsm.RsmState import java.util.* +private object NonterminalIdGenerator { + private var id = 0 + + fun getId() = id++ +} + data class Nonterminal(val name: String?) : Symbol { lateinit var startState: RsmState private var rsmStateLastId = 0 - override fun toString() = "Nonterminal(${name ?: this.hashCode()})" + private val id = NonterminalIdGenerator.getId() + + override fun toString() = "Nonterminal(${name ?: this.hashCode()}@$id)" + + override fun hashCode() = id + + override fun equals(other: Any?) = other != null && other is Nonterminal && other.id == id fun getNextRsmStateId(): Int { val id = rsmStateLastId diff --git a/solver/src/main/kotlin/org/ucfs/rsm/symbol/Term.kt b/solver/src/main/kotlin/org/ucfs/rsm/symbol/Term.kt index a72eb6ad0..9c90ca85e 100644 --- a/solver/src/main/kotlin/org/ucfs/rsm/symbol/Term.kt +++ b/solver/src/main/kotlin/org/ucfs/rsm/symbol/Term.kt @@ -5,18 +5,22 @@ import org.ucfs.parser.ParsingException data class Term(val value: TerminalType) : ITerminal, DerivedSymbol { override fun toString() = value.toString() + override fun getComparator(): Comparator { - //TODO improve comparable interfaces - //TODO replace this logic in 'generator' subproject + // TODO improve comparable interfaces + // TODO replace this logic in 'generator' subproject return object : Comparator { - override fun compare(a: ITerminal, b: ITerminal): Int { + override fun compare( + a: ITerminal, + b: ITerminal, + ): Int { if (a !is Term<*> || b !is Term<*>) { throw ParsingException( - "used comparator for $javaClass, " + "but got elements of ${a.javaClass}$ and ${b.javaClass}\$" + "used comparator for $javaClass, " + "but got elements of ${a.javaClass}$ and ${b.javaClass}\$", ) } return a.value.toString().compareTo(b.value.toString()) } } } -} \ No newline at end of file +} diff --git a/solver/src/main/kotlin/org/ucfs/sppf/SppfStorage.kt b/solver/src/main/kotlin/org/ucfs/sppf/SppfStorage.kt index 94194b664..6e7fa5dd3 100644 --- a/solver/src/main/kotlin/org/ucfs/sppf/SppfStorage.kt +++ b/solver/src/main/kotlin/org/ucfs/sppf/SppfStorage.kt @@ -1,7 +1,8 @@ package org.ucfs.sppf +import org.ucfs.input.LightSymbol +import org.ucfs.input.terminal import org.ucfs.rsm.RsmState -import org.ucfs.rsm.symbol.ITerminal import org.ucfs.sppf.node.* /** @@ -13,9 +14,8 @@ open class SppfStorage { */ private val createdSppfNodes: HashMap, RangeSppfNode> = HashMap() - private fun addNode(node: RangeSppfNode): RangeSppfNode { - return createdSppfNodes.getOrPut(node, { node }) + return createdSppfNodes.getOrPut(node) { node } } /** @@ -25,45 +25,59 @@ open class SppfStorage { input: InputRange, rsm: RsmRange, startState: RsmState, - childSppf: RangeSppfNode? = null + childSppf: RangeSppfNode? = null, ): RangeSppfNode { - return if (childSppf == null) addNode(input, rsm, NonterminalType(startState)) - else addNode(input, rsm, NonterminalType(startState), listOf(childSppf)) + return if (childSppf == null) { + addNode(input, rsm, NonterminalType(startState)) + } else { + addNode(input, rsm, NonterminalType(startState), listOf(childSppf)) + } } fun addEpsilonNode( input: InputRange, rsmRange: RsmRange, - rsmState: RsmState + rsmState: RsmState, ): RangeSppfNode { return addNode( - input, rsmRange, EpsilonNonterminalType(rsmState)) + input, + rsmRange, + EpsilonNonterminalType(rsmState), + ) } /** * Add temrminal node */ fun addNode( - input: InputRange, rsm: RsmRange, terminal: ITerminal + input: InputRange, + rsm: RsmRange, + terminal: LightSymbol, ): RangeSppfNode { - return addNode(input, rsm, TerminalType(terminal)) + return addNode(input, rsm, TerminalType(terminal.terminal)) } fun addIntermediateNode( leftSubtree: RangeSppfNode, - rightSubtree: RangeSppfNode + rightSubtree: RangeSppfNode, ): RangeSppfNode { if (leftSubtree.type is EmptyType) { return rightSubtree } return addNode( InputRange( - leftSubtree.inputRange!!.from, rightSubtree.inputRange!!.to - ), RsmRange( - leftSubtree.rsmRange!!.from, rightSubtree.rsmRange!!.to - ), IntermediateType( - leftSubtree.rsmRange.to, leftSubtree.inputRange.to - ), listOf(leftSubtree, rightSubtree) + leftSubtree.inputRange!!.from, + rightSubtree.inputRange!!.to, + ), + RsmRange( + leftSubtree.rsmRange!!.from, + rightSubtree.rsmRange!!.to, + ), + IntermediateType( + leftSubtree.rsmRange.to, + leftSubtree.inputRange.to, + ), + listOf(leftSubtree, rightSubtree), ) } @@ -71,7 +85,7 @@ open class SppfStorage { input: InputRange, rsm: RsmRange, rangeType: RangeType, - children: List> = listOf() + children: List> = listOf(), ): RangeSppfNode { val rangeNode = addNode(RangeSppfNode(input, rsm, Range)) val valueRsm = if (rangeType is TerminalType<*>) null else rsm @@ -79,11 +93,11 @@ open class SppfStorage { if (!rangeNode.children.contains(valueNode)) { rangeNode.children.add(valueNode) } - for(child in children){ - if (!valueNode.children.contains(child)){ + for (child in children) { + if (!valueNode.children.contains(child)) { valueNode.children.add(child) } } return rangeNode } -} \ No newline at end of file +} From 0d427223c08088443a35033ea53adc5c37dad620 Mon Sep 17 00:00:00 2001 From: random-randoms Date: Sun, 25 May 2025 18:48:17 +0300 Subject: [PATCH 5/6] move benchmarks to own module --- opt-benchmarks/build.gradle.kts | 22 ++++++ .../src/main/kotlin/org/ucfs/optbench/Main.kt | 70 +++++++++++++++++++ .../org/ucfs/optbench/TestConfiguration.kt | 13 ++++ .../kotlin/org/ucfs/optbench/TestGenerator.kt | 26 +++++++ .../kotlin/org/ucfs/optbench/TestSource.kt | 26 ++++--- .../src/main/kotlin/org/ucfs/optbench/Util.kt | 4 +- .../optbench/testsource/Dyck3TestGenerator.kt | 24 +++++-- .../optbench/testsource/DyckTestGenerator.kt | 24 +++++-- .../testsource/ExpressionTestGenerator.kt | 12 ++-- .../testsource/NonSquareTestGenerator.kt | 24 +++++-- .../testsource/PalindromeTestGenerator.kt | 24 +++++-- .../testsource/StrangeAStarTestGenerator.kt | 24 +++++-- .../testsource/UnequalBlocksTestGenerator.kt | 24 +++++-- .../org/ucfs/optbench/testsource/Util.kt | 0 settings.gradle.kts | 1 + .../ucfs/descriptors/DescriptorsStorage.kt | 13 ++-- .../org/ucfs/gss/GraphStructuredStack.kt | 24 +++---- .../main/kotlin/org/ucfs/input/IInputGraph.kt | 5 ++ .../main/kotlin/org/ucfs/input/InputGraph.kt | 2 + .../src/main/kotlin/org/ucfs/optbench/Main.kt | 55 --------------- .../kotlin/org/ucfs/optbench/TestGenerator.kt | 52 -------------- .../kotlin/org/ucfs/parser/context/Context.kt | 4 +- 22 files changed, 283 insertions(+), 190 deletions(-) create mode 100644 opt-benchmarks/build.gradle.kts create mode 100644 opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Main.kt create mode 100644 opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestConfiguration.kt create mode 100644 opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt rename {solver => opt-benchmarks}/src/main/kotlin/org/ucfs/optbench/TestSource.kt (80%) rename {solver => opt-benchmarks}/src/main/kotlin/org/ucfs/optbench/Util.kt (93%) rename {solver => opt-benchmarks}/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt (82%) rename {solver => opt-benchmarks}/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt (75%) rename {solver => opt-benchmarks}/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt (93%) rename {solver => opt-benchmarks}/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt (75%) rename {solver => opt-benchmarks}/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt (73%) rename {solver => opt-benchmarks}/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt (57%) rename {solver => opt-benchmarks}/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt (72%) rename {solver => opt-benchmarks}/src/main/kotlin/org/ucfs/optbench/testsource/Util.kt (100%) delete mode 100644 solver/src/main/kotlin/org/ucfs/optbench/Main.kt delete mode 100644 solver/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt diff --git a/opt-benchmarks/build.gradle.kts b/opt-benchmarks/build.gradle.kts new file mode 100644 index 000000000..eb8dad3fd --- /dev/null +++ b/opt-benchmarks/build.gradle.kts @@ -0,0 +1,22 @@ +plugins { + kotlin("jvm") version "1.9.20" +} + +group = "org.example" +version = "unspecified" + +repositories { + mavenCentral() +} + +dependencies { + implementation(project(":solver")) + testImplementation("org.jetbrains.kotlin:kotlin-test") +} + +tasks.test { + useJUnitPlatform() +} +kotlin { + jvmToolchain(11) +} diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Main.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Main.kt new file mode 100644 index 000000000..8ba588403 --- /dev/null +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Main.kt @@ -0,0 +1,70 @@ +package org.ucfs.optbench + +import org.ucfs.optbench.testsource.* +import java.io.File +import kotlin.random.Random + +fun warmup() { + ExpressionAcceptTestGenerator().generateSource(100, 1000, 100).run() +} + +fun bench( + generator: TestGenerator, + config: TestConfiguration, +): List { + val seed = Random.nextInt() + + return sequence { + config.runs.forEach { run -> + generator.generateSource(seed, run.first, run.second).run().also { println(it) }.also { yield(it) } + } + }.toList() +} + +fun benchMany( + generators: List, + config: TestConfiguration, +) = generators.fold(listOf()) { acc, it -> acc + bench(it, config) } + +object Configurations { + val longBig = sizesOf(500, 1000, 2000, 5000, 10000, 20000, 50000) with 200 + val longMedium = sizesOf(500, 1000, 2000, 5000, 10000, 20000) with 100 + val longSmall = sizesOf(500, 1000, 2000, 5000, 10000, 20000) with 25 + val longTiny = sizesOf(500, 1000, 2000, 5000, 10000) with 25 + val shortBig = sizesOf(100, 200, 300, 400, 500, 800, 1000) with 200 + val shortMedium = sizesOf(100, 200, 300, 400, 500) with 100 + val shortSmall = sizesOf(100, 200, 300, 400, 500) with 25 + val shortTiny = sizesOf(100, 200, 300, 400) with 10 + val veryShortTiny = sizesOf(50, 60, 70, 80, 90) with 10 +} + +object Generators { + val DyckAccept = DyckAcceptTestGenerator() + val DyckReject = DyckRejectTestGenerator() + val Dyck3Accept = Dyck3AcceptTestGenerator() + val Dyck3Reject = Dyck3RejectTestGenerator() + val ExpressionAccept = ExpressionAcceptTestGenerator() + val PalindromeAccept = PalindromeAcceptTestGenerator() + val PalindromeReject = PalindromeRejectTestGenerator() + val UnequalAccept = UnequalBlocksAcceptTestGenerator() + val UnequalReject = UnequalBlocksRejectTestGenerator() + val NonSquareAccept = NonSquareAcceptTestGenerator() + val NonSquareReject = NonSquareRejectTestGenerator() + val AStartAccept = StrangeAStarAcceptTestGenerator() + val AStartReject = StrangeAStarRejectTestGenerator() + + val fast = listOf(DyckAccept, DyckReject, Dyck3Accept, Dyck3Reject, ExpressionAccept) + val medium = listOf(PalindromeAccept, PalindromeReject) + val slow = listOf(UnequalAccept, NonSquareAccept, UnequalReject, NonSquareReject) + val verySlow = listOf(AStartAccept, AStartReject) +} + +fun main() { + warmup() + ( + benchMany(Generators.fast, Configurations.longSmall) + + benchMany(Generators.medium, Configurations.longTiny) + + benchMany(Generators.slow, Configurations.shortTiny) + // benchMany(Generators.verySlow, Configurations.veryShortTiny) + ).dumpToCsv(File("optimized.csv")) +} diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestConfiguration.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestConfiguration.kt new file mode 100644 index 000000000..5851e882b --- /dev/null +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestConfiguration.kt @@ -0,0 +1,13 @@ +package org.ucfs.optbench + +data class TestConfiguration(val runs: List>) + +data class TestSizes(val numbers: List) + +fun sizesOf(vararg sizes: Int) = TestSizes(sizes.toList()) + +infix fun TestSizes.with(number: Int) = TestConfiguration(numbers.map { it to number }) + +fun configOf(vararg runs: Pair) = TestConfiguration(runs.toList()) + +operator fun TestConfiguration.plus(other: TestConfiguration) = TestConfiguration(runs + other.runs) diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt new file mode 100644 index 000000000..2bef7530d --- /dev/null +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt @@ -0,0 +1,26 @@ +package org.ucfs.optbench + +import org.ucfs.grammar.combinator.Grammar +import kotlin.random.Random + +interface TestGenerator { + val name: String + val grammar: Grammar + + fun generate( + seed: Int, + size: Int, + ): Test +} + +fun TestGenerator.generateSource( + seed: Int, + size: Int, + number: Int, +): TestSource = + TestSource( + grammar, + Random(seed).let { rnd -> sequence { repeat(number) { yield(generate(rnd.nextInt(), size)) } }.toList() }, + name, + size, + ) diff --git a/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestSource.kt similarity index 80% rename from solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt rename to opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestSource.kt index 57926bd0e..db84c2099 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/TestSource.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestSource.kt @@ -6,6 +6,7 @@ import org.ucfs.input.LinearInput import org.ucfs.parser.Gll import org.ucfs.sppf.node.RangeSppfNode import java.io.File +import kotlin.math.sqrt import kotlin.system.measureNanoTime data class Test(val input: String, val size: Int, val output: RecognizerOutput) @@ -16,7 +17,7 @@ fun ParserOutput.checkRecognize(input: IInputGraph): RecognizerOutput if (this == null || inputRange == null) { RecognizerOutput.Reject } else { - (input.isFinal(inputRange.to) && input.isStart(inputRange.from)).toRecognizerOutput() + (input.isFinal(inputRange!!.to) && input.isStart(inputRange!!.from)).toRecognizerOutput() } fun runGll( @@ -49,17 +50,19 @@ data class TestResult( val name: String, val tests: Int, val size: Int, - val totalRuntime: Long, + val runtimes: List, val misses: List, ) { - val averageRuntime = totalRuntime / tests + val averageRuntime = runtimes.sum() / tests + val mse = sqrt(runtimes.sumOf { it * it }) / runtimes.size override fun toString(): String { val missesString = if (isOk()) "" else " | misses: ${misses.size}" return "name: ${name.chars(20)} | " + "size: ${size.chars(6)} | " + "tests: ${tests.chars(4)} | " + - "initial: ${averageRuntime.chars(15)}ns | " + + "average: ${averageRuntime.chars(15)}ns | " + + "mse: ${mse.chars(15)}ns" + missesString } } @@ -68,11 +71,11 @@ fun TestResult.isOk() = misses.isEmpty() data class TestSource(val grammar: Grammar, val inputs: Collection, val name: String, val size: Int) { fun run(): TestResult { - var totalRuntime: Long = 0 + val runtimes = mutableListOf() val misses = mutableListOf() inputs.forEach { val actual = runTest(it, grammar) - totalRuntime += actual.first + runtimes.add(actual.first.toDouble()) val test = SingleTest(name, it.input, it.output, actual.third) if (actual.third != it.output) { misses.add(test) @@ -82,7 +85,7 @@ data class TestSource(val grammar: Grammar, val inputs: Collection, val na name, inputs.size, size, - totalRuntime, + runtimes, misses, ) } @@ -90,15 +93,10 @@ data class TestSource(val grammar: Grammar, val inputs: Collection, val na fun List.dumpToCsv(file: File) { val bw = file.bufferedWriter() - bw.write("name,size,runtime\r\n") + bw.write("name,size,avg,mse\r\n") forEach { - bw.write(it.name) - bw.write(",") - bw.write(it.size.toString()) - bw.write(",") - bw.write(it.averageRuntime.toString()) - bw.write("\r\n") + bw.write("${it.name},${it.size},${it.averageRuntime},${it.mse}\r\n") } bw.close() diff --git a/solver/src/main/kotlin/org/ucfs/optbench/Util.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Util.kt similarity index 93% rename from solver/src/main/kotlin/org/ucfs/optbench/Util.kt rename to opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Util.kt index 9b69b27b5..d06012192 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/Util.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Util.kt @@ -14,7 +14,9 @@ fun String.repeat(n: Int) = fun String.chars(n: Int) = if (length < n) " ".repeat(n - length) + this else this -fun Number.chars(n: Int) = format("%${n}d", this) +fun Int.chars(n: Int) = format("%${n}d", this) + +fun Double.chars(n: Int) = format("%.0f", this).chars(n) fun parserOutputSame( left: ParserOutput, diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt similarity index 82% rename from solver/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt rename to opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt index ea5f1c1f5..6cdf5e12f 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/Dyck3TestGenerator.kt @@ -23,14 +23,24 @@ class Dyck3Grammar : Grammar() { } } -class Dyck3TestGenerator : TestGenerator { - override val name = "Dyck-3" +class Dyck3AcceptTestGenerator : TestGenerator { + override val name = "Dyck-3 Accept" override val grammar = Dyck3Grammar() - override val generator = - AcceptRejectUniformGenerator( - { seed, size -> genDyck3Ok(seed, size) + lineEndToken with RecognizerOutput.Accept }, - { seed, size -> genDyck3Fail(seed, size) + lineEndToken with RecognizerOutput.Reject }, - ) + + override fun generate( + seed: Int, + size: Int, + ) = genDyck3Ok(seed, size) + lineEndToken with RecognizerOutput.Accept +} + +class Dyck3RejectTestGenerator : TestGenerator { + override val name = "Dyck-3 Reject" + override val grammar = Dyck3Grammar() + + override fun generate( + seed: Int, + size: Int, + ) = genDyck3Fail(seed, size) + lineEndToken with RecognizerOutput.Reject } private val lBrace = listOf("( " of 1, "[ " of 1, "< " of 1) diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt similarity index 75% rename from solver/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt rename to opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt index 5dfffbf00..d8bb3c0e0 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/DyckTestGenerator.kt @@ -19,14 +19,24 @@ class DyckGrammar : Grammar() { } } -class DyckTestGenerator : TestGenerator { +class DyckAcceptTestGenerator : TestGenerator { override val grammar = DyckGrammar() - override val name = "Dyck-1" - override val generator = - AcceptRejectUniformGenerator( - { seed, size -> genDyckOk(seed, size) + lineEndToken with RecognizerOutput.Accept }, - { seed, size -> genDyckFail(seed, size) + lineEndToken with RecognizerOutput.Reject }, - ) + override val name = "Dyck-1 Accept" + + override fun generate( + seed: Int, + size: Int, + ) = genDyckOk(seed, size) + lineEndToken with RecognizerOutput.Accept +} + +class DyckRejectTestGenerator : TestGenerator { + override val grammar = DyckGrammar() + override val name = "Dyck-1 Reject" + + override fun generate( + seed: Int, + size: Int, + ) = genDyckFail(seed, size) + lineEndToken with RecognizerOutput.Reject } private val lBrace = "( " of 1 diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt similarity index 93% rename from solver/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt rename to opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt index 24ce32b4f..f237a7804 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/ExpressionTestGenerator.kt @@ -40,14 +40,14 @@ class ExpressionGrammar : Grammar() { } } -class ExpressionTestGenerator : TestGenerator { +class ExpressionAcceptTestGenerator : TestGenerator { override val grammar = ExpressionGrammar() override val name = "Expression" - override val generator = - TrivialGenerator { - seed, size -> - (generateStatement(seed, size) + lineEndToken) with RecognizerOutput.Accept - } + + override fun generate( + seed: Int, + size: Int, + ) = (generateStatement(seed, size) + lineEndToken) with RecognizerOutput.Accept } val lineEndToken = lineEndSymbol of 1 diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt similarity index 75% rename from solver/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt rename to opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt index ebb8d8691..6d315dede 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/NonSquareTestGenerator.kt @@ -26,14 +26,24 @@ class NonSquareGrammar : Grammar() { } } -class NonSquareTestGenerator : TestGenerator { - override val name = "Non square" +class NonSquareAcceptTestGenerator : TestGenerator { + override val name = "Non Square Accept" override val grammar = NonSquareGrammar() - override val generator = - AcceptRejectUniformGenerator( - { seed, size -> generateNonSquareAccept(seed, size) + lineEndToken with RecognizerOutput.Accept }, - { seed, size -> generateNonSquareReject(seed, size) + lineEndToken with RecognizerOutput.Reject }, - ) + + override fun generate( + seed: Int, + size: Int, + ) = generateNonSquareAccept(seed, size) + lineEndToken with RecognizerOutput.Accept +} + +class NonSquareRejectTestGenerator : TestGenerator { + override val name = "Non Square Reject" + override val grammar = NonSquareGrammar() + + override fun generate( + seed: Int, + size: Int, + ) = generateNonSquareReject(seed, size) + lineEndToken with RecognizerOutput.Reject } private val a = "a " of 1 diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt similarity index 73% rename from solver/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt rename to opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt index 708ed0646..6d7fb5e16 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/PalindromeTestGenerator.kt @@ -24,14 +24,24 @@ class PalindromeGrammar : Grammar() { } } -class PalindromeTestGenerator : TestGenerator { +class PalindromeAcceptTestGenerator : TestGenerator { override val grammar = PalindromeGrammar() - override val name = "Palindrome" - override val generator = - AcceptRejectUniformGenerator( - { seed, size -> generatePalindrome(seed, size) + lineEndToken with RecognizerOutput.Accept }, - { seed, size -> generatePalindromeFail(seed, size) + lineEndToken with RecognizerOutput.Reject }, - ) + override val name = "Palindrome Accept" + + override fun generate( + seed: Int, + size: Int, + ) = generatePalindrome(seed, size) + lineEndToken with RecognizerOutput.Accept +} + +class PalindromeRejectTestGenerator : TestGenerator { + override val grammar = PalindromeGrammar() + override val name = "Palindrome Reject" + + override fun generate( + seed: Int, + size: Int, + ) = generatePalindromeFail(seed, size) + lineEndToken with RecognizerOutput.Reject } private val letters = listOf("a " of 1, "b " of 1, "c " of 1) diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt similarity index 57% rename from solver/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt rename to opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt index 970ab060d..f8592e077 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/StrangeAStarTestGenerator.kt @@ -18,14 +18,24 @@ class StrangeAStar : Grammar() { } } -class StrangeAStarTestGenerator : TestGenerator { +class StrangeAStarAcceptTestGenerator : TestGenerator { override val grammar = StrangeAStar() - override val name = "Strange A*" - override val generator = - AcceptRejectUniformGenerator( - { seed, size -> Test(genAStarAccept(seed, size) + lineEndSymbol, size + 1, RecognizerOutput.Accept) }, - { seed, size -> Test(genAStartReject(seed, size) + lineEndSymbol, size + 1, RecognizerOutput.Reject) }, - ) + override val name = "Strange A* Accept" + + override fun generate( + seed: Int, + size: Int, + ) = Test(genAStarAccept(seed, size) + lineEndSymbol, size + 1, RecognizerOutput.Accept) +} + +class StrangeAStarRejectTestGenerator : TestGenerator { + override val grammar = StrangeAStar() + override val name = "Strange A* Reject" + + override fun generate( + seed: Int, + size: Int, + ) = Test(genAStartReject(seed, size) + lineEndSymbol, size + 1, RecognizerOutput.Reject) } fun genAStarAccept( diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt similarity index 72% rename from solver/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt rename to opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt index 63a2b4249..8ba848baa 100644 --- a/solver/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/UnequalBlocksTestGenerator.kt @@ -26,14 +26,24 @@ class UnequalBlocksGrammar : Grammar() { } } -class UnequalBlocksTestGenerator : TestGenerator { - override val name = "Unequal blocks" +class UnequalBlocksAcceptTestGenerator : TestGenerator { + override val name = "Unequal Blocks Accept" override val grammar = UnequalBlocksGrammar() - override val generator = - AcceptRejectUniformGenerator( - { seed, size -> generateUBSuccess(seed, size) + lineEndToken with RecognizerOutput.Accept }, - { seed, size -> generateUBFail(seed, size) + lineEndToken with RecognizerOutput.Reject }, - ) + + override fun generate( + seed: Int, + size: Int, + ) = generateUBSuccess(seed, size) + lineEndToken with RecognizerOutput.Accept +} + +class UnequalBlocksRejectTestGenerator : TestGenerator { + override val name = "Unequal blocks Reject" + override val grammar = UnequalBlocksGrammar() + + override fun generate( + seed: Int, + size: Int, + ) = generateUBSuccess(seed, size) + lineEndToken with RecognizerOutput.Accept } private val a = "a " of 1 diff --git a/solver/src/main/kotlin/org/ucfs/optbench/testsource/Util.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/Util.kt similarity index 100% rename from solver/src/main/kotlin/org/ucfs/optbench/testsource/Util.kt rename to opt-benchmarks/src/main/kotlin/org/ucfs/optbench/testsource/Util.kt diff --git a/settings.gradle.kts b/settings.gradle.kts index 9d660d937..598428cf0 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -7,3 +7,4 @@ include("benchmarks") include("generator") include("test-shared") include("examples") +include("opt-benchmarks") diff --git a/solver/src/main/kotlin/org/ucfs/descriptors/DescriptorsStorage.kt b/solver/src/main/kotlin/org/ucfs/descriptors/DescriptorsStorage.kt index 4e79bb00c..956986a1f 100644 --- a/solver/src/main/kotlin/org/ucfs/descriptors/DescriptorsStorage.kt +++ b/solver/src/main/kotlin/org/ucfs/descriptors/DescriptorsStorage.kt @@ -6,11 +6,11 @@ import org.ucfs.parser.ParsingException * Collection of default descriptors * @param VertexType - type of vertex in input graph */ -open class DescriptorsStorage { +open class DescriptorsStorage(initialCapacity: Int = 10) { /** * Collection of already handled descriptors, accessible via descriptor's hashcode */ - private val handledDescriptors = HashSet>() + private val handledDescriptors = HashSet>(initialCapacity) private val descriptorsToHandle = ArrayDeque>() @@ -31,10 +31,12 @@ open class DescriptorsStorage { } fun add(descriptor: Descriptor) { - if(!handledDescriptors.contains(descriptor)){ - descriptorsToHandle.addLast(descriptor) - } + if (!handledDescriptors.contains(descriptor)) + { + descriptorsToHandle.addLast(descriptor) + } } + /** * Gets next descriptor to handle * @return default descriptor if there is available one, null otherwise @@ -46,4 +48,3 @@ open class DescriptorsStorage { return null } } - diff --git a/solver/src/main/kotlin/org/ucfs/gss/GraphStructuredStack.kt b/solver/src/main/kotlin/org/ucfs/gss/GraphStructuredStack.kt index db0a9abcf..8dea4c7b9 100644 --- a/solver/src/main/kotlin/org/ucfs/gss/GraphStructuredStack.kt +++ b/solver/src/main/kotlin/org/ucfs/gss/GraphStructuredStack.kt @@ -4,12 +4,15 @@ import org.ucfs.descriptors.Descriptor import org.ucfs.rsm.RsmState import org.ucfs.sppf.node.RangeSppfNode -class GraphStructuredStack { - val nodes = HashMap, GssNode>() +class GraphStructuredStack(initialCapacity: Int = 10) { + val nodes = HashMap, GssNode>(initialCapacity) - fun getOrCreateNode(input: InputNode, rsm: RsmState): GssNode { + fun getOrCreateNode( + input: InputNode, + rsm: RsmState, + ): GssNode { val node = GssNode(rsm, input) - return nodes.getOrPut(node, {node}) + return nodes.getOrPut(node) { node } } fun addEdge( @@ -17,12 +20,11 @@ class GraphStructuredStack { rsmStateToReturn: RsmState, inputToContinue: InputNode, rsmStateToContinue: RsmState, - matcherRange: RangeSppfNode + matcherRange: RangeSppfNode, ): GssResult { val addedNode = getOrCreateNode(inputToContinue, rsmStateToContinue) val edge = GssEdge(gssNode, rsmStateToReturn, matcherRange) - // There is no need to check GSS edges duplication. // "Faster, Practical GLL Parsing", Ali Afroozeh and Anastasia Izmaylova // p.13: "There is at most one call to the create function with the same arguments. @@ -31,22 +33,20 @@ class GraphStructuredStack { return GssResult(addedNode, popped) } - /** * return outgoing edges */ fun pop( - descriptor: Descriptor, range: RangeSppfNode + descriptor: Descriptor, + range: RangeSppfNode, ): ArrayList> { val gssNode = descriptor.gssNode gssNode.popped.add(range) return gssNode.outgoingEdges } - } data class GssResult( - val gssNode: GssNode, val popped: ArrayList> + val gssNode: GssNode, + val popped: ArrayList>, ) - - diff --git a/solver/src/main/kotlin/org/ucfs/input/IInputGraph.kt b/solver/src/main/kotlin/org/ucfs/input/IInputGraph.kt index 4e834d024..181e361fd 100644 --- a/solver/src/main/kotlin/org/ucfs/input/IInputGraph.kt +++ b/solver/src/main/kotlin/org/ucfs/input/IInputGraph.kt @@ -5,6 +5,11 @@ package org.ucfs.input * @param VertexType - type of vertex in input graph */ interface IInputGraph { + /** + * @return number of vertices in the graph + */ + fun verticesNumber(): Int + /** * @return collection of all starting vertices */ diff --git a/solver/src/main/kotlin/org/ucfs/input/InputGraph.kt b/solver/src/main/kotlin/org/ucfs/input/InputGraph.kt index 0916330cc..e586f5498 100644 --- a/solver/src/main/kotlin/org/ucfs/input/InputGraph.kt +++ b/solver/src/main/kotlin/org/ucfs/input/InputGraph.kt @@ -9,6 +9,8 @@ open class InputGraph : IInputGraph { val startVertices: MutableSet = HashSet() + override fun verticesNumber(): Int = vertices.size + override fun getInputStartVertices(): MutableSet { return startVertices } diff --git a/solver/src/main/kotlin/org/ucfs/optbench/Main.kt b/solver/src/main/kotlin/org/ucfs/optbench/Main.kt deleted file mode 100644 index f416f2b5a..000000000 --- a/solver/src/main/kotlin/org/ucfs/optbench/Main.kt +++ /dev/null @@ -1,55 +0,0 @@ -package org.ucfs.optbench - -import org.ucfs.optbench.testsource.* -import java.io.File -import kotlin.random.Random - -fun bench( - what: TestGenerator, - initial: Int, - cap: Int, - tests: Int = 100, -): List { - var size = initial - - val seed = Random.nextInt() - - return sequence { - while (size < cap) { - what - .generateSource(seed, size, tests) - .run() - .also { println(it) } - .also { yield(it) } - size = size * 3 / 2 - } - }.toList() -} - -fun warmup() { - ExpressionTestGenerator().generateSource(100, 1000, 100).run() -} - -fun benchDyck() = bench(DyckTestGenerator(), 100, 20000, 50) - -fun benchDyck3() = bench(Dyck3TestGenerator(), 100, 20000, 50) - -fun benchExpression() = bench(ExpressionTestGenerator(), 100, 20000, 50) - -fun benchPalindrome() = bench(PalindromeTestGenerator(), 100, 15000, 50) - -fun benchUnequalBlocks() = bench(UnequalBlocksTestGenerator(), 100, 600, 50) - -fun benchNonSquare() = bench(NonSquareTestGenerator(), 100, 600, 50) - -fun main() { - warmup() - ( - benchDyck() + - benchDyck3() + - benchExpression() + - benchPalindrome() + - benchUnequalBlocks() + - benchNonSquare() - ).dumpToCsv(File("symbol_registry.csv")) -} diff --git a/solver/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt b/solver/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt deleted file mode 100644 index 07f658c32..000000000 --- a/solver/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt +++ /dev/null @@ -1,52 +0,0 @@ -package org.ucfs.optbench - -import org.ucfs.grammar.combinator.Grammar -import kotlin.random.Random - -interface TestGenerator { - val grammar: Grammar - val name: String - val generator: SingleTestGenerator -} - -interface SingleTestGenerator { - fun generateTest( - seed: Int, - size: Int, - ): Test -} - -class AcceptRejectUniformGenerator( - val generateAccept: (Int, Int) -> Test, - val generateReject: (Int, Int) -> Test, -) : SingleTestGenerator { - override fun generateTest( - seed: Int, - size: Int, - ): Test = if (Random(seed).nextBoolean()) generateAccept(seed, size) else generateReject(seed, size) -} - -class TrivialGenerator(val generate: (Int, Int) -> Test) : SingleTestGenerator { - override fun generateTest( - seed: Int, - size: Int, - ): Test = generate(seed, size) -} - -fun TestGenerator.generateSource( - seed: Int, - size: Int, - number: Int, -): TestSource = - TestSource( - grammar, - Random(seed) - .let { - rnd -> - sequence { - repeat(number) { yield(generator.generateTest(rnd.nextInt(), size)) } - }.toList() - }, - name, - size, - ) diff --git a/solver/src/main/kotlin/org/ucfs/parser/context/Context.kt b/solver/src/main/kotlin/org/ucfs/parser/context/Context.kt index f09936b74..cfde379aa 100644 --- a/solver/src/main/kotlin/org/ucfs/parser/context/Context.kt +++ b/solver/src/main/kotlin/org/ucfs/parser/context/Context.kt @@ -21,14 +21,14 @@ class Context( /** * Collection of descriptors */ - val descriptors: DescriptorsStorage = DescriptorsStorage() + val descriptors: DescriptorsStorage = DescriptorsStorage(input.verticesNumber() * 3) /** * Derivation trees storage */ val sppfStorage: SppfStorage = SppfStorage() - val gss: GraphStructuredStack = GraphStructuredStack() + val gss: GraphStructuredStack = GraphStructuredStack(input.verticesNumber() * 3) var parseResult: RangeSppfNode? = null } From 141bcc6c86848ee9d151beb5e303c1bd5d249a30 Mon Sep 17 00:00:00 2001 From: random-randoms Date: Mon, 26 May 2025 21:53:17 +0300 Subject: [PATCH 6/6] add bench dsl --- .../main/kotlin/org/ucfs/optbench/BenchDsl.kt | 38 ++++++++++++++++ .../src/main/kotlin/org/ucfs/optbench/Main.kt | 43 +++++-------------- .../kotlin/org/ucfs/optbench/TestSource.kt | 2 +- 3 files changed, 49 insertions(+), 34 deletions(-) create mode 100644 opt-benchmarks/src/main/kotlin/org/ucfs/optbench/BenchDsl.kt diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/BenchDsl.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/BenchDsl.kt new file mode 100644 index 000000000..f22faf7cd --- /dev/null +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/BenchDsl.kt @@ -0,0 +1,38 @@ +package org.ucfs.optbench + +import org.ucfs.optbench.testsource.ExpressionAcceptTestGenerator +import java.io.File +import kotlin.random.Random + +class BenchmarkScope { + val testResults = mutableListOf() + + fun dump(file: File) = testResults.dumpToCsv(file) +} + +fun benchmark(run: BenchmarkScope.() -> Unit): BenchmarkScope { + val bs = BenchmarkScope() + bs.run() + return bs +} + +fun BenchmarkScope.bench( + generator: TestGenerator, + config: TestConfiguration, +) { + val seed = Random.nextInt() + + testResults += + sequence { + config.runs.forEach { run -> + generator.generateSource(seed, run.first, run.second).run().also { println(it) }.also { yield(it) } + } + }.toList() +} + +fun BenchmarkScope.benchMany( + generators: List, + config: TestConfiguration, +) = generators.forEach { bench(it, config) } + +fun BenchmarkScope.warmup() = ExpressionAcceptTestGenerator().generateSource(100, 1000, 100).run() diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Main.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Main.kt index 8ba588403..df390a6f1 100644 --- a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Main.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Main.kt @@ -2,39 +2,17 @@ package org.ucfs.optbench import org.ucfs.optbench.testsource.* import java.io.File -import kotlin.random.Random - -fun warmup() { - ExpressionAcceptTestGenerator().generateSource(100, 1000, 100).run() -} - -fun bench( - generator: TestGenerator, - config: TestConfiguration, -): List { - val seed = Random.nextInt() - - return sequence { - config.runs.forEach { run -> - generator.generateSource(seed, run.first, run.second).run().also { println(it) }.also { yield(it) } - } - }.toList() -} - -fun benchMany( - generators: List, - config: TestConfiguration, -) = generators.fold(listOf()) { acc, it -> acc + bench(it, config) } object Configurations { val longBig = sizesOf(500, 1000, 2000, 5000, 10000, 20000, 50000) with 200 + val longBigCapped = sizesOf(500, 1000, 5000, 10000, 15000) with 200 val longMedium = sizesOf(500, 1000, 2000, 5000, 10000, 20000) with 100 - val longSmall = sizesOf(500, 1000, 2000, 5000, 10000, 20000) with 25 - val longTiny = sizesOf(500, 1000, 2000, 5000, 10000) with 25 + val longSmall = sizesOf(500, 1000, 2000, 5000, 10000, 20000, 50000) with 25 + val longTiny = sizesOf(500, 1000, 5000, 10000, 15000) with 25 val shortBig = sizesOf(100, 200, 300, 400, 500, 800, 1000) with 200 val shortMedium = sizesOf(100, 200, 300, 400, 500) with 100 val shortSmall = sizesOf(100, 200, 300, 400, 500) with 25 - val shortTiny = sizesOf(100, 200, 300, 400) with 10 + val shortTiny = sizesOf(100, 200, 300, 400, 500) with 10 val veryShortTiny = sizesOf(50, 60, 70, 80, 90) with 10 } @@ -60,11 +38,10 @@ object Generators { } fun main() { - warmup() - ( - benchMany(Generators.fast, Configurations.longSmall) + - benchMany(Generators.medium, Configurations.longTiny) + - benchMany(Generators.slow, Configurations.shortTiny) - // benchMany(Generators.verySlow, Configurations.veryShortTiny) - ).dumpToCsv(File("optimized.csv")) + benchmark { + warmup() + benchMany(Generators.fast, Configurations.longBig) + benchMany(Generators.medium, Configurations.longBigCapped) + benchMany(Generators.slow, Configurations.shortMedium) + }.dump(File("optimized.csv")) } diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestSource.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestSource.kt index db84c2099..380aa2e15 100644 --- a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestSource.kt +++ b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestSource.kt @@ -54,7 +54,7 @@ data class TestResult( val misses: List, ) { val averageRuntime = runtimes.sum() / tests - val mse = sqrt(runtimes.sumOf { it * it }) / runtimes.size + val mse = sqrt(runtimes.sumOf { (it - averageRuntime) * (it - averageRuntime) }) / runtimes.size override fun toString(): String { val missesString = if (isOk()) "" else " | misses: ${misses.size}"