FormalLanguageConstrainedPathQuerying · Random-Randoms · May 21, 2025 · May 21, 2025 · May 24, 2025 · May 24, 2025
diff --git a/opt-benchmarks/build.gradle.kts b/opt-benchmarks/build.gradle.kts
@@ -0,0 +1,22 @@
+plugins {
+    kotlin("jvm") version "1.9.20"
+}
+
+group = "org.example"
+version = "unspecified"
+
+repositories {
+    mavenCentral()
+}
+
+dependencies {
+    implementation(project(":solver"))
+    testImplementation("org.jetbrains.kotlin:kotlin-test")
+}
+
+tasks.test {
+    useJUnitPlatform()
+}
+kotlin {
+    jvmToolchain(11)
+}
diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/BenchDsl.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/BenchDsl.kt
@@ -0,0 +1,38 @@
+package org.ucfs.optbench
+
+import org.ucfs.optbench.testsource.ExpressionAcceptTestGenerator
+import java.io.File
+import kotlin.random.Random
+
+class BenchmarkScope {
+    val testResults = mutableListOf<TestResult>()
+
+    fun dump(file: File) = testResults.dumpToCsv(file)
+}
+
+fun benchmark(run: BenchmarkScope.() -> Unit): BenchmarkScope {
+    val bs = BenchmarkScope()
+    bs.run()
+    return bs
+}
+
+fun BenchmarkScope.bench(
+    generator: TestGenerator,
+    config: TestConfiguration,
+) {
+    val seed = Random.nextInt()
+
+    testResults +=
+        sequence {
+            config.runs.forEach { run ->
+                generator.generateSource(seed, run.first, run.second).run().also { println(it) }.also { yield(it) }
+            }
+        }.toList()
+}
+
+fun BenchmarkScope.benchMany(
+    generators: List<TestGenerator>,
+    config: TestConfiguration,
+) = generators.forEach { bench(it, config) }
+
+fun BenchmarkScope.warmup() = ExpressionAcceptTestGenerator().generateSource(100, 1000, 100).run()
diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Main.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Main.kt
@@ -0,0 +1,47 @@
+package org.ucfs.optbench
+
+import org.ucfs.optbench.testsource.*
+import java.io.File
+
+object Configurations {
+    val longBig = sizesOf(500, 1000, 2000, 5000, 10000, 20000, 50000) with 200
+    val longBigCapped = sizesOf(500, 1000, 5000, 10000, 15000) with 200
+    val longMedium = sizesOf(500, 1000, 2000, 5000, 10000, 20000) with 100
+    val longSmall = sizesOf(500, 1000, 2000, 5000, 10000, 20000, 50000) with 25
+    val longTiny = sizesOf(500, 1000, 5000, 10000, 15000) with 25
+    val shortBig = sizesOf(100, 200, 300, 400, 500, 800, 1000) with 200
+    val shortMedium = sizesOf(100, 200, 300, 400, 500) with 100
+    val shortSmall = sizesOf(100, 200, 300, 400, 500) with 25
+    val shortTiny = sizesOf(100, 200, 300, 400, 500) with 10
+    val veryShortTiny = sizesOf(50, 60, 70, 80, 90) with 10
+}
+
+object Generators {
+    val DyckAccept = DyckAcceptTestGenerator()
+    val DyckReject = DyckRejectTestGenerator()
+    val Dyck3Accept = Dyck3AcceptTestGenerator()
+    val Dyck3Reject = Dyck3RejectTestGenerator()
+    val ExpressionAccept = ExpressionAcceptTestGenerator()
+    val PalindromeAccept = PalindromeAcceptTestGenerator()
+    val PalindromeReject = PalindromeRejectTestGenerator()
+    val UnequalAccept = UnequalBlocksAcceptTestGenerator()
+    val UnequalReject = UnequalBlocksRejectTestGenerator()
+    val NonSquareAccept = NonSquareAcceptTestGenerator()
+    val NonSquareReject = NonSquareRejectTestGenerator()
+    val AStartAccept = StrangeAStarAcceptTestGenerator()
+    val AStartReject = StrangeAStarRejectTestGenerator()
+
+    val fast = listOf(DyckAccept, DyckReject, Dyck3Accept, Dyck3Reject, ExpressionAccept)
+    val medium = listOf(PalindromeAccept, PalindromeReject)
+    val slow = listOf(UnequalAccept, NonSquareAccept, UnequalReject, NonSquareReject)
+    val verySlow = listOf(AStartAccept, AStartReject)
+}
+
+fun main() {
+    benchmark {
+        warmup()
+        benchMany(Generators.fast, Configurations.longBig)
+        benchMany(Generators.medium, Configurations.longBigCapped)
+        benchMany(Generators.slow, Configurations.shortMedium)
+    }.dump(File("optimized.csv"))
+}
diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestConfiguration.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestConfiguration.kt
@@ -0,0 +1,13 @@
+package org.ucfs.optbench
+
+data class TestConfiguration(val runs: List<Pair<Int, Int>>)
+
+data class TestSizes(val numbers: List<Int>)
+
+fun sizesOf(vararg sizes: Int) = TestSizes(sizes.toList())
+
+infix fun TestSizes.with(number: Int) = TestConfiguration(numbers.map { it to number })
+
+fun configOf(vararg runs: Pair<Int, Int>) = TestConfiguration(runs.toList())
+
+operator fun TestConfiguration.plus(other: TestConfiguration) = TestConfiguration(runs + other.runs)
diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestGenerator.kt
@@ -0,0 +1,26 @@
+package org.ucfs.optbench
+
+import org.ucfs.grammar.combinator.Grammar
+import kotlin.random.Random
+
+interface TestGenerator {
+    val name: String
+    val grammar: Grammar
+
+    fun generate(
+        seed: Int,
+        size: Int,
+    ): Test
+}
+
+fun TestGenerator.generateSource(
+    seed: Int,
+    size: Int,
+    number: Int,
+): TestSource =
+    TestSource(
+        grammar,
+        Random(seed).let { rnd -> sequence { repeat(number) { yield(generate(rnd.nextInt(), size)) } }.toList() },
+        name,
+        size,
+    )
diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestSource.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/TestSource.kt
@@ -0,0 +1,103 @@
+package org.ucfs.optbench
+
+import org.ucfs.grammar.combinator.Grammar
+import org.ucfs.input.IInputGraph
+import org.ucfs.input.LinearInput
+import org.ucfs.parser.Gll
+import org.ucfs.sppf.node.RangeSppfNode
+import java.io.File
+import kotlin.math.sqrt
+import kotlin.system.measureNanoTime
+
+data class Test(val input: String, val size: Int, val output: RecognizerOutput)
+
+typealias ParserOutput<T> = RangeSppfNode<T>?
+
+fun <T> ParserOutput<T>.checkRecognize(input: IInputGraph<T>): RecognizerOutput =
+    if (this == null || inputRange == null) {
+        RecognizerOutput.Reject
+    } else {
+        (input.isFinal(inputRange!!.to) && input.isStart(inputRange!!.from)).toRecognizerOutput()
+    }
+
+fun runGll(
+    input: IInputGraph<Int>,
+    grammar: Grammar,
+) = Gll.gll(grammar.rsm, input).parse()
+
+fun runTest(
+    test: Test,
+    grammar: Grammar,
+): Triple<Long, ParserOutput<Int>, RecognizerOutput> {
+    var result: ParserOutput<Int>
+    val input = LinearInput.buildFromString(test.input)
+    val time = measureNanoTime { result = runGll(input, grammar) }
+    return Triple(time, result, result.checkRecognize(input))
+}
+
+enum class RecognizerOutput { Accept, Reject }
+
+fun Boolean.toRecognizerOutput() = if (this) RecognizerOutput.Accept else RecognizerOutput.Reject
+
+data class SingleTest(
+    val grammar: String,
+    val input: String,
+    val expected: RecognizerOutput,
+    val actual: RecognizerOutput,
+)
+
+data class TestResult(
+    val name: String,
+    val tests: Int,
+    val size: Int,
+    val runtimes: List<Double>,
+    val misses: List<SingleTest>,
+) {
+    val averageRuntime = runtimes.sum() / tests
+    val mse = sqrt(runtimes.sumOf { (it - averageRuntime) * (it - averageRuntime) }) / runtimes.size
+
+    override fun toString(): String {
+        val missesString = if (isOk()) "" else " | misses: ${misses.size}"
+        return "name: ${name.chars(20)} | " +
+            "size: ${size.chars(6)} | " +
+            "tests: ${tests.chars(4)} | " +
+            "average: ${averageRuntime.chars(15)}ns | " +
+            "mse: ${mse.chars(15)}ns" +
+            missesString
+    }
+}
+
+fun TestResult.isOk() = misses.isEmpty()
+
+data class TestSource(val grammar: Grammar, val inputs: Collection<Test>, val name: String, val size: Int) {
+    fun run(): TestResult {
+        val runtimes = mutableListOf<Double>()
+        val misses = mutableListOf<SingleTest>()
+        inputs.forEach {
+            val actual = runTest(it, grammar)
+            runtimes.add(actual.first.toDouble())
+            val test = SingleTest(name, it.input, it.output, actual.third)
+            if (actual.third != it.output) {
+                misses.add(test)
+            }
+        }
+        return TestResult(
+            name,
+            inputs.size,
+            size,
+            runtimes,
+            misses,
+        )
+    }
+}
+
+fun List<TestResult>.dumpToCsv(file: File) {
+    val bw = file.bufferedWriter()
+    bw.write("name,size,avg,mse\r\n")
+
+    forEach {
+        bw.write("${it.name},${it.size},${it.averageRuntime},${it.mse}\r\n")
+    }
+
+    bw.close()
+}
diff --git a/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Util.kt b/opt-benchmarks/src/main/kotlin/org/ucfs/optbench/Util.kt
@@ -0,0 +1,54 @@
+package org.ucfs.optbench
+
+import org.ucfs.rsm.symbol.Term
+import java.lang.String.format
+import kotlin.collections.HashSet
+
+const val lineEndSymbol = "$"
+val lineEnd = Term(lineEndSymbol)
+
+fun String.repeat(n: Int) =
+    generateSequence { this }
+        .take(n)
+        .fold("") { acc, s -> acc + s }
+
+fun String.chars(n: Int) = if (length < n) " ".repeat(n - length) + this else this
+
+fun Int.chars(n: Int) = format("%${n}d", this)
+
+fun Double.chars(n: Int) = format("%.0f", this).chars(n)
+
+fun <T> parserOutputSame(
+    left: ParserOutput<T>,
+    right: ParserOutput<T>,
+): Boolean {
+    val visitedLeft = HashSet<Int>()
+    val visitedRight = HashSet<Int>()
+
+    fun checkSame(
+        left: ParserOutput<T>,
+        right: ParserOutput<T>,
+    ): Boolean {
+        if (left == null && right == null) return true
+        if (left == null || right == null) return false
+
+        if (visitedLeft.contains(left.id) && visitedRight.contains(right.id)) return true
+        if (visitedLeft.contains(left.id) || visitedRight.contains(right.id)) return false
+
+        visitedLeft.add(left.id)
+        visitedRight.add(right.id)
+
+        if (left.type != right.type) return false
+        if (left.rsmRange != right.rsmRange) return false
+        if (left.inputRange != right.inputRange) return false
+        if (left.children.size != right.children.size) return false
+
+        left.children.forEachIndexed { index, it -> if (!checkSame(it, right.children[index])) return false }
+
+        return true
+    }
+
+    return checkSame(left, right)
+}
+
+infix fun <T> ParserOutput<T>.differsFrom(other: ParserOutput<T>) = !parserOutputSame(this, other)