DM20 Protocol

Overview Schema Related Servers Score Discussions

test_stt_integration.py•8.95 KiB

"""
Tests for STT (Speech-to-Text) integration in Party Mode.

Verifies:
- Player UI contains mic button and STT JavaScript
- Action endpoint accepts voice-sourced actions identically to typed ones
- Static assets include STT-related styles
- Graceful fallback: text input always works regardless of STT availability
"""

from pathlib import Path

import pytest
from starlette.testclient import TestClient

from dm20_protocol.party.server import PartyServer

from .conftest import PLAYER_IDS


# ===================================================================
# Static Asset Tests
# ===================================================================


class TestSTTStaticAssets:
    """Verify STT elements are present in player UI static files."""

    @pytest.fixture(autouse=True)
    def _load_static(self) -> None:
        static_dir = (
            Path(__file__).resolve().parents[2]
            / "src"
            / "dm20_protocol"
            / "party"
            / "static"
        )
        self.html = (static_dir / "index.html").read_text()
        self.js = (static_dir / "app.js").read_text()
        self.css = (static_dir / "style.css").read_text()

    def test_html_contains_mic_button(self) -> None:
        """Mic button must be in the action bar HTML."""
        assert 'class="action-bar__mic"' in self.html
        assert 'class="mic-icon"' in self.html
        assert 'class="mic-listening-dot"' in self.html

    def test_html_mic_button_hidden_by_default(self) -> None:
        """Mic button starts hidden (shown by JS if STT is supported)."""
        assert 'class="action-bar__mic"' in self.html
        # The button should have a hidden attribute by default
        mic_idx = self.html.index('class="action-bar__mic"')
        mic_line = self.html[max(0, mic_idx - 100) : mic_idx + 100]
        assert "hidden" in mic_line

    def test_html_has_transcript_preview(self) -> None:
        """Voice transcript preview element must exist."""
        assert 'class="voice-transcript-preview"' in self.html

    def test_js_contains_stt_initialization(self) -> None:
        """JavaScript must contain STT setup code."""
        assert "initSTT" in self.js
        assert "SpeechRecognition" in self.js
        assert "webkitSpeechRecognition" in self.js

    def test_js_contains_stt_feature_detection(self) -> None:
        """JS must check for browser STT support before enabling."""
        assert "sttSupported" in self.js

    def test_js_contains_mic_toggle(self) -> None:
        """JS must have toggle function for mic button."""
        assert "toggleSTT" in self.js
        assert "startSTT" in self.js
        assert "stopSTT" in self.js

    def test_js_handles_permission_denied(self) -> None:
        """JS must handle microphone permission denial gracefully."""
        assert "not-allowed" in self.js
        assert "sttDenied" in self.js

    def test_js_sends_voice_source_field(self) -> None:
        """JS must include source: 'voice' when submitting voice actions."""
        assert "source: 'voice'" in self.js or 'source: "voice"' in self.js

    def test_js_submits_voice_action_via_fetch(self) -> None:
        """Voice actions use the same /action endpoint as typed actions."""
        assert "submitVoiceAction" in self.js
        assert "/action" in self.js

    def test_js_shows_interim_transcription(self) -> None:
        """JS must show interim transcription as preview."""
        assert "interimResults" in self.js
        assert "showTranscriptPreview" in self.js

    def test_css_contains_mic_styles(self) -> None:
        """CSS must include mic button and listening indicator styles."""
        assert ".action-bar__mic" in self.css
        assert ".action-bar__mic--listening" in self.css
        assert ".mic-listening-dot" in self.css

    def test_css_contains_transcript_preview_styles(self) -> None:
        """CSS must include voice transcript preview styles."""
        assert ".voice-transcript-preview" in self.css

    def test_css_has_mic_pulse_animation(self) -> None:
        """CSS must include a pulse animation for listening state."""
        assert "mic-pulse" in self.css

    def test_text_input_always_present(self) -> None:
        """Text input must always exist regardless of STT support (fallback)."""
        assert 'class="action-bar__input"' in self.html
        assert 'class="action-bar__send"' in self.html


# ===================================================================
# Server Integration Tests
# ===================================================================


class TestSTTServerIntegration:
    """Verify the server handles voice-sourced actions correctly."""

    def test_action_with_voice_source_accepted(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Server must accept actions with source: 'voice' field."""
        client = TestClient(e2e_server.app)
        token = e2e_tokens["thorin"]

        resp = client.post(
            "/action",
            json={"action": "I cast fireball", "source": "voice"},
            headers={"Authorization": f"Bearer {token}"},
        )
        assert resp.status_code == 200
        data = resp.json()
        assert data["success"] is True
        assert data["player_id"] == "thorin"
        assert data["action_id"]

    def test_voice_action_queued_identically(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Voice actions must be queued identically to typed actions."""
        client = TestClient(e2e_server.app)
        token = e2e_tokens["elara"]

        # Submit typed action
        resp1 = client.post(
            "/action",
            json={"action": "I search the chest"},
            headers={"Authorization": f"Bearer {token}"},
        )
        typed_id = resp1.json()["action_id"]

        # Submit voice action
        resp2 = client.post(
            "/action",
            json={"action": "I open the door", "source": "voice"},
            headers={"Authorization": f"Bearer {token}"},
        )
        voice_id = resp2.json()["action_id"]

        # Both should be in the queue
        action1 = e2e_server.action_queue.pop()
        action2 = e2e_server.action_queue.pop()
        assert action1 is not None
        assert action2 is not None
        assert {action1["id"], action2["id"]} == {typed_id, voice_id}

    def test_empty_voice_action_rejected(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Empty transcriptions must be rejected."""
        client = TestClient(e2e_server.app)
        token = e2e_tokens["vex"]

        resp = client.post(
            "/action",
            json={"action": "", "source": "voice"},
            headers={"Authorization": f"Bearer {token}"},
        )
        assert resp.status_code == 400
        assert "Empty action" in resp.json()["error"]

    def test_whitespace_voice_action_rejected(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Whitespace-only transcriptions must be rejected."""
        client = TestClient(e2e_server.app)
        token = e2e_tokens["gorm"]

        resp = client.post(
            "/action",
            json={"action": "   ", "source": "voice"},
            headers={"Authorization": f"Bearer {token}"},
        )
        assert resp.status_code == 400

    def test_player_page_serves_with_mic_button(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Player page served by the server must include mic button."""
        client = TestClient(e2e_server.app)
        token = e2e_tokens["thorin"]

        resp = client.get(f"/play?token={token}")
        assert resp.status_code == 200
        html = resp.text
        assert "action-bar__mic" in html
        assert "mic-icon" in html


# ===================================================================
# Combat Gating with STT
# ===================================================================


class TestSTTCombatGating:
    """Verify STT respects combat turn gating (same as typed actions)."""

    def test_voice_action_uses_same_gating_as_typed(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Voice and typed actions share the same turn gating logic.

        Without an active TurnManager, both typed and voice actions are allowed.
        The _check_turn_gate method treats them identically since the action
        endpoint doesn't distinguish between sources.
        """
        client = TestClient(e2e_server.app)
        token = e2e_tokens["thorin"]

        # No TurnManager set -> both typed and voice actions pass
        resp_typed = client.post(
            "/action",
            json={"action": "I search the room"},
            headers={"Authorization": f"Bearer {token}"},
        )
        resp_voice = client.post(
            "/action",
            json={"action": "I search the room", "source": "voice"},
            headers={"Authorization": f"Bearer {token}"},
        )
        assert resp_typed.status_code == resp_voice.status_code == 200

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Polloinfilzato/dm20-protocol'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_stt_integration.py•8.95 KiB

"""
Tests for STT (Speech-to-Text) integration in Party Mode.

Verifies:
- Player UI contains mic button and STT JavaScript
- Action endpoint accepts voice-sourced actions identically to typed ones
- Static assets include STT-related styles
- Graceful fallback: text input always works regardless of STT availability
"""

from pathlib import Path

import pytest
from starlette.testclient import TestClient

from dm20_protocol.party.server import PartyServer

from .conftest import PLAYER_IDS


# ===================================================================
# Static Asset Tests
# ===================================================================


class TestSTTStaticAssets:
    """Verify STT elements are present in player UI static files."""

    @pytest.fixture(autouse=True)
    def _load_static(self) -> None:
        static_dir = (
            Path(__file__).resolve().parents[2]
            / "src"
            / "dm20_protocol"
            / "party"
            / "static"
        )
        self.html = (static_dir / "index.html").read_text()
        self.js = (static_dir / "app.js").read_text()
        self.css = (static_dir / "style.css").read_text()

    def test_html_contains_mic_button(self) -> None:
        """Mic button must be in the action bar HTML."""
        assert 'class="action-bar__mic"' in self.html
        assert 'class="mic-icon"' in self.html
        assert 'class="mic-listening-dot"' in self.html

    def test_html_mic_button_hidden_by_default(self) -> None:
        """Mic button starts hidden (shown by JS if STT is supported)."""
        assert 'class="action-bar__mic"' in self.html
        # The button should have a hidden attribute by default
        mic_idx = self.html.index('class="action-bar__mic"')
        mic_line = self.html[max(0, mic_idx - 100) : mic_idx + 100]
        assert "hidden" in mic_line

    def test_html_has_transcript_preview(self) -> None:
        """Voice transcript preview element must exist."""
        assert 'class="voice-transcript-preview"' in self.html

    def test_js_contains_stt_initialization(self) -> None:
        """JavaScript must contain STT setup code."""
        assert "initSTT" in self.js
        assert "SpeechRecognition" in self.js
        assert "webkitSpeechRecognition" in self.js

    def test_js_contains_stt_feature_detection(self) -> None:
        """JS must check for browser STT support before enabling."""
        assert "sttSupported" in self.js

    def test_js_contains_mic_toggle(self) -> None:
        """JS must have toggle function for mic button."""
        assert "toggleSTT" in self.js
        assert "startSTT" in self.js
        assert "stopSTT" in self.js

    def test_js_handles_permission_denied(self) -> None:
        """JS must handle microphone permission denial gracefully."""
        assert "not-allowed" in self.js
        assert "sttDenied" in self.js

    def test_js_sends_voice_source_field(self) -> None:
        """JS must include source: 'voice' when submitting voice actions."""
        assert "source: 'voice'" in self.js or 'source: "voice"' in self.js

    def test_js_submits_voice_action_via_fetch(self) -> None:
        """Voice actions use the same /action endpoint as typed actions."""
        assert "submitVoiceAction" in self.js
        assert "/action" in self.js

    def test_js_shows_interim_transcription(self) -> None:
        """JS must show interim transcription as preview."""
        assert "interimResults" in self.js
        assert "showTranscriptPreview" in self.js

    def test_css_contains_mic_styles(self) -> None:
        """CSS must include mic button and listening indicator styles."""
        assert ".action-bar__mic" in self.css
        assert ".action-bar__mic--listening" in self.css
        assert ".mic-listening-dot" in self.css

    def test_css_contains_transcript_preview_styles(self) -> None:
        """CSS must include voice transcript preview styles."""
        assert ".voice-transcript-preview" in self.css

    def test_css_has_mic_pulse_animation(self) -> None:
        """CSS must include a pulse animation for listening state."""
        assert "mic-pulse" in self.css

    def test_text_input_always_present(self) -> None:
        """Text input must always exist regardless of STT support (fallback)."""
        assert 'class="action-bar__input"' in self.html
        assert 'class="action-bar__send"' in self.html


# ===================================================================
# Server Integration Tests
# ===================================================================


class TestSTTServerIntegration:
    """Verify the server handles voice-sourced actions correctly."""

    def test_action_with_voice_source_accepted(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Server must accept actions with source: 'voice' field."""
        client = TestClient(e2e_server.app)
        token = e2e_tokens["thorin"]

        resp = client.post(
            "/action",
            json={"action": "I cast fireball", "source": "voice"},
            headers={"Authorization": f"Bearer {token}"},
        )
        assert resp.status_code == 200
        data = resp.json()
        assert data["success"] is True
        assert data["player_id"] == "thorin"
        assert data["action_id"]

    def test_voice_action_queued_identically(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Voice actions must be queued identically to typed actions."""
        client = TestClient(e2e_server.app)
        token = e2e_tokens["elara"]

        # Submit typed action
        resp1 = client.post(
            "/action",
            json={"action": "I search the chest"},
            headers={"Authorization": f"Bearer {token}"},
        )
        typed_id = resp1.json()["action_id"]

        # Submit voice action
        resp2 = client.post(
            "/action",
            json={"action": "I open the door", "source": "voice"},
            headers={"Authorization": f"Bearer {token}"},
        )
        voice_id = resp2.json()["action_id"]

        # Both should be in the queue
        action1 = e2e_server.action_queue.pop()
        action2 = e2e_server.action_queue.pop()
        assert action1 is not None
        assert action2 is not None
        assert {action1["id"], action2["id"]} == {typed_id, voice_id}

    def test_empty_voice_action_rejected(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Empty transcriptions must be rejected."""
        client = TestClient(e2e_server.app)
        token = e2e_tokens["vex"]

        resp = client.post(
            "/action",
            json={"action": "", "source": "voice"},
            headers={"Authorization": f"Bearer {token}"},
        )
        assert resp.status_code == 400
        assert "Empty action" in resp.json()["error"]

    def test_whitespace_voice_action_rejected(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Whitespace-only transcriptions must be rejected."""
        client = TestClient(e2e_server.app)
        token = e2e_tokens["gorm"]

        resp = client.post(
            "/action",
            json={"action": "   ", "source": "voice"},
            headers={"Authorization": f"Bearer {token}"},
        )
        assert resp.status_code == 400

    def test_player_page_serves_with_mic_button(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Player page served by the server must include mic button."""
        client = TestClient(e2e_server.app)
        token = e2e_tokens["thorin"]

        resp = client.get(f"/play?token={token}")
        assert resp.status_code == 200
        html = resp.text
        assert "action-bar__mic" in html
        assert "mic-icon" in html


# ===================================================================
# Combat Gating with STT
# ===================================================================


class TestSTTCombatGating:
    """Verify STT respects combat turn gating (same as typed actions)."""

    def test_voice_action_uses_same_gating_as_typed(
        self, e2e_server: PartyServer, e2e_tokens: dict[str, str]
    ) -> None:
        """Voice and typed actions share the same turn gating logic.

        Without an active TurnManager, both typed and voice actions are allowed.
        The _check_turn_gate method treats them identically since the action
        endpoint doesn't distinguish between sources.
        """
        client = TestClient(e2e_server.app)
        token = e2e_tokens["thorin"]

        # No TurnManager set -> both typed and voice actions pass
        resp_typed = client.post(
            "/action",
            json={"action": "I search the room"},
            headers={"Authorization": f"Bearer {token}"},
        )
        resp_voice = client.post(
            "/action",
            json={"action": "I search the room", "source": "voice"},
            headers={"Authorization": f"Bearer {token}"},
        )
        assert resp_typed.status_code == resp_voice.status_code == 200