Fletcher (Optimized, Modular)
A faster, cleaner split-version of your Fletcher assistant with streaming TTS while thinking (near-zero
delay), microphone input, screen analysis, Spotify controls (Windows), app launch/close, and optional
macOS Calendar reading.
1) Folder structure
Create this layout (names matter):
fletcher/
├─ __init__.py
├─ [Link]
├─ fletcher/
│ ├─ __init__.py
│ ├─ [Link]
│ ├─ [Link]
│ ├─ [Link]
│ ├─ [Link]
│ ├─ [Link]
│ ├─ [Link]
│ ├─ [Link]
│ ├─ [Link]
│ └─ [Link]
└─ [Link]
You can place everything inside a single top-level folder (named fletcher ) as shown.
2) Install prerequisites
Use a new venv if possible to avoid version conflicts.
# Windows (PowerShell)
python -m venv .venv
.\.venv\Scripts\Activate.ps1
pip install --upgrade pip
pip install pyqt5 openai pygame SpeechRecognition psutil pillow
# PyAudio (for mic) – Windows tips:
pip install pipwin
pipwin install pyaudio
# macOS/Linux (zsh/bash)
python3 -m venv .venv
1
source .venv/bin/activate
pip install --upgrade pip
pip install pyqt5 openai pygame SpeechRecognition psutil pillow pyaudio
If PyAudio gives trouble on macOS: brew install portaudio then pip install pyaudio .
3) Set your API keys (no hardcoding)
Set environment variables before running:
# Windows (PowerShell)
$env:OPENAI_API_KEY = "sk-..."
$env:ELEVENLABS_API_KEY = "eleven-..."
$env:ELEVENLABS_VOICE_ID = "NOpBlnGInO9m6vDvFkFC" # or your voice id
# macOS/Linux (zsh/bash)
export OPENAI_API_KEY="sk-..."
export ELEVENLABS_API_KEY="eleven-..."
export ELEVENLABS_VOICE_ID="NOpBlnGInO9m6vDvFkFC"
4) Run it
python -m [Link]
On first run macOS may ask to grant Calendar and Microphone access. For Calendar, allow Terminal (or
your IDE) under System Settings → Privacy & Security.
5) Usage (commands)
Type in the chat:
• open notepad , open chrome , close notepad (Windows examples)
• next song , previous song , pause , play (Windows Spotify)
• what is on my screen (takes a screenshot and describes it)
• any plans for this week? (macOS Calendar)
• fletcher exit to quit
Click the 🎤 button to speak.
2
6) Code — files
fletcher/__init__.py
# Empty on purpose; makes this a package.
fletcher/[Link]
import os
import platform
def is_windows() -> bool:
return [Link]() == "Windows"
def is_macos() -> bool:
return [Link]() == "Darwin"
def desktop_path() -> str:
return [Link]([Link]("~"), "Desktop")
fletcher/[Link]
import os
from .utils import is_windows
# Models
OPENAI_MODEL_TEXT = [Link]("OPENAI_MODEL_TEXT", "gpt-4o-mini")
OPENAI_MODEL_VISION = [Link]("OPENAI_MODEL_VISION", "gpt-4o")
# Keys (required)
OPENAI_API_KEY = [Link]("OPENAI_API_KEY", "")
ELEVEN_API_KEY = [Link]("ELEVENLABS_API_KEY", "")
ELEVEN_VOICE_ID = [Link]("ELEVENLABS_VOICE_ID", "NOpBlnGInO9m6vDvFkFC")
ELEVEN_MODEL_ID = [Link]("ELEVENLABS_MODEL_ID", "eleven_monolingual_v1")
# TTS
TTS_STABILITY = float([Link]("TTS_STABILITY", 0.4))
TTS_SIMILARITY = float([Link]("TTS_SIMILARITY", 0.92))
TTS_MAX_CHARS = int([Link]("TTS_MAX_CHARS", 220))
# App registry for quick open
APP_REGISTRY = {
"notepad": "[Link]",
"calc": "[Link]",
"calculator": "[Link]",
3
"firefox": "[Link]",
"chrome": "[Link]",
"word": "[Link]",
"excel": "[Link]",
"paint": "[Link]",
}
# Windows media keys
MEDIA_KEYS = {"next": 0xB0, "prev": 0xB1, "playpause": 0xB3} if is_windows()
else {}
fletcher/[Link]
import threading
from typing import Callable, List, Dict
from openai import OpenAI
from . import config
# One client per process
_client = OpenAI(api_key=config.OPENAI_API_KEY) if config.OPENAI_API_KEY else
None
def stream_chat(
history: List[Dict[str, str]],
user_text: str,
on_chunk: Callable[[str], None],
on_done: Callable[[str], None],
on_error: Callable[[str], None],
tts_streamer=None,
):
"""Stream a reply and forward chunks to UI and TTS streamer."""
def _job():
if _client is None:
on_error("OpenAI key missing. Set OPENAI_API_KEY.")
return
[Link]({"role": "user", "content": user_text})
reply = ""
try:
stream = _client.[Link](
model=config.OPENAI_MODEL_TEXT,
messages=history,
stream=True,
)
for chunk in stream:
delta = [Link][0].delta
piece = getattr(delta, "content", None)
if not piece:
continue
4
reply += piece
on_chunk(piece) # UI
if tts_streamer:
tts_streamer.feed(piece) # speak as it arrives
[Link]({"role": "assistant", "content": reply})
on_done(reply)
except Exception as e:
on_error(f"AI error: {e}")
[Link](target=_job, daemon=True).start()
fletcher/[Link]
import os
import re
import time
import queue
import tempfile
import threading
from typing import Optional
import pygame
from . import config
from .utils import is_windows, is_macos
# Try ElevenLabs lazily so app can still run without it
try:
from elevenlabs import generate, save, set_api_key, VoiceSettings
_HAVE_ELEVEN = True
except Exception:
_HAVE_ELEVEN = False
class TTSStreamer:
"""Aggregates tiny stream chunks into sentence-sized audio with minimal
delay."""
def __init__(self):
self._q: "[Link][str]" = [Link]()
self._buf = ""
self._stop = [Link]()
self._lock = [Link]()
if _HAVE_ELEVEN and config.ELEVEN_API_KEY:
set_api_key(config.ELEVEN_API_KEY)
else:
# No TTS; the streamer becomes a no-op
pass
# Configure pygame driver per OS (helps startup reliability)
5
if is_windows():
[Link]("SDL_AUDIODRIVER", "directsound")
elif is_macos():
[Link]("SDL_AUDIODRIVER", "coreaudio")
else:
[Link]("SDL_AUDIODRIVER", "alsa")
self._th = [Link](target=self._loop, daemon=True)
self._th.start()
def feed(self, text: str):
if text:
self._q.put(text)
def say(self, text: str):
"""Queue a full sentence immediately (bypasses aggregation)."""
for part in self._split_sentences(text):
self._q.put(part)
def shutdown(self):
self._stop.set()
self._q.put("")
# ---- internals ----
def _loop(self):
while not self._stop.is_set():
try:
piece = self._q.get(timeout=0.2)
except [Link]:
# If buffer has a pending sentence, flush after short idle
if self._buf:
self._flush_if_ready(force=True)
continue
self._buf += piece
self._flush_if_ready()
def _flush_if_ready(self, force: bool = False):
if not self._buf:
return
# Heuristic: speak when we hit sentence end or max size or short idle
sentence_done = bool([Link](r"[.!?…](\s|$)", self._buf))
too_long = len(self._buf) >= config.TTS_MAX_CHARS
if force or sentence_done or too_long:
chunk = self._take_speakable_chunk(self._buf)
self._buf = self._buf[len(chunk):]
if [Link]():
self._speak([Link]())
@staticmethod
def _split_sentences(text: str):
return [Link](r"(?<=[.!?…])\s+", text)
6
@staticmethod
def _take_speakable_chunk(text: str) -> str:
# Grab up to a sentence or MAX_CHARS
sentences = [Link](r"(?<=[.!?…])\s+", text)
out = ""
for s in sentences:
if len(out) + len(s) + 1 > config.TTS_MAX_CHARS:
break
out = (out + " " + s).strip()
if [Link](r"[.!?…]$", s):
break
return out or text[: config.TTS_MAX_CHARS]
def _speak(self, text: str):
if not _HAVE_ELEVEN or not config.ELEVEN_API_KEY:
return # no-op if TTS not configured
tmp_path: Optional[str] = None
try:
audio = generate(
text=text,
voice=config.ELEVEN_VOICE_ID,
model=config.ELEVEN_MODEL_ID,
voice_settings=VoiceSettings(
stability=config.TTS_STABILITY,
similarity_boost=config.TTS_SIMILARITY,
),
)
with [Link](delete=False, suffix=".mp3") as
tmp:
save(audio, [Link])
tmp_path = [Link]
with self._lock:
if not [Link].get_init():
[Link]()
[Link](tmp_path)
[Link]()
while [Link].get_busy():
[Link](0.05)
except Exception:
# Keep silent on TTS failures to avoid spamming the UI
pass
finally:
if tmp_path and [Link](tmp_path):
try:
[Link](tmp_path)
except Exception:
pass
7
fletcher/[Link]
import os
import base64
import tempfile
from typing import Optional
from PIL import ImageGrab
from openai import OpenAI
from . import config
_client = OpenAI(api_key=config.OPENAI_API_KEY) if config.OPENAI_API_KEY else
None
def analyze_screen() -> str:
if _client is None:
return "OpenAI key missing. Set OPENAI_API_KEY."
try:
img = [Link]()
except Exception as e:
return f"Couldn't capture screen: {e}"
tmp = [Link]([Link](), "fletcher_screen.png")
try:
[Link](tmp)
with open(tmp, "rb") as f:
img_b64 = base64.b64encode([Link]()).decode("utf-8")
resp = _client.[Link](
model=config.OPENAI_MODEL_VISION,
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "Describe and explain what is
visible in this screenshot."},
{"type": "