Implement Push-to-Talk feature and enhance audio input settings in AssistantView and CustomizeView. Update README for API key instructions and improve audio processing logic in OpenAI SDK. Adjust pnpm-lock.yaml for dependency updates.

2026-01-16 00:41:58 +03:00 · 2026-01-16 00:41:58 +03:00 · 656e8f0932
commit 656e8f0932
parent 669c019fd8
10 changed files with 5190 additions and 4095 deletions
--- a/README.md
+++ b/README.md
@ -1,10 +1,5 @@
-<img width="1299" height="424" alt="cd (1)" src="https://github.com/user-attachments/assets/b25fff4d-043d-4f38-9985-f832ae0d0f6e" />
-
-## Recall.ai - API for desktop recording
-
-If you’re looking for a hosted desktop recording API, consider checking out [Recall.ai](https://www.recall.ai/product/desktop-recording-sdk/?utm_source=github&utm_medium=sponsorship&utm_campaign=sohzm-cheating-daddy), an API that records Zoom, Google Meet, Microsoft Teams, in-person meetings, and more.
-
-This project is sponsored by Recall.ai.
+<!-- <img width="1299" height="424" alt="cd (1)" src="https://github.com/user-attachments/assets/b25fff4d-043d-4f38-9985-f832ae0d0f6e" /> -->
+# Mastermind

 ---

@ -14,33 +9,36 @@ This project is sponsored by Recall.ai.
 > [!NOTE]  
 > During testing it wont answer if you ask something, you need to simulate interviewer asking question, which it will answer

-A real-time AI assistant that provides contextual help during video calls, interviews, presentations, and meetings using screen capture and audio analysis.
+A real-time AI assistant that provides contextual help during video calls, interviews, presentations, and meetings using screen capture and audio analysis. It is fork of [Cheating Daddy](https://github.com/sohzm/cheating-daddy) project.

 ## Features

- **Live AI Assistance**: Real-time help powered by Google Gemini 2.0 Flash Live
+- **Live AI Assistance**: Real-time help powered by Gemini API / OpenAI SDK / OpenAI Realtime API, so you can choose which one you want to use
 - **Screen & Audio Capture**: Analyzes what you see and hear for contextual responses
 - **Multiple Profiles**: Interview, Sales Call, Business Meeting, Presentation, Negotiation
- **Transparent Overlay**: Always-on-top window that can be positioned anywhere
+- **Transparent Overlay**: Always-on-top window that can be positioned anywhere, if something goes wrong you can hide it without stoping session and losing context!
 - **Click-through Mode**: Make window transparent to clicks when needed
 - **Cross-platform**: Works on macOS, Windows, and Linux (kinda, dont use, just for testing rn)

 ## Setup

-1. **Get a Gemini API Key**: Visit [Google AI Studio](https://aistudio.google.com/apikey)
-2. **Install Dependencies**: `npm install`
-3. **Run the App**: `npm start`
+1. **Get a API Key**: Visit [Google AI Studio](https://aistudio.google.com/apikey) or [OpenAI](https://platform.openai.com/docs/api-reference) or any other OpenAI-compatible API!
+2. **Install Dependencies**: `pnpm install`
+3. **Run the App**: `pnpm start`

 ## Usage

-1. Enter your Gemini API key in the main window
+1. Enter your API key in the main window, select provider and model you want to use in preferences
 2. Choose your profile and language in settings
-3. Click "Start Session" to begin
-4. Position the window using keyboard shortcuts
-5. The AI will provide real-time assistance based on your screen and what interview asks
+3. Click "Start Session" to begin, if you want to use push-to-talk mode, you can enable it in preferences
+4. Position the window using keyboard shortcuts, or use your mouse to move it
+5. The AI will provide real-time assistance based on your screen and system audio/microphone input, you can also send text messages to AI by pressing Enter

 ## Keyboard Shortcuts

+> [!NOTE]  
+> All keyboard shortcuts are customizable in settings. You can check some default shortcuts below.
+
 - **Window Movement**: `Ctrl/Cmd + Arrow Keys` - Move window
 - **Click-through**: `Ctrl/Cmd + M` - Toggle mouse events
 - **Close/Back**: `Ctrl/Cmd + \` - Close window or go back
@ -48,13 +46,13 @@ A real-time AI assistant that provides contextual help during video calls, inter

 ## Audio Capture

- **macOS**: [SystemAudioDump](https://github.com/Mohammed-Yasin-Mulla/Sound) for system audio
- **Windows**: Loopback audio capture
+- **macOS**: [SystemAudioDump](https://github.com/Mohammed-Yasin-Mulla/Sound) for system audio capture, you can use microphone input as well
+- **Windows**: Loopback audio capture, you can use microphone input as well
 - **Linux**: Microphone input

 ## Requirements

 - Electron-compatible OS (macOS, Windows, Linux)
- Gemini API key
+- AI Provider API key
 - Screen recording permissions
 - Microphone/audio permissions
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/src/components/views/AssistantView.js
+++ b/src/components/views/AssistantView.js
@ -366,6 +366,57 @@ export class AssistantView extends LitElement {
        .region-select-btn span {
            margin-left: 4px;
        }
+
+        .ptt-toggle-btn {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            background: transparent;
+            color: var(--text-secondary);
+            border: 1px solid var(--border-color);
+            padding: 6px 12px;
+            border-radius: 20px;
+            font-size: 12px;
+            cursor: pointer;
+            transition: all 0.15s ease;
+        }
+
+        .ptt-toggle-btn:hover {
+            background: var(--hover-background);
+            color: var(--text-color);
+            border-color: var(--text-color);
+        }
+
+        .ptt-toggle-btn.active {
+            color: var(--error-color);
+            border-color: var(--error-color);
+        }
+
+        .ptt-indicator {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            font-size: 11px;
+            color: var(--text-secondary);
+            margin-bottom: 6px;
+        }
+
+        .ptt-dot {
+            width: 8px;
+            height: 8px;
+            border-radius: 50%;
+            background: var(--border-color);
+            box-shadow: 0 0 0 1px var(--border-color);
+        }
+
+        .ptt-dot.active {
+            background: var(--error-color);
+            box-shadow: 0 0 0 1px var(--error-color);
+        }
+
+        .ptt-label {
+            font-family: 'SF Mono', Monaco, monospace;
+        }
    `;

    static properties = {
@ -377,6 +428,9 @@ export class AssistantView extends LitElement {
        flashCount: { type: Number },
        flashLiteCount: { type: Number },
        aiProvider: { type: String },
+        pushToTalkActive: { type: Boolean },
+        audioInputMode: { type: String },
+        pushToTalkKeybind: { type: String },
    };

    constructor() {
@ -388,6 +442,9 @@ export class AssistantView extends LitElement {
        this.flashCount = 0;
        this.flashLiteCount = 0;
        this.aiProvider = 'gemini';
+        this.pushToTalkActive = false;
+        this.audioInputMode = 'auto';
+        this.pushToTalkKeybind = '';
    }

    getProfileNames() {
@ -507,6 +564,7 @@ export class AssistantView extends LitElement {

        // Load limits on mount
        this.loadLimits();
+        this.loadPushToTalkKeybind();

        // Set up IPC listeners for keyboard shortcuts
        if (window.require) {
@ -532,10 +590,17 @@ export class AssistantView extends LitElement {
                this.scrollResponseDown();
            };

+            this.handlePushToTalkState = (event, state) => {
+                this.pushToTalkActive = state?.active ?? false;
+                this.audioInputMode = state?.inputMode ?? 'auto';
+                this.requestUpdate();
+            };
+
            ipcRenderer.on('navigate-previous-response', this.handlePreviousResponse);
            ipcRenderer.on('navigate-next-response', this.handleNextResponse);
            ipcRenderer.on('scroll-response-up', this.handleScrollUp);
            ipcRenderer.on('scroll-response-down', this.handleScrollDown);
+            ipcRenderer.on('push-to-talk-state', this.handlePushToTalkState);
        }
    }

@ -557,6 +622,9 @@ export class AssistantView extends LitElement {
            if (this.handleScrollDown) {
                ipcRenderer.removeListener('scroll-response-down', this.handleScrollDown);
            }
+            if (this.handlePushToTalkState) {
+                ipcRenderer.removeListener('push-to-talk-state', this.handlePushToTalkState);
+            }
        }
    }

@ -584,6 +652,15 @@ export class AssistantView extends LitElement {
        }
    }

+    async loadPushToTalkKeybind() {
+        if (window.cheatingDaddy?.storage?.getKeybinds) {
+            const isMac = window.cheatingDaddy?.isMacOS || navigator.platform.includes('Mac');
+            const defaultKeybind = isMac ? 'Ctrl+Space' : 'Ctrl+Space';
+            const keybinds = await window.cheatingDaddy.storage.getKeybinds();
+            this.pushToTalkKeybind = keybinds?.pushToTalk || defaultKeybind;
+        }
+    }
+
    getTotalUsed() {
        return this.flashCount + this.flashLiteCount;
    }
@ -608,6 +685,14 @@ export class AssistantView extends LitElement {
        }
    }

+    handlePushToTalkToggle() {
+        if (!window.require) {
+            return;
+        }
+        const { ipcRenderer } = window.require('electron');
+        ipcRenderer.send('push-to-talk-toggle');
+    }
+
    scrollToBottom() {
        setTimeout(() => {
            const container = this.shadowRoot.querySelector('.response-container');
@ -649,10 +734,26 @@ export class AssistantView extends LitElement {

    render() {
        const responseCounter = this.getResponseCounter();
+        const showPushToTalk = this.aiProvider === 'openai-sdk' && this.audioInputMode === 'push-to-talk';
+        const keybindLabel = this.pushToTalkKeybind || 'Hotkey';
+        const pushToTalkLabel = this.pushToTalkActive
+            ? 'Recording...'
+            : `Press ${keybindLabel} to start/stop`;
+        const pushToTalkButtonLabel = this.pushToTalkActive ? 'Stop' : 'Record';

        return html`
            <div class="response-container" id="responseContainer"></div>

+            ${showPushToTalk
+                ? html`
+                      <div class="ptt-indicator">
+                          <span class="ptt-dot ${this.pushToTalkActive ? 'active' : ''}"></span>
+                          <span>Push-to-Talk:</span>
+                          <span class="ptt-label">${pushToTalkLabel}</span>
+                      </div>
+                  `
+                : ''}
+
            <div class="text-input-container">
                <button class="nav-button" @click=${this.navigateToPreviousResponse} ?disabled=${this.currentResponseIndex <= 0}>
                    <svg width="24px" height="24px" stroke-width="1.7" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
@ -671,6 +772,17 @@ export class AssistantView extends LitElement {
                <input type="text" id="textInput" placeholder="Type a message to the AI..." @keydown=${this.handleTextKeydown} />

                <div class="capture-buttons">
+                    ${showPushToTalk
+                        ? html`
+                              <button
+                                  class="ptt-toggle-btn ${this.pushToTalkActive ? 'active' : ''}"
+                                  @click=${this.handlePushToTalkToggle}
+                                  title="Toggle Push-to-Talk recording"
+                              >
+                                  ${pushToTalkButtonLabel}
+                              </button>
+                          `
+                        : ''}
                    <button class="region-select-btn" @click=${this.handleRegionSelect} title="Select region to analyze (like Win+Shift+S)">
                        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor">
                            <path
--- a/src/components/views/CustomizeView.js
+++ b/src/components/views/CustomizeView.js
@ -537,6 +537,7 @@ export class CustomizeView extends LitElement {
            color: var(--error-color);
            border-left: 2px solid var(--error-color);
        }
+
    `;

    static properties = {
@ -549,6 +550,7 @@ export class CustomizeView extends LitElement {
        backgroundTransparency: { type: Number },
        fontSize: { type: Number },
        theme: { type: String },
+        audioInputMode: { type: String },
        onProfileChange: { type: Function },
        onLanguageChange: { type: Function },
        onImageQualityChange: { type: Function },
@ -587,6 +589,7 @@ export class CustomizeView extends LitElement {

        // Audio mode default
        this.audioMode = 'speaker_only';
+        this.audioInputMode = 'auto';

        // Custom prompt
        this.customPrompt = '';
@ -795,6 +798,7 @@ export class CustomizeView extends LitElement {
            this.backgroundTransparency = prefs.backgroundTransparency ?? 0.8;
            this.fontSize = prefs.fontSize ?? 20;
            this.audioMode = prefs.audioMode ?? 'speaker_only';
+            this.audioInputMode = prefs.audioInputMode ?? 'auto';
            this.customPrompt = prefs.customPrompt ?? '';
            this.theme = prefs.theme ?? 'dark';
            this.aiProvider = prefs.aiProvider ?? 'gemini';
@ -820,6 +824,7 @@ export class CustomizeView extends LitElement {

            this.updateBackgroundTransparency();
            this.updateFontSize();
+            this.notifyPushToTalkSettings();
            this.requestUpdate();
        } catch (error) {
            console.error('Error loading settings:', error);
@ -832,6 +837,10 @@ export class CustomizeView extends LitElement {
        resizeLayout();
    }

+    disconnectedCallback() {
+        super.disconnectedCallback();
+    }
+
    getProfiles() {
        return [
            {
@ -944,6 +953,28 @@ export class CustomizeView extends LitElement {
        this.requestUpdate();
    }

+    async handleAudioInputModeChange(e) {
+        this.audioInputMode = e.target.value;
+        await cheatingDaddy.storage.updatePreference('audioInputMode', e.target.value);
+        this.notifyPushToTalkSettings();
+        this.requestUpdate();
+    }
+
+    notifyPushToTalkSettings() {
+        if (!window.require) {
+            return;
+        }
+        try {
+            const { ipcRenderer } = window.require('electron');
+            ipcRenderer.send('update-push-to-talk-settings', {
+                inputMode: this.audioInputMode,
+            });
+            ipcRenderer.send('update-keybinds', this.keybinds);
+        } catch (error) {
+            console.error('Failed to notify push-to-talk settings:', error);
+        }
+    }
+
    async handleThemeChange(e) {
        this.theme = e.target.value;
        await cheatingDaddy.theme.save(this.theme);
@ -965,6 +996,7 @@ export class CustomizeView extends LitElement {
            nextResponse: isMac ? 'Cmd+]' : 'Ctrl+]',
            scrollUp: isMac ? 'Cmd+Shift+Up' : 'Ctrl+Shift+Up',
            scrollDown: isMac ? 'Cmd+Shift+Down' : 'Ctrl+Shift+Down',
+            pushToTalk: isMac ? 'Ctrl+Space' : 'Ctrl+Space',
        };
    }

@ -1050,6 +1082,11 @@ export class CustomizeView extends LitElement {
                name: 'Scroll Response Down',
                description: 'Scroll the AI response content down',
            },
+            {
+                key: 'pushToTalk',
+                name: 'Push-to-Talk',
+                description: 'Activate audio recording (OpenAI SDK only)',
+            },
        ];
    }

@ -1319,6 +1356,9 @@ export class CustomizeView extends LitElement {
    }

    renderAudioSection() {
+        const isPushToTalkAvailable = this.aiProvider === 'openai-sdk';
+        const pushToTalkDisabled = !isPushToTalkAvailable;
+
        return html`
            <div class="content-header">Audio Settings</div>
            <div class="form-grid">
@ -1331,6 +1371,28 @@ export class CustomizeView extends LitElement {
                    </select>
                    <div class="form-description">Choose which audio sources to capture for the AI.</div>
                </div>
+                <div class="form-group">
+                    <label class="form-label">Audio Input Mode</label>
+                    <select
+                        class="form-control"
+                        .value=${this.audioInputMode}
+                        @change=${this.handleAudioInputModeChange}
+                        ?disabled=${pushToTalkDisabled}
+                    >
+                        <option value="auto">Automatic (Always Listening)</option>
+                        <option value="push-to-talk">Push-to-Talk (Hotkey Activated)</option>
+                    </select>
+                    <div class="form-description">
+                        ${pushToTalkDisabled
+                            ? 'Push-to-Talk is available only with the OpenAI SDK provider.'
+                            : this.audioInputMode === 'auto'
+                              ? 'Audio is continuously recorded and transcribed when silence is detected.'
+                              : 'Audio recording starts when you press and hold/toggle the hotkey.'}
+                    </div>
+                </div>
+                ${this.audioInputMode === 'push-to-talk'
+                    ? html`<div class="form-description">Use the Push-to-Talk hotkey (toggle) to start/stop recording.</div>`
+                    : ''}
            </div>
        `;
    }
--- a/src/components/views/HelpView.js
+++ b/src/components/views/HelpView.js
@ -295,7 +295,7 @@ export class HelpView extends LitElement {
                        <span>Community & Support</span>
                    </div>
                    <div class="community-links">
-                        <div class="community-link" @click=${() => this.handleExternalLinkClick('https://cheatingdaddy.com')}>
+                        <!-- <div class="community-link" @click=${() => this.handleExternalLinkClick('https://cheatingdaddy.com')}>
                            <svg
                                viewBox="0 0 24 24"
                                fill="none"
@ -312,8 +312,8 @@ export class HelpView extends LitElement {
                                ></path>
                            </svg>
                            Website
-                        </div>
-                        <div class="community-link" @click=${() => this.handleExternalLinkClick('https://github.com/sohzm/cheating-daddy')}>
+                        </div> -->
+                        <div class="community-link" @click=${() => this.handleExternalLinkClick('https://github.com/ShiftyX1/Mastermind')}>
                            <svg
                                viewBox="0 0 24 24"
                                fill="none"
@ -329,7 +329,7 @@ export class HelpView extends LitElement {
                            </svg>
                            GitHub
                        </div>
-                        <div class="community-link" @click=${() => this.handleExternalLinkClick('https://discord.gg/GCBdubnXfJ')}>
+                        <!-- <div class="community-link" @click=${() => this.handleExternalLinkClick('https://discord.gg/GCBdubnXfJ')}>
                            <svg
                                viewBox="0 0 24 24"
                                fill="none"
@ -353,7 +353,7 @@ export class HelpView extends LitElement {
                                ></path>
                            </svg>
                            Discord
-                        </div>
+                        </div> -->
                    </div>
                </div>

--- a/src/storage.js
+++ b/src/storage.js
@ -33,6 +33,7 @@ const DEFAULT_PREFERENCES = {
    selectedImageQuality: 'medium',
    advancedMode: false,
    audioMode: 'speaker_only',
+    audioInputMode: 'auto',
    fontSize: 'medium',
    backgroundTransparency: 0.8,
    googleSearchEnabled: false,
--- a/src/utils/ai-provider-manager.js
+++ b/src/utils/ai-provider-manager.js
@ -186,6 +186,7 @@ async function initializeAISession(customPrompt = '', profile = 'interview', lan
        try {
            await openaiSdkProvider.initializeOpenAISDK(providerConfig);
            openaiSdkProvider.setSystemPrompt(systemPrompt);
+            openaiSdkProvider.updatePushToTalkSettings(prefs.audioInputMode || 'auto');
            sendToRenderer('update-status', 'Ready (OpenAI SDK)');
            return true;
        } catch (error) {
@ -325,6 +326,16 @@ function setupAIProviderIpcHandlers(geminiSessionRef) {
        saveConversationTurn(transcription, response);
    });

+    ipcMain.on('push-to-talk-toggle', () => {
+        if (currentProvider === 'openai-sdk') {
+            openaiSdkProvider.togglePushToTalk();
+        }
+    });
+
+    ipcMain.on('update-push-to-talk-settings', (event, { inputMode } = {}) => {
+        openaiSdkProvider.updatePushToTalkSettings(inputMode || 'auto');
+    });
+
    ipcMain.handle('initialize-ai-session', async (event, customPrompt, profile, language) => {
        return await initializeAISession(customPrompt, profile, language);
    });
--- a/src/utils/openai-sdk.js
+++ b/src/utils/openai-sdk.js
@ -14,6 +14,8 @@ let openaiClient = null;
 let currentConfig = null;
 let conversationMessages = [];
 let isProcessing = false;
+let audioInputMode = 'auto';
+let isPushToTalkActive = false;

 // macOS audio capture
 let systemAudioProc = null;
@ -294,6 +296,18 @@ async function processAudioChunk(base64Audio, mimeType) {
    const now = Date.now();
    const buffer = Buffer.from(base64Audio, 'base64');

+    if (audioInputMode === 'push-to-talk') {
+        if (!isPushToTalkActive) {
+            return { success: true, ignored: true };
+        }
+
+        // In push-to-talk mode we only buffer while active
+        audioChunks.push(buffer);
+        lastAudioTime = now;
+
+        return { success: true, buffering: true };
+    }
+
    // Track first chunk time for duration-based flushing
    if (audioChunks.length === 0) {
        firstChunkTime = now;
@ -380,6 +394,97 @@ async function flushAudioAndTranscribe() {
    }
 }

+function notifyPushToTalkState() {
+    sendToRenderer('push-to-talk-state', {
+        active: isPushToTalkActive,
+        inputMode: audioInputMode,
+    });
+}
+
+function resetRealtimeAudioBuffer() {
+    audioChunks = [];
+    firstChunkTime = 0;
+    lastAudioTime = 0;
+
+    if (silenceCheckTimer) {
+        clearTimeout(silenceCheckTimer);
+        silenceCheckTimer = null;
+    }
+    if (windowsTranscriptionTimer) {
+        clearInterval(windowsTranscriptionTimer);
+        windowsTranscriptionTimer = null;
+    }
+}
+
+function updateTranscriptionTimerForPushToTalk() {
+    if (audioInputMode === 'push-to-talk') {
+        stopTranscriptionTimer();
+        return;
+    }
+
+    if (systemAudioProc && !transcriptionTimer) {
+        startTranscriptionTimer();
+    }
+}
+
+async function setPushToTalkActive(active) {
+    const wasActive = isPushToTalkActive;
+    isPushToTalkActive = active;
+
+    if (active) {
+        // Starting recording - clear any old buffers
+        resetRealtimeAudioBuffer();
+        audioBuffer = Buffer.alloc(0);
+        console.log('Push-to-Talk: Recording started');
+        sendToRenderer('update-status', 'Recording...');
+    }
+
+    notifyPushToTalkState();
+
+    // When user stops recording in PTT mode, send audio for transcription
+    if (!active && wasActive && audioInputMode === 'push-to-talk') {
+        console.log('Push-to-Talk: Recording stopped, transcribing...');
+        sendToRenderer('update-status', 'Transcribing...');
+
+        // For browser-based audio (Windows)
+        if (audioChunks.length > 0) {
+            await flushAudioAndTranscribe();
+        }
+        // For macOS SystemAudioDump
+        if (audioBuffer.length > 0) {
+            await transcribeBufferedAudio(true); // Force transcription
+        }
+
+        sendToRenderer('update-status', 'Listening...');
+    }
+}
+
+async function togglePushToTalk() {
+    if (isPushToTalkActive) {
+        await setPushToTalkActive(false);
+    } else {
+        await setPushToTalkActive(true);
+    }
+}
+
+function updatePushToTalkSettings(inputMode) {
+    if (inputMode) {
+        audioInputMode = inputMode;
+    }
+
+    if (audioInputMode !== 'push-to-talk' && isPushToTalkActive) {
+        isPushToTalkActive = false;
+    }
+
+    if (audioInputMode !== 'push-to-talk') {
+        resetRealtimeAudioBuffer();
+        audioBuffer = Buffer.alloc(0);
+    }
+
+    notifyPushToTalkState();
+    updateTranscriptionTimerForPushToTalk();
+}
+
 function clearConversation() {
    const systemMessage = conversationMessages.find(m => m.role === 'system');
    conversationMessages = systemMessage ? [systemMessage] : [];
@ -403,6 +508,7 @@ function closeOpenAISDK() {
    conversationMessages = [];
    audioChunks = [];
    isProcessing = false;
+    isPushToTalkActive = false;

    // Clear timers
    if (silenceCheckTimer) {
@ -414,6 +520,7 @@ function closeOpenAISDK() {
        windowsTranscriptionTimer = null;
    }

+    notifyPushToTalkState();
    sendToRenderer('update-status', 'Disconnected');
 }

@ -461,11 +568,16 @@ function hasSpeech(buffer, threshold = 500) {
    return rms > threshold;
 }

-async function transcribeBufferedAudio() {
+async function transcribeBufferedAudio(forcePTT = false) {
    if (audioBuffer.length === 0 || isProcessing) {
        return;
    }

+    // In push-to-talk mode, only transcribe when explicitly requested (forcePTT=true)
+    if (audioInputMode === 'push-to-talk' && !forcePTT) {
+        return;
+    }
+
    // Calculate audio duration
    const bytesPerSample = 2;
    const audioDurationMs = (audioBuffer.length / bytesPerSample / SAMPLE_RATE) * 1000;
@ -475,7 +587,8 @@ async function transcribeBufferedAudio() {
    }

    // Check if there's actual speech in the audio (Voice Activity Detection)
-    if (!hasSpeech(audioBuffer)) {
+    // Skip VAD check in PTT mode - user explicitly wants to transcribe
+    if (!forcePTT && !hasSpeech(audioBuffer)) {
        // Clear buffer if it's just silence/noise
        audioBuffer = Buffer.alloc(0);
        return;
@ -487,7 +600,9 @@ async function transcribeBufferedAudio() {

    try {
        console.log(`Transcribing ${audioDurationMs.toFixed(0)}ms of audio...`);
+        if (!forcePTT) {
            sendToRenderer('update-status', 'Transcribing...');
+        }

        const transcription = await transcribeAudio(currentBuffer, 'audio/wav');

@ -497,14 +612,20 @@ async function transcribeBufferedAudio() {

            // Send to chat
            await sendTextMessage(transcription);
+        } else if (forcePTT) {
+            console.log('Push-to-Talk: No speech detected in recording');
        }

+        if (!forcePTT) {
            sendToRenderer('update-status', 'Listening...');
+        }
    } catch (error) {
        console.error('Transcription error:', error);
+        if (!forcePTT) {
            sendToRenderer('update-status', 'Listening...');
        }
    }
+}

 async function startMacOSAudioCapture() {
    if (process.platform !== 'darwin') return false;
@ -598,6 +719,10 @@ async function startMacOSAudioCapture() {
            // Convert stereo to mono
            const monoChunk = CHANNELS === 2 ? convertStereoToMono(chunk) : chunk;

+            if (audioInputMode === 'push-to-talk' && !isPushToTalkActive) {
+                continue;
+            }
+
            // Add to audio buffer for transcription
            audioBuffer = Buffer.concat([audioBuffer, monoChunk]);

@ -643,7 +768,7 @@ async function startMacOSAudioCapture() {
    });

    // Start periodic transcription
-    startTranscriptionTimer();
+    updateTranscriptionTimerForPushToTalk();

    sendToRenderer('update-status', 'Listening...');

@ -651,6 +776,10 @@ async function startMacOSAudioCapture() {
 }

 function startTranscriptionTimer() {
+    // Don't start auto-transcription timer in push-to-talk mode
+    if (audioInputMode === 'push-to-talk') {
+        return;
+    }
    stopTranscriptionTimer();
    transcriptionTimer = setInterval(transcribeBufferedAudio, TRANSCRIPTION_INTERVAL_MS);
 }
@ -682,6 +811,8 @@ module.exports = {
    sendImageMessage,
    processAudioChunk,
    flushAudioAndTranscribe,
+    togglePushToTalk,
+    updatePushToTalkSettings,
    clearConversation,
    closeOpenAISDK,
    startMacOSAudioCapture,
--- a/src/utils/renderer.js
+++ b/src/utils/renderer.js
@ -186,6 +186,10 @@ ipcRenderer.on('update-status', (event, status) => {
    cheatingDaddy.setStatus(status);
 });

+ipcRenderer.on('push-to-talk-toggle', () => {
+    ipcRenderer.send('push-to-talk-toggle');
+});
+
 async function startCapture(screenshotIntervalSeconds = 5, imageQuality = 'medium') {
    // Store the image quality for manual screenshots
    currentImageQuality = imageQuality;
--- a/src/utils/window.js
+++ b/src/utils/window.js
@ -9,6 +9,7 @@ let windowResizing = false;
 let resizeAnimation = null;
 const RESIZE_ANIMATION_DURATION = 500; // milliseconds

+
 function createWindow(sendToRenderer, geminiSessionRef) {
    // Get layout preference (default to 'normal')
    let windowWidth = 1100;
@ -155,6 +156,7 @@ function getDefaultKeybinds() {
        scrollUp: isMac ? 'Cmd+Shift+Up' : 'Ctrl+Shift+Up',
        scrollDown: isMac ? 'Cmd+Shift+Down' : 'Ctrl+Shift+Down',
        emergencyErase: isMac ? 'Cmd+Shift+E' : 'Ctrl+Shift+E',
+        pushToTalk: isMac ? 'Ctrl+Space' : 'Ctrl+Space',
    };
 }

@ -164,6 +166,10 @@ function updateGlobalShortcuts(keybinds, mainWindow, sendToRenderer, geminiSessi
    // Unregister all existing shortcuts
    globalShortcut.unregisterAll();

+    const prefs = storage.getPreferences();
+    const audioInputMode = prefs.audioInputMode || 'auto';
+    const enablePushToTalk = audioInputMode === 'push-to-talk';
+
    const primaryDisplay = screen.getPrimaryDisplay();
    const { width, height } = primaryDisplay.workAreaSize;
    const moveIncrement = Math.floor(Math.min(width, height) * 0.1);
@ -343,6 +349,18 @@ function updateGlobalShortcuts(keybinds, mainWindow, sendToRenderer, geminiSessi
            console.error(`Failed to register emergencyErase (${keybinds.emergencyErase}):`, error);
        }
    }
+
+    // Register push-to-talk shortcut (OpenAI SDK only, gated by preferences)
+    if (keybinds.pushToTalk && enablePushToTalk) {
+        try {
+            globalShortcut.register(keybinds.pushToTalk, () => {
+                sendToRenderer('push-to-talk-toggle');
+            });
+            console.log(`Registered pushToTalk (toggle): ${keybinds.pushToTalk}`);
+        } catch (error) {
+            console.error(`Failed to register pushToTalk (${keybinds.pushToTalk}):`, error);
+        }
+    }
 }

 function setupWindowIpcHandlers(mainWindow, sendToRenderer, geminiSessionRef) {