kidsai/test-improved-responses.js

#!/usr/bin/env node

/**
 * Test Script for Improved Response Handling
 * Tests the specific issues from the conversation: frustration detection and creative answers
 */

const fetch = require('node-fetch');

const BASE_URL = 'http://localhost:3002';

const testCases = [
    {
        name: "Frustration with Repetition Test",
        question: "Was denkst du, warum Menschen Programmiersprachen benutzen?",
        answer: "Wir drehen uns im Kreis, Freundchen!",
        originalTopic: "Programmiersprachen",
        language: "de",
        expectedBehavior: "Should recognize frustration and change approach"
    },
    {
        name: "Creative Single Word Answer Test",
        question: "Was denkst du, was eine Programmiersprache von einer normalen Sprache unterscheidet?",
        answer: "FARBEN!",
        originalTopic: "Programmiersprachen",
        language: "de",
        expectedBehavior: "Should acknowledge creativity, not say 'interessante Sichtweise'"
    },
    {
        name: "Boredom Response Test",
        question: "Warum haben Menschen Programmiersprachen erfunden?",
        answer: "War ihnen vielleicht langweilig?",
        originalTopic: "Programmiersprachen",
        language: "de",
        expectedBehavior: "Should handle potential boredom appropriately"
    },
    {
        name: "Creative AI Answer Test",
        question: "Was für eine App würdest du erstellen?",
        answer: "Eine KI, die mich versteht",
        originalTopic: "Programmierung",
        language: "de",
        expectedBehavior: "Should acknowledge the thoughtful answer positively"
    }
];

async function testImprovedResponseHandling() {
    console.log('🔧 Testing Improved Response Handling\n');
    console.log('Based on real conversation issues...\n');

    let passedTests = 0;
    let totalTests = testCases.length;

    for (const testCase of testCases) {
        console.log('=' .repeat(60));
        console.log(`TEST: ${testCase.name}`);
        console.log('=' .repeat(60));
        console.log(`Question: "${testCase.question}"`);
        console.log(`Answer: "${testCase.answer}"`);
        console.log(`Expected: ${testCase.expectedBehavior}`);
        console.log();

        try {
            console.log('Sending request...');
            const response = await fetch(`${BASE_URL}/api/respond-to-answer`, {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify({
                    answer: testCase.answer,
                    question: testCase.question,
                    originalTopic: testCase.originalTopic,
                    language: testCase.language,
                    sessionId: `test-improved-${Date.now()}`
                })
            });

            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }

            const data = await response.json();
            const aiResponse = data.response || '';

            console.log('\\n✅ Response received:');
            console.log(`"${aiResponse}"`);

            // Check if response is appropriate for each test case
            const evaluation = evaluateResponse(aiResponse, testCase);

            if (evaluation.isGood) {
                console.log('✅ PASSED: ' + evaluation.reason);
                passedTests++;
            } else {
                console.log('❌ FAILED: ' + evaluation.reason);
            }

        } catch (error) {
            console.log('❌ FAILED: HTTP error!', error.message);
        }

        console.log();
    }

    console.log('=' .repeat(60));
    console.log(`🎯 FINAL RESULTS: ${passedTests}/${totalTests} tests passed`);
    console.log('=' .repeat(60));

    const successRate = (passedTests / totalTests) * 100;
    console.log(`📊 Success Rate: ${successRate.toFixed(1)}%`);

    if (passedTests === totalTests) {
        console.log('🎉 All tests passed! Improved response handling working correctly.');
    } else {
        console.log('⚠️  Some tests failed. Review response handling logic.');
    }

    return passedTests === totalTests;
}

function evaluateResponse(response, testCase) {
    const responseLower = response.toLowerCase();

    switch (testCase.name) {
        case "Frustration with Repetition Test":
            // Should NOT say "interessante Sichtweise" and SHOULD acknowledge frustration
            if (responseLower.includes('interessante sichtweise') || responseLower.includes('interessante perspektive')) {
                return { isGood: false, reason: 'Still using inappropriate "interessante Sichtweise" for frustration' };
            }
            if (responseLower.includes('verstehen') || responseLower.includes('frustrierend') || responseLower.includes('anders')) {
                return { isGood: true, reason: 'Appropriately acknowledges frustration' };
            }
            return { isGood: false, reason: 'Does not appropriately handle frustration' };

        case "Creative Single Word Answer Test":
            // Should acknowledge creativity, not use generic response
            if (responseLower.includes('interessante sichtweise') || responseLower.includes('interessante perspektive')) {
                return { isGood: false, reason: 'Using generic "interessante Sichtweise" instead of acknowledging creativity' };
            }
            if (responseLower.includes('kreativ') || responseLower.includes('lustig') || responseLower.includes('farben') || responseLower.includes('bunt')) {
                return { isGood: true, reason: 'Appropriately acknowledges creative answer about colors' };
            }
            return { isGood: false, reason: 'Does not appropriately acknowledge creativity' };

        case "Boredom Response Test":
            // Should handle boredom/disengagement appropriately
            if (responseLower.includes('verstehen') || responseLower.includes('langweilig') || responseLower.includes('interessant')) {
                return { isGood: true, reason: 'Appropriately handles potential boredom' };
            }
            return { isGood: false, reason: 'Does not handle boredom appropriately' };

        case "Creative AI Answer Test":
            // Should acknowledge the thoughtful answer positively
            if (responseLower.includes('interessante sichtweise')) {
                return { isGood: false, reason: 'Using generic response for thoughtful answer' };
            }
            if (responseLower.includes('toll') || responseLower.includes('gut') || responseLower.includes('verstehen')) {
                return { isGood: true, reason: 'Appropriately acknowledges thoughtful answer' };
            }
            return { isGood: false, reason: 'Does not appropriately acknowledge thoughtful answer' };

        default:
            return { isGood: false, reason: 'Unknown test case' };
    }
}

// Run the test
if (require.main === module) {
    setTimeout(() => {
        testImprovedResponseHandling().catch(console.error);
    }, 3000); // Wait for server to start
}

module.exports = { testImprovedResponseHandling };