ice/profanity-filter.js

/**
 * Comprehensive Server-Side Profanity Filter for Ice Watch
 * Filters inappropriate language with database-backed custom word management
 */

const sqlite3 = require('sqlite3').verbose();
const path = require('path');

class ProfanityFilter {
    /**
     * Static factory method for creating and initializing a ProfanityFilter
     * @param {string|null} dbPath - Optional path to database file
     * @returns {Promise<ProfanityFilter>} Fully initialized ProfanityFilter instance
     */
    static async create(dbPath = null) {
        const filter = new ProfanityFilter(dbPath);
        await filter.initialize();
        return filter;
    }

    constructor(dbPath = null) {
        // Initialize separate database for profanity filter
        const defaultDbPath = path.join(__dirname, 'profanity.db');
        this.dbPath = dbPath || defaultDbPath;
        this.db = new sqlite3.Database(this.dbPath);
        this.isInitialized = false;

        // Base profanity words - comprehensive list
        this.baseProfanityWords = [
            // Common profanity
            'damn', 'hell', 'crap', 'shit', 'fuck', 'ass', 'bitch', 'bastard',
            'piss', 'whore', 'slut', 'retard', 'fag', 'gay', 'homo', 'tranny',
            'dickhead', 'asshole', 'motherfucker', 'cocksucker', 'twat', 'cunt',

            // Racial slurs and hate speech
            'nigger', 'nigga', 'spic', 'wetback', 'chink', 'gook', 'kike',
            'raghead', 'towelhead', 'beaner', 'cracker', 'honkey', 'whitey',
            'kyke', 'jigaboo', 'coon', 'darkie', 'mammy', 'pickaninny',

            // Sexual content
            'penis', 'vagina', 'boob', 'tit', 'cock', 'dick', 'pussy', 'cum',
            'sex', 'porn', 'nude', 'naked', 'horny', 'masturbate', 'orgasm',
            'blowjob', 'handjob', 'anal', 'penetration', 'erection', 'climax',

            // Violence and threats
            'kill', 'murder', 'shoot', 'bomb', 'terrorist', 'suicide', 'rape',
            'violence', 'assault', 'attack', 'threat', 'harm', 'hurt', 'pain',
            'stab', 'strangle', 'torture', 'execute', 'assassinate', 'slaughter',

            // Drugs and substances
            'weed', 'marijuana', 'cocaine', 'heroin', 'meth', 'drugs', 'high',
            'stoned', 'drunk', 'alcohol', 'beer', 'liquor', 'vodka', 'whiskey',
            'ecstasy', 'lsd', 'crack', 'dope', 'pot', 'joint', 'bong',

            // Religious/cultural insults
            'jesus christ', 'goddamn', 'christ almighty', 'holy shit', 'god damn',
            'for christ sake', 'jesus fucking christ', 'holy fuck',

            // Body parts (inappropriate context)
            'testicles', 'balls', 'scrotum', 'clitoris', 'labia', 'anus',
            'rectum', 'butthole', 'nipples', 'breasts',

            // Misc inappropriate
            'wtf', 'omfg', 'stfu', 'gtfo', 'milf', 'dilf', 'thot', 'simp',
            'incel', 'chad', 'beta', 'alpha male', 'mansplain', 'karen'
        ];

        // Leetspeak and common substitutions
        this.leetMap = {
            '0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's', '6': 'g', '7': 't',
            '8': 'b', '9': 'g', '@': 'a', '$': 's', '!': 'i', '+': 't', '*': 'a',
            '%': 'a', '(': 'c', ')': 'c', '&': 'a', '#': 'h', '|': 'l', '\\': '/'
        };

        // Initialize custom words array
        this.customWords = [];

        // Build initial patterns with base words only
        this.patterns = this.buildPatterns();
    }

    /**
     * Async initialization method that must be called after construction
     * @returns {Promise<void>}
     */
    async initialize() {
        if (this.isInitialized) {
            return;
        }

        try {
            // Initialize database synchronously first
            await this.initializeDatabaseAsync();

            // Load custom words from database
            await this.loadCustomWords();

            this.isInitialized = true;
            console.log('ProfanityFilter initialization completed successfully');
        } catch (error) {
            console.error('Error during ProfanityFilter initialization:', error);
            throw error;
        }
    }

    /**
     * Initialize the database table for custom profanity words (async version)
     */
    async initializeDatabaseAsync() {
        return new Promise((resolve, reject) => {
            this.db.serialize(() => {
                this.db.run(`CREATE TABLE IF NOT EXISTS profanity_words (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    word TEXT NOT NULL UNIQUE,
                    severity TEXT DEFAULT 'medium',
                    category TEXT DEFAULT 'custom',
                    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
                    created_by TEXT DEFAULT 'admin'
                )`, (err) => {
                    if (err) {
                        console.error('Error creating profanity_words table:', err);
                        reject(err);
                    } else {
                        console.log('Profanity words table initialized successfully');
                        resolve();
                    }
                });
            });
        });
    }

    /**
     * Initialize the database table for custom profanity words (legacy sync version)
     * @deprecated Use initializeDatabaseAsync() instead
     */
    initializeDatabase() {
        this.db.serialize(() => {
            this.db.run(`CREATE TABLE IF NOT EXISTS profanity_words (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                word TEXT NOT NULL UNIQUE,
                severity TEXT DEFAULT 'medium',
                category TEXT DEFAULT 'custom',
                created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
                created_by TEXT DEFAULT 'admin'
            )`, (err) => {
                if (err) {
                    console.error('Error creating profanity_words table:', err);
                } else {
                    console.log('Profanity words table initialized successfully');
                }
            });
        });
    }

    /**
     * Load custom words from database
     */
    async loadCustomWords() {
        return new Promise((resolve, reject) => {
            this.db.all(
                'SELECT word, severity, category FROM profanity_words',
                [],
                (err, rows) => {
                    if (err) {
                        console.error('Error loading custom profanity words:', err);
                        reject(err);
                        return;
                    }

                    this.customWords = rows.map(row => ({
                        word: row.word.toLowerCase(),
                        severity: row.severity,
                        category: row.category
                    }));

                    console.log(`Loaded ${this.customWords.length} custom profanity words`);
                    this.patterns = this.buildPatterns(); // Rebuild patterns with custom words
                    resolve();
                }
            );
        });
    }

    /**
     * Check if the filter is fully initialized and warn if not
     * @private
     */
    _checkInitialization() {
        if (!this.isInitialized) {
            console.warn('⚠️  ProfanityFilter: Using base words only - custom words not loaded yet. Call initialize() or use ProfanityFilter.create() for full functionality.');
        }
    }

    /**
     * Get all profanity words (base + custom)
     */
    getAllWords() {
        this._checkInitialization();

        const baseWords = this.baseProfanityWords.map(word => ({
            word: word.toLowerCase(),
            severity: this.getSeverity(word),
            category: 'base'
        }));

        return [...baseWords, ...this.customWords];
    }

    /**
     * Build regex patterns for profanity detection
     */
    buildPatterns() {
        const allWords = this.getAllWords();

        return allWords.map(wordObj => {
            const word = wordObj.word;
            // Create simple word boundary pattern first
            let pattern = word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // Escape special chars

            // For simple detection, use word boundaries
            const simpleRegex = new RegExp(`\\b${pattern}\\b`, 'gi');

            // Also create pattern with character substitutions for advanced detection
            const advancedPattern = word.split('').map(char => {
                const substitutes = this.getCharSubstitutes(char);
                if (substitutes.length > 1) {
                    // Escape special regex characters in substitutes
                    const escapedSubs = substitutes.map(s => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
                    return `[${escapedSubs.join('')}]`;
                }
                return char;
            }).join('[\\s\\-_\\.,;:!?]*?');

            const advancedRegex = new RegExp(`\\b${advancedPattern}\\b`, 'gi');

            return {
                regex: simpleRegex,
                advancedRegex: advancedRegex,
                word: word,
                severity: wordObj.severity,
                category: wordObj.category
            };
        });
    }

    /**
     * Get character substitutes including leetspeak
     */
    getCharSubstitutes(char) {
        const substitutes = [char];

        // Add leetspeak equivalents
        Object.keys(this.leetMap).forEach(leet => {
            if (this.leetMap[leet] === char) {
                substitutes.push(leet);
            }
        });

        // Add common character substitutions
        const charMap = {
            'a': ['@', '4', '*'],
            'e': ['3'],
            'i': ['1', '!', '|'],
            'o': ['0'],
            's': ['$', '5'],
            't': ['7', '+'],
            'g': ['6', '9'],
            'b': ['8'],
            'l': ['1', '|']
        };

        if (charMap[char]) {
            substitutes.push(...charMap[char]);
        }

        return [...new Set(substitutes)]; // Remove duplicates
    }

    /**
     * Normalize text to handle various obfuscation attempts
     */
    normalizeText(text) {
        if (!text || typeof text !== 'string') return '';

        let normalized = text.toLowerCase();

        // Replace leetspeak characters - escape special regex characters properly
        Object.keys(this.leetMap).forEach(leet => {
            const escapedLeet = leet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
            normalized = normalized.replace(new RegExp(escapedLeet, 'g'), this.leetMap[leet]);
        });

        // Handle spaced out words (f u c k -> fuck) - more comprehensive
        normalized = normalized.replace(/\b([a-z])\s+([a-z])\s+([a-z])\s+([a-z])\s+([a-z])\b/g, '$1$2$3$4$5');
        normalized = normalized.replace(/\b([a-z])\s+([a-z])\s+([a-z])\s+([a-z])\b/g, '$1$2$3$4');
        normalized = normalized.replace(/\b([a-z])\s+([a-z])\s+([a-z])\b/g, '$1$2$3');

        // Remove excessive punctuation but keep word boundaries
        normalized = normalized.replace(/[^\w\s]/g, ' ').replace(/\s+/g, ' ').trim();

        return normalized;
    }

    /**
     * Check if text contains profanity
     */
    containsProfanity(text) {
        if (!text || typeof text !== 'string') return false;

        this._checkInitialization();

        const normalizedText = this.normalizeText(text);
        const originalText = text.toLowerCase();

        return this.patterns.some(pattern => {
            return pattern.regex.test(normalizedText) ||
                   pattern.regex.test(originalText) ||
                   (pattern.advancedRegex && pattern.advancedRegex.test(normalizedText));
        });
    }

    /**
     * Get detailed analysis of profanity in text
     */
    analyzeProfanity(text) {
        if (!text || typeof text !== 'string') {
            return {
                hasProfanity: false,
                matches: [],
                severity: 'none',
                count: 0,
                filtered: ''
            };
        }

        this._checkInitialization();

        const normalizedText = this.normalizeText(text);
        const originalText = text.toLowerCase();
        const matches = [];
        let filteredText = text;

        this.patterns.forEach(pattern => {
            let found = null;
            let matchedText = '';

            // Try simple regex first on both normalized and original text
            found = normalizedText.match(pattern.regex);
            if (found) {
                matchedText = normalizedText;
            } else {
                found = originalText.match(pattern.regex);
                if (found) {
                    matchedText = originalText;
                }
            }

            // If not found, try advanced regex
            if (!found && pattern.advancedRegex) {
                found = normalizedText.match(pattern.advancedRegex);
                if (found) {
                    matchedText = normalizedText;
                } else {
                    found = originalText.match(pattern.advancedRegex);
                    if (found) {
                        matchedText = originalText;
                    }
                }
            }

            if (found) {
                // Only add if not already detected
                if (!matches.some(m => m.word === pattern.word)) {
                    matches.push({
                        word: pattern.word,
                        matches: found,
                        severity: pattern.severity,
                        category: pattern.category
                    });
                }

                // Replace in original text - try multiple patterns
                const wordPattern = pattern.word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
                const simplePattern = new RegExp(`\\b${wordPattern}\\b`, 'gi');
                filteredText = filteredText.replace(simplePattern, '*'.repeat(Math.max(3, pattern.word.length)));

                // Also try to replace with the exact matches found
                found.forEach(match => {
                    const exactPattern = new RegExp(match.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi');
                    filteredText = filteredText.replace(exactPattern, '*'.repeat(Math.max(3, match.length)));
                });

                // Also try to replace with advanced pattern
                if (pattern.advancedRegex) {
                    filteredText = filteredText.replace(pattern.advancedRegex, '*'.repeat(Math.max(3, pattern.word.length)));
                }
            }
        });

        const hasProfanity = matches.length > 0;
        const maxSeverity = hasProfanity ?
            Math.max(...matches.map(m => this.getSeverityLevel(m.severity))) : 0;

        return {
            hasProfanity,
            matches,
            severity: this.getSeverityName(maxSeverity),
            count: matches.length,
            filtered: filteredText
        };
    }

    /**
     * Filter profanity from text
     */
    filterProfanity(text, replacement = '*') {
        const analysis = this.analyzeProfanity(text);
        return analysis.filtered;
    }

    /**
     * Get severity level for a word
     */
    getSeverity(word) {
        // High severity: hate speech, extreme profanity, threats
        const highSeverity = [
            'nigger', 'nigga', 'kill', 'murder', 'shoot', 'bomb', 'terrorist',
            'rape', 'kike', 'fag', 'raghead', 'towelhead', 'motherfucker',
            'cunt', 'cocksucker', 'jigaboo', 'coon', 'execute', 'assassinate'
        ];

        // Medium severity: sexual content, moderate profanity
        const mediumSeverity = [
            'fuck', 'shit', 'bitch', 'whore', 'slut', 'penis', 'vagina',
            'cock', 'dick', 'pussy', 'cum', 'sex', 'porn', 'asshole',
            'dickhead', 'twat', 'blowjob', 'handjob'
        ];

        if (highSeverity.includes(word.toLowerCase())) return 'high';
        if (mediumSeverity.includes(word.toLowerCase())) return 'medium';
        return 'low';
    }

    /**
     * Get numeric severity level
     */
    getSeverityLevel(severity) {
        switch (severity) {
            case 'high': return 3;
            case 'medium': return 2;
            case 'low': return 1;
            default: return 0;
        }
    }

    /**
     * Get severity name from level
     */
    getSeverityName(level) {
        switch (level) {
            case 3: return 'high';
            case 2: return 'medium';
            case 1: return 'low';
            default: return 'none';
        }
    }

    /**
     * Add a custom word to the database
     */
    async addCustomWord(word, severity = 'medium', category = 'custom', createdBy = 'admin') {
        return new Promise((resolve, reject) => {
            const normalizedWord = word.toLowerCase().trim();

            this.db.run(
                'INSERT INTO profanity_words (word, severity, category, created_by) VALUES (?, ?, ?, ?)',
                [normalizedWord, severity, category, createdBy],
                function(err) {
                    if (err) {
                        if (err.message.includes('UNIQUE constraint failed')) {
                            reject(new Error('Word already exists in the filter'));
                        } else {
                            reject(err);
                        }
                        return;
                    }

                    console.log(`Added custom profanity word: ${normalizedWord}`);
                    resolve({
                        id: this.lastID,
                        word: normalizedWord,
                        severity,
                        category,
                        created_by: createdBy
                    });
                }
            );
        });
    }

    /**
     * Remove a custom word from the database
     */
    async removeCustomWord(wordId) {
        return new Promise((resolve, reject) => {
            this.db.run(
                'DELETE FROM profanity_words WHERE id = ?',
                [wordId],
                function(err) {
                    if (err) {
                        reject(err);
                        return;
                    }

                    if (this.changes === 0) {
                        reject(new Error('Word not found'));
                        return;
                    }

                    console.log(`Removed custom profanity word with ID: ${wordId}`);
                    resolve({ deleted: true, changes: this.changes });
                }
            );
        });
    }

    /**
     * Get all custom words from database
     */
    async getCustomWords() {
        return new Promise((resolve, reject) => {
            this.db.all(
                'SELECT id, word, severity, category, created_at, created_by FROM profanity_words ORDER BY created_at DESC',
                [],
                (err, rows) => {
                    if (err) {
                        reject(err);
                        return;
                    }
                    resolve(rows);
                }
            );
        });
    }

    /**
     * Update a custom word
     */
    async updateCustomWord(wordId, updates) {
        return new Promise((resolve, reject) => {
            const { word, severity, category } = updates;

            this.db.run(
                'UPDATE profanity_words SET word = ?, severity = ?, category = ? WHERE id = ?',
                [word.toLowerCase().trim(), severity, category, wordId],
                function(err) {
                    if (err) {
                        reject(err);
                        return;
                    }

                    if (this.changes === 0) {
                        reject(new Error('Word not found'));
                        return;
                    }

                    console.log(`Updated custom profanity word with ID: ${wordId}`);
                    resolve({ updated: true, changes: this.changes });
                }
            );
        });
    }

    /**
     * Close the database connection
     */
    close() {
        if (this.db) {
            this.db.close((err) => {
                if (err) {
                    console.error('Error closing profanity filter database:', err);
                }
            });
        }
    }

}

module.exports = ProfanityFilter;