ice/services/ProfanityFilterService.js

/**
 * Refactored Profanity Filter Service that uses the ProfanityWord model
 */

class ProfanityFilterService {
    constructor(profanityWordModel) {
        this.profanityWordModel = profanityWordModel;
        this.isInitialized = false;

        // Base profanity words - comprehensive list
        this.baseProfanityWords = [
            // Common profanity
            'damn', 'hell', 'crap', 'shit', 'fuck', 'ass', 'bitch', 'bastard',
            'piss', 'whore', 'slut', 'retard', 'fag', 'gay', 'homo', 'tranny',
            'dickhead', 'asshole', 'motherfucker', 'cocksucker', 'twat', 'cunt',

            // Racial slurs and hate speech
            'nigger', 'nigga', 'spic', 'wetback', 'chink', 'gook', 'kike',
            'raghead', 'towelhead', 'beaner', 'cracker', 'honkey', 'whitey',
            'kyke', 'jigaboo', 'coon', 'darkie', 'mammy', 'pickaninny',

            // Sexual content
            'penis', 'vagina', 'boob', 'tit', 'cock', 'dick', 'pussy', 'cum',
            'sex', 'porn', 'nude', 'naked', 'horny', 'masturbate', 'orgasm',
            'blowjob', 'handjob', 'anal', 'penetration', 'erection', 'climax',

            // Violence and threats
            'kill', 'murder', 'shoot', 'bomb', 'terrorist', 'suicide', 'rape',
            'violence', 'assault', 'attack', 'threat', 'harm', 'hurt', 'pain',
            'stab', 'strangle', 'torture', 'execute', 'assassinate', 'slaughter',

            // Drugs and substances
            'weed', 'marijuana', 'cocaine', 'heroin', 'meth', 'drugs', 'high',
            'stoned', 'drunk', 'alcohol', 'beer', 'liquor', 'vodka', 'whiskey',
            'ecstasy', 'lsd', 'crack', 'dope', 'pot', 'joint', 'bong',

            // Religious/cultural insults
            'jesus christ', 'goddamn', 'christ almighty', 'holy shit', 'god damn',
            'for christ sake', 'jesus fucking christ', 'holy fuck',

            // Body parts (inappropriate context)
            'testicles', 'balls', 'scrotum', 'clitoris', 'labia', 'anus',
            'rectum', 'butthole', 'nipples', 'breasts',

            // Misc inappropriate
            'wtf', 'omfg', 'stfu', 'gtfo', 'milf', 'dilf', 'thot', 'simp',
            'incel', 'chad', 'beta', 'alpha male', 'mansplain', 'karen'
        ];

        // Leetspeak and common substitutions
        this.leetMap = {
            '0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's', '6': 'g', '7': 't',
            '8': 'b', '9': 'g', '@': 'a', '$': 's', '!': 'i', '+': 't', '*': 'a',
            '%': 'a', '(': 'c', ')': 'c', '&': 'a', '#': 'h', '|': 'l', '\\': '/'
        };

        // Initialize custom words array
        this.customWords = [];

        // Initialize patterns to null; will be built during async initialization
        this.patterns = null;
    }

    /**
     * Initialize the filter by loading custom words
     */
    async initialize() {
        if (this.isInitialized) {
            return;
        }

        try {
            await this.loadCustomWords();
            this.isInitialized = true;
            console.log('ProfanityFilterService initialization completed successfully');
        } catch (error) {
            console.error('Error during ProfanityFilterService initialization:', error);
            throw error;
        }
    }

    /**
     * Load custom words from database using the model
     */
    async loadCustomWords() {
        try {
            const rows = await this.profanityWordModel.loadWords();

            this.customWords = rows.map(row => ({
                word: row.word.toLowerCase(),
                severity: row.severity,
                category: row.category
            }));

            console.log(`Loaded ${this.customWords.length} custom profanity words`);
            this.patterns = this.buildPatterns(); // Rebuild patterns with custom words
        } catch (err) {
            console.error('Error loading custom profanity words:', err);
            throw err;
        }
    }

    /**
     * Build regex patterns for all profanity words
     */
    buildPatterns() {
        const allWords = [...this.baseProfanityWords, ...this.customWords.map(w => w.word)];

        // Sort by length (longest first) to catch longer variations before shorter ones
        allWords.sort((a, b) => b.length - a.length);

        // Create patterns with word boundaries and common variations
        return allWords.map(word => {
            const escaped = word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
            const pattern = escaped
                .split('')
                .map(char => {
                    const leetChars = Object.entries(this.leetMap)
                        .filter(([_, v]) => v === char.toLowerCase())
                        .map(([k, _]) => k);

                    if (leetChars.length > 0) {
                        const allChars = [char, ...leetChars].map(c =>
                            c.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
                        );
                        return `[${allChars.join('')}]`;
                    }
                    return char;
                })
                .join('[\\s\\-\\_\\*\\.]*');

            return {
                word: word,
                pattern: new RegExp(`\\b${pattern}\\b`, 'gi'),
                severity: this.getSeverity(word),
                category: this.getCategory(word)
            };
        });
    }

    /**
     * Get severity level for a word
     */
    getSeverity(word) {
        // Check custom words first
        const customWord = this.customWords.find(w => w.word === word.toLowerCase());
        if (customWord) {
            return customWord.severity;
        }

        // Categorize severity based on type
        const highSeverity = ['nigger', 'nigga', 'cunt', 'fag', 'retard', 'kike', 'spic', 'gook', 'chink'];
        const lowSeverity = ['damn', 'hell', 'crap', 'wtf', 'omfg'];

        if (highSeverity.includes(word.toLowerCase())) return 'high';
        if (lowSeverity.includes(word.toLowerCase())) return 'low';
        return 'medium';
    }

    /**
     * Get category for a word
     */
    getCategory(word) {
        // Check custom words first
        const customWord = this.customWords.find(w => w.word === word.toLowerCase());
        if (customWord) {
            return customWord.category;
        }

        // Categorize based on type
        const categories = {
            racial: ['nigger', 'nigga', 'spic', 'wetback', 'chink', 'gook', 'kike', 'raghead', 'towelhead', 'beaner', 'cracker', 'honkey', 'whitey'],
            sexual: ['penis', 'vagina', 'boob', 'tit', 'cock', 'dick', 'pussy', 'cum', 'sex', 'porn', 'nude', 'naked', 'horny', 'masturbate'],
            violence: ['kill', 'murder', 'shoot', 'bomb', 'terrorist', 'suicide', 'rape', 'violence', 'assault', 'attack'],
            substance: ['weed', 'marijuana', 'cocaine', 'heroin', 'meth', 'drugs', 'high', 'stoned', 'drunk', 'alcohol'],
            general: ['shit', 'fuck', 'ass', 'bitch', 'bastard', 'damn', 'hell', 'crap']
        };

        for (const [category, words] of Object.entries(categories)) {
            if (words.includes(word.toLowerCase())) {
                return category;
            }
        }

        return 'general';
    }

    /**
     * Normalize text for checking
     */
    normalizeText(text) {
        if (!text) return '';

        // Convert to lowercase and handle basic substitutions
        let normalized = text.toLowerCase();

        // Replace multiple spaces/special chars with single space
        normalized = normalized.replace(/[\s\-\_\*\.]+/g, ' ');

        // Apply leet speak conversions
        normalized = normalized.split('').map(char =>
            this.leetMap[char] || char
        ).join('');

        return normalized;
    }

    /**
     * Check if text contains profanity
     */
    containsProfanity(text) {
        if (!text || !this.patterns) return false;

        const normalized = this.normalizeText(text);
        return this.patterns.some(({ pattern }) => pattern.test(normalized));
    }

    /**
     * Analyze text for profanity with detailed results
     */
    analyzeProfanity(text) {
        if (!text || !this.patterns) {
            return {
                hasProfanity: false,
                matches: [],
                severity: 'none',
                count: 0,
                filtered: text || ''
            };
        }

        const normalized = this.normalizeText(text);
        const matches = [];
        let filteredText = text;

        this.patterns.forEach(({ word, pattern, severity, category }) => {
            const regex = new RegExp(pattern.source, 'gi');
            let match;

            while ((match = regex.exec(normalized)) !== null) {
                matches.push({
                    word: word,
                    found: match[0],
                    index: match.index,
                    severity: severity,
                    category: category
                });

                // Replace in filtered text
                const replacement = '*'.repeat(match[0].length);
                filteredText = filteredText.substring(0, match.index) +
                              replacement +
                              filteredText.substring(match.index + match[0].length);
            }
        });

        // Determine overall severity
        let overallSeverity = 'none';
        if (matches.length > 0) {
            if (matches.some(m => m.severity === 'high')) {
                overallSeverity = 'high';
            } else if (matches.some(m => m.severity === 'medium')) {
                overallSeverity = 'medium';
            } else {
                overallSeverity = 'low';
            }
        }

        return {
            hasProfanity: matches.length > 0,
            matches: matches,
            severity: overallSeverity,
            count: matches.length,
            filtered: filteredText
        };
    }

    /**
     * Filter profanity from text
     */
    filterProfanity(text, replacementChar = '*') {
        const analysis = this.analyzeProfanity(text);
        return analysis.filtered;
    }

    /**
     * Add a custom word using the model
     */
    async addCustomWord(word, severity = 'medium', category = 'custom', createdBy = 'admin') {
        try {
            const result = await this.profanityWordModel.create(word, severity, category, createdBy);
            await this.loadCustomWords(); // Reload to update patterns
            return result;
        } catch (err) {
            if (err.message.includes('UNIQUE constraint failed')) {
                throw new Error('Word already exists in the filter');
            }
            throw err;
        }
    }

    /**
     * Remove a custom word using the model
     */
    async removeCustomWord(wordId) {
        const result = await this.profanityWordModel.delete(wordId);
        if (result.changes === 0) {
            throw new Error('Word not found');
        }
        await this.loadCustomWords(); // Reload to update patterns
        return { deleted: true, changes: result.changes };
    }

    /**
     * Get all custom words using the model
     */
    async getCustomWords() {
        return await this.profanityWordModel.getAll();
    }

    /**
     * Update a custom word using the model
     */
    async updateCustomWord(wordId, updates) {
        const { word, severity, category } = updates;
        const result = await this.profanityWordModel.update(wordId, word, severity, category);
        if (result.changes === 0) {
            throw new Error('Word not found');
        }
        await this.loadCustomWords(); // Reload to update patterns
        return { updated: true, changes: result.changes };
    }
}

module.exports = ProfanityFilterService;