ice/profanity-filter.js
Deco Vander c7f39e4939 feat: isolate profanity filter with separate database
- Create dedicated ProfanityFilter class with isolated SQLite database
- Separate profanity.db from main application database to prevent SQLITE_MISUSE errors
- Add comprehensive custom word management (CRUD operations)
- Implement advanced profanity detection with leetspeak and pattern matching
- Add admin UI for managing custom profanity words
- Add extensive test suites for both profanity filter and API routes
- Update server.js to use isolated profanity filter
- Add proper database initialization and cleanup methods
- Support in-memory databases for testing

Breaking changes:
- Profanity filter now uses separate database file
- Updated admin API endpoints for profanity management
- Enhanced profanity detection capabilities
2025-07-04 00:03:24 -04:00

512 lines
18 KiB
JavaScript

/**
* Comprehensive Server-Side Profanity Filter for Ice Watch
* Filters inappropriate language with database-backed custom word management
*/
const sqlite3 = require('sqlite3').verbose();
const path = require('path');
class ProfanityFilter {
constructor(dbPath = null) {
// Initialize separate database for profanity filter
const defaultDbPath = path.join(__dirname, 'profanity.db');
this.dbPath = dbPath || defaultDbPath;
this.db = new sqlite3.Database(this.dbPath);
// Base profanity words - comprehensive list
this.baseProfanityWords = [
// Common profanity
'damn', 'hell', 'crap', 'shit', 'fuck', 'ass', 'bitch', 'bastard',
'piss', 'whore', 'slut', 'retard', 'fag', 'gay', 'homo', 'tranny',
'dickhead', 'asshole', 'motherfucker', 'cocksucker', 'twat', 'cunt',
// Racial slurs and hate speech
'nigger', 'nigga', 'spic', 'wetback', 'chink', 'gook', 'kike',
'raghead', 'towelhead', 'beaner', 'cracker', 'honkey', 'whitey',
'kyke', 'jigaboo', 'coon', 'darkie', 'mammy', 'pickaninny',
// Sexual content
'penis', 'vagina', 'boob', 'tit', 'cock', 'dick', 'pussy', 'cum',
'sex', 'porn', 'nude', 'naked', 'horny', 'masturbate', 'orgasm',
'blowjob', 'handjob', 'anal', 'penetration', 'erection', 'climax',
// Violence and threats
'kill', 'murder', 'shoot', 'bomb', 'terrorist', 'suicide', 'rape',
'violence', 'assault', 'attack', 'threat', 'harm', 'hurt', 'pain',
'stab', 'strangle', 'torture', 'execute', 'assassinate', 'slaughter',
// Drugs and substances
'weed', 'marijuana', 'cocaine', 'heroin', 'meth', 'drugs', 'high',
'stoned', 'drunk', 'alcohol', 'beer', 'liquor', 'vodka', 'whiskey',
'ecstasy', 'lsd', 'crack', 'dope', 'pot', 'joint', 'bong',
// Religious/cultural insults
'jesus christ', 'goddamn', 'christ almighty', 'holy shit', 'god damn',
'for christ sake', 'jesus fucking christ', 'holy fuck',
// Body parts (inappropriate context)
'testicles', 'balls', 'scrotum', 'clitoris', 'labia', 'anus',
'rectum', 'butthole', 'nipples', 'breasts',
// Misc inappropriate
'wtf', 'omfg', 'stfu', 'gtfo', 'milf', 'dilf', 'thot', 'simp',
'incel', 'chad', 'beta', 'alpha male', 'mansplain', 'karen'
];
// Leetspeak and common substitutions
this.leetMap = {
'0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's', '6': 'g', '7': 't',
'8': 'b', '9': 'g', '@': 'a', '$': 's', '!': 'i', '+': 't', '*': 'a',
'%': 'a', '(': 'c', ')': 'c', '&': 'a', '#': 'h', '|': 'l', '\\': '/'
};
// Initialize database and load custom words
this.customWords = [];
this.initializeDatabase();
this.loadCustomWords();
// Build patterns
this.patterns = this.buildPatterns();
}
/**
* Initialize the database table for custom profanity words
*/
initializeDatabase() {
this.db.serialize(() => {
this.db.run(`CREATE TABLE IF NOT EXISTS profanity_words (
id INTEGER PRIMARY KEY AUTOINCREMENT,
word TEXT NOT NULL UNIQUE,
severity TEXT DEFAULT 'medium',
category TEXT DEFAULT 'custom',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
created_by TEXT DEFAULT 'admin'
)`, (err) => {
if (err) {
console.error('Error creating profanity_words table:', err);
} else {
console.log('Profanity words table initialized successfully');
}
});
});
}
/**
* Load custom words from database
*/
async loadCustomWords() {
return new Promise((resolve, reject) => {
this.db.all(
'SELECT word, severity, category FROM profanity_words',
[],
(err, rows) => {
if (err) {
console.error('Error loading custom profanity words:', err);
reject(err);
return;
}
this.customWords = rows.map(row => ({
word: row.word.toLowerCase(),
severity: row.severity,
category: row.category
}));
console.log(`Loaded ${this.customWords.length} custom profanity words`);
this.patterns = this.buildPatterns(); // Rebuild patterns with custom words
resolve();
}
);
});
}
/**
* Get all profanity words (base + custom)
*/
getAllWords() {
const baseWords = this.baseProfanityWords.map(word => ({
word: word.toLowerCase(),
severity: this.getSeverity(word),
category: 'base'
}));
return [...baseWords, ...this.customWords];
}
/**
* Build regex patterns for profanity detection
*/
buildPatterns() {
const allWords = this.getAllWords();
return allWords.map(wordObj => {
const word = wordObj.word;
// Create simple word boundary pattern first
let pattern = word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // Escape special chars
// For simple detection, use word boundaries
const simpleRegex = new RegExp(`\\b${pattern}\\b`, 'gi');
// Also create pattern with character substitutions for advanced detection
const advancedPattern = word.split('').map(char => {
const substitutes = this.getCharSubstitutes(char);
if (substitutes.length > 1) {
// Escape special regex characters in substitutes
const escapedSubs = substitutes.map(s => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
return `[${escapedSubs.join('')}]`;
}
return char;
}).join('[\\s\\-_\\.,;:!?]*?');
const advancedRegex = new RegExp(`\\b${advancedPattern}\\b`, 'gi');
return {
regex: simpleRegex,
advancedRegex: advancedRegex,
word: word,
severity: wordObj.severity,
category: wordObj.category
};
});
}
/**
* Get character substitutes including leetspeak
*/
getCharSubstitutes(char) {
const substitutes = [char];
// Add leetspeak equivalents
Object.keys(this.leetMap).forEach(leet => {
if (this.leetMap[leet] === char) {
substitutes.push(leet);
}
});
// Add common character substitutions
const charMap = {
'a': ['@', '4', '*'],
'e': ['3'],
'i': ['1', '!', '|'],
'o': ['0'],
's': ['$', '5'],
't': ['7', '+'],
'g': ['6', '9'],
'b': ['8'],
'l': ['1', '|']
};
if (charMap[char]) {
substitutes.push(...charMap[char]);
}
return [...new Set(substitutes)]; // Remove duplicates
}
/**
* Normalize text to handle various obfuscation attempts
*/
normalizeText(text) {
if (!text || typeof text !== 'string') return '';
let normalized = text.toLowerCase();
// Replace leetspeak characters - escape special regex characters properly
Object.keys(this.leetMap).forEach(leet => {
const escapedLeet = leet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
normalized = normalized.replace(new RegExp(escapedLeet, 'g'), this.leetMap[leet]);
});
// Handle spaced out words (f u c k -> fuck) - more comprehensive
normalized = normalized.replace(/\b([a-z])\s+([a-z])\s+([a-z])\s+([a-z])\s+([a-z])\b/g, '$1$2$3$4$5');
normalized = normalized.replace(/\b([a-z])\s+([a-z])\s+([a-z])\s+([a-z])\b/g, '$1$2$3$4');
normalized = normalized.replace(/\b([a-z])\s+([a-z])\s+([a-z])\b/g, '$1$2$3');
// Remove excessive punctuation but keep word boundaries
normalized = normalized.replace(/[^\w\s]/g, ' ').replace(/\s+/g, ' ').trim();
return normalized;
}
/**
* Check if text contains profanity
*/
containsProfanity(text) {
if (!text || typeof text !== 'string') return false;
const normalizedText = this.normalizeText(text);
const originalText = text.toLowerCase();
return this.patterns.some(pattern => {
return pattern.regex.test(normalizedText) ||
pattern.regex.test(originalText) ||
(pattern.advancedRegex && pattern.advancedRegex.test(normalizedText));
});
}
/**
* Get detailed analysis of profanity in text
*/
analyzeProfanity(text) {
if (!text || typeof text !== 'string') {
return {
hasProfanity: false,
matches: [],
severity: 'none',
count: 0,
filtered: ''
};
}
const normalizedText = this.normalizeText(text);
const originalText = text.toLowerCase();
const matches = [];
let filteredText = text;
this.patterns.forEach(pattern => {
let found = null;
let matchedText = '';
// Try simple regex first on both normalized and original text
found = normalizedText.match(pattern.regex);
if (found) {
matchedText = normalizedText;
} else {
found = originalText.match(pattern.regex);
if (found) {
matchedText = originalText;
}
}
// If not found, try advanced regex
if (!found && pattern.advancedRegex) {
found = normalizedText.match(pattern.advancedRegex);
if (found) {
matchedText = normalizedText;
} else {
found = originalText.match(pattern.advancedRegex);
if (found) {
matchedText = originalText;
}
}
}
if (found) {
// Only add if not already detected
if (!matches.some(m => m.word === pattern.word)) {
matches.push({
word: pattern.word,
matches: found,
severity: pattern.severity,
category: pattern.category
});
}
// Replace in original text - try multiple patterns
const wordPattern = pattern.word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const simplePattern = new RegExp(`\\b${wordPattern}\\b`, 'gi');
filteredText = filteredText.replace(simplePattern, '*'.repeat(Math.max(3, pattern.word.length)));
// Also try to replace with the exact matches found
found.forEach(match => {
const exactPattern = new RegExp(match.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi');
filteredText = filteredText.replace(exactPattern, '*'.repeat(Math.max(3, match.length)));
});
// Also try to replace with advanced pattern
if (pattern.advancedRegex) {
filteredText = filteredText.replace(pattern.advancedRegex, '*'.repeat(Math.max(3, pattern.word.length)));
}
}
});
const hasProfanity = matches.length > 0;
const maxSeverity = hasProfanity ?
Math.max(...matches.map(m => this.getSeverityLevel(m.severity))) : 0;
return {
hasProfanity,
matches,
severity: this.getSeverityName(maxSeverity),
count: matches.length,
filtered: filteredText
};
}
/**
* Filter profanity from text
*/
filterProfanity(text, replacement = '*') {
const analysis = this.analyzeProfanity(text);
return analysis.filtered;
}
/**
* Get severity level for a word
*/
getSeverity(word) {
// High severity: hate speech, extreme profanity, threats
const highSeverity = [
'nigger', 'nigga', 'kill', 'murder', 'shoot', 'bomb', 'terrorist',
'rape', 'kike', 'fag', 'raghead', 'towelhead', 'motherfucker',
'cunt', 'cocksucker', 'jigaboo', 'coon', 'execute', 'assassinate'
];
// Medium severity: sexual content, moderate profanity
const mediumSeverity = [
'fuck', 'shit', 'bitch', 'whore', 'slut', 'penis', 'vagina',
'cock', 'dick', 'pussy', 'cum', 'sex', 'porn', 'asshole',
'dickhead', 'twat', 'blowjob', 'handjob'
];
if (highSeverity.includes(word.toLowerCase())) return 'high';
if (mediumSeverity.includes(word.toLowerCase())) return 'medium';
return 'low';
}
/**
* Get numeric severity level
*/
getSeverityLevel(severity) {
switch (severity) {
case 'high': return 3;
case 'medium': return 2;
case 'low': return 1;
default: return 0;
}
}
/**
* Get severity name from level
*/
getSeverityName(level) {
switch (level) {
case 3: return 'high';
case 2: return 'medium';
case 1: return 'low';
default: return 'none';
}
}
/**
* Add a custom word to the database
*/
async addCustomWord(word, severity = 'medium', category = 'custom', createdBy = 'admin') {
return new Promise((resolve, reject) => {
const normalizedWord = word.toLowerCase().trim();
this.db.run(
'INSERT INTO profanity_words (word, severity, category, created_by) VALUES (?, ?, ?, ?)',
[normalizedWord, severity, category, createdBy],
function(err) {
if (err) {
if (err.message.includes('UNIQUE constraint failed')) {
reject(new Error('Word already exists in the filter'));
} else {
reject(err);
}
return;
}
console.log(`Added custom profanity word: ${normalizedWord}`);
resolve({
id: this.lastID,
word: normalizedWord,
severity,
category,
created_by: createdBy
});
}
);
});
}
/**
* Remove a custom word from the database
*/
async removeCustomWord(wordId) {
return new Promise((resolve, reject) => {
this.db.run(
'DELETE FROM profanity_words WHERE id = ?',
[wordId],
function(err) {
if (err) {
reject(err);
return;
}
if (this.changes === 0) {
reject(new Error('Word not found'));
return;
}
console.log(`Removed custom profanity word with ID: ${wordId}`);
resolve({ deleted: true, changes: this.changes });
}
);
});
}
/**
* Get all custom words from database
*/
async getCustomWords() {
return new Promise((resolve, reject) => {
this.db.all(
'SELECT id, word, severity, category, created_at, created_by FROM profanity_words ORDER BY created_at DESC',
[],
(err, rows) => {
if (err) {
reject(err);
return;
}
resolve(rows);
}
);
});
}
/**
* Update a custom word
*/
async updateCustomWord(wordId, updates) {
return new Promise((resolve, reject) => {
const { word, severity, category } = updates;
this.db.run(
'UPDATE profanity_words SET word = ?, severity = ?, category = ? WHERE id = ?',
[word.toLowerCase().trim(), severity, category, wordId],
function(err) {
if (err) {
reject(err);
return;
}
if (this.changes === 0) {
reject(new Error('Word not found'));
return;
}
console.log(`Updated custom profanity word with ID: ${wordId}`);
resolve({ updated: true, changes: this.changes });
}
);
});
}
/**
* Close the database connection
*/
close() {
if (this.db) {
this.db.close((err) => {
if (err) {
console.error('Error closing profanity filter database:', err);
}
});
}
}
}
module.exports = ProfanityFilter;