- Create dedicated ProfanityFilter class with isolated SQLite database - Separate profanity.db from main application database to prevent SQLITE_MISUSE errors - Add comprehensive custom word management (CRUD operations) - Implement advanced profanity detection with leetspeak and pattern matching - Add admin UI for managing custom profanity words - Add extensive test suites for both profanity filter and API routes - Update server.js to use isolated profanity filter - Add proper database initialization and cleanup methods - Support in-memory databases for testing Breaking changes: - Profanity filter now uses separate database file - Updated admin API endpoints for profanity management - Enhanced profanity detection capabilities
512 lines
18 KiB
JavaScript
512 lines
18 KiB
JavaScript
/**
|
|
* Comprehensive Server-Side Profanity Filter for Ice Watch
|
|
* Filters inappropriate language with database-backed custom word management
|
|
*/
|
|
|
|
const sqlite3 = require('sqlite3').verbose();
|
|
const path = require('path');
|
|
|
|
class ProfanityFilter {
|
|
constructor(dbPath = null) {
|
|
// Initialize separate database for profanity filter
|
|
const defaultDbPath = path.join(__dirname, 'profanity.db');
|
|
this.dbPath = dbPath || defaultDbPath;
|
|
this.db = new sqlite3.Database(this.dbPath);
|
|
|
|
// Base profanity words - comprehensive list
|
|
this.baseProfanityWords = [
|
|
// Common profanity
|
|
'damn', 'hell', 'crap', 'shit', 'fuck', 'ass', 'bitch', 'bastard',
|
|
'piss', 'whore', 'slut', 'retard', 'fag', 'gay', 'homo', 'tranny',
|
|
'dickhead', 'asshole', 'motherfucker', 'cocksucker', 'twat', 'cunt',
|
|
|
|
// Racial slurs and hate speech
|
|
'nigger', 'nigga', 'spic', 'wetback', 'chink', 'gook', 'kike',
|
|
'raghead', 'towelhead', 'beaner', 'cracker', 'honkey', 'whitey',
|
|
'kyke', 'jigaboo', 'coon', 'darkie', 'mammy', 'pickaninny',
|
|
|
|
// Sexual content
|
|
'penis', 'vagina', 'boob', 'tit', 'cock', 'dick', 'pussy', 'cum',
|
|
'sex', 'porn', 'nude', 'naked', 'horny', 'masturbate', 'orgasm',
|
|
'blowjob', 'handjob', 'anal', 'penetration', 'erection', 'climax',
|
|
|
|
// Violence and threats
|
|
'kill', 'murder', 'shoot', 'bomb', 'terrorist', 'suicide', 'rape',
|
|
'violence', 'assault', 'attack', 'threat', 'harm', 'hurt', 'pain',
|
|
'stab', 'strangle', 'torture', 'execute', 'assassinate', 'slaughter',
|
|
|
|
// Drugs and substances
|
|
'weed', 'marijuana', 'cocaine', 'heroin', 'meth', 'drugs', 'high',
|
|
'stoned', 'drunk', 'alcohol', 'beer', 'liquor', 'vodka', 'whiskey',
|
|
'ecstasy', 'lsd', 'crack', 'dope', 'pot', 'joint', 'bong',
|
|
|
|
// Religious/cultural insults
|
|
'jesus christ', 'goddamn', 'christ almighty', 'holy shit', 'god damn',
|
|
'for christ sake', 'jesus fucking christ', 'holy fuck',
|
|
|
|
// Body parts (inappropriate context)
|
|
'testicles', 'balls', 'scrotum', 'clitoris', 'labia', 'anus',
|
|
'rectum', 'butthole', 'nipples', 'breasts',
|
|
|
|
// Misc inappropriate
|
|
'wtf', 'omfg', 'stfu', 'gtfo', 'milf', 'dilf', 'thot', 'simp',
|
|
'incel', 'chad', 'beta', 'alpha male', 'mansplain', 'karen'
|
|
];
|
|
|
|
// Leetspeak and common substitutions
|
|
this.leetMap = {
|
|
'0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's', '6': 'g', '7': 't',
|
|
'8': 'b', '9': 'g', '@': 'a', '$': 's', '!': 'i', '+': 't', '*': 'a',
|
|
'%': 'a', '(': 'c', ')': 'c', '&': 'a', '#': 'h', '|': 'l', '\\': '/'
|
|
};
|
|
|
|
// Initialize database and load custom words
|
|
this.customWords = [];
|
|
this.initializeDatabase();
|
|
this.loadCustomWords();
|
|
|
|
// Build patterns
|
|
this.patterns = this.buildPatterns();
|
|
}
|
|
|
|
/**
|
|
* Initialize the database table for custom profanity words
|
|
*/
|
|
initializeDatabase() {
|
|
this.db.serialize(() => {
|
|
this.db.run(`CREATE TABLE IF NOT EXISTS profanity_words (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
word TEXT NOT NULL UNIQUE,
|
|
severity TEXT DEFAULT 'medium',
|
|
category TEXT DEFAULT 'custom',
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
created_by TEXT DEFAULT 'admin'
|
|
)`, (err) => {
|
|
if (err) {
|
|
console.error('Error creating profanity_words table:', err);
|
|
} else {
|
|
console.log('Profanity words table initialized successfully');
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Load custom words from database
|
|
*/
|
|
async loadCustomWords() {
|
|
return new Promise((resolve, reject) => {
|
|
this.db.all(
|
|
'SELECT word, severity, category FROM profanity_words',
|
|
[],
|
|
(err, rows) => {
|
|
if (err) {
|
|
console.error('Error loading custom profanity words:', err);
|
|
reject(err);
|
|
return;
|
|
}
|
|
|
|
this.customWords = rows.map(row => ({
|
|
word: row.word.toLowerCase(),
|
|
severity: row.severity,
|
|
category: row.category
|
|
}));
|
|
|
|
console.log(`Loaded ${this.customWords.length} custom profanity words`);
|
|
this.patterns = this.buildPatterns(); // Rebuild patterns with custom words
|
|
resolve();
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get all profanity words (base + custom)
|
|
*/
|
|
getAllWords() {
|
|
const baseWords = this.baseProfanityWords.map(word => ({
|
|
word: word.toLowerCase(),
|
|
severity: this.getSeverity(word),
|
|
category: 'base'
|
|
}));
|
|
|
|
return [...baseWords, ...this.customWords];
|
|
}
|
|
|
|
/**
|
|
* Build regex patterns for profanity detection
|
|
*/
|
|
buildPatterns() {
|
|
const allWords = this.getAllWords();
|
|
|
|
return allWords.map(wordObj => {
|
|
const word = wordObj.word;
|
|
// Create simple word boundary pattern first
|
|
let pattern = word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // Escape special chars
|
|
|
|
// For simple detection, use word boundaries
|
|
const simpleRegex = new RegExp(`\\b${pattern}\\b`, 'gi');
|
|
|
|
// Also create pattern with character substitutions for advanced detection
|
|
const advancedPattern = word.split('').map(char => {
|
|
const substitutes = this.getCharSubstitutes(char);
|
|
if (substitutes.length > 1) {
|
|
// Escape special regex characters in substitutes
|
|
const escapedSubs = substitutes.map(s => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
|
|
return `[${escapedSubs.join('')}]`;
|
|
}
|
|
return char;
|
|
}).join('[\\s\\-_\\.,;:!?]*?');
|
|
|
|
const advancedRegex = new RegExp(`\\b${advancedPattern}\\b`, 'gi');
|
|
|
|
return {
|
|
regex: simpleRegex,
|
|
advancedRegex: advancedRegex,
|
|
word: word,
|
|
severity: wordObj.severity,
|
|
category: wordObj.category
|
|
};
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get character substitutes including leetspeak
|
|
*/
|
|
getCharSubstitutes(char) {
|
|
const substitutes = [char];
|
|
|
|
// Add leetspeak equivalents
|
|
Object.keys(this.leetMap).forEach(leet => {
|
|
if (this.leetMap[leet] === char) {
|
|
substitutes.push(leet);
|
|
}
|
|
});
|
|
|
|
// Add common character substitutions
|
|
const charMap = {
|
|
'a': ['@', '4', '*'],
|
|
'e': ['3'],
|
|
'i': ['1', '!', '|'],
|
|
'o': ['0'],
|
|
's': ['$', '5'],
|
|
't': ['7', '+'],
|
|
'g': ['6', '9'],
|
|
'b': ['8'],
|
|
'l': ['1', '|']
|
|
};
|
|
|
|
if (charMap[char]) {
|
|
substitutes.push(...charMap[char]);
|
|
}
|
|
|
|
return [...new Set(substitutes)]; // Remove duplicates
|
|
}
|
|
|
|
/**
|
|
* Normalize text to handle various obfuscation attempts
|
|
*/
|
|
normalizeText(text) {
|
|
if (!text || typeof text !== 'string') return '';
|
|
|
|
let normalized = text.toLowerCase();
|
|
|
|
// Replace leetspeak characters - escape special regex characters properly
|
|
Object.keys(this.leetMap).forEach(leet => {
|
|
const escapedLeet = leet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
normalized = normalized.replace(new RegExp(escapedLeet, 'g'), this.leetMap[leet]);
|
|
});
|
|
|
|
// Handle spaced out words (f u c k -> fuck) - more comprehensive
|
|
normalized = normalized.replace(/\b([a-z])\s+([a-z])\s+([a-z])\s+([a-z])\s+([a-z])\b/g, '$1$2$3$4$5');
|
|
normalized = normalized.replace(/\b([a-z])\s+([a-z])\s+([a-z])\s+([a-z])\b/g, '$1$2$3$4');
|
|
normalized = normalized.replace(/\b([a-z])\s+([a-z])\s+([a-z])\b/g, '$1$2$3');
|
|
|
|
// Remove excessive punctuation but keep word boundaries
|
|
normalized = normalized.replace(/[^\w\s]/g, ' ').replace(/\s+/g, ' ').trim();
|
|
|
|
return normalized;
|
|
}
|
|
|
|
/**
|
|
* Check if text contains profanity
|
|
*/
|
|
containsProfanity(text) {
|
|
if (!text || typeof text !== 'string') return false;
|
|
|
|
const normalizedText = this.normalizeText(text);
|
|
const originalText = text.toLowerCase();
|
|
|
|
return this.patterns.some(pattern => {
|
|
return pattern.regex.test(normalizedText) ||
|
|
pattern.regex.test(originalText) ||
|
|
(pattern.advancedRegex && pattern.advancedRegex.test(normalizedText));
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get detailed analysis of profanity in text
|
|
*/
|
|
analyzeProfanity(text) {
|
|
if (!text || typeof text !== 'string') {
|
|
return {
|
|
hasProfanity: false,
|
|
matches: [],
|
|
severity: 'none',
|
|
count: 0,
|
|
filtered: ''
|
|
};
|
|
}
|
|
|
|
const normalizedText = this.normalizeText(text);
|
|
const originalText = text.toLowerCase();
|
|
const matches = [];
|
|
let filteredText = text;
|
|
|
|
this.patterns.forEach(pattern => {
|
|
let found = null;
|
|
let matchedText = '';
|
|
|
|
// Try simple regex first on both normalized and original text
|
|
found = normalizedText.match(pattern.regex);
|
|
if (found) {
|
|
matchedText = normalizedText;
|
|
} else {
|
|
found = originalText.match(pattern.regex);
|
|
if (found) {
|
|
matchedText = originalText;
|
|
}
|
|
}
|
|
|
|
// If not found, try advanced regex
|
|
if (!found && pattern.advancedRegex) {
|
|
found = normalizedText.match(pattern.advancedRegex);
|
|
if (found) {
|
|
matchedText = normalizedText;
|
|
} else {
|
|
found = originalText.match(pattern.advancedRegex);
|
|
if (found) {
|
|
matchedText = originalText;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
// Only add if not already detected
|
|
if (!matches.some(m => m.word === pattern.word)) {
|
|
matches.push({
|
|
word: pattern.word,
|
|
matches: found,
|
|
severity: pattern.severity,
|
|
category: pattern.category
|
|
});
|
|
}
|
|
|
|
// Replace in original text - try multiple patterns
|
|
const wordPattern = pattern.word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
const simplePattern = new RegExp(`\\b${wordPattern}\\b`, 'gi');
|
|
filteredText = filteredText.replace(simplePattern, '*'.repeat(Math.max(3, pattern.word.length)));
|
|
|
|
// Also try to replace with the exact matches found
|
|
found.forEach(match => {
|
|
const exactPattern = new RegExp(match.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi');
|
|
filteredText = filteredText.replace(exactPattern, '*'.repeat(Math.max(3, match.length)));
|
|
});
|
|
|
|
// Also try to replace with advanced pattern
|
|
if (pattern.advancedRegex) {
|
|
filteredText = filteredText.replace(pattern.advancedRegex, '*'.repeat(Math.max(3, pattern.word.length)));
|
|
}
|
|
}
|
|
});
|
|
|
|
const hasProfanity = matches.length > 0;
|
|
const maxSeverity = hasProfanity ?
|
|
Math.max(...matches.map(m => this.getSeverityLevel(m.severity))) : 0;
|
|
|
|
return {
|
|
hasProfanity,
|
|
matches,
|
|
severity: this.getSeverityName(maxSeverity),
|
|
count: matches.length,
|
|
filtered: filteredText
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Filter profanity from text
|
|
*/
|
|
filterProfanity(text, replacement = '*') {
|
|
const analysis = this.analyzeProfanity(text);
|
|
return analysis.filtered;
|
|
}
|
|
|
|
/**
|
|
* Get severity level for a word
|
|
*/
|
|
getSeverity(word) {
|
|
// High severity: hate speech, extreme profanity, threats
|
|
const highSeverity = [
|
|
'nigger', 'nigga', 'kill', 'murder', 'shoot', 'bomb', 'terrorist',
|
|
'rape', 'kike', 'fag', 'raghead', 'towelhead', 'motherfucker',
|
|
'cunt', 'cocksucker', 'jigaboo', 'coon', 'execute', 'assassinate'
|
|
];
|
|
|
|
// Medium severity: sexual content, moderate profanity
|
|
const mediumSeverity = [
|
|
'fuck', 'shit', 'bitch', 'whore', 'slut', 'penis', 'vagina',
|
|
'cock', 'dick', 'pussy', 'cum', 'sex', 'porn', 'asshole',
|
|
'dickhead', 'twat', 'blowjob', 'handjob'
|
|
];
|
|
|
|
if (highSeverity.includes(word.toLowerCase())) return 'high';
|
|
if (mediumSeverity.includes(word.toLowerCase())) return 'medium';
|
|
return 'low';
|
|
}
|
|
|
|
/**
|
|
* Get numeric severity level
|
|
*/
|
|
getSeverityLevel(severity) {
|
|
switch (severity) {
|
|
case 'high': return 3;
|
|
case 'medium': return 2;
|
|
case 'low': return 1;
|
|
default: return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get severity name from level
|
|
*/
|
|
getSeverityName(level) {
|
|
switch (level) {
|
|
case 3: return 'high';
|
|
case 2: return 'medium';
|
|
case 1: return 'low';
|
|
default: return 'none';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add a custom word to the database
|
|
*/
|
|
async addCustomWord(word, severity = 'medium', category = 'custom', createdBy = 'admin') {
|
|
return new Promise((resolve, reject) => {
|
|
const normalizedWord = word.toLowerCase().trim();
|
|
|
|
this.db.run(
|
|
'INSERT INTO profanity_words (word, severity, category, created_by) VALUES (?, ?, ?, ?)',
|
|
[normalizedWord, severity, category, createdBy],
|
|
function(err) {
|
|
if (err) {
|
|
if (err.message.includes('UNIQUE constraint failed')) {
|
|
reject(new Error('Word already exists in the filter'));
|
|
} else {
|
|
reject(err);
|
|
}
|
|
return;
|
|
}
|
|
|
|
console.log(`Added custom profanity word: ${normalizedWord}`);
|
|
resolve({
|
|
id: this.lastID,
|
|
word: normalizedWord,
|
|
severity,
|
|
category,
|
|
created_by: createdBy
|
|
});
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Remove a custom word from the database
|
|
*/
|
|
async removeCustomWord(wordId) {
|
|
return new Promise((resolve, reject) => {
|
|
this.db.run(
|
|
'DELETE FROM profanity_words WHERE id = ?',
|
|
[wordId],
|
|
function(err) {
|
|
if (err) {
|
|
reject(err);
|
|
return;
|
|
}
|
|
|
|
if (this.changes === 0) {
|
|
reject(new Error('Word not found'));
|
|
return;
|
|
}
|
|
|
|
console.log(`Removed custom profanity word with ID: ${wordId}`);
|
|
resolve({ deleted: true, changes: this.changes });
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get all custom words from database
|
|
*/
|
|
async getCustomWords() {
|
|
return new Promise((resolve, reject) => {
|
|
this.db.all(
|
|
'SELECT id, word, severity, category, created_at, created_by FROM profanity_words ORDER BY created_at DESC',
|
|
[],
|
|
(err, rows) => {
|
|
if (err) {
|
|
reject(err);
|
|
return;
|
|
}
|
|
resolve(rows);
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Update a custom word
|
|
*/
|
|
async updateCustomWord(wordId, updates) {
|
|
return new Promise((resolve, reject) => {
|
|
const { word, severity, category } = updates;
|
|
|
|
this.db.run(
|
|
'UPDATE profanity_words SET word = ?, severity = ?, category = ? WHERE id = ?',
|
|
[word.toLowerCase().trim(), severity, category, wordId],
|
|
function(err) {
|
|
if (err) {
|
|
reject(err);
|
|
return;
|
|
}
|
|
|
|
if (this.changes === 0) {
|
|
reject(new Error('Word not found'));
|
|
return;
|
|
}
|
|
|
|
console.log(`Updated custom profanity word with ID: ${wordId}`);
|
|
resolve({ updated: true, changes: this.changes });
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Close the database connection
|
|
*/
|
|
close() {
|
|
if (this.db) {
|
|
this.db.close((err) => {
|
|
if (err) {
|
|
console.error('Error closing profanity filter database:', err);
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
module.exports = ProfanityFilter;
|