Jump to content

User:Polygnotus/typo.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//I temporarily disabled the ability to detect urls
//http://en.wiki.x.io/w/index.php?title=User%3APolygnotus%2Ftypo.js&diff=1245273240&oldid=1244969917


// <nowiki>
// http://en.wiki.x.io/w/index.php?title=Giraffe&action=edit&typo=the&typofix=teh
//perhaps add some unit tests?

(function() {
    const DEBUG = false;
    function debug(...args) {
        if (DEBUG) {
            console.log('[TypoTool]', ...args);
        }
    }

    debug("Script started");

    const typo = getUrlParameter('typo');
    const typofix = getUrlParameter('typofix');
    if (!typo || !typofix || typo === '' || typofix === '') {
        debug("Typo or typofix parameters missing or empty. Script will not run.");
        return;
    }

    let hasRun = false;

    const disqualificationRegex = /((\[\s*sic\s*\]))|(\(\s*sic\s*\))|\{\s*sic\s*\}|\bsic\b|(&#91;sic&#93;)|(&#93;sic&#93;)|((\{\{\s*sic))(.*?)(\}\})|\{\{\s*bots\s*\}\}|\{\{\s*nobots\s*\}\}|\{\{\s*(As written|Typo|Proper name|Notatypo|Not typo|Nat|Propername|Proper noun|Chem name|NAT|Bug workaround)\s*(\|.*?)?\}\}/i;

    const ignoreRegexes = [
        /<blockquote>(.*?)<\/blockquote>/i,
        /((\{\{\s*DEFAULTSORT\s*:\s*))(.*?)((\}\}))/i,
        /<!--\s+(.*?)\s+-->/i,
        /(\[\[(File|Image):(.*?)(\.|\||\]\]))/i,
        /Image:(.*?)\./i,
        /Category:(.*?)\./i,
        /<\s*ref\s+name\s*=\s*(?:"[^"]*"|'[^']*'|[^\s/>]+)\s*(?:\/?>|\s)/i,
        /<\s*gallery\s*.*?<\/\s*gallery\s*>/i,
        /<\s*Gallery\s*.*?<\/\s*Gallery\s*>/i
    ];

    //const urlRegex = /((https?:\/\/)?(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/gi;
    
    const urlRegex = /\b((?:https?:\/\/|www\.)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?\b)/gi;
    

    const ignoreTemplates = [
        "Interlanguage link multi", "Illm", "Bquote", "ILL", "Transl",
        "Quote", "Quotation", "Block quote", "III", "Bq", "Ill2", "Translit",
        "Not translated", "Lang-Latn", "Interlanguage", "\"", "Tlit",
        "Transliterate", "Cita", "Link-interwiki", "Citation bloc", "C quote", "Quotes",
        "Quoteblock", "Cquote2", "Langue", "Interlanguage links", "SIC",
        "ILLM", "LANG", "Blockquotation", "Block quotation", "LAng", "Zitat", "Cquotetxt", "Ill-wd",
        "Ill-WD", "Cquotetext", "PrettyQuotation", "Interlanguage link forced", "Lang-xx",
        "Long quotation", "Epigraph", "Red Wikidata link", "InterLanguage Link", "Gquote", "CquoteTxt",
        "Blockquote/old", "ISOtranslit", "RedQ", "Belg", "Coquote", "Imagequote",
        "Block quote next to floating content", "Iw2", "MultiLink", "Gbq", "Interlanguage link Wikidata",
        "Imagequote2", "Interlanguage link", "lang", "transliteration", "blockquote", "not a typo", "sic", 
        "clarify", "Spoken Wikipedia", "Multiple image", "Double image", "Triple image", "Doubleimage", 
        "Tripleimage", "Multiple images", "Four images", "Auto images", "Autoimages", "Dual image", "Mehrere Bilder", 
        "Multipleimage", "Multiple iamge", "MImage", "Mimage", "Multimage", "Multiimage", "Mulitple images", "Multi image", 
        "Multi Image", "Multimg", "Double image stack", "Vertical images list", "Double images", "Multipleimages", 
        "Multiple video", "Mim", "Cite tweet", "cite book", "Photo montage", "Photomontage", "Collage", 
        "Proposed deletion/dated", "Proposed deletion", "Wikt-lang", "sortname", "sn", "sort name", 
        "Commons category", "Commonscat", "Commons cat", "Commons Category", "Wikimedia Commons cat", "Cc", 
        "Category commons", "Ccat", "C cat", "Commonscategory", "Commonsimages cat", "COMMONSCAT", 
        "Category Commons", "Commons-cat", "Wikimedia Commons category", "flagicon image"

    ];

    const ignoreParameters = [
        "reason", "trans-title", "first", "last", "name", "photo", 
        "image", "image2", "image3", "image4", "image5", "image6", "image7", "image8", "image9", "image10",
		"title", "map_image", "image_skyline", "cover", "image_name", "last1", "first1","last2",
        "first2","last3", "first3","last4", "first4","last5", "first5","last6", "first6",
        "last7", "first7","last8", "first8","last9", "first9","last10", "first10", "logo", 
        "structure1", "structure2", "author", "Ship image", "url", "concern", "editor", 
        "editing", "image file", "imagefile", "image_file", "archive-url", "lesser", 
        "middle", "quote", "map", "seal", "image_map", "image_coat",
        "flag_p1", "flag_p2", "flag_p3", "flag_p4", "flag_p5", "flag_p6", "flag_p7", "flag_p8", "flag_p9", "flag_p10", 
		"flag_s1", "flag_s2", "flag_s3", "flag_s4", "flag_s5", "flag_s6", "flag_s7", "flag_s8", "flag_s9", "flag_s10", 
		"image_p1", "image_p2", "image_p3", "image_p4", "image_p5", "image_p6", "image_p7", "image_p8", "image_p9", "image_p10",
		"image_s1", "image_s2", "image_s3", "image_s4", "image_s5", "image_s6", "image_s7", "image_s8", "image_s9", "image_s10",
		"ImageFile1", "ImageFile2", "ImageFile3", "ImageFile4", "ImageFile5", "ImageFile6", "ImageFile7", "ImageFile8", "ImageFile9", "ImageFile10", 
		"range_map", "id"
    ];

    function getUrlParameter(name) {
        name = name.replace(/[\[]/, '\\[').replace(/[\]]/, '\\]');
        var regex = new RegExp('[\\?&]' + name + '=([^&#]*)');
        var results = regex.exec(location.search);
        return results === null ? '' : decodeURIComponent(results[1].replace(/\+/g, ' '));
    }

    function replaceTextInWikitextEditor(typo, typofix) {
        var maxAttempts = 20;
        var attemptInterval = 500; // 0.5 seconds

        function attempt(attemptsLeft) {
            var editTextarea = document.getElementById('wpTextbox1');
            if (editTextarea && editTextarea.value) {
                var currentText = editTextarea.value;

                // Check for automatic disqualification
                if (disqualificationRegex.test(currentText)) {
                    debug("Automatic disqualification found. Closing window.");
                    closeWindowWithRetry();
                    return;
                }

                var typoPattern = new RegExp(typo, 'gi');
                var replacementsMade = false;
                
                var matches = [];
                var match;
                while ((match = typoPattern.exec(currentText)) !== null) {
                    matches.push({index: match.index, length: match[0].length, original: match[0]});
                }

                // Sort matches by length (descending) and then by index (ascending)
                matches.sort((a, b) => b.length - a.length || a.index - b.index);

                // Process matches in order, skipping overlaps
                var newText = currentText;
                var offset = 0;
                for (let match of matches) {
                    let adjustedIndex = match.index + offset;
                    if (shouldReplace(newText, adjustedIndex, match.original)) {
                        let replacement = match.original.charAt(0) === match.original.charAt(0).toUpperCase() 
                            ? typofix.charAt(0).toUpperCase() + typofix.slice(1)
                            : typofix.toLowerCase();
                        newText = newText.slice(0, adjustedIndex) + replacement + newText.slice(adjustedIndex + match.length);
                        offset += replacement.length - match.length;
                        replacementsMade = true;
                    }
                }

                editTextarea.value = newText;

                if (replacementsMade) {
                    var changeSummary = typo + ' → ' + typofix;
                    var editSummaryField = document.getElementById('wpSummary');
                    if (editSummaryField) {
                        editSummaryField.value = changeSummary;
                    }

                    var showChangesButton = document.querySelector('input[name="wpDiff"]');
                    if (showChangesButton) {
                        showChangesButton.click();
                        setTimeout(function() {
                            var form = showChangesButton.form;
                            if (form) form.submit();
                        }, 1000); // Increased timeout to 1 second
                    } else {
                        debug("Show changes button not found. Unable to submit changes.");
                    }
                    debug("Typo replaced and changes submitted.");
                } else {
                    debug("No replacements made. Closing window.");
                    closeWindowWithRetry();
                }
            } else if (attemptsLeft > 0) {
                setTimeout(function() {
                    attempt(attemptsLeft - 1);
                }, attemptInterval);
            } else {
                debug("Edit textarea not found or empty after multiple attempts. Closing window.");
                closeWindowWithRetry();
            }
        }

        attempt(maxAttempts);
    }

    function shouldReplace(text, index, match) {
        // Check if the match is within an HTML comment
        let commentStart = text.lastIndexOf('<!--', index);
        let commentEnd = text.indexOf('-->', index);
        if (commentStart !== -1 && commentEnd !== -1 && commentStart < index && index < commentEnd) {
            return false;
        }

        // Check if the match is within a ref name <ref name="">
        let refStart = text.lastIndexOf('<ref', index);
        if (refStart !== -1) {
            let refEnd = text.indexOf('>', refStart);
            if (refEnd !== -1 && index > refStart && index < refEnd) {
                let refContent = text.substring(refStart, refEnd);
                if (refContent.includes('name=')) {
                    return false;
                }
            }
        }

        // Check if the match is within a URL
        let urlMatch;
        while ((urlMatch = urlRegex.exec(text)) !== null) {
            if (index >= urlMatch.index && index < urlMatch.index + urlMatch[0].length) {
                return false;
            }
        }
        
        // Check if the match is within any of the ignore regexes
        for (let regex of ignoreRegexes) {
            let regexMatch = text.match(regex);
            if (regexMatch && regexMatch.index <= index && index < regexMatch.index + regexMatch[0].length) {
                return false;
            }
        }

        // Check if the match is within a template
        let templateDepth = 0;
        let i = index;
        while (i >= 0) {
            if (text.substr(i, 2) === '}}') {
                templateDepth++;
                i--;
            } else if (text.substr(i, 2) === '{{') {
                if (templateDepth === 0) {
                    // Find the template name, allowing for whitespace
                    let templateNameStart = i + 2;
                    while (templateNameStart < text.length && /\s/.test(text[templateNameStart])) {
                        templateNameStart++;
                    }
                    let templateNameEnd = text.indexOf('|', templateNameStart);
                    if (templateNameEnd === -1) {
                        templateNameEnd = text.indexOf('}}', templateNameStart);
                    }
                    if (templateNameEnd === -1) {
                        templateNameEnd = index;
                    }
                    let templateName = text.substring(templateNameStart, templateNameEnd).trim();
                    
                    if (ignoreTemplates.some(template => templateName.toLowerCase() === template.toLowerCase())) {
                        return false;
                    }
                    break;
                }
                templateDepth--;
                i--;
            }
            i--;
        }

        // Check if the match is within a parameter
        let parameterDepth = 0;
        i = index;
        while (i >= 0) {
            if (text[i] === '|' && parameterDepth === 0) {
                let parameterContent = text.substring(i + 1, index);
                let equalSignPos = parameterContent.indexOf('=');
                if (equalSignPos !== -1) {
                    let parameterName = parameterContent.substring(0, equalSignPos).trim();
                    if (ignoreParameters.some(param => param.toLowerCase() === parameterName.toLowerCase())) {
                        return false;
                    }
                }
                break;
            } else if (text.substr(i, 2) === '}}') {
                parameterDepth++;
                i--;
            } else if (text.substr(i, 2) === '{{') {
                parameterDepth--;
                i--;
            }
            i--;
        }

        // Skip if it's just the correct word but with the first letter missing or the last one
        let typofix = getUrlParameter('typofix');
        if (match.toLowerCase() === typofix.toLowerCase().slice(1) || 
            match.toLowerCase() === typofix.toLowerCase().slice(0, -1)) {
            return false;
        }

        // Skip anything between [[File: and | or ]]
        let fileStart = text.lastIndexOf('[[File:', index);
        let fileSeparator = text.indexOf('|', index);
        let fileEnd = text.indexOf(']]', index);
        if (fileStart !== -1 && (fileSeparator !== -1 || fileEnd !== -1) && 
            fileStart < index && index < (fileSeparator !== -1 ? fileSeparator : fileEnd)) {
            return false;
        }

        // Skip anything between | logo = and | or ]] or a newline
        let logoStart = text.lastIndexOf('| logo =', index);
        let logoEnd = text.indexOf('|', index);
        if (logoEnd === -1) logoEnd = text.indexOf(']]', index);
        if (logoEnd === -1) logoEnd = text.indexOf('\n', index);
        if (logoStart !== -1 && logoEnd !== -1 && logoStart < index && index < logoEnd) {
            return false;
        }

        return true;
    }

    function closeWindowWithRetry(attempts = 50) {
        if (attempts <= 0) {
            debug("Failed to close window after multiple attempts.");
            return;
        }
        
        try {
            window.close();
        } catch (e) {
            debug("Failed to close window. Retrying...");
            setTimeout(() => closeWindowWithRetry(attempts - 1), 100);
        }
    }

    function init() {
        if (hasRun) return; // Prevent multiple executions
        hasRun = true;

        replaceTextInWikitextEditor(typo, typofix);
    }

    // Use both DOMContentLoaded and load events
    if (document.readyState === "loading") {
        document.addEventListener("DOMContentLoaded", init);
    } else {
        init();
    }
    window.addEventListener("load", init);
})();
// </nowiki>