From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.

// <nowiki>

// Adjust chset-* style code chart colouration to match Unicode categories.

// Adds a button below the source editor.

//

// I am not selling this and make no guarantees of safety, fitness or that it won't mangle the content.

// You are advised to double check that the scripted process has produced the desired results, and

// clean up where necessary (and manually fix the more complicated cases, such as multiple mappings

// or PUA mappings).

//

// Furthermore, this loads JavaScript code from a third-party source for identifying Unicode character

// category. I cannot guarantee that this will not be compromised. Proceed at your own risk.

//

// Usage: mw.loader.load("//en.wikipedia.org/?action=raw&ctype=text/javascript&title=User:HarJIT/Scripts/unicategorise.js");

//

// Canonical: [[m:w:User:HarJIT/Scripts/unicategorise.js]]



(() => {



String.prototype.pysplit = function () {

    if (arguments.length == 1) {

        return this.split(arguments0]);

    }

    var temp = this.split(arguments0], arguments1]);

    var heading = temp.join(arguments0]).length + arguments0].length;

    temptemp.length = this.substring(heading);

    return temp;

};



String.prototype.startswith = function (s) {

    return this.substring(0, s.length) == s;

};



String.prototype.endswith = function (s) {

    return this.substring(this.length - s.length) == s;

};



String.prototype.contains = function (s) {

    return this.indexOf(s) >= 0;

};



var ezh = null;

jQuery.get("https://cdn.jsdelivr.net/gh/slevithan/xregexp@57f919a3ebc58ea2f56cb5b2391b6151483b1709/tools/output/categories.js", (b) => {

    b = b.pysplit("=", 1)[1].trim();

    if (b.endswith(";")) {

        b = b.substring(0, b.length-1).trim();

    }

    ezh = eval(b); // Isn't in the JSON subset, sadly.

}, "text");



var yogh = {};

var doyogh = () => {

    if (ezh === null) {

        setTimeout(doyogh, 500);

        return;

    }

    ezh.forEach((e) => {

        if (typeof e.astral != "undefined") {

            yoghe.name = new RegExp("[" + e.bmp + "]|" + e.astral);

        } else {

            yoghe.name = new RegExp("[" + e.bmp + "]");

        }

    });

};

doyogh();



var CHECKMS = "<!-- XXX: check colour -->";

var SHARED = "<!-- Note: Only the shared/invariant EBCDIC cells are colored; international glyphs are not. -->\n";

var FROM = "Invariant alphanumeric, punctuation, and control characters are shown in color.";

var TO = "Non-invariant characters are shown boxed.";

var SUMMARY = "regenerate colour codes based on Unicode category ([[User:HarJIT/Scripts/unicategorise.js|script]])";

var SUMMARY2 = "box nationalised codes, and regenerate colour codes based on Unicode category ([[User:HarJIT/Scripts/unicategorise.js|script]])";

var FAKEHEAD = "|-\n| width=\"4%\" |\n! width=\"6%\" | \u20140 || width=\"6%\" | \u20141\n! width=\"6%\" | \u20142 || width=\"6%\" | \u20143\n! width=\"6%\" | \u20144 || width=\"6%\" | \u20145\n! width=\"6%\" | \u20146 || width=\"6%\" | \u20147\n! width=\"6%\" | \u20148 || width=\"6%\" | \u20149\n! width=\"6%\" | \u2014A || width=\"6%\" | \u2014B\n! width=\"6%\" | \u2014C || width=\"6%\" | \u2014D\n! width=\"6%\" | \u2014E || width=\"6%\" | \u2014F";

var FAKEFOOT = "|-\n||\n!—0||—1||—2||—3||—4||—5||—6||—7||—8||—9||—A||—B||—C||—D||—E||—F";



var fix = function (inp) {

    var output = "";

    // We need *a* colour template to begin with in order to parse it, even if

    // it is the wrong one. For the places where they aren't used already.

    // Also some EBCDIC pages use lack of colour as a distinguishing mark, which

    // no longer shows up now that -letter is white.

    var iox = (inp.contains(SHARED))?("color-intl-box"):("color-intl");

    inp = inp.replace(/\n\|\s*\|?\{\{[Cc]hset-c(?=ell|trl)/g, "\n|{{chset-" + iox + "}}|{{chset-c");

    var doz = inp.split("{{Chset-").join("{{chset-").split("\n|{{chset-color-");

    output += doz0];

    doz.slice(1).forEach((ii) => {

        var i = ii;

        var nombre = "";

        if (i.startswith("undef")) {

            output += "\n|{{chset-color-" + ii;

            return; /* i.e. continue */

        } else if (i.startswith("intl}}|{{chset-cell3||<u>''")) {

            // Common in APL code pages, including EBCDIC ones.

            output += "\n|{{chset-color-letter" + ii.substring(4);

            return; /* i.e. continue */

        } else if (i.startswith("hangups}}|{{chset-cell3||<u>''")) {

            // What the hell (chset-color-hangups does not exist and afaik never did)

            output += "\n|{{chset-color-letter" + ii.substring(7);

            return; /* i.e. continue */

        } else if (i.substring(0, 40).contains("l4|")) {

            nombre = "4";

        } else if (i.substring(0, 40).contains("l3|")) {

            nombre = "3";

        } else if (!i.substring(0, 40).contains("l|")) {

            output += "\n|{{chset-color-" + ii;

            return; /* i.e. continue */

        }

        var layout = (i.substring(0, 40).contains("chset-ctrl")) ? ("-ctrl") : ("-cell");

        var iii = i.substring(0, 15);

        if (iii.startswith("esc")) {

            output += "\n|{{chset-color-" + ii;

            return; /* i.e. continue */

        }

        var hilite = ((iii.contains("-box")) ? ("-box") : 

                         ((iii.contains("-var")) ? ("-var") : ("")));

        if (hilite == "-box" && iii.contains("-box|")) {

            hilite += "|" + i.pysplit("-box|", 1)[1].pysplit("}", 1)[0];

        }

        var wlink = null;

        if (nombre === "" && i.pysplit("}}", 1)[1].trim().startswith("|[[")) {

            // MIK being _incredibly_ helpful

            i = i.pysplit("|[[", 1)[1];

            wlink = i.pysplit("|", 1)[0];

            i = i.pysplit("|", 2)[2];

        } else {

            i = i.pysplit("l" + nombre + "|", 1)[1];

        }

        var cpt = i.pysplit("|", 1)[0].pysplit("}}", 1)[0].trim();

        var codept;

        if (wlink !== null) {

            // MIK being _incredibly_ helpful indeed

            var codep = String.fromCodePoint(parseInt(cpt, 16));

            i = i.split("}}]]").join("|[[" + wlink + "|" + codep + "]]}}");

        }

        if (cpt.trim().length === 0 && iii.startswith("ctrl")) {

            // Unmapped controls, common in articles about EBCDIC variants.

            cpt = "0000"; // Kludgy

        }

        var cpts = cpt.replace(/<ref[^>]*?(\/>|>[^<]*?<\/ref>)/g, " ");

        cpts = cpts.replace(/\(/, "/").replace(/\)/, " ").replace(/\?/, " ");

        cpts = cpts.replace("  ", " ").replace("  ", " ").replace("  ", " ").replace("  ", " ");

        cpts = cpts.split("/");

        var colours = new Set([]);

        var checkmsg = "";

        cpts.forEach((cpt) => {

            cpt = cpt.trim();

            if (cpt.contains(" ")) {

                cpt = cpt.pysplit(" ", 1)[0];

                checkmsg = CHECKMS;

            }

            if (cpt.match(/^[0-9a-f]+$/i)) {

                codept = String.fromCodePoint(parseInt(cpt, 16));

            } else {

                console.log(cpt); return; /* i.e. continue */

            }

            if (codept.match(yogh"Co"])) {

                // Private use, which could mean:

                //  (a) An end-user defined character in the source encoding (which would be -misc).

                //  (b) A well-defined character without a standard Unicode mapping (e.g. the Apple

                //      logo in Macintosh, the Windows logo in Wingdings, the radical extender in

                //      x-mac-symbol, several characters in KPS 9566 and LMBCS...).

                // ==> Let a human be the judge here.

                console.log(cpt); return; /* i.e. continue */

            } else if (codept.match(yogh"L"])) {

                if (iii.startswith("alpha")) {

                    // May as well keep it like that for now...

                    colours.add("-alpha");

                } else {

                    colours.add("-letter");

                }

            } else if (codept.match(yogh"N"])) {

                colours.add("-digit");

            } else if (codept.match(yogh"P"])) {

                if (iii.startswith("ext") && (parseInt(cpt, 16) > 0x7F)) {

                    // May as well keep it like that for now...

                    colours.add("-ext-punct");

                } else {

                    colours.add("-punct");

                }

            } else if (codept.match(yogh"S"])) {

                colours.add("-graph");

            } else if (codept.match(yogh"C"])) {

                colours.add("-ctrl");

            } else {

                colours.add("-misc");

            }

        });

        colours = Array.from(colours);

        var colour = null;

        if (colours.length == 0) {

            output += "\n|" + CHECKMS + "{{chset-color-" + ii;

            return; /* i.e. continue */

        } else if (colours.length == 1) {

            colour = colours0];

        } else {

            colours.forEach((col) => {

                if (iii.startswith(col.substring(1))) {

                    colour = col;

                }

            });

            if (colour === null) {

                output += "\n|" + CHECKMS + "{{chset-color-" + ii;

                return; /* i.e. continue */

            }

        }

        output += "\n|" + checkmsg + "{{chset-color" + colour + hilite + "}}|{{chset" + layout + nombre + "|" + i;

    });

    return output;

};



jQuery(() => {

    if (jQuery("#editpage-copywarn").length) {

        var butn = document.createElement("input");

        butn.setAttribute("type", "button");

        butn.setAttribute("value", "Fix chset-color");

        var nxt = jQuery("#editpage-copywarn")[0];

        nxt.parentNode.insertBefore(butn, nxt);

        butn.onclick = () => {

            var rprt = SUMMARY;

            var txt = jQuery("#wpTextbox1")[0];

            var vl = txt.value;

            var fx = fix(vl);

            if (fx != vl) {

                if (vl.contains(SHARED)) {

                    fx = fx.split(SHARED).join("").split(FROM).join(TO);

                    rprt = SUMMARY2;

                }

                var tytl = jQuery("#firstHeading")[0].innerText.trim().substring("Editing ".length);

                fx = fx.replace(FAKEHEAD, "{{chset-table-header|" + tytl + "}}");

                fx = fx.replace(FAKEFOOT, "{{chset-table-footer}}");

                txt.value = fx;

                jQuery("#wpSummary")[0].value = rprt;

            }

            

        }

    }

});



})();



// End: [[m:w:User:HarJIT/Scripts/unicategorise.js]]

// </nowiki>