From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.

/*

 * <nowiki>

 * This script is a fork of https://en.wikipedia.org/?title=User:Enterprisey/archiver.js&oldid=1113588553

 * which was forked from https://en.wikipedia.org/?title=User:%CE%A3/Testing_facility/Archiver.js&oldid=1003561411

 */



/*

 * Documentation of CSS classes.

 *

 * .arky-span is the main custom class of the script.

 * Inside a .arky-span is an archive link, which triggers selection.

 * .arky-span tags also store data (not visible in the UI) associated with

 * corresponding sections: the index of the section and heading level

 * (i.e. ==2==, ===3===, etc)

 * Tags with .arky-span class are also called "archive spans".

 *

 * .arky-selected-section is put onto the whole semantic heading

 * of a section, selected by user for archiving.

 * During selection the class is used for styling (a light-blue background).

 * After clicking "archive ... selected threads" the class is used to

 * find all the archive spans, which live inside the semantic heading.

 */



const USERSCRIPT_NAME = "Archiver";



function notifyUser(notificationMessage) {

	mw.notify(notificationMessage, {

		title: USERSCRIPT_NAME

	});

}



const LOG_PREFIX = `[${USERSCRIPT_NAME}]:`;



function error(...toLog) {

	console.error(LOG_PREFIX, ...toLog);

}



function warn(...toLog) {

	console.warn(LOG_PREFIX, ...toLog);

}



function info(...toLog) {

	console.info(LOG_PREFIX, ...toLog);

}



function debug(...toLog) {

	console.debug(LOG_PREFIX, ...toLog);

}



function constructAd() {

	// TODO maybe also introduce versions + include version in the ad?

	return `using [[User:Andrybak/Archiver|${USERSCRIPT_NAME}]]`;

}



function constructEditSummary(mainEditSummary) {

	return `${mainEditSummary} (${constructAd()})`;

}



$.when( mw.loader.using(['mediawiki.util','mediawiki.api']), $.ready).done( function () {

	/*

	 * Reference documentation about keys and values in mw.config:

	 * https://www.mediawiki.org/wiki/Manual:Interface/JavaScript#mw.config

	 */

	if (!mw.config.get('wgIsArticle')) { // This variable is badly named -- it is not related to a page being a main namespace "article".

		info('Not a wiki page. Aborting.');

		return;

	}

	if (mw.config.get('wgArticleId') === 0 || mw.config.get('wgRevisionId') === 0) {

		info('Either the page does not exist yet or it is a diffonly=yes view. Aborting.');

		return;

	}

	if (mw.config.get("wgNamespaceNumber") % 2 == 0 && mw.config.get("wgNamespaceNumber") != 4) {

		// not a talk page and not project namespace

		info('Not a discussion namespace. Aborting.');

		return;

	}

	if (mw.config.get("wgNamespaceNumber") == -1) {

		// is a special page

		info('This is a "Special:" page. Aborting.');

		return;

	}

	const parserOutput = document.querySelector('#mw-content-text .mw-parser-output');

	if (!parserOutput || $(parserOutput).find(':header').length === 0) {

		info('Nothing to archive here. Aborting.');

		return;

	}

	if (mw.config.get('wgDiffNewId') != null || mw.config.get('wgDiffOldId') != null) {

		info('Detected diff view. Aborting.');

		return;

	}



	mw.util.addCSS(".arky-selected-section { background-color:#D9E9FF } .arky-selected-section .arky-span a { font-weight:bold }");



	var sectionCodepointOffsets = new Object();

	var wikiText = "";

	var revStamp; // The timestamp when we originally got the page contents - we pass it to the "edit" API call for edit conflict detection



	var portletLink = mw.util.addPortletLink("p-cactions", "#", "ⵙCA", "ca-oecaAndrybak", "Enter/exit the archival process", null, null);

	var archiveButton = $(document.createElement("button"));

	let highestArchiveSubpagePromise = null;

	$(portletLink).click(function(e) {

		$(".arky-selected-section").removeClass('.arky-selected-section');

		$(".arky-span").toggle();

		archiveButton.toggle();

		if (highestArchiveSubpagePromise == null) {

			/*

			 * Start searching for the archive subpage with highest number immediately.

			 * Then the click listener on `archiveButton` will wait for this `Promise`.

			 */

			highestArchiveSubpagePromise = findHighestArchiveSubpage();

		} else {

			// TODO: if "Loading..." was already shown to the user via the button, we need to reset the text here.

		}

	});



	archiveButton.html("archive all the selected threads")

		.attr("id", 'arky-archive-button')

		.css("position", 'sticky')

		.css("bottom", 0)

		.css("width", '100%')

		.css("font-size", '200%');

	$(document.body).append(archiveButton);

	archiveButton.toggle();

	archiveButton.click(function(e) {

		var selectedSections = $(".arky-selected-section .arky-span").map(function() {

			return $(this).data("section");

		}).toArray();

		if (selectedSections.length === 0) {

			return alert("No threads selected, aborting");

		}

		const timeoutId = setTimeout(() => {

			/*

			 * In case highestArchiveSubpagePromise is taking a long time,

			 * show to the user that stuff is happening.

			 */

			archiveButton.text("Loading...");

		}, 1000);

		highestArchiveSubpagePromise.then(result => {

			clearTimeout(timeoutId);

			info("Successful highestArchiveSubpagePromise:", result);

			doArchive(selectedSections, result);

		}, rejection => {

			info("Failed highestArchiveSubpagePromise:", rejection);

			const currentPageName = mw.config.get("wgPageName");

			doArchive(selectedSections, archiveSpacedSubpageName(currentPageName, "???"));

		});

	}); // end of archiveButton click handler



	addArchiveLinks();



	function midPoint(lower, upper) {

		return Math.floor(lower + (upper - lower) / 2);

	}



	/*

	 * Based on /info/en/?search=Module:Exponential_search

	 */

	async function exponentialSearch(testFunc, i, lower, upper) {

		if (await testFunc(i)) {

			if (i + 1 == upper) {

				return i;

			}

			lower = i;

			if (upper) {

				i = midPoint(lower, upper);

			} else {

				i = i * 2;

			}

			return exponentialSearch(testFunc, i, lower, upper);

		} else {

			upper = i;

			i = midPoint(lower, upper);

			return exponentialSearch(testFunc, i, lower, upper);

		}

	}



	function archiveSpacedSubpageName(pageName, archiveNumber) {

		return pageName + "/Archive " + archiveNumber;

	}



	function archiveSlashedSubpageName(pageName, archiveNumber) {

		return pageName + "/Archive/" + archiveNumber;

	}



	/*

	 * Based on /info/en/?search=Wikipedia_talk:User_scripts/Archive_7#nocreate-missing

	 */

	async function pageExists(title) {

		const api = new mw.Api();

		const response = await api.get({

			"action": "query",

			"format": "json",

			"titles": title

		});

		const missing = "missing" in Object.values(response.query.pages)[0];

		return !missing;

	}



	/*

	 * Find the subpage of this page, which will be used as destination/target of archiving.

	 * It is just "Archive 1" by default, but can be increased by exponentialSearch.

	 */

	function findHighestArchiveSubpage() {

		info("findHighestArchiveSubpage: start");

		// mw.config.get("wgPageName")

		return new Promise(async (resolve, reject) => {

			try {

				const currentPageName = mw.config.get("wgPageName");

				const currentYear = new Date().getUTCFullYear();

				let subpageFunc;

				/*

				 * Check if "current year" subpage is a good candidate for

				 * pages with /info/en/?search=Template:Archived_annually

				 * TODO: maybe implement checking if {{Archived annually}} is transcluded.

				 */

				if (await pageExists(archiveSpacedSubpageName(currentPageName, currentYear - 1)) && !await pageExists(archiveSpacedSubpageName(currentPageName, currentYear + 1))) {

					resolve(archiveSpacedSubpageName(currentPageName, currentYear));

					return;

				} else if (await pageExists(archiveSpacedSubpageName(currentPageName, 1))) {

					subpageFunc = archiveSpacedSubpageName;

				} else if (await pageExists(archiveSlashedSubpageName(currentPageName, 1))) {

					subpageFunc = archiveSlashedSubpageName;

				} else {

					notifyUser("Cannot find the first archive subpage");

					info('Assuming zero archive subpages.');

					resolve(archiveSpacedSubpageName(currentPageName, 1));

					return;

				}



				async function checkArchiveSubpageExists(archiveNumber) {

					const archiveSubpageTitle = subpageFunc(currentPageName, archiveNumber);

					return pageExists(archiveSubpageTitle);

				}

				// see also /info/en/?search=Module:Highest_archive_number

				const highestNumber = await exponentialSearch(checkArchiveSubpageExists, 10, 1, null);

				const highestArchiveSubpage = subpageFunc(currentPageName, highestNumber);

				resolve(highestArchiveSubpage);

			} catch (e) {

				const msg = "Cannot find archive subpage with the highest number";

				error(msg, e);

				notifyUser(msg);

				reject(e);

			}

		});

	}



	function doArchive(selectedSections, highestArchiveSubpage) {

		// returns `s` without the substring starting at `start` and ending at `end`

		function cut(s, start, end) {

			return s.substr(0, start) + s.substring(end);

		}



		const archivePageName = prompt("Archiving " + selectedSections.length + " threads: where should we move them to? The latest archive number seems to be:", highestArchiveSubpage);

		if (!archivePageName || archivePageName == mw.config.get("wgPageName")) {

			return alert("No archive target selected, aborting");

		}



		// codepointToUtf16Idx maps codepoint idx (i.e. MediaWiki index into page text) to utf-16 idx (i.e. JavaScript index into wikiText)

		var codepointToUtf16Idx = {};



		// Initialize "important" (= either a section start or end) values to 0

		selectedSections.forEach(function(n) {

			codepointToUtf16IdxsectionCodepointOffsetsn].start = 0;

			codepointToUtf16IdxsectionCodepointOffsetsn].end = 0;

		});

		codepointToUtf16IdxInfinity = Infinity; // Because sometimes we'll have Infinity as an "end" value



		// fill in our mapping from codepoints (MediaWiki indices) to utf-16 (i.e. JavaScript).

		// yes, this loops through every character in the wikitext. very unfortunate.

		var codepointPos = 0;

		for (var utf16Pos = 0; utf16Pos < wikiText.length; utf16Pos++, codepointPos++) {

			if (codepointToUtf16Idx.hasOwnProperty(codepointPos)) {

				codepointToUtf16IdxcodepointPos = utf16Pos;

			}



			if ((0xD800 <= wikiText.charCodeAt(utf16Pos)) && (wikiText.charCodeAt(utf16Pos) <= 0xDBFF)) {

				// high surrogate! utf16Pos goes up by 2, but codepointPos goes up by only 1.

				utf16Pos++; // skip the low surrogate

			}

		}



		var newTextForArchivePage = selectedSections.map(function(n) {

			return wikiText.substring(

				codepointToUtf16IdxsectionCodepointOffsetsn].start],

				codepointToUtf16IdxsectionCodepointOffsetsn].end

			);

		}).join("");



		selectedSections.reverse(); // go in reverse order so that we don't invalidate the offsets of earlier sections

		var newWikiText = wikiText;

		selectedSections.forEach(function(n) {

			newWikiText = cut(

				newWikiText,

				codepointToUtf16IdxsectionCodepointOffsetsn].start],

				codepointToUtf16IdxsectionCodepointOffsetsn].end

			);

		});



		info("archive this:" + newTextForArchivePage);

		info("revised page:" + newWikiText);

		var pluralizedThreads = selectedSections.length + ' thread' + ((selectedSections.length === 1) ? '' : 's');

		new mw.Api().postWithToken("csrf", {

			action: 'edit',

			title: mw.config.get("wgPageName"),

			text: newWikiText,

			summary: constructEditSummary(`Removing ${pluralizedThreads}, will be on [[${archivePageName}]]`),

			basetimestamp: revStamp,

			starttimestamp: revStamp

		})

		.done(function(res1) {

			alert("Successfully removed threads from talk page");

			info(res1);

			new mw.Api().postWithToken("csrf", {

				action: 'edit',

				title: archivePageName,

				appendtext: "\n" + newTextForArchivePage,

				summary: constructEditSummary(`Adding ${pluralizedThreads} from [[${mw.config.get("wgPageName")}]]`)

			})

				.done(() => alert("Successfully added threads to archive page"))

				.fail(() => alert("Failed to add threads to archive page. Manual inspection needed."))

				.always(function(res2) {

					info(res2);

					window.location.reload();

				});

		})

		.fail(function(res1) {

			alert("Failed to remove threads from talk page. Aborting archive process.");

			error(res1);

			window.location.reload();

		});

	} // end of doArchive()



	/*

	 * Filters the result of the API query.

	 * Plus, importantly, populates the global variable `sectionCodepointOffsets`.

	 */

	function extractValidSections(apiResultSections) {

		const validSections = {};



		// generate the list/array

		$(apiResultSections)

		// For sections transcluded from other pages, s.index will look

		// like T-1 instead of just 1. Remove those.

			.filter((i, s) => { return s.index == parseInt(s.index) })

			.each((i, s) => { validSectionss.index = s });



		// record the offsets in the global variable

		for (var i in validSections) {

			i = parseInt(i);

			// What MediaWiki calls "byteoffset" is actually a codepoint offset!! Drat!!

			sectionCodepointOffsetsi = {

				start: validSectionsi].byteoffset,

				end: validSections.hasOwnProperty(i+1)?validSectionsi+1].byteoffset:Infinity

			};

		}



		return validSections;

	}



	/*

	 * The convoluted way of "header" vs "headerContainer" is needed, because

	 * there are different HTML layouts for "headings" in different skins.

	 * In Vector 2022, layout of ==Second level== versus ===Third level===

	 * headings is different even for a _single_ skin.

	 *

	 * The HTML layout is either

	 *    <div><h*></h*><edit section link /></div>

	 * or

	 *    <h*><edit section link /></h*>

	 *

	 * "headerContainer" is always the outer of the tags, it always contains the <edit section link /> tags.

	 * "header" is always one of the <h*> tags.

	 * Meaning that in some cases "header" and "headContainer" is the same HTML element.

	 *

	 * arky-span, aka archiveSpans are put inside the "<edit section link />".

	 *

	 * For details, see:

	 *   - https://www.mediawiki.org/?title=Heading_HTML_changes&oldid=6538029

	 */



	// Returns a plain HTMLElement

	function findEditSectionForHeader(header) {

		// in Vector, the bracketed [edit] section link is a direct child element/node

		const maybeVectorEditSection = header.querySelector('.mw-editsection');

		if (maybeVectorEditSection) {

			return maybeVectorEditSection;

		}

		// in other skins, the bracketed [edit] section link is a sibling of <h2> <h3> etc

		if (header.parentElement.classList.contains('mw-heading')) {

			const maybeEditSection = header.parentElement.querySelector('.mw-editsection');

			if (maybeEditSection) {

				return maybeEditSection;

			}

		}

		return null;

	}



	// Returns a jQuery object

	function findHeaderContainerForArchiveSpan(archiveSpan) {

		const jQueryArchiveSpan = $(archiveSpan);

		const maybeDivMwHeading = jQueryArchiveSpan.parents('.mw-heading');

		if (maybeDivMwHeading.length > 0) {

			return maybeDivMwHeading;

		}

		const maybeHeaderParent = jQueryArchiveSpan.parents(':header');

		if (maybeHeaderParent.length > 0) {

			return maybeHeaderParent;

		}

		notifyUser("findHeaderContainerForArchiveSpan: Cannot parse section headings in this skin. Aborting.")

		error("findHeaderContainerForArchiveSpan: Tags for bug report:", archiveSpan, archiveSpan.parentElement);

		return null;

	}



	/*

	 * We need to get the top-level element of the whole header.

	 * In some cases it's a <div class="mw-heading ...">

	 * In other cases it's just a <h2>, <h3>, etc tag.

	 *

	 * Returns a plain HTML element.

	 */

	function getHeaderContainer(header) {

		if (header.parentElement.classList.contains('mw-heading')) {

			return header.parentElement;

		}

		return header;

	}



	/*

	 * Create the bracketed [archive] links next to the [edit] section links.

	 * These [archive] links are used by a user to select sections for archival.

	 */

	function addArchiveLinks() {

		// grab page sections and wikitext so we can add the "archive" links to appropriate sections

		new mw.Api().get({action: 'parse', page: mw.config.get("wgPageName")}).done(function(parseApiResult) {

			new mw.Api().get({action: 'query', pageids: mw.config.get("wgArticleId"), prop: 'revisions'], rvprop: 'content', 'timestamp']}).done(function(revisionsApiResult) {

				var rv;

				rv = revisionsApiResult.query.pagesmw.config.get("wgArticleId")].revisions0];

				wikiText = rv"*"];

				revStamp = rv'timestamp'];

			});



			const validSections = extractValidSections(parseApiResult.parse.sections);



			/*

			 * The search for all section headings starts with

			 * finding all <h*> tags, which aren't for the table of contents.

			 * From the <h*> tags, we find the "[edit] section links" and

			 * "header containers" (see big comment above).

			 */

			const allHeaders = $("#mw-content-text .mw-parser-output").find(":header").filter(':not(#mw-toc-heading)');

			if (allHeaders.length == 0) {

				warn('Nothing to archive here. The script should have aborted earlier. Aborting.');

				return;

			}

			allHeaders.each(function(i, header) {

				var sectionNumber = undefined;

				const headerLevel = header.tagName.slice(1) * 1; // wtf javascript

				const editSection = findEditSectionForHeader(header);

				if (!editSection) {

					// we're either in an archived page ([edit] links are hidden with magic word __NOEDITSECTION__)

					return;

				}

				{

					const editSectionLink = editSection.querySelector('a');

					if (editSectionLink) {

						// Note: href may not be set.

						const sectionNumberMatch = editSectionLink.href && editSectionLink.href.match(/&section=(\d+)/);

						if (sectionNumberMatch) {

							sectionNumber = sectionNumberMatch1];

						}

					}

				}

				// if the if statement fails, it might be something like <h2>not a real section</h2>

				if (validSections.hasOwnProperty(sectionNumber)) {

					const archiveLink = $('<a>')

					.text('archive')

					.click(function() {

						const correspondingHeaderContainer = $(getHeaderContainer(header));

						correspondingHeaderContainer.toggleClass('arky-selected-section');



						// now, click all sub-sections of this section

						// i.e. mark all needed header containers with our CSS class .arky-selected-section

						const isThisSectionSelected = correspondingHeaderContainer.hasClass('arky-selected-section');

						const thisHeaderLevel = archiveLink.parents('.arky-span').data('header-level');



						// starting from the current section, loop through each section

						const allArchiveSpans = $('.arky-span');

						const currSectionIdx = allArchiveSpans.index(archiveLink.parents('.arky-span'));

						for (var i = currSectionIdx + 1; i < allArchiveSpans.length; i++) {

							if ($(allArchiveSpansi]).data('header-level') <= thisHeaderLevel) {

								// if this isn't a subsection, quit

								break;

							}

							const closestHeaderContainer = findHeaderContainerForArchiveSpan(allArchiveSpansi]);

							if (closestHeaderContainer.hasClass('arky-selected-section') != isThisSectionSelected) {

								// if this section needs toggling, toggle it

								closestHeaderContainer.toggleClass('arky-selected-section');

							}

						}



						// finally, update button

						const selectedSectionCount = $('.arky-selected-section').length;

						archiveButton

							.prop('disabled', selectedSectionCount === 0)

							.text('archive ' + selectedSectionCount + ' selected thread' +

								  ((selectedSectionCount === 1) ? '' : 's'));

					});



					const arkySpan = $("<span>", { "class": "arky-span" })

					.css({'display':'none'})

					.data({'header-level': headerLevel, 'section': sectionNumber})

					.append(

						$('<span>', { 'class': 'mw-editsection-bracket' }).text('['),

						archiveLink,

						$('<span>', { 'class': 'mw-editsection-bracket' }).text(']')

					);



					$(editSection).append("&nbsp;", arkySpan);

				}

			});

		})

		.fail(() => warn('addArchiveLinks: Cannot download current page. Aborting.'));

	}

}); // </nowiki>