User:Xover/unwrap.js

/* global $, mw */ "use strict";

// Wait for the page to be parsed (new-style $(document).ready) $( => {	/*	 * First check that this is a context we should be active in.	 */	// Only active on Page:-namespace pages.	if (mw.config.get('wgCanonicalNamespace') !== 'Page') {	  return;	}	// Only active on pages with content model 'proofread-page'.	if (mw.config.get('wgPageContentModel') !== 'proofread-page') {	  return;	}	// Only active when in edit/preview/diff mode.	if ($.inArray(mw.config.get('wgAction'), ['edit', 'submit']) < 0) {	    return;	}	/*	 *  Add portlets for the various commands.	 */	var unwrapPortlet = mw.util.addPortletLink( 'p-tb', '#', '↲ Remove hard line breaks', 'ca-unwrap', 'Remove hard line breaks from OCR text.' ); 	$(unwrapPortlet).click(event => { event.preventDefault; doUnwrapText; });

let unwrapButtonDetails = { type: 'button', icon: 'https://upload.wikimedia.org/wikipedia/commons/0/0e/U%2B21B2.svg', label: "Remove hard linebreaks", action: {type: 'callback', execute: doUnwrapText} };	var unwrapButton = { section: 'proofreadpage-tools', group: 'text', tools: {'unwrap': unwrapButtonDetails} };

mw.hook('ext.proofreadpage.osd-controller-available').add( => {		$("#wpTextbox1").wikiEditor('addToToolbar', { section: 'proofreadpage-tools', groups: { text: { label: 'Text' }			}		});		$("#wpTextbox1").wikiEditor('addToToolbar', { section: 'proofreadpage-tools', group: 'text', tools: {'unwrap': { type: 'button', icon: 'https://upload.wikimedia.org/wikipedia/commons/0/0e/U%2B21B2.svg', label: "Remove hard linebreaks", action: {type: 'callback', execute: doUnwrapText} }}		});	}); }); // END: $(document).ready

function doUnwrapText { let OCR = $('#wpTextbox1').val; OCR = OCR.replace(/^\s+$/mg, ''); // Nuke lines consisting of only whitespace OCR = OCR.replace(/([^\n])\n(?!\n)/gm, '$1 '); OCR = OCR.replace(/ +/g, ' ');

// Stash these here for now // OCR = OCR.replace(/‘/g, '“'); // OCR = OCR.replace(/’/g, '”'); OCR = OCR.replace(/”(s|d|t)/g, '’$1'); OCR = OCR.replace(/s” /g, 's’ '); OCR = OCR.replace(/&mdash;/g, '—'); OCR = OCR.replace(/\s*—\s*/g, '—'); OCR = OCR.replace(/- /g, ''); OCR = OCR.replace(/ (’|”|;|!|\?|:)/g, '$1'); OCR = OCR.replace(/(‘|“) /g, '$1'); OCR = OCR.replace(/^ +(\S)/mg, '$1'); OCR = OCR.replace(/(\S) +$/mg, '$1'); OCR = OCR.replace(/(\S)\s+$/g, '$1'); OCR = OCR.replace(/(\s*)\.\s*\.[ .]*(\s*)/g, '$1$2'); OCR = OCR.replace(/‘‘/g, '“'); // two single curly quotes is prolly a double quote OCR = OCR.replace(/’’/g, '”'); // two single curly quotes is prolly a double quote OCR = OCR.replace(/^(["'“‘])T /mg, '$1I '); // “T -> “I OCR = OCR.replace(/(\S)’em/mg, '$1 ’em'); // '’em' is generally spaced

$('#wpTextbox1').val(OCR); } // END: doUnwrapText