MediaWiki:Gadget-ocr.js

/*jshint boss:true*/ /*global $, mw*/

/* * Query an ocr for a given Page:, first try to get the hocr text layer as it's available * for most book, fast and of a better quality. If it fails, try the older and slower * ocr method. hocr fail around 1/5000 books. ocr should never fails as it use the image * visible on the Page:. */

var lang = mw.config.get( 'wgContentLanguage' );

function disable_input(set) {	if (set) { $(document).keyup(function(e) {			if (e.which == 27) { disable_input(false); }		}); }

set ? $('#wsOcr1').off('click') : $('#wsOcr1').on('click', do_hocr); set ? $('#wsOcr2').off('click') : $('#wsOcr1').on('click', fraktur_ocr);

$('#wpTextbox1').prop('disabled', set); }

function ocr_callback(data) { if (data.error) { alert(data.text); } else { // Checking if tb is disabled is required with chrome as ESC doesn't kill // the query. var tb = document.getElementById("wpTextbox1"); if (tb.disabled) tb.value = data.text; }

disable_input(false); }

function hocr_callback(data) { if (data.error) { // Fallback to the slow way. disable_input(false); do_ocr; return; } else { // Checking if tb is disabled is required with chrome as ESC doesn't kill // the query. var tb = document.getElementById("wpTextbox1"); if (tb.disabled) { localStorage.ws_hOCR = data.text;

var text = $(data.text).text;

// Ugly as hell. text = text.replace(/^ +/mg, '') .replace(/\n{4,}/g, '@_@_@_@') .replace(/\n{2,}/g, '____SPACE____') .replace(/\n/g, ' ') .replace(/____SPACE____/g, '\n') .replace(/@_@_@_@/g, '\n\n'); tb.value = $.trim(text); }	}

disable_input(false); }

function do_hocr { disable_input(true);

var request_url = '//tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr&book=' + encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');

$.getJSON(request_url).done(hocr_callback).fail(do_ocr); }

function do_ocr { if ($( '.prp-page-image img' ).length) { disable_input(true);

// server side can't use protocol relative url, request it as https: var url_image = 'https:' + $( '.prp-page-image img' ).attr('src');

var request_url = "//tools.wmflabs.org/phetools/ocr.php?cmd=ocr&url="+url_image+"&lang="+lang+"&user="+mw.config.get('wgUserName');

$.getJSON( request_url ).done( ocr_callback ); } }

function fraktur_ocr {	lang = 'de-f'; // For fraktur we need to use the slow way, all hocr for 'de' // are done with non-fraktur. do_ocr; lang = mw.config.get( 'wgContentLanguage' ); }

function addButtonToWikiEditorToolbar( b ){ var tools = {}; tools[ b.imageId ] = { label: b.speedTip, filters: [ 'body.ns-104' ], type: 'button', icon: b.imageFile, action: { type: 'callback', execute: b.onClick }	};	$( '#wpTextbox1' ).wikiEditor( 'addToToolbar', {		section: 'main',		group: 'insert',		tools: tools	} ); $( '[rel="' + b.imageId + '"]' ).width( 42 ); }

function addButtonToClassicToolbar( b ){ mw.toolbar.addButton( {		imageFile: b.imageFile,		speedTip: b.speedTip,		imageId: b.imageId	} ); $( '#' + b.imageId ).off( 'click' ).click( function {		b.onClick;		return false;	} ).width( 46 ); }

function customiseToolbar {	var modules, add, img; // This can be the string "0" if the user disabled the preference (T54542) if( mw.user.options.get( 'usebetatoolbar' ) == 1 ){ modules = [ 'ext.wikiEditor' ]; img = '//upload.wikimedia.org/wikipedia/commons/c/c9/Toolbaricon_OCR.png'; add = addButtonToWikiEditorToolbar; } else if ( mw.user.options.get( 'showtoolbar' ) == 1 ){ modules = [ 'mediawiki.toolbar' ]; img = '//upload.wikimedia.org/wikipedia/commons/e/e0/Button_ocr.png'; add = addButtonToClassicToolbar; } else { return; }	$.when(		mw.loader.using( modules ),		$.ready	).then( function{		if( mw.config.get( 'wgContentLanguage' ) === 'de' ){			add( { imageFile: img, speedTip: 'Normale OCR', imageId: 'wsOcr1', onClick: do_hocr } );			add( { imageFile: '//upload.wikimedia.org/wikipedia/commons/a/af/Button_Fractur_OCR.png', speedTip: 'Fraktur OCR', imageId: 'wsOcr2', onClick: fraktur_ocr } );		} else {			add( { imageFile: img, speedTip: 'Get the text by OCR', imageId: 'wsOcr1', onClick: do_hocr } );		}	} ); }

mw.loader.using( 'user.options', function {	var isPage = mw.config.get( 'wgCanonicalNamespace' ) === 'Page',		editing = $.inArray( mw.config.get( 'wgAction' ), [ 'edit', 'submit' ] ) !== -1,		disableOcr = self.proofreadpage_disable_ocr,		dependencies = [ 'jquery.textSelection', 'ext.proofreadpage.page.edit' ],		usingBetaToolbar = mw.user.options.get( 'usebetatoolbar' ) === 1,		usingOldToolbar = mw.user.options.get( 'showtoolbar' ) === 1,		usingCodeMirror = mw.user.options.get( 'codemirror-syntax-highlight' ) === '1';	if ( isPage && editing && !disableOcr && ( usingBetaToolbar || usingOldToolbar ) ) {		// Set CodeMirror dependency if required.		if ( usingCodeMirror ) {			dependencies.push( 'ext.CodeMirror.lib' );		}		// Set dependency for the relevant toolbar module.		if ( usingBetaToolbar ) {			dependencies.push( 'ext.wikiEditor' );		} else if ( usingOldToolbar ) {			dependencies.push( 'mediawiki.toolbar' ); }		// Now run the toolbar customisation. mw.loader.using( dependencies, customiseToolbar ); } } );