User:CES1596/OCR2.js

//OCR tool based on en:MediaWiki:OCR.js

var lang = 'en'; var language = 'eng';

$(function {       var namespaceNumber = mw.config.get('wgNamespaceNumber');        if (namespaceNumber !== 104) return;        $("#p-namespaces ul").append($(" HOCR ").attr({'id':'hocr0_btn', 'title':'do HOCR'}).css({'color':'#0000ff','cursor':'pointer'}));        $('#hocr0_btn').click(function { do_hocr; console.log('do_hocr done'); $('#hocr0_btn, #hocr0_btn span a').css({'color':'#000000'}); });       $("#p-namespaces ul").append($(" OCR ").attr({'id':'ocr0_btn', 'title':'do OCR'}).css({'color':'#0000ff','cursor':'pointer'}));        $('#ocr0_btn').click(function { do_ocr; console.log('do_ocr done'); $('#ocr0_btn, #ocr0_btn span a').css({'color':'#000000'}); });       $("#p-namespaces ul").append($(" TOCR ").attr({'id':'tocr0_btn', 'title':'do Tesseract OCR'}).css({'color':'#0000ff','cursor':'pointer'}));        $('#tocr0_btn').click(function { do_tocr; console.log('do_tocr done'); $('#tocr0_btn, #tocr0_btn span a').css({'color':'#000000'}); });       $("#p-namespaces ul").append($(" GOCR ").attr({'id':'gocr0_btn', 'title':'do Google OCR'}).css({'color':'#0000ff','cursor':'pointer'}));        $('#gocr0_btn').click(function { do_gocr; console.log('do_gocr done'); $('#gocr0_btn, #gocr0_btn span a').css({'color':'#000000'}); }); });

function hocr_callback(data) { if (data.error) { console.log('data error'); do_ocr; return; } else { var tb = document.getElementById("wpTextbox1"); tb.value = $(data.text).text; console.log('hocr callback done'); } }

function ocr_callback(data) { if (data.error) { alert(data.text); console.log('data error'); } else { var tb = document.getElementById("wpTextbox1"); tb.value = data.text; console.log('ocr callback done'); } }

function do_hocr { var request_url = '//tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr&book=' + encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName'); console.log('hocr url defined'); $.getJSON(request_url).done(hocr_callback); console.log('hocr getJSON done'); }

function do_ocr { if ($( '.prp-page-image img' ).length) { var url_image = 'https:' + $( '.prp-page-image img' ).attr('src'); var request_url = "//tools.wmflabs.org/phetools/ocr.php?cmd=ocr&url="+url_image+"&lang="+lang+"&user="+mw.config.get('wgUserName'); console.log('ocr url defined'); $.getJSON( request_url ).done( ocr_callback ); console.log('ocr getJSON done'); } }

function do_tocr { if ( $( '.prp-page-image img' ).length === 0 ) { mw.notify( 'tocr image not found' ); console.log('tocr image not found'); }	var imageUrl = 'https:' + $( '.prp-page-image img' ).attr( 'src' ); $.getScript( 'https://tools-static.wmflabs.org/cdnjs/ajax/libs/tesseract.js/2.0.0-alpha.2/tesseract.min.js', function {		var { TesseractWorker } = Tesseract;		var worker = new TesseractWorker({ workerPath: 'https://tools-static.wmflabs.org/cdnjs/ajax/libs/tesseract.js/2.0.0-alpha.2/worker.min.js', langPath: 'https://tools.wmflabs.org/tessdata/4.0.0', corePath: 'https://tools.wmflabs.org/tessdata/core/tesseract-core.wasm.js', });

worker .recognize( imageUrl, language ) .then( processOcrResult ); } );	console.log('tocr getScript done'); }

function processOcrResult( result ) { if ( result.text === undefined || result.text.length === 0 ) { mw.notify( 'tocr no text' ); console.log('tocr no text'); return; }	$( '#wpTextbox1' ).val( result.text ); console.log('tocr callback done'); }

function do_gocr { if ( $( '.prp-page-image img' ).length === 0 ) { mw.notify( mw.msg( 'google-ocr-image-not-found' ) ); console.log('gocr image not found'); }	var imageUrl = 'https:' + $( '.prp-page-image img' ).attr('src'); var toolUrl = "//tools.wmflabs.org/ws-google-ocr/api.php"; var requestUrl = toolUrl + "?image=" + imageUrl + "&lang="+lang; $.getJSON( requestUrl ) .done( processOcrResult ) .fail( processOcrResult ) // Same handler, for simplicity. }

function processOcrResult( response ) { if ( response.responseJSON !== undefined && response.responseJSON.error ) { mw.notify( mw.msg( 'error' ) + ' ' + response.responseJSON.error.code + ' ' + response.responseJSON.error.message ); console.log('gocr error'); return; }	if ( response.text === undefined || response.text.length === 0 ) { mw.notify( mw.msg( 'google-ocr-no-text' ) ); console.log('gocr no text'); return; }	$( '#wpTextbox1' ).val( response.text ); }