User:Pathoschild/standardise-dev.js

// /************************
 * This is the cutting-edge development version, and is frequently
 * broken. You should use User:Pathoschild/standardise.js instead.
 * broken. You should use User:Pathoschild/standardise.js instead.

/************* function standardize { /******************	*** Content exceptions ******************/	/* exception pattern */ var pattern = '<(nowiki|poem|pre)[^>]*>[\\s\\S]*?'; // double-escaping needed for RegExp
 * Wikisource standardization extension (development version)
 * for the Regex menu framework 1.2+, http://meta.wikimedia.org/wiki/User:Pathoschild/Script:Regex_menu_framework
 * by m:user:Pathoschild

/* store exceptions in an array */ var patternlocal = new RegExp(pattern, 'ig'); var exceptionvalues = editbox.value.match(patternlocal);

if(editbox.value.match(pattern)) { /* replace exceptions with placeholders */ var patternlocal = new RegExp(pattern, 'i'); for(var x=0; x<exceptionvalues.length; x++) { editbox.value = editbox.value.replace(patternlocal, '~exception~'); }	}

/*******************	*** Mainspace *******************/	if(wgNamespaceNumber=='0') { /*******************		*** header normalization *******************/		/* prepare template for parsing if present */ if(editbox.value.match(/\s*%%newline%%/ig,'}}\n'); // after

// images, categories, interwiki links regex(/%%newline%%(\s*\[\[(?:Image|Category|[^:]+):[^\]]+\]\])/ig,'\n$1'); regex(/(\s*\[\[(?:Image|Category|[^:]+):[^\]]+\]\])%%newline%%/ig,'$1\n');

// lists regex(/%%newline%%([*#:;])/ig,'\n$1'); // lists regex(/([*#:;][^\n]*)%%newline%%/ig,'$1\n'); // newlines closing list items

// tables regex(/%%newline%%{\|/ig,'\n{|'); regex(/{\|%%newline%%/ig,'{|\n');

// rules regex(/%%newline%%(+)/g,'\n$1'); regex(/(+)%%newline%%/g,'$1\n'); // tags regex(/(<[^>\n]+>)\s*%%newline%%/ig,'$1\n'); regex(/%%newline%%(<[^>\n]+>)/ig,'\n$1');

/* remove remaining */ regex(/-%%newline%%([^\s])/ig,'-$1'); // hyphenated words regex(/\s*%%newline%%\s*/ig,' '); // all others /*******************		*** Cleanup *******************/		/* restore delimiters */ regex(/%%pipe%%/g,'|'); regex(/%%leftcurlies%%/g,''); }	/*******************	*** Authorspace *******************/	if(wgNamespaceNumber=='102') { /*******************		*** normalization *******************/		/* fix delimiters */ regex(/[\n\s]*\|\s*((?:first|last)name|last_initial|(?:birth|death)year|description|image|(?:wikipedia|wikiquote|commons)_link|dates|name|defaultsort)\s*=\s*/ig,'~$1='); // author parameter delimiters regex(/({{author[\s\S]*?{{[^\|}]*)\|/ig,'$1%%pipe%%',5); // other template pipes regex(/({{author[\s\S]*?){{([^}]+)}}/ig,'$1%%leftcurlies%%$2%%rightcurlies%%',5); // other template delimiters /* cleanup */ regex(/{{(author[^}]+)\s*}}/i,''); // rm trailing whitespace /* place standard template and move like parameters */ regex(/{{author/i,'{{author\n |firstname     =\n |lastname       =\n |last_initial   =\n |birthyear      =\n |deathyear      =\n |description    =\n |image          =\n |wikipedia_link =\n |wikiquote_link =\n |commons_link   =\n}}\n{{author'); regex(/(author[\s\S]*?\|((?:first|last)?name|last_initial|(?:birth|death)year|description|image|(?:wikipedia|wikiquote|commons)_link|dates|defaultsort)\s*)=([\s\S]*?)~?\2=([^~]*)/i,'$1=$4$3',10); /* get dates if necessary */ if(regsearch(/(?:birth|death)year\s*=\s*\n/)) { // cannibalise categories regex(/(birthyear\s*)=(\s*\n[\s\S]*?)\n?\[\[\s*Category\s*:\s*(\d+(?:\s*BCE)?) births\s*[^\]]*\]\]/,'$1=$3$2'); regex(/(deathyear\s*)=(\s*\n[\s\S]*?)\n?\[\[\s*Category\s*:\s*(\d+(?:\s*BCE)?) deaths\s*[^\]]*\]\]/,'$1=$3$2'); // if that failed, parse from old template if(regsearch(/(?:birth|death)year\s*=\s*\n/) && regsearch(/~dates=[^~]/)) { /* get dates */ // get raw parameter var olddates = editbox.value.replace(/^[\s\S]*dates=[^\d~}]*([^~}]+)[\s\S]*$/,'$1'); // raw parameter olddates = olddates.replace(/^(\d+)\s*BC?E/ig,'$1 BCE'); // fix eras // extract dates var birthyear = olddates.replace(/^(\d+(?: BCE)?)[\s\S]*$/ig,'$1'); var deathyear = olddates.replace(/^\d+[^\d]+?(\d+(?: BCE)?)$/ig,'$1'); /* fill in empty parameters */ if(regsearch(/birthyear\s*=\s*\n/)) { regex(/(birthyear\s*)=/,'$1='+birthyear); }				if(regsearch(/deathyear\s*=\s*\n/) &&deathyear>birthyear) { regex(/(deathyear\s*)=/,'$1='+deathyear); }			}		}		/* get names */ if(regsearch(/(?:first|last)name\s*=\s*\n/)) { // cannibalise name field if(regsearch(/(?:first|last)name\s*=\s*\n/)) { // extract var name = editbox.value.replace(/^[\s\S]*~name=([^~}]*)[\s\S]*/,'$1'); var firstname = name.replace(/([\s\S]+)\s+[\s\S]*/,'$1'); var lastname = name.replace(/[\s\S]+\s+([\s\S]*)/,'$1'); // fill in empty parameters if(regsearch(/firstname\s*=\s*\n/)) { regex(/(firstname\s*)=/,'$1='+firstname); }				if(regsearch(/lastname\s*=\s*\n/)) { regex(/(lastname\s*)=/,'$1='+lastname); }			}		}		/* cleanup */ // remove old template regex(/({{author[\s\S]*?)[\n\s]*{{author[^}]*}}[\n\s]*/ig,'$1\n\n');

// restore delimiters regex(/%%pipe%%/g,'|'); regex(/%%leftcurlies%%/g,''); // fix whitespace regex(/((?:(?:first|last)name|last_initial|(?:birth|death)year|description|image|(?:wikipedia|wikiquote|commons)_link)\s*)=\s*/ig,'$1= '); regex(/= \|/g,'= \n |'); regex(/= }}/g,'= \n}}'); /* remove old categories */ regex(/\[\[\s*Category\s*:\s*\d+[^\]]*?(?:births|deaths)[^\]]*\]\]\n?/ig,''); // authors by year regex(/\[\[\s*Category\s*:\s*(?:Ancient|Early modern|Medieval|Modern|Renaissance) authors[^\]]*\]\]\n?/ig,''); // authors by era /*******************		*** Other tweaks *******************/		/* update license templates */ regex(/{{\s*(?:msg:|template:)?(?:author-)?(PD-[^\|\}]+)(?:\|[^}]*)?}}/ig,''); /* normalize dates */ regex(/^([#*:]+ \[\^\+\]\]),\s*(\d+)/mig,'$1 ($2)'); }	/*******************	*** miscellaneous cleanup *******************/	/*  templates */ regex(/{{\s*(?:msg:|template:)?([^}]+)}}/ig,''); /* syntax */ // headers regex(/\n*^(=+)\s*(.*?)\s*\1\s*/mig,'\n\n$1$2$1\n'); // whitespace regex(/=\n+=/ig,'=\n='); // fix consecutive headers // categories regex(/\[\[\s*category\s*:\s*([^\|\]]+)(?:\s*(\|)([^\]]*))?\s*\]\]/ig,''); //links regex(/\[\[\s*([^\|\]]+?)\s*(?:(\|)\s*([^\]]+?)\s*)?\]\]/ig,'$1$2$3'); // redundant starting and ending whitespace regex(/\[\[([^\|\]]+?)\s*\|\s*\1\]\]/ig,'$1'); // redundant link text regex(/\[\[([^\|\]]+?)_/ig,'[[$1 ',5); // underscores // lists regex(/^([*#:]+)\s*/mig,'$1 '); /*******************	*** sort elements *******************/	/* store elements and remove from code */ // categories var categories = regsearch(/\[\[category:[^\]]+\]\]/ig); regex(/\[\[category:[^\]]+\]\]\n?/ig,''); // interlanguage links var interwikilinks = regsearch(/\[\\+)?:[^\]]+\]\]/ig); // get codes if(interwikilinks) { for(var x in interwikilinks) { // filter out known non-interlanguage prefixes if(interwikilinks[x].match(/\[\[(?:c2|cej|dcc|mw|rev|rfc|svn|wqy):/i)) { interwikilinks.splice(x,0); }			else { var pattern = new RegExp(interwikilinks[x]+'\n?','ig'); regex(/\[\\+)?:[^\]]+\]\]\n?/ig,''); }		}	}	// license templates var licenses = regsearch(/{{(?:PD-|GFDL)[^}]*}}/ig); regex(/{{(?:PD-|GFDL)[^}]*}}\n?/ig,''); /* sort and re-add */ // compare function for case-insensitivity // courtesy  function caseless(a,b) { var a = a.toLowerCase; var b = b.toLowerCase; if (a < b) return -1; if (a > b) return 1; return 0; }

// initial whitespace regex(/[\s\n]*$/,'\n\n');

// licenses if(licenses && licenses.length>0) { // licenses for(var x in licenses) { editbox.value = editbox.value+licenses[x]+'\n'; }		// whitespace editbox.value = editbox.value+'\n'; }

// categories if(categories && categories.length>0) { // sort and place categories.sort(caseless); for(var x in categories) { editbox.value = editbox.value+categories[x]+'\n'; }		// whitespace editbox.value = editbox.value+'\n'; }	// interlanguage links if(interwikilinks) { // sort and place interwikilinks.sort(caseless); for(var x in interwikilinks) { editbox.value = editbox.value+interwikilinks[x]+'\n'; }	}

/* restore exceptions */ if(editbox.value.match(/~exception~/)) { /* restore placeholders */ for(var i=0; i<exceptionvalues.length; i++) { var pattern = new RegExp('~exception~'); editbox.value = editbox.value.replace(pattern, exceptionvalues[i]); }	}	/* edit summary */ setreason('standardization, updates, and cleanup with regex'); } //