User:TalBot/ceom equivalence.py


 * 1) ! /usr/bin/env python
 * 2) _*_ coding: utf8 _*_
 * 3) Part of the Complete Encyclopedia of Music standadisation:
 * 4) Gather equivalence classes of pages by capitalisation, convert all but one
 * 5) members of a class to soft redirects
 * 6) Copyright (C) 2007, GrafZahl (en.wikisource.org user)
 * 7) run with standard args "-log -putthrottle:xx"
 * 1) Copyright (C) 2007, GrafZahl (en.wikisource.org user)
 * 2) run with standard args "-log -putthrottle:xx"
 * 1) run with standard args "-log -putthrottle:xx"
 * 1) run with standard args "-log -putthrottle:xx"

import pagegenerators, wikipedia

wikipedia.get_throttle.setDelay(5)

for args in wikipedia.handleArgs: wikipedia.output(u'(WWW) ignoring unrecognised argument: %s' % arg)


 * 1) Basic stuff

site = wikipedia.getSite q_text = u'(???) Create what redirect?' q_answer = [ u'1 -> 2', u'2 -> 1', u'None' ] q_hotkeys = [ u'1', u'2', u'N' ] q_default = u'N' base_redirover = u'"%s"' base_summ = u'[bot] converting to soft redirect to %s'


 * 1) Page generators

ceom_pages = pagegenerators.PrefixingPageGenerator(u'Complete Encyclopaedia of Music')


 * 1) Equivalence classes


 * 1) There does not appear to be a multimap type present in Python. I'll use a
 * 2) normal mapping (dict) to a set of sets instead

eqc = dict


 * 1) assess pages

wikipedia.output(u'(III) Assessing pages')

for page in ceom_pages: try: title = page.title key = title.lower if not eqc.has_key(key): eqc[key] = set eqc[key].add(title) except wikipedia.Error: wikipedia.output(u'(EEE) Error assessing page %s' % page.title)


 * 1) print equivalence classes

wikipedia.output(u'(III) Equivalence classes:')

for key in eqc.iterkeys: wikipedia.output(u'\n* %s:' % key) for title in eqc[key]: wikipedia.output(u'** %s' % title)


 * 1) Interactive redirect creation
 * 2) We keep it simple:
 * 3) - refuse to change classes with more than two pages that are not redirects
 * 4) - ask user which page should be the redirect
 * 1) - ask user which page should be the redirect

def user_choice(page1, page2): """A diff of page1 and page2 is shown, then the user is asked whether	  page1 should be a redirect to page2 or vice versa	""" wikipedia.output(u'     (III) Difference between texts 1 = %s and 2 = %s' % ( page1.title, page2.title )) wikipedia.showDiff(page1.get, page2.get) choice = wikipedia.inputChoice(q_text, q_answer[:], q_hotkeys, q_default) if choice == u'1': textlink = page2.aslink(textlink = True) redirover = base_redirover % textlink summ = base_summ % textlink page1.put(redirover, summ, minorEdit = False) elif choice == u'2': textlink = page1.aslink(textlink = True) redirover = base_redirover % textlink summ = base_summ % textlink page2.put(redirover, summ, minorEdit = False)

wikipedia.output(u'\n(III) redirect creation:')

for key in eqc.iterkeys: try: wikipedia.output(u'  (III) processing equivalence class %s' % key) ctitles = set for title in eqc[key]: page = wikipedia.Page(site, title) if page.get.find(u'oft redirect') == -1: ctitles.add(page) if len(ctitles) == 0: wikipedia.output(u'     (EEE) no content for this key') elif len(ctitles) == 1: wikipedia.output(u'     (III) singleton, doing nothing') elif len(ctitles) == 2: # offer choice to user user_choice(ctitles.pop, ctitles.pop) else: wikipedia.output(u'     (WWW) complicated situation for this key, doing nothing') except wikipedia.Error: wikipedia.output(u'(EEE) Error processing class %s' % key)