THC Science

#!usr/bin/python
"""
This is a script that will add {{oldprodfull}} to the talk pages
of all pages in a certain category that were PRODded in the past,
and do not currently have one.

Written by The Earwig: <http://en.wikipedia.org/wiki/User:The_Earwig>
With assistance from MacMed: <http://en.wikipedia.org/wiki/User:MacMed>
"""
# Imports
import sys, urllib, re, datetime, codecs # System imports
import wikipedia, pagegenerators, catlib # Pywikipedia imports
import simplejson as json # JSON import

global site
site = wikipedia.getSite()

def main(): # Main function, parses the category given to it through the command line.
    if sys.argv[1] == '--version':
        print 'MacMedBot is currently in alpha testing, version 0.5.7'
        sys.exit()
    else:    
        try:
            category = sys.argv[1] # Get the category from the user's input.
        except IndexError:
            print "Sorry; please specify a category to use."
            exit()
        if "Category:" not in category:
            category = "Category:%s" % category
        cat = catlib.Category(site, category)
        generator = pagegenerators.CategorizedPageGenerator(cat, recurse = True) # Now create a generator for it.
        for page in generator: # Iterate over said generator.
            check(page.title()) # Process the page.
            
def check(pagename):
    page = wikipedia.Page(site, pagename)
    if page.namespace() != 0:
        wikipedia.output(u"Not an article, skipping...")
        return
    else:
        pagename1 = 'Talk:'+pagename
        page = wikipedia.Page(site, pagename1)
        wikipedia.output(u"\nChecking page %s for {{oldprodfull}}." % page.aslink())
        oldtext = ""
        try: # In case page does not exist.
            oldtext = page.get() # To check for {{oldprodfull}}
        except wikipedia.NoPage: # If page does not exist
            wikipedia.output(u"Talk page does not exist, processing...")
            process(pagename) # Process the page (no template)
        except wikipedia.IsRedirectPage: # If page is a redirect
            wikipedia.output(u"Talk page is a redirect, skipping...")
            return # Do not process
        oldtext2 = oldtext.lower() # Make everything lowercase
        if "{{oldprodfull" not in oldtext2: # Look for {{oldprodfull}}
            wikipedia.output(u"{{oldprodfull}} not found on talk page, processing...")
            process(pagename) # If it's there, move on
        else: #If it's not...
            wikipedia.output(u"{{oldprodfull}} found on talk page, skipping...")
            return # Do not process
    

    
def process(page): # Process function, plug in the pagename here to process.
    page = wikipedia.Page(site, page)
    if ifChecked(page.title()):
        wikipedia.output(u"This page has already been checked, skipping...")
        return
    try:
        page.get()
    except Exception:
        wikipedia.output(u"\nPage [[%s]] does not exist." % page.title())
        return
    wikipedia.output(u"\nChecking page %s." % page.aslink())
    checked(page.title())
    content, nextid = getRevision(page.title())
    if "{{dated prod" in content:
        wikipedia.output(u"There is PROD on the page but it has not yet expired.")
        return
    for x in range(50):
        content, nextid = getRevision(page.title(), nextid)
        if "{{dated prod" in content:
            putTemplate(page.title(), content)
        if not nextid:
            wikipedia.output(u"No PROD template found, moving on...")
            break

def checked(page):
    wikipedia.output(u"Logging page...\nProcessing...")
    f = codecs.open("edited_pages.txt", "a")
    f.write("\n[[%s]]" % page)
    f.close()

def ifChecked(page=""):
    f = codecs.open("edited_pages.txt", "r")
    checks = f.read()
    page = "[[%s]]" % page
    if page in checks:
        return True
    else:
        return False


def getRevision(title, nextid=None):
    params = {'action':'query', 'prop':'revisions', 'rvprop':'content', 'rvlimit':1, 'format':'json'}
    params['titles'] = title
    if nextid:
        params['rvstartid'] = nextid
    data = urllib.urlencode(params)
    raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data)
    res = json.loads(raw.read())
    pageid = res['query']['pages'].keys()[0]
    nextif = None
    if 'query-continue' in res:
        nextid = res['query-continue']['revisions']['rvstartid']
    content = res['query']['pages'][pageid]['revisions'][0]['*']
    return content, nextid

def putTemplate(pagename, content): # Put the {{oldprodfull}} template on the page specified.
    page = wikipedia.Page(site, pagename) # Load the page.
    try: # In case the page doesn't exist.
        oldtext = page.get() # Get the old text, so we don't erase it.
    except wikipedia.NoPage: # If the page doesn't exist...
        oldtext = "" # Set this to an empty string.
    oldtext2 = oldtext.lower()
    wikipedia.output(u"Saving...")
    prod = content.split("}}")[0]
    nomreason = prod.split("concern = ")[1].split("|")[0]
    addition = "\n{{oldprodfull|nomreason="+nomreason+"}}" # Whatever you're adding.
    newtext = addition + oldtext # Create the new text, the save it.
    # page.put(newtext, comment="([[WP:BOT|Bot]]): Adding {{oldprodfull}} to PRODded page.") # Use whatever edit summary you want here.
        

if __name__ == '__main__':
    try:
        main()
    finally:
        wikipedia.stopme()
THC Science

Bringing Science to the Cannabis Conversation!

Cannabis Ruderalis

Leave a Reply