Jump to content

User:Lemmey/TheDaily

From Wikipedia, the free encyclopedia
from BeautifulSoup import BeautifulStoneSoup
import datetime
import urllib
import wikipedia ##replace with whoipedia for IP usage
import time
##import sentance_case
import re
 
 
rtnln = u'''
 
'''
 
def AddtoPage(PageName,newtext,checklink,message):
    ##print newtext
    Page = wikipedia.Page(wikipedia.getSite(), PageName)
    Pagetext = Page.get()
    oldPageText = Pagetext
    newtext = newtext.encode('utf8')
    if Pagetext.find(checklink) == -1:
        ##position = Pagetext.find('<!-- All news items below this line -->')+len('<!-- All news items below this line -->')
        position = Pagetext.find('<!-- All news items above this line -->|}')
 
        ##print Pagetext[:position]+rtnln+ newtext + Pagetext[position:]
        Pagetext = Pagetext[:position]+ newtext + rtnln + Pagetext[position:]
        save_page(Page,oldPageText,Pagetext,message)
 
def save_page(page,oldtext,newtext,message):
    print "Message: ",message
    wikipedia.showDiff(oldtext, newtext)
    choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
    text = newtext
    ##choice = 'y' #HARD CODED
    if choice == 'y':
        try:
            # Save the page
            page.put(newtext,minorEdit=False,comment=message)
            time.sleep(69)
        except wikipedia.EditConflict:
            wikipedia.output(u'Skipping %s because of edit conflict')
        except:
            pass
 
def sortstory(a,b):
    a = filter(lambda x: x.isalnum(), a.upper())
    b = filter(lambda x: x.isalnum(), b.upper())
    return cmp(a,b)
 
 
 
def AlphaBeta(Pagetext):
    ##Doesn't work on pages with sub bullets **
    start = Pagetext.find('<!-- All news items below this line -->')+len('<!-- All news items below this line -->')
    end = Pagetext.find('<!-- All news items above this line -->|}')
    stories = Pagetext[start:end]
    stories = stories.replace("* ","*")
    stories = stories.replace("**","%%")
    stories = stories.replace("*","~")
 
    story_list = re.split(r'([\b~+])', stories)
    story_list=sorted(story_list,cmp=sortstory)
    entires = []
    for story in story_list:
        if len(story) > 4: 
             entires.append('*'+story+'\n')
             ##print story
 
    stories=''.join(entires)
    stories = stories.replace("\n\n","\n")
    stories = stories.replace("%%","**")
    stories = stories.replace("~","*")
 
    text = Pagetext[:start]+"\n"+stories+Pagetext[end:]
    return text
 
 
 
def MakePageName(date):
    date = date.replace("-"," ")
 
    y=int(date[:4])
    m=int(date[6:8])
    d=int(date[8:])
 
    now = datetime.datetime(y,m,d).strftime('%Y %B ')
    PageName = "Portal:Current_events/" + str(now)+str(d)
 
    return PageName
 
def SentanceCase(oldtext):
    newtext=sentance_case.sentence_caser(oldtext)
    sentance = oldtext.split(" ")
 
    for word in sentance:
        if word.isupper():
            #print word,word.lower()
            newtext=newtext.replace(" "+word.lower()," "+word)
    if sentance[0].isupper():
        newtext=newtext.replace(sentance[0].capitalize(),sentance[0],1)
    return newtext
 
def WikiLink(oldtext):
    newtext=oldtext
    sentance = oldtext.split(" ")
    ###Build individual links
    for word in sentance:
        if word.istitle() and word!="The":
            newtext=newtext.replace(word,'[['+word+']]')
 
    ###Build Compound links
    sentance = newtext.split(" ")
    Cap = False
    for w in xrange(len(sentance)):
        if sentance[w]=="The":
            sentance[w]=sentance[w].replace(']]','')
        if Cap==True:
            sentance[w]=sentance[w].replace('[[','')
            ##print sentance[w]
        if sentance[w].istitle() and sentance[w]!="The":
            Cap=True
            sentance[w]=sentance[w].replace(']]','')
            ##print sentance[w]
        else:
            Cap=False
            if sentance[w-1].istitle() and sentance[w-1]!="The":
                sentance[w-1]=sentance[w-1]+']]'
 
 
    ###Put the words back together
    nt=""
    for word in sentance:
        if word.isupper():
            ##print word, "[["+word+"]]"
            word=word.replace(word,'[['+word+']]')
        nt=nt+word+" "
 
    ###Replace punct links
    nt=nt.replace(".]]","]].")
    nt=nt.replace(",]]","]],")
    return nt
 
 
def setimes():
    array = []
    # Get a file-like object for the Python Web site's home page.
    f = urllib.urlopen("http://www.setimes.com/cocoon/setimes/rss/en_GB/setimes.rss")
    # Read from the object, storing the page's contents in 's'.
    xml = f.read()
    f.close()
 
    soup = BeautifulStoneSoup(xml)
 
    for item in soup('item'):
        ##print item
        i = BeautifulStoneSoup(str(item))
        if '...' not in i.description.string:
        #print i('description')
            date = str(i('dc:date')).strip('[]')
            date = date[9:-10]
            PageName = MakePageName(date)
            ##print PageName
            link = i.link.string.strip()
            text = i.description.string
            ##MakeWikiLinks(i.title.string)
            text = "*"+WikiLink(i.description.string)+ "["+link+" (Southeast European Times)]"
            ##SentanceCase(i.description.string)
 
            array.append([PageName,text,link,date])
    return array
 
def VOA():
    array = []
    # Get a file-like object for the Python Web site's home page.
    f = urllib.urlopen("http://www.voanews.com/english/customCF/RecentStoriesRSS.cfm?keyword=TopStories ")
    # Read from the object, storing the page's contents in 's'.
    xml = f.read()
    f.close()
 
    soup = BeautifulStoneSoup(xml)
    #print soup.prettify()
    for item in soup('item'):
        text = SentanceCase(item.title.string)+"; "+item.description.string+"."
        ##text=WikiLink(text)
        ##text = SentanceCase(item.description.string)+"." #"; "+item.description.string+"
        link = item.link.string.replace("?rss=topstories","")
        date = link[len("http://www.voanews.com/english/"):]
        date = date[:len("2008-05-30")]
        ##print date
        PageName = MakePageName(date)
        text = "*" + text + "["+link+" (VOA)]"
 
        array.append([PageName,text,link,date])
    return array
 
##articlearray=[]
##articlearray=setimes()
##articlearray=articlearray+VOA()
##print articlearray
 
##articlearray=sorted(articlearray)
##for a in articlearray:
##    PageName=a[0]
##    text=a[1]
##    link=a[2]
##    date=a[3]
##    ##print date,text
##    if date!="2008-05-30":
##        ##print text
##        AddtoPage(PageName,text,link,date)


####AddtoPage(PageName,text,link,date)
Page = wikipedia.Page(wikipedia.getSite(), "Portal:Current events/2008 June 3")
Pagetext = Page.get()
oldPageText=Pagetext
Pagetext=AlphaBeta(Pagetext)
##print Pagetext
save_page(Page,oldPageText,Pagetext,"Sorted Alphabukenly")