User:Hillgentleman/ifexistslog.py

#ifexistslog.py
# TO FETCH PAGE, STORE IT, AND THEN FIND ALL STRINGS like:
#          2007-12-03 06:27:16 zh_yuewiki: 131 http://zh-yue.wikipedia.org/wiki/%E6%B4%9B%E7%A3%AF%E5%B1%B1%E8%84%88
#FROM  http://noc.wikimedia.org/~tstarling/ifexist.log

a = ''

import urllib
import codecs # WE MAY NOT NEED THIS, BUT ANYHOW
#import re
#import time

#urlX = re.compile(r'http\://zh-yue.+\b',flags=re.U)

file = urllib.urlopen('http://noc.wikimedia.org/~tstarling/ifexist.log')
saveFile = codecs.open('ifexists.log.1','w',encoding='utf-8')

x=file.read()
saveFile.write(x)
saveFile.close()



###CRAP -UNLESS YOU HAVE VERY LITTLE MEMORY
"""

try:
  while True:
    x = file.read(100000)
    print x

    if a=='': a=raw_input('press Return to continue to wait, or press some other key to automatise')

    saveFile.write(x)
    
    currentTime=time.clock()
    while time.clock()< currentTime+10:
      print'..'

finally:
    saveFile.close()

"""