Tuesday, May 26, 2009

Access Kaixin001.com via Jython

Some notes:
  1. Use cookies in the request
  2. Use tools(webscarab) to get the request URL/data in the Flash
  3. Parse the HTML/XML

Here is the update Proxy.py(base on the proxyHTTP.py in the last post) to build proxy under Jython:
from java.net import Authenticator
from java.net import PasswordAuthentication
from java.lang import System
from java.net import URL
from urllib2 import ProxyHandler, build_opener, HTTPCookieProcessor

proxy_info = {
'user' : '*domain*\\*user*',
'pass' : '*password*',
'host' : '***',
'port' : '***'
}

class auth(Authenticator):
    def getPasswordAuthentication(self):
        return PasswordAuthentication(proxy_info['user'], proxy_info['pass'])

def getHTTPS(url):
    systemSettings = System.getProperties()
    systemSettings.put("https.proxyHost", proxy_info['host'])
    systemSettings.put("https.proxyPort", proxy_info['port'])
    Authenticator.setDefault(auth())
 
    u = URL(url)
    d = u.openConnection().getInputStream()
    c = d.read()
    r = []
    while c <> -1:
        r.append(chr(c))
        c = d.read()

    return ''.join(r)

def HTTPProxyHandler():
    return ProxyHandler({'http' : \
        'http://%(user)s:%(pass)s@%(host)s:%(port)s' % proxy_info})

def getOpener(cj):
    # cj - use cookie or not
    proxy = HTTPProxyHandler()
    if cj:
        opener = build_opener(proxy, 
                              HTTPCookieProcessor())
    else:
        opener = build_opener(proxy)
        
    return opener
    
def main():
    urlpath = r'https://www.google.com/reader'
    print getHTTPS(urlpath)

if __name__ == '__main__':
    main()
And this is the main script:

import Proxy
import urllib2
#import cookielib
import re
from urllib import urlencode
from xml.dom import minidom
from time import sleep
from random import random


urlparm = {
    'garden': '/!house/!garden//getconf.php',
    'water': '/!house/!garden/water.php',
    'antigrass': '/!house/!garden/antigrass.php',
    'antivermin': '/!house/!garden/antivermin.php',
    'havest': '/!house/!garden/havest.php',
    'plough': '/!house/!garden/plough.php',
    'ranch': '/!house/!ranch//getconf.php',
    'r_havest': '/!house/!ranch/havest.php'
}

xmlparm = '%s'

class kx:
    def __init__(self):
        self.buildProxy()
        self.url = 'http://www.kaixin001.com'
        self.DELAY = 3  #3 sec
        
    def buildProxy(self):
        # build a new opener that uses cookies
        #proxy_support = Proxy.HTTPProxyHandler()
        #opener = urllib2.build_opener(proxy_support, 
        #                            urllib2.HTTPCookieProcessor())
        #opener.addheaders = [('User-Agent','Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')]
        opener = Proxy.getOpener(True)
        
        # install it
        urllib2.install_opener(opener)
        
    def login(self, usr, pwd):
        
        path = self.url + '/login/login.php'
        
        body = {
            'url': '/home/',
            'email': usr,
            'password': pwd
        }
        
        req = urllib2.Request(path, urlencode(body))
        
        #response = urllib2.urlopen(req).read()
        #print response
        urllib2.urlopen(req)        
        
    def getpage(self, path, data={}):
        #Delay
        #sleep(self.DELAY+random()+random())
        
        req = urllib2.Request(self.url+path, urlencode(data))
        
        return urllib2.urlopen(req).read()
        

def get_friend_info(str1, friend_info):
    pattern = '\?uid\=(\d{6,8})\"\stitle\=\"([^\"]+)\"'
    for match in re.finditer(pattern, str1):
        friend_info[match.group(1)] = unicode(match.group(2),'utf-8')

def get_verify(str1):
    pattern = 'g_verify\s=\s\"([^\"]+)\"'
    return re.findall(pattern, str1)[0]

def showMsg(fuid, t):
    if fuid=='0':
        print '%s: %s' % (t, 'My')
    else:
        print ' %s: %s' % (t, fuid)

def pxml(t, c):
    try:
        xmldoc = minidom.parseString(xmlparm % c)
    except:
        print '! xml parse failed.'
        return
        
    if xmldoc.getElementsByTagName('ret')[0].firstChild.data == 'fail':
        print '  %s: %s' % (t, xmldoc.getElementsByTagName('reason')[0].firstChild.data)
    else:
        print '  %s: %s' % (t, 'succ')

def showFriend(o, v):
    c = o.getpage('/friend/')
    i = {}
    get_friend_info(c, i)
    for k in i:
        if k in ['*ignore list*']: continue
        #print k, i[k]
        showGarden(o, v, k)
        showRanch(o, v, k)
    #print '-'*20
    #print 'Total %d friends' % len(i)
    
def showGarden(o, v, fuid):
    
    pars = {
        'fuid': fuid,
        'verify': v
    }
    
    c = o.getpage( urlparm['garden'], pars )

    try:
        xmldoc = minidom.parseString(xmlparm % c)
    except:
        print '! Failed to parse %s xml file.' % fuid
        return

    showMsg(fuid, 'Garden')
    
    for nodes in xmldoc.getElementsByTagName('garden'):
        for node in nodes.getElementsByTagName('item'):
            if node.getElementsByTagName('name'):
                
                farmnum = node.getElementsByTagName('farmnum')[0].firstChild.data
                water = node.getElementsByTagName('water')[0].firstChild.data
                grass = node.getElementsByTagName('grass')[0].firstChild.data
                vermin = node.getElementsByTagName('vermin')[0].firstChild.data
                cropsstatus = node.getElementsByTagName('cropsstatus')[0].firstChild.data
                shared = node.getElementsByTagName('shared')[0].firstChild.data
                grow = node.getElementsByTagName('grow')[0].firstChild.data
                totalgrow = node.getElementsByTagName('totalgrow')[0].firstChild.data
                
                pars = {
                    'fuid': fuid,
                    'farmnum': farmnum,
                    'seedid': 0,
                    'verify': v,
                    'r': random()
                }
                #print node.getElementsByTagName('name')[0].firstChild.data
                
                if int(water) < 5:
                    pxml( 'Water', o.getpage( urlparm['water'], pars ) )
                
                elif grass == 1:
                    pxml( 'Grass', o.getpage( urlparm['antigrass'], pars ) )
                
                elif vermin == '1':
                    pxml( 'Vermin', o.getpage( urlparm['antivermin'], pars ) )
                
                # me
                elif fuid == '0' and grow <> '' and grow == totalgrow and \
                    ( cropsstatus == '1' or cropsstatus == '2' ):
                    
                    pxml( 'Havest', o.getpage( urlparm['havest'], pars ) )
                    
                    if shared == '0':
                        pxml( 'Plough', o.getpage( urlparm['plough'], pars ) )
                
                # others
                elif fuid <> '0':
                    #  plough the shared item only
                    if cropsstatus == '3' and shared == '1':
                        pxml( 'Plough', o.getpage( urlparm['plough'], pars ) )
                    
                    # steal
                    elif cropsstatus == '2' and shared == '0':
                        
                        bsteal = True
                        
                        for nc in node.getElementsByTagName('crops')[0].childNodes:
                            if nc.data.find('font ') <> -1:
                                bsteal = False
                                break
                            
                        if bsteal:
                            pxml( 'Steal', o.getpage( urlparm['havest'], pars ) )
                    

def showRanch(o, v, fuid):
    
    pars = {
        'fuid': fuid,
        'verify': v
    }
    
    c = o.getpage( urlparm['ranch'], pars )
    
    try:
        xmldoc = minidom.parseString(xmlparm % c)
    except:
        print '! Failed to parse %s xml file.' % fuid
        return
    
    showMsg(fuid, 'Ranch')
    
    for nodes in xmldoc.getElementsByTagName('product2'):
        for node in nodes.getElementsByTagName('item'):
            
            skey = node.getElementsByTagName('skey')[0].firstChild.data
            typenum = node.getElementsByTagName('type')[0].firstChild.data
            
            pars = {
                'skey': skey,
                'fuid': fuid,
                'foodnum': 1,
                'type': typenum,
                'seedid': 0,
                'verify': v,
                'r': random()
            }
            
            bsteal = True
            
            if fuid == '0':
                pxml( 'Havest %s' % skey, o.getpage( urlparm['r_havest'], pars ) )
            else:
                for nc in node.getElementsByTagName('tips')[0].childNodes:
                    if nc.data.find('font ') <> -1:
                        bsteal = False
                        break
                    
                if bsteal:
                    pxml( 'Steal %s' % skey, o.getpage( urlparm['r_havest'], pars ) )
                    

def main():
    usr = [
        ('*email*','*password*')
    ]
    
    for u,p in usr:
        print '-'*18, u, '-'*20
        o = kx()
        
        o.login(u,p)
        
        c = o.getpage('/!house/garden/index.php')
        v = get_verify(c)
        
        showGarden(o, v, '0')
        showRanch(o, v, '0')
        
        #print '-'*18, 'Friends', '-'*20
        showFriend(o, v)
        

if __name__ == '__main__':
    main()