Unable to load ASP.NET page using Python urllib2

I am trying to execute a POST request https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/WellDetails/WellDetails.aspx to clear the data.

Here is my current code:

from urllib import urlencode
import urllib2

# Configuration
uri = 'https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/WellDetails/WellDetails.aspx'
headers = {
    'HTTP_USER_AGENT': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.13) Gecko/2009073022 Firefox/3.0.13',
    'HTTP_ACCEPT': 'application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    'Accept-Encoding': 'gzip,deflate,sdch',
    'Accept-Language': 'en-US,en;q=0.8',
    'Content-Type': 'application/x-www-form-urlencoded'
}
formFields = [(r'ctl00$MainContent$WellDetailsCriteria1$SearchPermitNumber','003-00013'), 
              (r'ctl00$MainContent$WellDetailsCriteria1$SearchPermitNumber$ob_CboSearchPermitNumberTB','003-00013'),
              (r'ctl00$MainContent$WellDetailsCriteria1$SearchPermitNumber$ob_CboSearchPermitNumberSIS','0'),
              (r'ctl00%24MainContent%24WellDetailsCriteria1%24ViewDataButton','View Data'),
              (r'__VIEWSTATE', r'/wEPDwUJOTc2MzI0NTk4D2QWAmYPDxYEHglQYWdlVGl0bGUFDFdlbGwgRGV0YWlscx4SUmVxdWlyZXNKYXZhU2NyaXB0Z2QWAgIDD2QWCGYPFgIeBFRleHQF1hA8ZGl2IHN0eWxlPSJoZWlnaHQ6IDE0OXB4OyB3aWR0aDogOTUycHg7IGJhY2tncm91bmQtcmVwZWF0OiBuby1yZXBlYXQ7IGJhY2tncm91bmQtaW1hZ2U6dXJsKGh0dHBzOi8vd3d3LmFoczIuZGVwLnN0YXRlLnBhLnVzL2ltYWdlcy9kZXBfZXh0ZXJuYWxfb ... YWRlciRIZWFkZXJWaWV3D2dkrp784OTosLLEOFxy/mWBtsit I6kjKRlZ/ 1IBCkZNk='),
              (r'__EVENTVALIDATION', r'/wEWBALn79faCwK+qZJIAqXY04cBAorCkdMKL5VEAnd1IIQ3cnIHRxZAluFo5G5Y5ffyRXRdtmBiGCc='),
              (r'__EVENTTARGET', r'ctl00$MainContent$WellDetailsCriteria1$SearchPermitNumber$ctl00$MainContent$WellDetailsCriteria1$SearchPermitNumber'),
              (r'__EVENTARGUMENT', r'ctl00$MainContent$WellDetailsCriteria1$SearchPermitNumber$ctl00$MainContent$WellDetailsCriteria1$SearchPermitNumber')
             ]

# Load page
encodedFields = urlencode(formFields)
req = urllib2.Request(uri, encodedFields, headers)
r = urllib2.urlopen(req)

# Handle results
print r.read()

The return page says, “Sorry, we are having technical difficulties. Please try again later,” so I know that I have to ruin something. I do not send cookies, but I was not sure if this was necessary. If so, can I add "Cookie: ASP.NET_SessionId = whatever" to my headers or do I need to use CookieLib?

Any thoughts on what is going wrong will be most highly rated.

EDIT: , __VIEWSTATE __EVENTVALIDATION ( , )

from urllib import urlencode
import urllib2
from BeautifulSoup import BeautifulSoup
import cookielib

# Configuration
uri = 'https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/WellDetails/WellDetails.aspx'
# Create headers
headers = {
    'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    'Accept-Encoding': 'gzip,deflate,sdch',
    'Accept-Language': 'en-US,en;q=0.8',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Host': 'www.paoilandgasreporting.state.pa.us',
    'Origin': 'https://www.paoilandgasreporting.state.pa.us',
    'Referer': 'https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/WellDetails/WellDetails.aspx',
    'User-Agent': 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16',
}

# Set up cookie jar
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), urllib2.HTTPSHandler(debuglevel=1))

# Grab information that we need to pass along with our requests
#r = urllib2.urlopen(uri)
req = urllib2.Request(uri,urlencode([]),headers)
cj.add_cookie_header(req)
r = opener.open(req)
print cj


soup = BeautifulSoup(r.read())
eventvalidation = soup.find('input', id='__EVENTVALIDATION')['value']
viewstate = soup.find('input', id='__VIEWSTATE')['value']
formFields = [  ('__EVENTVALIDATION',eventvalidation),
                ('__VIEWSTATE',viewstate),
                ('__EVENTTARGET', ''),
                ('__EVENTARGUMENT', ''),
                ('ctl00$MainContent$WellDetailsCriteria1$SearchPermitNumber', '003-00013'),
                ('ctl00$MainContent$WellDetailsCriteria1$SearchPermitNumber$ob_CboSearchPermitNumberTB','003-00013'),
                ('ctl00$MainContent$WellDetailsCriteria1$SearchPermitNumber$ob_CboSearchPermitNumberSIS','0'), # TODO what value to pass?
                ('ctl00$MainContent$WellDetailsCriteria1$ViewDataButton','View Data'), # do we need this?
             ]


# Load page
encodedFields = urlencode(formFields)
req = urllib2.Request(uri, encodedFields, headers)
cj.add_cookie_header(req)
r = opener.open(req)

# Handle results
print r.read()
0
2

. , ( , ), - "while first_visit is True:" .

        for result in the_page.findAll('input', attrs={'name' : '__VIEWSTATE'}):
            view_state =  result['value']

        for result_1 in the_page.findAll('input', attrs={'name' : '__EVENTVALIDATION'}):
            event_validation =  result_1['value']

        for result_2 in the_page.findAll('input', attrs={'name' : '__PREVIOUSPAGE'}):
            previous_page =  result_2['value']

        for result in the_page.findAll('input', attrs={'name' : '__EVENTTARGET'}):
            event_target =  result['value']

:

        url = 'http://bandscore.ielts.org/search.aspx'
        values = {
                                '__EVENTTARGET' : 'gdvSearchResults',
                                '__EVENTARGUMENT' : page,
                            '__VIEWSTATE' : view_state,
                            '__PREVIOUSPAGE' : previous_page,
                            '__EVENTVALIDATION' : event_validation,
                            'DropDownList1'  : Country,
                            #'txtSearchInstitution'  : '',
                            #'hdnSearchText'  : '',
                            #'rdoFilter': '%25',
        }
        user_agent = 'Mozilla/5 (Solaris 10) Gecko'
        headers = { 'User-Agent' : user_agent }
        data = urllib.urlencode(values)
        req = urllib2.Request(url, data, headers)
        response = urllib2.urlopen(req)
        thePage = response.read()
        the_page = soup(thePage)

n00b Python, ... ...!

+1

, , . , , script. Firefox, , TamperData, Firebug LiveHttp.

, , mechanize.

0

All Articles