Change . I did not understand that you were just looking for a problem with your script. Here's what I think is the problem, followed by my original answer, which addresses a different approach to the bigger problem you are trying to solve.
script - except: . sys.exit(0).
, try, , D:\Download\htmlString.p . IOError, except IOError:
script , , except:
import sys
import pickle
import urllib2
request = urllib2.Request('http://www.iana.org/domains/example/')
response = urllib2.urlopen(request)
htmlString = response.read()
try:
file = pickle.load( open( 'D:\\Download\\htmlString.p', 'rb'))
if file == htmlString:
print("Values haven't changed!")
sys.exit(0)
else:
pickle.dump( htmlString, open( 'D:\\Download\\htmlString.p', "wb" ) )
print('Saving')
except IOError:
pickle.dump( htmlString, open( 'D:\\Download\\htmlString.p', "wb" ) )
print('Created new file.')
os.path - - , script , .
2: URL-.
, . , HTML , .
import sys
import pickle
import urllib2
request = urllib2.Request('http://ecal.forexpros.com/e_cal.php?duration=weekly')
response = urllib2.urlopen(request)
htmlString = response.read().split('<iframe src="http://fls.doubleclick')[0]
try:
file = pickle.load( open( 'D:\\Download\\htmlString.p', 'r'))
if pickle.load( open( 'D:\\Download\\htmlString.p', 'r')) == htmlString:
print("Values haven't changed!")
sys.exit(0)
else:
pickle.dump( htmlString, open( 'D:\\Download\\htmlString.p', "w" ) )
print('Saving')
except IOError:
pickle.dump( htmlString, open( 'D:\\Download\\htmlString.p', "w" ) )
print('Created new file.')
HTML-, . , - . , , , , , , .
- .
-? HTTP Last-Modified, , , ( , ). HEAD, . , .
If-Modified-Since, , .
, - :
import sys
import os.path
import urllib2
url = 'http://www.iana.org/domains/example/'
saved_time_file = 'last time check.txt'
request = urllib2.Request(url)
if os.path.exists(saved_time_file):
""" If we've previously stored a time, get it and add it to the request"""
last_time = open(saved_time_file, 'r').read()
request.add_header("If-Modified-Since", last_time)
try:
response = urllib2.urlopen(request)
except urllib2.HTTPError, err:
if err.code == 304:
print "Nothing new."
sys.exit(0)
raise
last_modified = response.info().get('Last-Modified', False)
if last_modified:
open(saved_time_file, 'w').write(last_modified)
else:
print("Server did not provide a last-modified property. Continuing...")
"""
Alternately, you could save the current time in HTTP-date format here:
http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3
This might work for some servers that don't provide Last-Modified, but do
respect If-Modified-Since.
"""
"""
You should get here if the server won't confirm the content is old.
Hopefully, that means it new.
HTML should be in response.read().
"""
Stii, . ETags, , .