Quote:
Originally Posted by somnia
[You must be logged in to view images. Log in or Register.]
Win. Can you include source code?
|
You need these two dependencies
http://python-requests.org
http://lxml.de/
PHP Code:
import requests
import lxml.html
def find_images(url):
root=lxml.html.parse(url).getroot()
root.make_links_absolute()
imgs = []
for i in root.xpath('//div[contains(@id, "post_message")]//img'):
src = i.attrib.get('src','')
if 'project1999' not in src:
imgs.append(i.attrib.get('src',''))
return imgs
def main():
nums=[x+1 for x in range(52)]
urls=['http://www.project1999.org/forums/showthread.php?t=37779&page=%d' %num for num in nums]
todownload = []
for url in urls:
for img in find_images(url):
todownload.append(img)
todownload = list(set(todownload)) #remove duplicates
print "downloading %d images" % len(todownload)
for count, i in enumerate(todownload):
try:
print "%d downloading %s" % (count, i)
open('imgs/%d' % count, 'w').write(requests.get(i).content)
except:
print "couldn't download %s" %i
if __name__ == '__main__':
main()
http://wutdo.com/catimgs.zip