from urllib import requestimport re,urllibreq = request.Request('http://tieba.baidu.com/p/4425486638')with request.urlopen(req) as f: data=f.read().decode('utf-8') #data即为获得的网页代码imgre = re.compile(r'src=\"(.*?)\"') #正则表达式筛选出图片链接src=" "results = imgre.findall(data) #results为一个包含图片链接的listpicnum =0 for y in results: if '.jpg' not in y: continue img = urllib.request.urlopen(y).read() try: f = open(str(picnum)+'.jpg','wb') f.write(img) picnum+=1 f.close; except: print('无法将图片%s写入%s' % (x, str(picnum) +'.jpg' ) )
求大神用pillow剔除掉无用的广告图片(大小在100px *100px以下),准备用beautifulsoup升级一下这个简单的爬虫。