for li in li_list: img_src = li.xpath('./a/img/@src')[0] img_src = 'http://pic.netbian.com'+img_src # 中文出现乱码 img_name = li.xpath('./a/img/@alt')[0] + 'jpg' # 通用处理乱码解决方案 img_name = img_name.encode('iso-8859-1').decode('gbk') # print(img_name, img_src) # 请求图片 img_data = requests.get(url=img_src, headers=headers).content img_path = dir_name+'/'+img_name withopen(img_path, 'wb') as f: f.write(img_data) print(img_name,'下载成功')
实例3: (全国城市获取:热门城市+全部城市)
分开解析
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
tree = etree.HTML(page_text) # 热门城市 host_li_list = tree.xpath('//@div[@class="bottom"]/ul/li') all_hot_city_names =[] for li in host_li_list: hot_city_name = li.xpath('./a/text()')[0] all_hot_city_names.append(hot_city_name)
# 所有城市 city_names_list = tree.xpath('//div[@class="bottom"]/ul/div[2]/li') all_city_names = [] for li in city_names_list: city_name = li.xpath('./a/text()')[0] all_city_names.append(city_name)
同时解析,两者xpath不同
1 2 3 4 5 6 7 8 9
# //@div[@class="bottom"]/ul/li/a # //div[@class="bottom"]/ul/div[2]/li/a # 中间用 | 分开 a_list = tree.xpath('//@div[@class="bottom"]/ul/li/a | //div[@class="bottom"]/ul/div[2]/li/a') all_city_names = [] for li in a_list: city_name = li.xpath('./a/text()') all_city_names.append(city_name) print(all_city_names)