deffindall_in_page(page, start_part, end_part): all_strings = [] end = 0 while page.find(start_part,end) != -1: start = page.find(start_part, end)+len(start_part) end = page.find(end_part, start) string = page[start: end] all_strings.append(string) return all_strings
defpic_urls_find_part(pages): pic_pics = [] for page in pages: urls = findall_in_page(page, '"path":"', '"') pic_pics.extend(urls) return pic_pics defpages_usr(label): pages = [] url = 'https://www.duitang.com/napi/blog/list/by_search/?kw={}&start={}&limit=1000' #将中文转成ASCII码 label = urllib.parse.quote(label) #观察response可发现max=3600 for index inrange(0,3600,100): u = url.format(label,index) page = get_page(u) pages.append(page) return pages defdownload_pic(url, name): req = requests.get(url) path = 'test/杨幂'+str(name)+".jpg" withopen(path, 'wb+') as file: file.write(req.content) #将锁打开避免堵塞 thread_lock.release() defmain(label): pages = pages_usr(label) pic_urls = pic_urls_find_part(pages) numbers = 0 for url in pic_urls: numbers += 1 print("Now Downloading: {}".format(numbers)) #锁上,并开始执行 thread_lock.acquire() t = threading.Thread(target=download_pic, args=(url, numbers)) t.start() main('杨幂')
Now Downloading: 1
Now Downloading: 2
Now Downloading: 3
Now Downloading: 4
Now Downloading: 5
Now Downloading: 6
Now Downloading: 7
Now Downloading: 8
Now Downloading: 9
Now Downloading: 10