比较好的网站建设_深圳软件开发定制_中企动力网站_seo建站 = require('express');">

江西网站优化

天津网站优化、南通seo、永久使用、不限域名、商城系统平台开发

邓州网站建设?

比较好的网站建设_深圳软件开发定制_中企动力网站_seo建站

proxies = []def write_to_mongo(ips, city):'''将数据写入mongoDB'''client = Client(host='localhost', port=27017)db = client['fs_db']coll = db[city + '_good']for ip in ips:coll.insert_one({'name': ip[0], \'price': ip[1],'addresses': ip[2],'areas': ip[3],'eq': ip[4]})client.close()def read_from_mongo(city):client = Client(host='localhost', port=27017)db = client['fs_db']coll = db[city + '_good']li = coll.find()client.close()return liclass Consumer(threading.Thread):def __init__(self, args):threading.Thread.__init__(self, args=args)def run(self):global is_crawurl_demo, i, city_id, lock = self._argsprint("{}, 第{}页".format(city[city_id], i))url = url_demo.format(i)soup = get_real(url)names = []for name in soup.select('.tit_shop'):names.append(name.text.strip())addresses = []for item in soup.find_all('p', attrs={'class': 'add_shop'}):address = item.a.text + " " + item.span.textaddresses.append(address.replace('\t', '').replace('\n', ''))es = []for item in soup.find_all('p', attrs={'class': 'tel_shop'}):es.append(item.text.replace('\t', '').replace('\n', ''))moneys = []for money in soup.find_all("span", attrs={"class": 'red'}):moneys.append(money.text.strip())areas = []for area in soup.find_all('dd', attrs={'class': 'price_right'}):areas.append(area.find_all('span')[-1].text)houses = []for idx in range(len(names)):try:item = [names[idx], moneys[idx], addresses[idx], areas[idx], es[idx]]print(item)houses.append(item)except Exception as e:print(e)lock.acquire()write_to_mongo(houses, e_city[city_id])lock.release()print("线程结束{}".format(i))def dict2proxy(dic):s = dic['type'] + '://' + dic['ip'] + ':' + str(dic['port'])return {'http': s, 'https': s}def get_real(url):resp = requests.get(url, headers=header)soup = BeautifulSoup(resp.content, 'html.parser', from_encoding='gb18030')if soup.find('title').text.strip() == '跳转...':pattern1 = re.compile(r"var t4='(.*?)';")script = soup.find("script", text=pattern1)t4 = pattern1.search(str(script)).group(1)pattern1 = re.compile(r"var t3='(.*?)';")script = soup.find("script", text=pattern1)t3 = re.findall(pattern1, str(script))[-2]url = t4 + '?' + t3HTML = requests.get(url, headers=header)soup = BeautifulSoup(HTML.content, 'html.parser', from_encoding='gb18030')elif soup.find('title').text.strip() == '访问验证-房天下':passreturn soupdef read_proxies():client = Client(host='localhost', port=27017)db = client['proxies_db']coll = db['proxies']# 先检测,再写入,防止重复dic = list(coll.find())client.close()return dicdef craw():lock = threading.Lock()for idx in trange(len(e_city)):url = eshouse[idx]soup = get_real(url.format(2))try:page_number = int(soup.find('div', attrs={'class': 'page_al'}).find_all('span')[-1].text[1:-1])pages = list(range(1, page_number + 1))except:pages = list(range(1, 101))url_demo = urlts = []# pages = [1, 2, 3]while len(pages) != 0:for i in range(10):t = Consumer((url_demo, pages.pop(), idx, lock))t.start()ts.append(t)if len(pages) == 0:breakfor t in ts:t.join()ts.remove(t)if __name__ == '__main__':craw() 黑龙江域名注册惠州seo优化萧山网站制作公司深圳网站优化排名网站生成系统网站推广专家天蝎做网站建网站wap自助建站网站空间商一级域名网站云南网站建设徐州网站推广优秀个人网站重庆市网站建设小程序开发公司青岛网站公司网站技术网站空间申请网站维护服务合肥网站优化网站seo李守洪排名大师app制作网站二次开发小程序定制公司厦门网站推广站长忽略的观点石家庄整站优化小网站精品建站商城系统开发成都网站seo公司

猜你喜欢