1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
| from parsel import Selector import requests
cookies = { 'auth_token': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxODgyNzYwMzk2MiIsImlhdCI6MTY1MzM1NjQ0MSwiZXhwIjoxNjU1OTQ4NDQxfQ._mF-UJeQsLBiSbKQnneoOw-yjGQ0qkYuDatYz-gB6-oNaEGXKkU4pcrU8Uvr_EdQzKKv5uS8slHip0jI45SCFw', }
headers = { 'authority': 'www.tianyancha.com', 'accept': '*/*', 'accept-language': 'zh,zh-CN;q=0.9', 'dnt': '1', 'referer': 'https://www.tianyancha.com/company/11684584', 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-origin', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36', 'x-requested-with': 'XMLHttpRequest', }
params = { 'TABLE_DIM_NAME': 'findNewsCount', 'ps': '10', 'pn': '10', 'id': '11684584', 'name': '中航重机股份有限公司', 'companyBizType': '8', '_': '1653380498166', }
response = requests.get('https://www.tianyancha.com/pagination/findNewsCount.xhtml', params=params, cookies=cookies, headers=headers)
selector = Selector(text=response.text)
news_contents = selector.xpath('//div[@class="company-news-content"]') for content in news_contents: link = content.xpath('.//div[1]/a/@href').extract() title = content.xpath('.//div[1]/a/text()').extract() tags = content.xpath('./div[@class="news-tags"]//span/text()').extract() abstract_news = content.xpath('./div[@class="abstracts -new"]//text()').extract() abstract = content.xpath('./div[@class="abstracts "]//text()').extract() source = content.xpath('./div[@class="infos"]/span[1]/text()').extract() time_ = content.xpath('./div[@class="infos"]/span[2]/text()').extract() company = content.xpath('./div[@class="infos"]/span[3]/a//text()').extract() if not abstract_news: abstract_news = abstract print(company)
|