1839吧 关注:27贴子:46
  • 4回复贴,共1
import requests from lxml import etreeimport reimport csvdef parser(url): head = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' } try: resp = requests.get(url, headers=head) 网页链接 = 'utf-8' html = 网页链接 pat = re.compile(r'<script.*?>.*?</script>|<style.*?>.*?</style>', re.S) sr_sc_txt = pat.sub('', html) print(sr_sc_txt) tree = etree.HTML(html) lis = tree.xpath("//*[@class='fl'][表情]") # print(lis) for li in lis: a_txt = li.xpath('./h3/text()')[0] save([a_txt]) print(a_txt) except Exception as e: print(f"出现错误,错误代码如下:{e}")def save(data): with open('/home/output/crawler_result.csv', 'a', encoding='utf-8', newline="") as f: csvwirte = csv.writer(f) csvwirte.writerow(data)if __name__ == '__main__': url = 'http://117.73.11.244:9090/' parser(url)


IP属地:山东来自iPhone客户端1楼2023-11-14 10:19回复



    IP属地:山东2楼2023-11-15 08:10
    回复
      浪潮1+X证书第一期线上师资培训开课啦
      教育部“1+X”证书:《数据采集职业技能等级证...
      浪潮出席新技术革命背景下产教融合对话活动


      IP属地:山东3楼2023-11-15 08:12
      回复
        127.0.0.1 304 22/Oct/2022:16:56:52 +0800


        IP属地:山东4楼2023-11-15 08:13
        回复



          IP属地:山东5楼2023-11-15 09:29
          回复