'''
1.
shell对象和selector对象
scrapy shell就是一个交互式的终端
作用:可以很好的调试
启动:scrapy shell url
如果url有参数,用引号把url包起来
2.选择器
selector
xpath
extract:返回unicode字符串
css
re
'''
废话不多说,直接上代码,然后解释。
spider代码
# -*- coding: utf-8 -*-
import scrapy
from ..items import TencentItem
class TencentSpider(scrapy.Spider):
name = 'tencent'
# allowed_domains = ['https://hr.tencent.com/position.php?&start=0#a']
start_urls = ['https://hr.tencent.com/position.php?&start=0#a']
base_url = 'https://hr.tencent.com/'
def parse(self, response):
names = response.xpath('//tr[@class="even"]/td[1]/a/text() | //tr[@class="odd"]/td[1]/a/text()').extract()
types = response.xpath('//tr[@cl