requests:获取页面工具 BeautifulSoup:页面剖析器 content():去标签 strip(X):当x为空时,默认删除空白符(包括’\n’, ‘\r’, ‘\t’, ’ ‘)
- 抓取最新更新美剧的demo
import requests
from bs4 import BeautifulSoup
import re
url = "http://bbs.ncar.cc/thread-28724-1-1.html"
res = requests.get(url)
soup = BeautifulSoup(res.text,'html.parser')
sel_episode = soup.select('#postlist #thread_subject ')[0]
num = (sel_episode.text).encode('utf-8')
m = re.search('[1][7]',num)
episode = int(m.group(0))
sel_magnet = soup.select('#postlist .blockcode ')[episode-1].text[:-4]
print '最新 第',str(episode),'集','\n',sel_magnet
- 抓取爆炒江湖数据
#-*- coding:utf-8 -*-
import requests
import os
import urllib
import sys
reload(sys)
sys.setdefaultencoding('utf8')
url = "https://foodgame.github.io/data/data.json?_=1515818661701"
res = requests.get(url)
f = open ('food.txt','w')
f.write(res.text)