发一个网易云课堂的爬虫
阅读 20 作者 傻瓜
import requests
a={"_movies0":{
"movieid":'M6SGHFBMC',
"href":'http://open.163.com/movie/2008/1/M/C/M6SGF6VB4_M6SGHFBMC.html'
},
"_movies1":{
"movieid":'M6SGHJ9BO',
"href":'http://open.163.com/movie/2008/1/B/O/M6SGF6VB4_M6SGHJ9BO.html'
},
"_movies2":{
"movieid":'M6SGHM4EB',
"href":'http://open.163.com/movie/2008/1/E/B/M6SGF6VB4_M6SGHM4EB.html'
},
"_movies3":{
"movieid":'M6SGHKAED',
"href":'http://open.163.com/movie/2008/1/E/D/M6SGF6VB4_M6SGHKAED.html'
},
"_movies4":{
"movieid":'M6SGHMFAR',
"href":'http://open.163.com/movie/2008/1/A/R/M6SGF6VB4_M6SGHMFAR.html'
},
"_movies5":{
"movieid":'M6SGJVV7H',
"href":'http://open.163.com/movie/2008/1/7/H/M6SGF6VB4_M6SGJVV7H.html'
},
"_movies6":{
"movieid":'M6SGJVMC6',
"href":'http://open.163.com/movie/2008/1/C/6/M6SGF6VB4_M6SGJVMC6.html'
},
"_movies7":{
"movieid":'M6SGJVA93',
"href":'http://open.163.com/movie/2008/1/9/3/M6SGF6VB4_M6SGJVA93.html'
},
"_movies8":{
"movieid":'M6SGJV3FH',
"href":'http://open.163.com/movie/2008/1/F/H/M6SGF6VB4_M6SGJV3FH.html'
},
"_movies9":{
"movieid":'M6SGJURUO',
"href":'http://open.163.com/movie/2008/1/U/O/M6SGF6VB4_M6SGJURUO.html'
},
"_movies10":{
"movieid":'M6SGKG5LM',
"href":'http://open.163.com/movie/2008/1/L/M/M6SGF6VB4_M6SGKG5LM.html'
},
"_movies11":{
"movieid":'M6SGKGMOT',
"href":'http://open.163.com/movie/2008/1/O/T/M6SGF6VB4_M6SGKGMOT.html'
},
"_movies12":{
"movieid":'M6SGKK6L3',
"href":'http://open.163.com/movie/2008/1/L/3/M6SGF6VB4_M6SGKK6L3.html'
},
"_movies13":{
"movieid":'M6SGKIEME',
"href":'http://open.163.com/movie/2008/1/M/E/M6SGF6VB4_M6SGKIEME.html'
},
"_movies14":{
"movieid":'M6SGKINJV',
"href":'http://open.163.com/movie/2008/1/J/V/M6SGF6VB4_M6SGKINJV.html'
},
"_movies15":{
"movieid":'M6SGKSC2N',
"href":'http://open.163.com/movie/2008/1/2/N/M6SGF6VB4_M6SGKSC2N.html'
},
"_movies16":{
"movieid":'M6SGKVGN6',
"href":'http://open.163.com/movie/2008/1/N/6/M6SGF6VB4_M6SGKVGN6.html'
},
"_movies17":{
"movieid":'M6SGL3P1H',
"href":'http://open.163.com/movie/2008/1/1/H/M6SGF6VB4_M6SGL3P1H.html'
},
"_movies18":{
"movieid":'M6SGL2R35',
"href":'http://open.163.com/movie/2008/1/3/5/M6SGF6VB4_M6SGL2R35.html'
},
"_movies19":{
"movieid":'M6SGL3CE4',
"href":'http://open.163.com/movie/2008/1/E/4/M6SGF6VB4_M6SGL3CE4.html'
}
}
headers = {
 
    'charset': 'utf-8',
    'Accept-Encoding': 'gzip',
    'referer': 'https://servicewechat.com/wx855c5d7718f218c9/414/page-frame.html',
    'xdk-version': 'V0.11.27.0',
    'xdk-versioncode': '221',
    'xdk-env': 'v2',
    'content-type': 'application/json',
    'token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJvY2h2cTBCR3NwTEhhT0FmLWRaeTlzdWxjR2lvIiwiYXVkaWVuY2UiOiJtb2JpbGUiLCJjcmVhdGVkIjoxNTQwMjg3MDcwNDI5LCJleHAiOjE1NDYzMzUxMjZ9.wl_4ZPbAxhV9pBhrcNQrfrOo1HGlhJZmjBUZstf4QNg',
    'User-Agent': 'Mozilla/5.0 (Linux; Android 8.0.0; VTR-AL00 Build/HUAWEIVTR-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/66.0.3359.126 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070336) NetType/WIFI Language/zh_CN Process/appbrand0',
    'Host': 'mars.sharedaka.com',
    'Connection': 'Keep-Alive'
 
}
import re
import os
download_path='./rc'
rc_dict={}
i=1
for mk in a.keys():
    murl=a[mk]["href"]
    #print(murl)
    content = requests.get(murl).text.replace('\n','').replace(' ','')
    #print(content)
    rcurl_s=re.compile(r"appsrc:'(.+?)'",re.DOTALL)
    rcurl= re.findall(rcurl_s, content)[0].replace('m3u8','mp4')
    titlle_s=re.compile(r"title:'(.+?)'",re.DOTALL)
    titlle= str(i)+re.findall(titlle_s, content)[0]
    rc_dict[titlle]=rcurl
    i+=1
print(rc_dict)
for mk in rc_dict.keys():
    path=os.path.join(download_path,mk+'.mp4')
    print('--------->',path)
    if not os.path.exists(path):
        r = requests.get(rc_dict[mk],stream=True)
        with open(path,'ab+') as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
    else:
        print('existed')
#result = json.loads(content)["data"]