from requests import Session from bs4 import BeautifulSoup from pathlib import Path from tqdm import tqdm import re casts = [] offset = 0 s = Session() s.cookies.set('PHPSESSID', '') s.cookies.set('REMEMBERME', '') def download_file(url, session, filename): with session.get(url, stream=True) as r: r.raise_for_status() with open(filename, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): # If you have chunk encoded response uncomment if # and set chunk_size parameter to None. #if chunk: f.write(chunk) r = s.get('https://symfonycasts.com/api/courses/all-tracks') soup = BeautifulSoup(r.text, 'html.parser') for div in soup.find_all('div', {'class': 'js-tut-container'}): casts.append({ "url": div.find('a').get('href'), "name": div.find('h4').text, }) subcasts = [] for cast in casts: r = s.get("https://symfonycasts.com" + cast["url"]) soup = BeautifulSoup(r.text, 'html.parser') for subcast in soup.find_all('a', {'class': 'course-list-item-title'}): subcasts.append({ "castName": cast["name"], "name": subcast.find('p').text, "url": subcast.get('href').replace('/resume', '') }) for cast in tqdm(subcasts[offset:]): try: r = s.get("https://symfonycasts.com" + cast["url"]) soup = BeautifulSoup(r.text, 'html.parser') chapterList = soup.find('ul', {'class': 'chapter-list'}) counter = 1 for link in chapterList.find_all('a', {'class': 'd-block'}): try: if link.get('href') is "#": continue r = s.get("https://symfonycasts.com" + link.get('href')) soup = BeautifulSoup(r.text, 'html.parser') safeName = cast["name"].replace(' ', '_') safeName = re.sub(r'[^a-zA-Z0-9_]', '', safeName) safeCastName = cast["castName"].replace(' ', '_') safeCastName = re.sub(r'[^a-zA-Z0-9_]', '', safeCastName) safeVideoName = soup.find("h1").text.replace(' ', '_') safeVideoName = re.sub(r'[^a-zA-Z0-9_]', '', safeVideoName) folder = "casts/" + safeCastName + "/" + safeName + "/" + "{:02d}".format(counter) + "_" + safeVideoName path = Path(folder) path.mkdir(parents=True, exist_ok=True) dropdown = soup.find('div', {'aria-labelledby': 'downloadDropdown'}) for span in dropdown.find_all('span'): fileLink = span.find('a').get('href') fileName = span.find('a').text if "Course Code" in fileName: download_file("https://symfonycasts.com" + fileLink, s, folder + "/code.zip") elif "This Video" in fileName: download_file("https://symfonycasts.com" + fileLink, s, folder + "/video.mp4") elif "Course Script" in fileName: download_file("https://symfonycasts.com" + fileLink, s, folder + "/course.pdf") counter += 1 except Exception as e: print(e) print("Error on " + cast["name"] + " - " + link.get('href')) except Exception as e: print(e) print("Error on " + cast["name"])