import requests, time, re, os, configparser, sys, argparse, shutil from bs4 import BeautifulSoup def main(args): print(f"Fetching PDFs.") if not os.path.isdir("pdfs"): print("First-time setup...") os.mkdir("pdfs") config = configparser.ConfigParser(interpolation=None) if not os.path.isfile('config.ini'): print("config.ini is missing!") sys.exit(0) config.read('config.ini') if "username" not in config["DEFAULT"] or "password" not in config["DEFAULT"] or config["DEFAULT"]["username"] == "" or config["DEFAULT"]["password"] == "": print("username and password must be present in config.ini.") sys.exit(0) INFOVAYA_ROOT = "https://events.infovaya.com/" info = { "username": config["DEFAULT"]["username"], "password": config["DEFAULT"]["password"], "action": "login" } s = requests.Session() q = s.post("{}login".format(INFOVAYA_ROOT), data=info) q.raise_for_status() sessions_request = s.get("{}event?id=88&actionMenu=sessions".format(INFOVAYA_ROOT)) sessions_request.raise_for_status() sessions_page_soup = BeautifulSoup(sessions_request.text, "html.parser") print("Starting to search for PDFs. This may take a while...") for session_link in sessions_page_soup.find_all("a", href=re.compile("session\?id")): time.sleep(0.05) page_request = s.get("{}{}".format(INFOVAYA_ROOT, session_link["href"])) page_request.raise_for_status() page_soup = BeautifulSoup(page_request.text, "html.parser") page_title = page.soup.h1.string for pdf_tag in page_soup.find_all("a", href=re.compile("pdfviewer")): pdf_name = pdf_tag["href"].split("/")[-1] file_name = "{}-{}".format(page_title.replace(" ", "_"), pdf_name) pdf_request = s.get("{}{}".format(INFOVAYA_ROOT, pdf_tag["href"]), stream=True) pdf_request.raise_for_status() print("Fetching PDF for '{}'...".format(page_title)) with open(file_name, "wb") as file: pdf_request.raw.decode_content = True shutil.copyfileobj(pdf_request.raw, file) print("Saved as pdfs/{}".format(file_name)) print("Finished fetching thread.") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("thread", action="store", help="The threadid from the thread's URL") parser.add_argument("-i", "--images", action="store_true", help="Set this flag to download images as well as HTML.\nNOTE: This may be VERY bandwidth and disk intensive!") args = parser.parse_args() main(args)