|  |  | @@ -32,6 +32,17 @@ def main(args: dict) -> None: | 
		
	
		
			
			|  |  |  | infovaya_creds_request.raise_for_status() | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | root_event = 88 if not args.event else args.event | 
		
	
		
			
			|  |  |  | caching = not args.no_cache | 
		
	
		
			
			|  |  |  | cache_list = [] | 
		
	
		
			
			|  |  |  | if caching: | 
		
	
		
			
			|  |  |  | if not os.path.isfile(f"{root_event}_cache.txt"): | 
		
	
		
			
			|  |  |  | cache_file = open(f"{root_event}_cache.txt", "w") | 
		
	
		
			
			|  |  |  | else: | 
		
	
		
			
			|  |  |  | cache_file = open(f"{root_event}_cache.txt", "a") | 
		
	
		
			
			|  |  |  | cache_list = cache_file.readlines() | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | skip_until = False if not args.start_with else True | 
		
	
		
			
			|  |  |  | start_with = args.start_with | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | sessions_request = infovaya_session.get(f"{INFOVAYA_ROOT}event?id={root_event}&actionMenu=sessions") | 
		
	
		
			
			|  |  |  | sessions_request.raise_for_status() | 
		
	
	
		
			
			|  |  | @@ -42,8 +53,14 @@ def main(args: dict) -> None: | 
		
	
		
			
			|  |  |  | print("Starting to search for PDFs. This may take a while...") | 
		
	
		
			
			|  |  |  | for session_link in sessions_page_soup.find_all("a", href=re.compile("session\?id")): | 
		
	
		
			
			|  |  |  | session_href = session_link["href"][1:] | 
		
	
		
			
			|  |  |  | page_name = f"session_{session_href.split('=')[-1]}.html" | 
		
	
		
			
			|  |  |  | if not os.path.isfile(f"html/{page_name}"): | 
		
	
		
			
			|  |  |  | page_id = session_href.split("=")[-1] | 
		
	
		
			
			|  |  |  | page_name = f"session_{page_id}.html" | 
		
	
		
			
			|  |  |  | if caching and page_id not in cache_list: | 
		
	
		
			
			|  |  |  | cache_file.write(f"{page_id}\n") | 
		
	
		
			
			|  |  |  | if skip_until and page_id != start_with: | 
		
	
		
			
			|  |  |  | continue | 
		
	
		
			
			|  |  |  | skip_until = False | 
		
	
		
			
			|  |  |  | if not caching or page_id not in cache_list: | 
		
	
		
			
			|  |  |  | time.sleep(1) | 
		
	
		
			
			|  |  |  | print(f"Fetching {session_href} ... ", end="") | 
		
	
		
			
			|  |  |  | page_request = infovaya_session.get(f"{INFOVAYA_ROOT}{session_href}") | 
		
	
	
		
			
			|  |  | @@ -59,8 +76,9 @@ def main(args: dict) -> None: | 
		
	
		
			
			|  |  |  | # import pdb; pdb.set_trace() | 
		
	
		
			
			|  |  |  | for presentation_link in page_soup.find_all("a", href=re.compile("presentation\?id")): | 
		
	
		
			
			|  |  |  | presentation_href = presentation_link["href"][1:] | 
		
	
		
			
			|  |  |  | pres_page_name = f"pres_{presentation_href.split('=')[-1]}.html" | 
		
	
		
			
			|  |  |  | if not os.path.isfile(f"html/{pres_page_name}"): | 
		
	
		
			
			|  |  |  | presentation_id = presentation_href.split('=')[-1] | 
		
	
		
			
			|  |  |  | pres_page_name = f"pres_{presentation_id}.html" | 
		
	
		
			
			|  |  |  | if not caching or presentation_id not in cache_list: | 
		
	
		
			
			|  |  |  | print(f"  Fetching {presentation_href} ...", end="") | 
		
	
		
			
			|  |  |  | pres_request = infovaya_session.get(f"{INFOVAYA_ROOT}{presentation_href}") | 
		
	
		
			
			|  |  |  | pres_request.raise_for_status() | 
		
	
	
		
			
			|  |  | @@ -76,17 +94,30 @@ def main(args: dict) -> None: | 
		
	
		
			
			|  |  |  | file_name = f"{pres_page_title.replace(' ', '_')}-{pdf_name}" | 
		
	
		
			
			|  |  |  | pdf_request = infovaya_session.get(f"{INFOVAYA_ROOT}{pdf_tag['href'][1:]}") | 
		
	
		
			
			|  |  |  | pdf_request.raise_for_status() | 
		
	
		
			
			|  |  |  | if b"Download Quota Exceeded" in pdf_request.content: | 
		
	
		
			
			|  |  |  | print("""ATTENTION | 
		
	
		
			
			|  |  |  | Infovaya is reporting that you've exceeded your download limit. | 
		
	
		
			
			|  |  |  | If you're caching, you can start over from this point tomorrow. | 
		
	
		
			
			|  |  |  | If you haven't been caching, you can use the --start-with flag to enter the last SESSION ID above and start from there.""") | 
		
	
		
			
			|  |  |  | sys.exit(0) | 
		
	
		
			
			|  |  |  | if caching and presentation_id not in cache_list: | 
		
	
		
			
			|  |  |  | cache_file.write(f"{presentation_id}\n") | 
		
	
		
			
			|  |  |  | print(f"Fetching PDF for '{pres_page_title}'...") | 
		
	
		
			
			|  |  |  | with open(f"pdfs/{file_name}", "wb") as file: | 
		
	
		
			
			|  |  |  | file.write(pdf_request.content) | 
		
	
		
			
			|  |  |  | print(f"Saved as pdfs/{file_name}") | 
		
	
		
			
			|  |  |  | time.sleep(2) | 
		
	
		
			
			|  |  |  | if not os.path.isfile(f"pdfs/{file_name}"): | 
		
	
		
			
			|  |  |  | with open(f"pdfs/{file_name}", "wb") as file: | 
		
	
		
			
			|  |  |  | file.write(pdf_request.content) | 
		
	
		
			
			|  |  |  | print(f"Saved as pdfs/{file_name}") | 
		
	
		
			
			|  |  |  | time.sleep(2) | 
		
	
		
			
			|  |  |  | if caching: | 
		
	
		
			
			|  |  |  | cache_file.close() | 
		
	
		
			
			|  |  |  |  | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if __name__ == "__main__": | 
		
	
		
			
			|  |  |  | parser = argparse.ArgumentParser() | 
		
	
		
			
			|  |  |  | parser.add_argument("-s", "--save-pages", action="store_true", help="Save HTML of the scanned pages") | 
		
	
		
			
			|  |  |  | parser.add_argument("-e", "--event", action="store_true", help="Choose which event ID to scan") | 
		
	
		
			
			|  |  |  | parser.add_argument("-c", "--no-cache", action="store_true", help="Don't cache previously-seen pages; re-download everything") | 
		
	
		
			
			|  |  |  | parser.add_argument("-w", "--start-with", action="store", help="Start with the given session ID") | 
		
	
		
			
			|  |  |  | args = parser.parse_args() | 
		
	
		
			
			|  |  |  | print(args) | 
		
	
		
			
			|  |  |  | main(args) |