1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- import requests, time, re, os, configparser, sys
- from bs4 import BeautifulSoup
-
- config = configparser.ConfigParser()
- if not os.path.isfile('config.ini'):
- print("config.ini is missing!")
- sys.exit(0)
- config.read('config.ini')
-
- if "username" not in config["DEFAULT"] or "password" not in config["DEFAULT"] or config["DEFAULT"]["username"] == "" or config["DEFAULT"]["password"] == "":
- print("username and password must be present in config.ini.")
- sys.exit(0)
-
- info = { "username": config["DEFAULT"]["username"],
- "password": config["DEFAULT"]["password"],
- "action": "login"
- }
-
- # files = [f for f in os.listdir('pages') if os.path.isfile("./pages/{}".format(f))]
-
- if not os.path.isdir('pages'):
- os.mkdir('pages', 0o755)
-
- s = requests.Session()
- q = s.post("https://forums.somethingawful.com/account.php", data=info)
- # print(q.text)
-
- if "lastpage" in config["DEFAULT"] and config["DEFAULT"]["lastpage"] != "":
- lastpage = int(config["DEFAULT"]["lastpage"])
- else:
- lastpage = 1
-
- i = lastpage
- while True:
- time.sleep(0.1)
- payload = {'threadid': '3908778', 'pagenumber': str(i)}
- r = s.get("https://forums.somethingawful.com/showthread.php", params=payload) #, cookies=jar)
- # with open("pages/rawpage{}.txt".format(i), "w+") as file:
- # file.write(r.text)
- if "The page number you requested" in r.text:
- i -= 1
- break
- matcher = re.compile(r'[g]aybie[s]? [n]om\S{0,} (.+)$', flags=re.IGNORECASE|re.MULTILINE)
- # matcher = re.compile(r'[Gg]aybie[s]? [Nn]om')
- if re.search(matcher, r.text) != None:
- print("Page {} has a nomination.".format(i))
- soup = BeautifulSoup(r.text, 'html.parser')
- for tag in soup.find_all('tr'):
- keep = False
- latestimg = ""
- for child in tag.descendants:
- #if child.name == "img":
- # lastimg = child['src']
- res = re.search(matcher, str(child))
- if res != None:
- # out = "{}: {}".format(res.group(1), lastimg)
- # print(out)
- # with open("nominations.txt", "a") as file:
- # file.write(out + "\n")
- keep = True
- if keep == False:
- tag.decompose()
- with open("pages/page{}.html".format(i), "w") as file:
- file.write(str(soup))
- else:
- print("Page {} has no nominations.".format(i))
- i += 1
-
- config["DEFAULT"]["lastpage"] = str(i)
- with open("config.ini", "w") as file:
- config.write(file)
|