| import requests, time, os, sys | import requests, time, os, sys | ||||
| import pandas, msgpack | |||||
| from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||||
| class DataColl: | |||||
| def __init__(self): | |||||
| self._year = None | |||||
| self._row = None | |||||
| self._col = None | |||||
| self._control = None | |||||
| self._cf_eq = None | |||||
| self._control_filter = None | |||||
| self._weight = None | |||||
| self._chisq = None | |||||
| self._fmt = None | |||||
| @property | |||||
| def year(self): | |||||
| return self._year | |||||
| @year.setter | |||||
| def year(self, year): | |||||
| # if int(year) < 2014: | |||||
| # print("NSDUH does not have data from before 2014. Setting to 2014.") | |||||
| # self._year = 2014 | |||||
| self._year = int(year) | |||||
| self._generate() | |||||
| @property | |||||
| def row(self): | |||||
| return self._row | |||||
| @row.setter | |||||
| def row(self, r): | |||||
| self._row = r | |||||
| self._generate() | |||||
| @property | |||||
| def col(self): | |||||
| return self._col | |||||
| @col.setter | |||||
| def col(self, c): | |||||
| self._col = c | |||||
| self._generate() | |||||
| @property | |||||
| def control(self): | |||||
| return self._control | |||||
| @control.setter | |||||
| def control(self, ctl): | |||||
| self._control = ctl | |||||
| self._generate() | |||||
| @property | |||||
| def cf_eq(self): | |||||
| return self._cf_eq | |||||
| @cf_eq.setter | |||||
| def cf_eq(self, eq): | |||||
| self._cf_eq = eq | |||||
| self._generate() | |||||
| @property | |||||
| def control_filter(self): | |||||
| return self._control_filter | |||||
| @control_filter.setter | |||||
| def control_filter(self, filter): | |||||
| self._control_filter = filter | |||||
| self._generate() | |||||
| @property | |||||
| def weight(self): | |||||
| return self._weight | |||||
| @weight.setter | |||||
| def weight(self, wgt): | |||||
| self._weight = wgt | |||||
| self._generate() | |||||
| @property | |||||
| def chisq(self): | |||||
| return self._chisq | |||||
| @chisq.setter | |||||
| def chisq(self, cs): | |||||
| self._chisq = cs | |||||
| self._generate() | |||||
| @property | |||||
| def fmt(self): | |||||
| return self._fmt | |||||
| @fmt.setter | |||||
| def fmt(self, f): | |||||
| self._fmt = f | |||||
| self._generate() | |||||
| @property | |||||
| def endpoint(self): | |||||
| return self._endpoint | |||||
| def _generate(self): | |||||
| self.setEndpoint(False) | |||||
| self.setFilename(False) | |||||
| def setEndpoint(self, loud=True): | |||||
| if loud and self._year == None or self._row == None or self._col == None: | |||||
| print("Year, row, and column must be set in order to build an endpoint.") | |||||
| self._endpoint = None | |||||
| lcontrol = "" if self.control == None else "&control={}".format(self.control) | |||||
| lcfnot = "" if self.cf_eq == True else "!" | |||||
| lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(self.control, lcfnot, self.control_filter) | |||||
| lweight = "" if self.weight == None else "&weight={}".format(self.weight) | |||||
| lchisq = "&run_chisq=false" if self.chisq == False or self.chisq == None else "&run_chisq=true" | |||||
| lfmt = "json" if (self.fmt == None or self.fmt not in ["json", "msgpack", "api"]) else self.fmt | |||||
| year_rng = "{}-{}".format(int(self.year), int(self.year)+1) | |||||
| self._endpoint = "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format( | |||||
| year_rng, | |||||
| self.row, | |||||
| self.col, | |||||
| lcontrol, | |||||
| lcontrol_filter, | |||||
| lweight, | |||||
| lchisq, | |||||
| lfmt | |||||
| ) | |||||
| @property | |||||
| def filename(self): | |||||
| return self._filename | |||||
| def setFilename(self, loud=True, ext=None): | |||||
| if loud and self._year == None or self._row == None or self._col == None: | |||||
| print("Year, row, and column must be set in order to build a filename.") | |||||
| self._filename = None | |||||
| lext = "csv" if ext == None else ext | |||||
| lcontrol = "" if self.control == None else "_ctl_{}".format(self.control) | |||||
| lcfnot = "" if self.cf_eq == True else "n" | |||||
| lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "_{}eq_{}".format(lcfnot, self.control_filter) | |||||
| lweight = "" if self.weight == None else "_weight_{}".format(self.weight) | |||||
| lchisq = "_chisq_false" if self.chisq == False or self.chisq == None else "_chisq_true" | |||||
| self._filename = "NSDUH_{}_{}_vs_{}{}{}{}{}.{}".format( | |||||
| self.year, | |||||
| self.row, | |||||
| self.col, | |||||
| lcontrol, | |||||
| lcontrol_filter, | |||||
| lweight, | |||||
| lchisq, | |||||
| lext | |||||
| ) | |||||
| def toString(self): | |||||
| print("Data Collector:") | |||||
| print("Row: {}, Column: {}".format(self.row, self.col)) | |||||
| print("Controlling on {} {}= {}".format(self.control, self.control_filter[0], self.control_filter[1])) | |||||
| print("Weighted by {}".format(self.weight)) | |||||
| print("Generating Chi-Squared" if self.chisq else "Not generating Chi-Squared") | |||||
| print("Formatting as {}".format(self.fmt)) | |||||
| print("URL: {}".format(self.endpoint)) | |||||
| def main(): | def main(): | ||||
| endpoint = endpointBuilder(2017, "STATE", "YOSELL2", "CATAG18", False, "2", "DASWT_1", False, "json") | |||||
| print(endpoint) | |||||
| def endpointBuilder(year, row, col, control=None, control_filter_not=False, control_filter=None, weight=None, chisq=False, fmt=None): | |||||
| lcontrol = "" if control_filter == None else "&control={}".format(control) | |||||
| lcfnot = "" if control_filter_not == False else "!" | |||||
| lcontrol_filter = "" if (control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(control, lcfnot, control_filter) | |||||
| lweight = "" if weight == None else "&weight={}".format(weight) | |||||
| lchisq = "&run_chisq=false" if chisq == False else "&run_chisq=true" | |||||
| lfmt = "json" if (fmt == None or fmt not in ["json", "msgpack", "api"]) else fmt | |||||
| year_rng = "{}-{}".format(int(year), int(year)+1) | |||||
| return "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format( | |||||
| year_rng, | |||||
| row, | |||||
| col, | |||||
| lcontrol, | |||||
| lcontrol_filter, | |||||
| lweight, | |||||
| lchisq, | |||||
| lfmt | |||||
| ) | |||||
| df = DataColl() | |||||
| df.year = 2017 | |||||
| df.row = "STATE" | |||||
| df.col = "YOSELL2" | |||||
| df.control = "CATAG18" | |||||
| df.cf_eq = True | |||||
| df.control_filter = "2" | |||||
| df.weight = "DASWT_1" | |||||
| df.chisq = False | |||||
| df.fmt = "json" | |||||
| print(df.endpoint) | |||||
| # df.setFilename(False, "txt") | |||||
| df.setFilename(False, "json") | |||||
| r = requests.get(df.endpoint) | |||||
| # t = msgpack.unpackb(r.content) | |||||
| # with open(df.filename, "w", newline='', encoding="utf-8") as file: | |||||
| # file.write(str(t[b'results'])) | |||||
| rs = r.json()["results"] | |||||
| for k,_v in rs.items(): | |||||
| print(k) | |||||
| #data = pandas.read_json(r.text, orient="columns") | |||||
| # with open(df.filename, "w", newline='') as file: | |||||
| # data.to_csv(file) | |||||
| #print(r.json()) | |||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||
| main() | main() |