| @@ -1,29 +1,200 @@ | |||
| import requests, time, os, sys | |||
| import pandas, msgpack | |||
| from bs4 import BeautifulSoup | |||
| class DataColl: | |||
| def __init__(self): | |||
| self._year = None | |||
| self._row = None | |||
| self._col = None | |||
| self._control = None | |||
| self._cf_eq = None | |||
| self._control_filter = None | |||
| self._weight = None | |||
| self._chisq = None | |||
| self._fmt = None | |||
| @property | |||
| def year(self): | |||
| return self._year | |||
| @year.setter | |||
| def year(self, year): | |||
| # if int(year) < 2014: | |||
| # print("NSDUH does not have data from before 2014. Setting to 2014.") | |||
| # self._year = 2014 | |||
| self._year = int(year) | |||
| self._generate() | |||
| @property | |||
| def row(self): | |||
| return self._row | |||
| @row.setter | |||
| def row(self, r): | |||
| self._row = r | |||
| self._generate() | |||
| @property | |||
| def col(self): | |||
| return self._col | |||
| @col.setter | |||
| def col(self, c): | |||
| self._col = c | |||
| self._generate() | |||
| @property | |||
| def control(self): | |||
| return self._control | |||
| @control.setter | |||
| def control(self, ctl): | |||
| self._control = ctl | |||
| self._generate() | |||
| @property | |||
| def cf_eq(self): | |||
| return self._cf_eq | |||
| @cf_eq.setter | |||
| def cf_eq(self, eq): | |||
| self._cf_eq = eq | |||
| self._generate() | |||
| @property | |||
| def control_filter(self): | |||
| return self._control_filter | |||
| @control_filter.setter | |||
| def control_filter(self, filter): | |||
| self._control_filter = filter | |||
| self._generate() | |||
| @property | |||
| def weight(self): | |||
| return self._weight | |||
| @weight.setter | |||
| def weight(self, wgt): | |||
| self._weight = wgt | |||
| self._generate() | |||
| @property | |||
| def chisq(self): | |||
| return self._chisq | |||
| @chisq.setter | |||
| def chisq(self, cs): | |||
| self._chisq = cs | |||
| self._generate() | |||
| @property | |||
| def fmt(self): | |||
| return self._fmt | |||
| @fmt.setter | |||
| def fmt(self, f): | |||
| self._fmt = f | |||
| self._generate() | |||
| @property | |||
| def endpoint(self): | |||
| return self._endpoint | |||
| def _generate(self): | |||
| self.setEndpoint(False) | |||
| self.setFilename(False) | |||
| def setEndpoint(self, loud=True): | |||
| if loud and self._year == None or self._row == None or self._col == None: | |||
| print("Year, row, and column must be set in order to build an endpoint.") | |||
| self._endpoint = None | |||
| lcontrol = "" if self.control == None else "&control={}".format(self.control) | |||
| lcfnot = "" if self.cf_eq == True else "!" | |||
| lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(self.control, lcfnot, self.control_filter) | |||
| lweight = "" if self.weight == None else "&weight={}".format(self.weight) | |||
| lchisq = "&run_chisq=false" if self.chisq == False or self.chisq == None else "&run_chisq=true" | |||
| lfmt = "json" if (self.fmt == None or self.fmt not in ["json", "msgpack", "api"]) else self.fmt | |||
| year_rng = "{}-{}".format(int(self.year), int(self.year)+1) | |||
| self._endpoint = "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format( | |||
| year_rng, | |||
| self.row, | |||
| self.col, | |||
| lcontrol, | |||
| lcontrol_filter, | |||
| lweight, | |||
| lchisq, | |||
| lfmt | |||
| ) | |||
| @property | |||
| def filename(self): | |||
| return self._filename | |||
| def setFilename(self, loud=True, ext=None): | |||
| if loud and self._year == None or self._row == None or self._col == None: | |||
| print("Year, row, and column must be set in order to build a filename.") | |||
| self._filename = None | |||
| lext = "csv" if ext == None else ext | |||
| lcontrol = "" if self.control == None else "_ctl_{}".format(self.control) | |||
| lcfnot = "" if self.cf_eq == True else "n" | |||
| lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "_{}eq_{}".format(lcfnot, self.control_filter) | |||
| lweight = "" if self.weight == None else "_weight_{}".format(self.weight) | |||
| lchisq = "_chisq_false" if self.chisq == False or self.chisq == None else "_chisq_true" | |||
| self._filename = "NSDUH_{}_{}_vs_{}{}{}{}{}.{}".format( | |||
| self.year, | |||
| self.row, | |||
| self.col, | |||
| lcontrol, | |||
| lcontrol_filter, | |||
| lweight, | |||
| lchisq, | |||
| lext | |||
| ) | |||
| def toString(self): | |||
| print("Data Collector:") | |||
| print("Row: {}, Column: {}".format(self.row, self.col)) | |||
| print("Controlling on {} {}= {}".format(self.control, self.control_filter[0], self.control_filter[1])) | |||
| print("Weighted by {}".format(self.weight)) | |||
| print("Generating Chi-Squared" if self.chisq else "Not generating Chi-Squared") | |||
| print("Formatting as {}".format(self.fmt)) | |||
| print("URL: {}".format(self.endpoint)) | |||
| def main(): | |||
| endpoint = endpointBuilder(2017, "STATE", "YOSELL2", "CATAG18", False, "2", "DASWT_1", False, "json") | |||
| print(endpoint) | |||
| def endpointBuilder(year, row, col, control=None, control_filter_not=False, control_filter=None, weight=None, chisq=False, fmt=None): | |||
| lcontrol = "" if control_filter == None else "&control={}".format(control) | |||
| lcfnot = "" if control_filter_not == False else "!" | |||
| lcontrol_filter = "" if (control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(control, lcfnot, control_filter) | |||
| lweight = "" if weight == None else "&weight={}".format(weight) | |||
| lchisq = "&run_chisq=false" if chisq == False else "&run_chisq=true" | |||
| lfmt = "json" if (fmt == None or fmt not in ["json", "msgpack", "api"]) else fmt | |||
| year_rng = "{}-{}".format(int(year), int(year)+1) | |||
| return "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format( | |||
| year_rng, | |||
| row, | |||
| col, | |||
| lcontrol, | |||
| lcontrol_filter, | |||
| lweight, | |||
| lchisq, | |||
| lfmt | |||
| ) | |||
| df = DataColl() | |||
| df.year = 2017 | |||
| df.row = "STATE" | |||
| df.col = "YOSELL2" | |||
| df.control = "CATAG18" | |||
| df.cf_eq = True | |||
| df.control_filter = "2" | |||
| df.weight = "DASWT_1" | |||
| df.chisq = False | |||
| df.fmt = "json" | |||
| print(df.endpoint) | |||
| # df.setFilename(False, "txt") | |||
| df.setFilename(False, "json") | |||
| r = requests.get(df.endpoint) | |||
| # t = msgpack.unpackb(r.content) | |||
| # with open(df.filename, "w", newline='', encoding="utf-8") as file: | |||
| # file.write(str(t[b'results'])) | |||
| rs = r.json()["results"] | |||
| for k,_v in rs.items(): | |||
| print(k) | |||
| #data = pandas.read_json(r.text, orient="columns") | |||
| # with open(df.filename, "w", newline='') as file: | |||
| # data.to_csv(file) | |||
| #print(r.json()) | |||
| if __name__ == "__main__": | |||
| main() | |||