|
|
|
|
|
|
|
|
import requests, time, os, sys |
|
|
import requests, time, os, sys |
|
|
|
|
|
import pandas, msgpack |
|
|
from bs4 import BeautifulSoup |
|
|
from bs4 import BeautifulSoup |
|
|
|
|
|
|
|
|
|
|
|
class DataColl: |
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
self._year = None |
|
|
|
|
|
self._row = None |
|
|
|
|
|
self._col = None |
|
|
|
|
|
self._control = None |
|
|
|
|
|
self._cf_eq = None |
|
|
|
|
|
self._control_filter = None |
|
|
|
|
|
self._weight = None |
|
|
|
|
|
self._chisq = None |
|
|
|
|
|
self._fmt = None |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def year(self): |
|
|
|
|
|
return self._year |
|
|
|
|
|
|
|
|
|
|
|
@year.setter |
|
|
|
|
|
def year(self, year): |
|
|
|
|
|
# if int(year) < 2014: |
|
|
|
|
|
# print("NSDUH does not have data from before 2014. Setting to 2014.") |
|
|
|
|
|
# self._year = 2014 |
|
|
|
|
|
self._year = int(year) |
|
|
|
|
|
self._generate() |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def row(self): |
|
|
|
|
|
return self._row |
|
|
|
|
|
|
|
|
|
|
|
@row.setter |
|
|
|
|
|
def row(self, r): |
|
|
|
|
|
self._row = r |
|
|
|
|
|
self._generate() |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def col(self): |
|
|
|
|
|
return self._col |
|
|
|
|
|
|
|
|
|
|
|
@col.setter |
|
|
|
|
|
def col(self, c): |
|
|
|
|
|
self._col = c |
|
|
|
|
|
self._generate() |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def control(self): |
|
|
|
|
|
return self._control |
|
|
|
|
|
|
|
|
|
|
|
@control.setter |
|
|
|
|
|
def control(self, ctl): |
|
|
|
|
|
self._control = ctl |
|
|
|
|
|
self._generate() |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def cf_eq(self): |
|
|
|
|
|
return self._cf_eq |
|
|
|
|
|
|
|
|
|
|
|
@cf_eq.setter |
|
|
|
|
|
def cf_eq(self, eq): |
|
|
|
|
|
self._cf_eq = eq |
|
|
|
|
|
self._generate() |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def control_filter(self): |
|
|
|
|
|
return self._control_filter |
|
|
|
|
|
|
|
|
|
|
|
@control_filter.setter |
|
|
|
|
|
def control_filter(self, filter): |
|
|
|
|
|
self._control_filter = filter |
|
|
|
|
|
self._generate() |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def weight(self): |
|
|
|
|
|
return self._weight |
|
|
|
|
|
|
|
|
|
|
|
@weight.setter |
|
|
|
|
|
def weight(self, wgt): |
|
|
|
|
|
self._weight = wgt |
|
|
|
|
|
self._generate() |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def chisq(self): |
|
|
|
|
|
return self._chisq |
|
|
|
|
|
|
|
|
|
|
|
@chisq.setter |
|
|
|
|
|
def chisq(self, cs): |
|
|
|
|
|
self._chisq = cs |
|
|
|
|
|
self._generate() |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def fmt(self): |
|
|
|
|
|
return self._fmt |
|
|
|
|
|
|
|
|
|
|
|
@fmt.setter |
|
|
|
|
|
def fmt(self, f): |
|
|
|
|
|
self._fmt = f |
|
|
|
|
|
self._generate() |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def endpoint(self): |
|
|
|
|
|
return self._endpoint |
|
|
|
|
|
|
|
|
|
|
|
def _generate(self): |
|
|
|
|
|
self.setEndpoint(False) |
|
|
|
|
|
self.setFilename(False) |
|
|
|
|
|
|
|
|
|
|
|
def setEndpoint(self, loud=True): |
|
|
|
|
|
if loud and self._year == None or self._row == None or self._col == None: |
|
|
|
|
|
print("Year, row, and column must be set in order to build an endpoint.") |
|
|
|
|
|
self._endpoint = None |
|
|
|
|
|
lcontrol = "" if self.control == None else "&control={}".format(self.control) |
|
|
|
|
|
lcfnot = "" if self.cf_eq == True else "!" |
|
|
|
|
|
lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(self.control, lcfnot, self.control_filter) |
|
|
|
|
|
lweight = "" if self.weight == None else "&weight={}".format(self.weight) |
|
|
|
|
|
lchisq = "&run_chisq=false" if self.chisq == False or self.chisq == None else "&run_chisq=true" |
|
|
|
|
|
lfmt = "json" if (self.fmt == None or self.fmt not in ["json", "msgpack", "api"]) else self.fmt |
|
|
|
|
|
year_rng = "{}-{}".format(int(self.year), int(self.year)+1) |
|
|
|
|
|
self._endpoint = "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format( |
|
|
|
|
|
year_rng, |
|
|
|
|
|
self.row, |
|
|
|
|
|
self.col, |
|
|
|
|
|
lcontrol, |
|
|
|
|
|
lcontrol_filter, |
|
|
|
|
|
lweight, |
|
|
|
|
|
lchisq, |
|
|
|
|
|
lfmt |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
|
def filename(self): |
|
|
|
|
|
return self._filename |
|
|
|
|
|
|
|
|
|
|
|
def setFilename(self, loud=True, ext=None): |
|
|
|
|
|
if loud and self._year == None or self._row == None or self._col == None: |
|
|
|
|
|
print("Year, row, and column must be set in order to build a filename.") |
|
|
|
|
|
self._filename = None |
|
|
|
|
|
lext = "csv" if ext == None else ext |
|
|
|
|
|
lcontrol = "" if self.control == None else "_ctl_{}".format(self.control) |
|
|
|
|
|
lcfnot = "" if self.cf_eq == True else "n" |
|
|
|
|
|
lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "_{}eq_{}".format(lcfnot, self.control_filter) |
|
|
|
|
|
lweight = "" if self.weight == None else "_weight_{}".format(self.weight) |
|
|
|
|
|
lchisq = "_chisq_false" if self.chisq == False or self.chisq == None else "_chisq_true" |
|
|
|
|
|
self._filename = "NSDUH_{}_{}_vs_{}{}{}{}{}.{}".format( |
|
|
|
|
|
self.year, |
|
|
|
|
|
self.row, |
|
|
|
|
|
self.col, |
|
|
|
|
|
lcontrol, |
|
|
|
|
|
lcontrol_filter, |
|
|
|
|
|
lweight, |
|
|
|
|
|
lchisq, |
|
|
|
|
|
lext |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def toString(self): |
|
|
|
|
|
print("Data Collector:") |
|
|
|
|
|
print("Row: {}, Column: {}".format(self.row, self.col)) |
|
|
|
|
|
print("Controlling on {} {}= {}".format(self.control, self.control_filter[0], self.control_filter[1])) |
|
|
|
|
|
print("Weighted by {}".format(self.weight)) |
|
|
|
|
|
print("Generating Chi-Squared" if self.chisq else "Not generating Chi-Squared") |
|
|
|
|
|
print("Formatting as {}".format(self.fmt)) |
|
|
|
|
|
print("URL: {}".format(self.endpoint)) |
|
|
|
|
|
|
|
|
def main(): |
|
|
def main(): |
|
|
endpoint = endpointBuilder(2017, "STATE", "YOSELL2", "CATAG18", False, "2", "DASWT_1", False, "json") |
|
|
|
|
|
print(endpoint) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def endpointBuilder(year, row, col, control=None, control_filter_not=False, control_filter=None, weight=None, chisq=False, fmt=None): |
|
|
|
|
|
lcontrol = "" if control_filter == None else "&control={}".format(control) |
|
|
|
|
|
lcfnot = "" if control_filter_not == False else "!" |
|
|
|
|
|
lcontrol_filter = "" if (control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(control, lcfnot, control_filter) |
|
|
|
|
|
lweight = "" if weight == None else "&weight={}".format(weight) |
|
|
|
|
|
lchisq = "&run_chisq=false" if chisq == False else "&run_chisq=true" |
|
|
|
|
|
lfmt = "json" if (fmt == None or fmt not in ["json", "msgpack", "api"]) else fmt |
|
|
|
|
|
year_rng = "{}-{}".format(int(year), int(year)+1) |
|
|
|
|
|
return "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format( |
|
|
|
|
|
year_rng, |
|
|
|
|
|
row, |
|
|
|
|
|
col, |
|
|
|
|
|
lcontrol, |
|
|
|
|
|
lcontrol_filter, |
|
|
|
|
|
lweight, |
|
|
|
|
|
lchisq, |
|
|
|
|
|
lfmt |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
df = DataColl() |
|
|
|
|
|
df.year = 2017 |
|
|
|
|
|
df.row = "STATE" |
|
|
|
|
|
df.col = "YOSELL2" |
|
|
|
|
|
df.control = "CATAG18" |
|
|
|
|
|
df.cf_eq = True |
|
|
|
|
|
df.control_filter = "2" |
|
|
|
|
|
df.weight = "DASWT_1" |
|
|
|
|
|
df.chisq = False |
|
|
|
|
|
df.fmt = "json" |
|
|
|
|
|
print(df.endpoint) |
|
|
|
|
|
|
|
|
|
|
|
# df.setFilename(False, "txt") |
|
|
|
|
|
df.setFilename(False, "json") |
|
|
|
|
|
|
|
|
|
|
|
r = requests.get(df.endpoint) |
|
|
|
|
|
# t = msgpack.unpackb(r.content) |
|
|
|
|
|
# with open(df.filename, "w", newline='', encoding="utf-8") as file: |
|
|
|
|
|
# file.write(str(t[b'results'])) |
|
|
|
|
|
|
|
|
|
|
|
rs = r.json()["results"] |
|
|
|
|
|
|
|
|
|
|
|
for k,_v in rs.items(): |
|
|
|
|
|
print(k) |
|
|
|
|
|
#data = pandas.read_json(r.text, orient="columns") |
|
|
|
|
|
|
|
|
|
|
|
# with open(df.filename, "w", newline='') as file: |
|
|
|
|
|
# data.to_csv(file) |
|
|
|
|
|
#print(r.json()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
main() |