|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272 |
- import requests, time, os, sys, json, csv
- from bs4 import BeautifulSoup
-
- class DataTable:
- def __init__(self, rdas_obj=None):
- self.table = {}
- self._cols = {}
- self._rows = {}
- self.col_labels = {}
- self.row_labels = {}
- if rdas_obj != None:
- self.populate(rdas_obj)
-
- def populate(self, rdas_obj):
- # 1. Put values from results:column:options:key into a list.
- # a. Sort the list on the int value of the key.
- # b. Place the list into self._cols, using the value as the key and the index as the value.
- # 2. Do the same thing for the values from results:row:options:key and self._rows.
- # 3. Using the key-value pairs in self._cols, place results:column:options:title into self.col_labels
- # with the self._cols VALUE as the key.
- # a. REMEMBER that missing values are coded "." in keys but "" in cell descriptors!
- # 4. Do the same thing for results:row:options:title and self._rows.
- # 5. For each record in results:cells, get a row-column coordinate by associating row_option and column_option
- # with a key in each of self._rows and self._cols.
- # a. Insert the value in results:cells:n:column into self.table at that row-column coordinate.
- pass
-
- def generate_table(self):
- # Return an object containing self.col_labels, self.row_labels, and self.table.
- # self._cols and self._rows are for internal use and don't need to be produced.
- o = {
- "clabels": self.col_labels,
- "rlabels": self.row_labels,
- "values": self.table
- }
- return o
-
-
- class DataPocket:
- def __init__(self):
- self.rows = {}
-
- def addRow(self, rownum, name=None, values=None):
- ### values should be a list in column order
- rownum = int(rownum)
- d = {'name': name,
- 'values': [] if values == None else [x for x in values]
- }
- self.rows[rownum] = d
-
- def addName(self, rownum, name):
- if int(rownum) in self.rows.keys():
- self.rows[rownum]['name'] = name
- else:
- print("No such row.")
-
- def addValues(self, rownum, values):
- ### values should be a list in column order
- if int(rownum) in self.rows.keys():
- self.rows[rownum]['values'] = [x for x in values]
- else:
- print("No such row.")
-
- def setValue(self, rownum, value, col):
- ### col should be a zero-indexed integer
- col = int(col)
- if int(rownum) in self.rows.keys():
- try:
- self.rows[rownum]['values'][col] = value
- except IndexError:
- while len(self.rows[rownum]['values'] < col):
- self.rows[rownum]['values'].append(None)
- self.rows[rownum]['values'].append(value)
- else:
- print("No such row.")
-
- def printRow(self, rownum):
- if int(rownum) in self.rows.keys():
- print("Row {} (\"{}\"): {}".format(rownum, self.rows[rownum]['name'], self.rows[rownum]['values']))
- else:
- print("No such row.")
-
- def printData(self):
- for row in self.rows.keys():
- self.printRow(row)
-
- class DataColl:
- def __init__(self):
- self._year = None
- self._row = None
- self._col = None
- self._control = None
- self._cf_eq = None
- self._control_filter = None
- self._weight = None
- self._chisq = None
- self._fmt = None
-
- @property
- def year(self):
- return self._year
-
- @year.setter
- def year(self, year):
- # if int(year) < 2014:
- # print("NSDUH does not have data from before 2014. Setting to 2014.")
- # self._year = 2014
- self._year = int(year)
- self._generate()
-
- @property
- def row(self):
- return self._row
-
- @row.setter
- def row(self, r):
- self._row = r
- self._generate()
-
- @property
- def col(self):
- return self._col
-
- @col.setter
- def col(self, c):
- self._col = c
- self._generate()
-
- @property
- def control(self):
- return self._control
-
- @control.setter
- def control(self, ctl):
- self._control = ctl
- self._generate()
-
- @property
- def cf_eq(self):
- return self._cf_eq
-
- @cf_eq.setter
- def cf_eq(self, eq):
- self._cf_eq = eq
- self._generate()
-
- @property
- def control_filter(self):
- return self._control_filter
-
- @control_filter.setter
- def control_filter(self, filter):
- self._control_filter = filter
- self._generate()
-
- @property
- def weight(self):
- return self._weight
-
- @weight.setter
- def weight(self, wgt):
- self._weight = wgt
- self._generate()
-
- @property
- def chisq(self):
- return self._chisq
-
- @chisq.setter
- def chisq(self, cs):
- self._chisq = cs
- self._generate()
-
- @property
- def fmt(self):
- return self._fmt
-
- @fmt.setter
- def fmt(self, f):
- self._fmt = f
- self._generate()
-
- @property
- def endpoint(self):
- return self._endpoint
-
- def _generate(self):
- self.setEndpoint(False)
- self.setFilename(False)
-
- def setEndpoint(self, loud=True):
- if loud and self._year == None or self._row == None or self._col == None:
- print("Year, row, and column must be set in order to build an endpoint.")
- self._endpoint = None
- lcontrol = "" if self.control == None else "&control={}".format(self.control)
- lcfnot = "" if self.cf_eq == True else "!"
- lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(self.control, lcfnot, self.control_filter)
- lweight = "" if self.weight == None else "&weight={}".format(self.weight)
- lchisq = "&run_chisq=false" if self.chisq == False or self.chisq == None else "&run_chisq=true"
- lfmt = "json" if (self.fmt == None or self.fmt not in ["json", "msgpack", "api"]) else self.fmt
- year_rng = "{}-{}".format(int(self.year), int(self.year)+1)
- self._endpoint = "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format(
- year_rng,
- self.row,
- self.col,
- lcontrol,
- lcontrol_filter,
- lweight,
- lchisq,
- lfmt
- )
-
- @property
- def filename(self):
- return self._filename
-
- def setFilename(self, loud=True, ext=None):
- if loud and self._year == None or self._row == None or self._col == None:
- print("Year, row, and column must be set in order to build a filename.")
- self._filename = None
- lext = "csv" if ext == None else ext
- lcontrol = "" if self.control == None else "_ctl_{}".format(self.control)
- lcfnot = "" if self.cf_eq == True else "n"
- lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "_{}eq_{}".format(lcfnot, self.control_filter)
- lweight = "" if self.weight == None else "_weight_{}".format(self.weight)
- lchisq = "_chisq_false" if self.chisq == False or self.chisq == None else "_chisq_true"
- self._filename = "NSDUH_{}_{}_vs_{}{}{}{}{}.{}".format(
- self.year,
- self.row,
- self.col,
- lcontrol,
- lcontrol_filter,
- lweight,
- lchisq,
- lext
- )
-
- def toString(self):
- print("Data Collector:")
- print("Row: {}, Column: {}".format(self.row, self.col))
- print("Controlling on {} {}= {}".format(self.control, self.control_filter[0], self.control_filter[1]))
- print("Weighted by {}".format(self.weight))
- print("Generating Chi-Squared" if self.chisq else "Not generating Chi-Squared")
- print("Formatting as {}".format(self.fmt))
- print("URL: {}".format(self.endpoint))
-
- def main():
- df = DataColl()
- df.year = 2017
- df.row = "STATE"
- df.col = "YOSELL2"
- df.control = "CATAG18"
- df.cf_eq = True
- df.control_filter = "2"
- df.weight = "DASWT_1"
- df.chisq = False
- df.fmt = "json"
- print(df.endpoint)
-
- df.setFilename(False, "csv")
-
- r = requests.get(df.endpoint)
-
- rjson = r.json() # this creates a Python object, not a JSON string
- # in results/cells, column_option and row_option refer to the key field, not the list index
- # in column_option and row_option, missing value is coded as "", but coded as "." in key field
-
-
-
-
- if __name__ == "__main__":
- main()
|