Ver código fonte

lots of stuff

master
Noelle 4 anos atrás
pai
commit
fe80d2e764
1 arquivos alterados com 193 adições e 22 exclusões
  1. 193
    22
      main.py

+ 193
- 22
main.py Ver arquivo

@@ -1,29 +1,200 @@
import requests, time, os, sys
import pandas, msgpack
from bs4 import BeautifulSoup

class DataColl:
def __init__(self):
self._year = None
self._row = None
self._col = None
self._control = None
self._cf_eq = None
self._control_filter = None
self._weight = None
self._chisq = None
self._fmt = None

@property
def year(self):
return self._year

@year.setter
def year(self, year):
# if int(year) < 2014:
# print("NSDUH does not have data from before 2014. Setting to 2014.")
# self._year = 2014
self._year = int(year)
self._generate()

@property
def row(self):
return self._row

@row.setter
def row(self, r):
self._row = r
self._generate()

@property
def col(self):
return self._col

@col.setter
def col(self, c):
self._col = c
self._generate()

@property
def control(self):
return self._control

@control.setter
def control(self, ctl):
self._control = ctl
self._generate()

@property
def cf_eq(self):
return self._cf_eq

@cf_eq.setter
def cf_eq(self, eq):
self._cf_eq = eq
self._generate()

@property
def control_filter(self):
return self._control_filter

@control_filter.setter
def control_filter(self, filter):
self._control_filter = filter
self._generate()

@property
def weight(self):
return self._weight

@weight.setter
def weight(self, wgt):
self._weight = wgt
self._generate()

@property
def chisq(self):
return self._chisq

@chisq.setter
def chisq(self, cs):
self._chisq = cs
self._generate()

@property
def fmt(self):
return self._fmt

@fmt.setter
def fmt(self, f):
self._fmt = f
self._generate()

@property
def endpoint(self):
return self._endpoint

def _generate(self):
self.setEndpoint(False)
self.setFilename(False)

def setEndpoint(self, loud=True):
if loud and self._year == None or self._row == None or self._col == None:
print("Year, row, and column must be set in order to build an endpoint.")
self._endpoint = None
lcontrol = "" if self.control == None else "&control={}".format(self.control)
lcfnot = "" if self.cf_eq == True else "!"
lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(self.control, lcfnot, self.control_filter)
lweight = "" if self.weight == None else "&weight={}".format(self.weight)
lchisq = "&run_chisq=false" if self.chisq == False or self.chisq == None else "&run_chisq=true"
lfmt = "json" if (self.fmt == None or self.fmt not in ["json", "msgpack", "api"]) else self.fmt
year_rng = "{}-{}".format(int(self.year), int(self.year)+1)
self._endpoint = "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format(
year_rng,
self.row,
self.col,
lcontrol,
lcontrol_filter,
lweight,
lchisq,
lfmt
)
@property
def filename(self):
return self._filename

def setFilename(self, loud=True, ext=None):
if loud and self._year == None or self._row == None or self._col == None:
print("Year, row, and column must be set in order to build a filename.")
self._filename = None
lext = "csv" if ext == None else ext
lcontrol = "" if self.control == None else "_ctl_{}".format(self.control)
lcfnot = "" if self.cf_eq == True else "n"
lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "_{}eq_{}".format(lcfnot, self.control_filter)
lweight = "" if self.weight == None else "_weight_{}".format(self.weight)
lchisq = "_chisq_false" if self.chisq == False or self.chisq == None else "_chisq_true"
self._filename = "NSDUH_{}_{}_vs_{}{}{}{}{}.{}".format(
self.year,
self.row,
self.col,
lcontrol,
lcontrol_filter,
lweight,
lchisq,
lext
)

def toString(self):
print("Data Collector:")
print("Row: {}, Column: {}".format(self.row, self.col))
print("Controlling on {} {}= {}".format(self.control, self.control_filter[0], self.control_filter[1]))
print("Weighted by {}".format(self.weight))
print("Generating Chi-Squared" if self.chisq else "Not generating Chi-Squared")
print("Formatting as {}".format(self.fmt))
print("URL: {}".format(self.endpoint))

def main():
endpoint = endpointBuilder(2017, "STATE", "YOSELL2", "CATAG18", False, "2", "DASWT_1", False, "json")
print(endpoint)


def endpointBuilder(year, row, col, control=None, control_filter_not=False, control_filter=None, weight=None, chisq=False, fmt=None):
lcontrol = "" if control_filter == None else "&control={}".format(control)
lcfnot = "" if control_filter_not == False else "!"
lcontrol_filter = "" if (control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(control, lcfnot, control_filter)
lweight = "" if weight == None else "&weight={}".format(weight)
lchisq = "&run_chisq=false" if chisq == False else "&run_chisq=true"
lfmt = "json" if (fmt == None or fmt not in ["json", "msgpack", "api"]) else fmt
year_rng = "{}-{}".format(int(year), int(year)+1)
return "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format(
year_rng,
row,
col,
lcontrol,
lcontrol_filter,
lweight,
lchisq,
lfmt
)
df = DataColl()
df.year = 2017
df.row = "STATE"
df.col = "YOSELL2"
df.control = "CATAG18"
df.cf_eq = True
df.control_filter = "2"
df.weight = "DASWT_1"
df.chisq = False
df.fmt = "json"
print(df.endpoint)

# df.setFilename(False, "txt")
df.setFilename(False, "json")

r = requests.get(df.endpoint)
# t = msgpack.unpackb(r.content)
# with open(df.filename, "w", newline='', encoding="utf-8") as file:
# file.write(str(t[b'results']))

rs = r.json()["results"]

for k,_v in rs.items():
print(k)
#data = pandas.read_json(r.text, orient="columns")
# with open(df.filename, "w", newline='') as file:
# data.to_csv(file)
#print(r.json())




if __name__ == "__main__":
main()

Carregando…
Cancelar
Salvar