Python scripting to automate fetching data from RDAS.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

main.py 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. import requests, time, os, sys
  2. import pandas, msgpack
  3. from bs4 import BeautifulSoup
  4. class DataColl:
  5. def __init__(self):
  6. self._year = None
  7. self._row = None
  8. self._col = None
  9. self._control = None
  10. self._cf_eq = None
  11. self._control_filter = None
  12. self._weight = None
  13. self._chisq = None
  14. self._fmt = None
  15. @property
  16. def year(self):
  17. return self._year
  18. @year.setter
  19. def year(self, year):
  20. # if int(year) < 2014:
  21. # print("NSDUH does not have data from before 2014. Setting to 2014.")
  22. # self._year = 2014
  23. self._year = int(year)
  24. self._generate()
  25. @property
  26. def row(self):
  27. return self._row
  28. @row.setter
  29. def row(self, r):
  30. self._row = r
  31. self._generate()
  32. @property
  33. def col(self):
  34. return self._col
  35. @col.setter
  36. def col(self, c):
  37. self._col = c
  38. self._generate()
  39. @property
  40. def control(self):
  41. return self._control
  42. @control.setter
  43. def control(self, ctl):
  44. self._control = ctl
  45. self._generate()
  46. @property
  47. def cf_eq(self):
  48. return self._cf_eq
  49. @cf_eq.setter
  50. def cf_eq(self, eq):
  51. self._cf_eq = eq
  52. self._generate()
  53. @property
  54. def control_filter(self):
  55. return self._control_filter
  56. @control_filter.setter
  57. def control_filter(self, filter):
  58. self._control_filter = filter
  59. self._generate()
  60. @property
  61. def weight(self):
  62. return self._weight
  63. @weight.setter
  64. def weight(self, wgt):
  65. self._weight = wgt
  66. self._generate()
  67. @property
  68. def chisq(self):
  69. return self._chisq
  70. @chisq.setter
  71. def chisq(self, cs):
  72. self._chisq = cs
  73. self._generate()
  74. @property
  75. def fmt(self):
  76. return self._fmt
  77. @fmt.setter
  78. def fmt(self, f):
  79. self._fmt = f
  80. self._generate()
  81. @property
  82. def endpoint(self):
  83. return self._endpoint
  84. def _generate(self):
  85. self.setEndpoint(False)
  86. self.setFilename(False)
  87. def setEndpoint(self, loud=True):
  88. if loud and self._year == None or self._row == None or self._col == None:
  89. print("Year, row, and column must be set in order to build an endpoint.")
  90. self._endpoint = None
  91. lcontrol = "" if self.control == None else "&control={}".format(self.control)
  92. lcfnot = "" if self.cf_eq == True else "!"
  93. lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(self.control, lcfnot, self.control_filter)
  94. lweight = "" if self.weight == None else "&weight={}".format(self.weight)
  95. lchisq = "&run_chisq=false" if self.chisq == False or self.chisq == None else "&run_chisq=true"
  96. lfmt = "json" if (self.fmt == None or self.fmt not in ["json", "msgpack", "api"]) else self.fmt
  97. year_rng = "{}-{}".format(int(self.year), int(self.year)+1)
  98. self._endpoint = "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format(
  99. year_rng,
  100. self.row,
  101. self.col,
  102. lcontrol,
  103. lcontrol_filter,
  104. lweight,
  105. lchisq,
  106. lfmt
  107. )
  108. @property
  109. def filename(self):
  110. return self._filename
  111. def setFilename(self, loud=True, ext=None):
  112. if loud and self._year == None or self._row == None or self._col == None:
  113. print("Year, row, and column must be set in order to build a filename.")
  114. self._filename = None
  115. lext = "csv" if ext == None else ext
  116. lcontrol = "" if self.control == None else "_ctl_{}".format(self.control)
  117. lcfnot = "" if self.cf_eq == True else "n"
  118. lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "_{}eq_{}".format(lcfnot, self.control_filter)
  119. lweight = "" if self.weight == None else "_weight_{}".format(self.weight)
  120. lchisq = "_chisq_false" if self.chisq == False or self.chisq == None else "_chisq_true"
  121. self._filename = "NSDUH_{}_{}_vs_{}{}{}{}{}.{}".format(
  122. self.year,
  123. self.row,
  124. self.col,
  125. lcontrol,
  126. lcontrol_filter,
  127. lweight,
  128. lchisq,
  129. lext
  130. )
  131. def toString(self):
  132. print("Data Collector:")
  133. print("Row: {}, Column: {}".format(self.row, self.col))
  134. print("Controlling on {} {}= {}".format(self.control, self.control_filter[0], self.control_filter[1]))
  135. print("Weighted by {}".format(self.weight))
  136. print("Generating Chi-Squared" if self.chisq else "Not generating Chi-Squared")
  137. print("Formatting as {}".format(self.fmt))
  138. print("URL: {}".format(self.endpoint))
  139. def main():
  140. df = DataColl()
  141. df.year = 2017
  142. df.row = "STATE"
  143. df.col = "YOSELL2"
  144. df.control = "CATAG18"
  145. df.cf_eq = True
  146. df.control_filter = "2"
  147. df.weight = "DASWT_1"
  148. df.chisq = False
  149. df.fmt = "json"
  150. print(df.endpoint)
  151. # df.setFilename(False, "txt")
  152. df.setFilename(False, "json")
  153. r = requests.get(df.endpoint)
  154. # t = msgpack.unpackb(r.content)
  155. # with open(df.filename, "w", newline='', encoding="utf-8") as file:
  156. # file.write(str(t[b'results']))
  157. rs = r.json()["results"]
  158. for k,_v in rs.items():
  159. print(k)
  160. #data = pandas.read_json(r.text, orient="columns")
  161. # with open(df.filename, "w", newline='') as file:
  162. # data.to_csv(file)
  163. #print(r.json())
  164. if __name__ == "__main__":
  165. main()