Python scripting to automate fetching data from RDAS.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

main.py 6.7KB

4 jaren geleden
4 jaren geleden
4 jaren geleden
4 jaren geleden
4 jaren geleden
4 jaren geleden
4 jaren geleden
4 jaren geleden
4 jaren geleden
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. import requests, time, os, sys, json, csv
  2. from bs4 import BeautifulSoup
  3. class DataPocket:
  4. def __init__(self):
  5. self.rows = {}
  6. def addRow(self, rownum, name=None, values=None):
  7. ### values should be a list in column order
  8. rownum = int(rownum)
  9. d = {'name': name,
  10. 'values': [] if values == None else [x for x in values]
  11. }
  12. self.rows[rownum] = d
  13. def addName(self, rownum, name):
  14. if int(rownum) in self.rows.keys():
  15. self.rows[rownum]['name'] = name
  16. else:
  17. print("No such row.")
  18. def addValues(self, rownum, values):
  19. ### values should be a list in column order
  20. if int(rownum) in self.rows.keys():
  21. self.rows[rownum]['values'] = [x for x in values]
  22. else:
  23. print("No such row.")
  24. def setValue(self, rownum, value, col):
  25. ### col should be a zero-indexed integer
  26. col = int(col)
  27. if int(rownum) in self.rows.keys():
  28. try:
  29. self.rows[rownum]['values'][col] = value
  30. except IndexError:
  31. while len(self.rows[rownum]['values'] < col):
  32. self.rows[rownum]['values'].append(None)
  33. self.rows[rownum]['values'].append(value)
  34. else:
  35. print("No such row.")
  36. def printRow(self, rownum):
  37. if int(rownum) in self.rows.keys():
  38. print("Row {} (\"{}\"): {}".format(rownum, self.rows[rownum]['name'], self.rows[rownum]['values']))
  39. else:
  40. print("No such row.")
  41. def printData(self):
  42. for row in self.rows.keys():
  43. self.printRow(row)
  44. class DataColl:
  45. def __init__(self):
  46. self._year = None
  47. self._row = None
  48. self._col = None
  49. self._control = None
  50. self._cf_eq = None
  51. self._control_filter = None
  52. self._weight = None
  53. self._chisq = None
  54. self._fmt = None
  55. @property
  56. def year(self):
  57. return self._year
  58. @year.setter
  59. def year(self, year):
  60. # if int(year) < 2014:
  61. # print("NSDUH does not have data from before 2014. Setting to 2014.")
  62. # self._year = 2014
  63. self._year = int(year)
  64. self._generate()
  65. @property
  66. def row(self):
  67. return self._row
  68. @row.setter
  69. def row(self, r):
  70. self._row = r
  71. self._generate()
  72. @property
  73. def col(self):
  74. return self._col
  75. @col.setter
  76. def col(self, c):
  77. self._col = c
  78. self._generate()
  79. @property
  80. def control(self):
  81. return self._control
  82. @control.setter
  83. def control(self, ctl):
  84. self._control = ctl
  85. self._generate()
  86. @property
  87. def cf_eq(self):
  88. return self._cf_eq
  89. @cf_eq.setter
  90. def cf_eq(self, eq):
  91. self._cf_eq = eq
  92. self._generate()
  93. @property
  94. def control_filter(self):
  95. return self._control_filter
  96. @control_filter.setter
  97. def control_filter(self, filter):
  98. self._control_filter = filter
  99. self._generate()
  100. @property
  101. def weight(self):
  102. return self._weight
  103. @weight.setter
  104. def weight(self, wgt):
  105. self._weight = wgt
  106. self._generate()
  107. @property
  108. def chisq(self):
  109. return self._chisq
  110. @chisq.setter
  111. def chisq(self, cs):
  112. self._chisq = cs
  113. self._generate()
  114. @property
  115. def fmt(self):
  116. return self._fmt
  117. @fmt.setter
  118. def fmt(self, f):
  119. self._fmt = f
  120. self._generate()
  121. @property
  122. def endpoint(self):
  123. return self._endpoint
  124. def _generate(self):
  125. self.setEndpoint(False)
  126. self.setFilename(False)
  127. def setEndpoint(self, loud=True):
  128. if loud and self._year == None or self._row == None or self._col == None:
  129. print("Year, row, and column must be set in order to build an endpoint.")
  130. self._endpoint = None
  131. lcontrol = "" if self.control == None else "&control={}".format(self.control)
  132. lcfnot = "" if self.cf_eq == True else "!"
  133. lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(self.control, lcfnot, self.control_filter)
  134. lweight = "" if self.weight == None else "&weight={}".format(self.weight)
  135. lchisq = "&run_chisq=false" if self.chisq == False or self.chisq == None else "&run_chisq=true"
  136. lfmt = "json" if (self.fmt == None or self.fmt not in ["json", "msgpack", "api"]) else self.fmt
  137. year_rng = "{}-{}".format(int(self.year), int(self.year)+1)
  138. self._endpoint = "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format(
  139. year_rng,
  140. self.row,
  141. self.col,
  142. lcontrol,
  143. lcontrol_filter,
  144. lweight,
  145. lchisq,
  146. lfmt
  147. )
  148. @property
  149. def filename(self):
  150. return self._filename
  151. def setFilename(self, loud=True, ext=None):
  152. if loud and self._year == None or self._row == None or self._col == None:
  153. print("Year, row, and column must be set in order to build a filename.")
  154. self._filename = None
  155. lext = "csv" if ext == None else ext
  156. lcontrol = "" if self.control == None else "_ctl_{}".format(self.control)
  157. lcfnot = "" if self.cf_eq == True else "n"
  158. lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "_{}eq_{}".format(lcfnot, self.control_filter)
  159. lweight = "" if self.weight == None else "_weight_{}".format(self.weight)
  160. lchisq = "_chisq_false" if self.chisq == False or self.chisq == None else "_chisq_true"
  161. self._filename = "NSDUH_{}_{}_vs_{}{}{}{}{}.{}".format(
  162. self.year,
  163. self.row,
  164. self.col,
  165. lcontrol,
  166. lcontrol_filter,
  167. lweight,
  168. lchisq,
  169. lext
  170. )
  171. def toString(self):
  172. print("Data Collector:")
  173. print("Row: {}, Column: {}".format(self.row, self.col))
  174. print("Controlling on {} {}= {}".format(self.control, self.control_filter[0], self.control_filter[1]))
  175. print("Weighted by {}".format(self.weight))
  176. print("Generating Chi-Squared" if self.chisq else "Not generating Chi-Squared")
  177. print("Formatting as {}".format(self.fmt))
  178. print("URL: {}".format(self.endpoint))
  179. def main():
  180. df = DataColl()
  181. df.year = 2017
  182. df.row = "STATE"
  183. df.col = "YOSELL2"
  184. df.control = "CATAG18"
  185. df.cf_eq = True
  186. df.control_filter = "2"
  187. df.weight = "DASWT_1"
  188. df.chisq = False
  189. df.fmt = "json"
  190. print(df.endpoint)
  191. df.setFilename(False, "csv")
  192. r = requests.get(df.endpoint)
  193. if __name__ == "__main__":
  194. main()