Python scripting to automate fetching data from RDAS.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

main.py 6.7KB

il y a 4 ans
il y a 4 ans
il y a 4 ans
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. import requests, time, os, sys, json, csv
  2. from bs4 import BeautifulSoup
  3. class DataPocket:
  4. def __init__(self):
  5. self.rows = {}
  6. def addRow(self, rownum, name=None, values=None):
  7. ### values should be a list in column order
  8. rownum = int(rownum)
  9. d = {'name': name,
  10. 'values': [] if values == None else [x for x in values]
  11. }
  12. self.rows[rownum] = d
  13. def addName(self, rownum, name):
  14. if int(rownum) in self.rows.keys():
  15. self.rows[rownum]['name'] = name
  16. else:
  17. print("No such row.")
  18. def addValues(self, rownum, values):
  19. ### values should be a list in column order
  20. if int(rownum) in self.rows.keys():
  21. self.rows[rownum]['values'] = [x for x in values]
  22. else:
  23. print("No such row.")
  24. def setValue(self, rownum, value, col):
  25. ### col should be a zero-indexed integer
  26. col = int(col)
  27. if int(rownum) in self.rows.keys():
  28. try:
  29. self.rows[rownum]['values'][col] = value
  30. except IndexError:
  31. while len(self.rows[rownum]['values'] < col):
  32. self.rows[rownum]['values'].append(None)
  33. self.rows[rownum]['values'].append(value)
  34. else:
  35. print("No such row.")
  36. def printRow(self, rownum):
  37. if int(rownum) in self.rows.keys():
  38. print("Row {} (\"{}\"): {}".format(rownum, self.rows[rownum]['name'], self.rows[rownum]['values']))
  39. else:
  40. print("No such row.")
  41. def printData(self):
  42. for row in self.rows.keys():
  43. self.printRow(row)
  44. class DataColl:
  45. def __init__(self):
  46. self._year = None
  47. self._row = None
  48. self._col = None
  49. self._control = None
  50. self._cf_eq = None
  51. self._control_filter = None
  52. self._weight = None
  53. self._chisq = None
  54. self._fmt = None
  55. @property
  56. def year(self):
  57. return self._year
  58. @year.setter
  59. def year(self, year):
  60. # if int(year) < 2014:
  61. # print("NSDUH does not have data from before 2014. Setting to 2014.")
  62. # self._year = 2014
  63. self._year = int(year)
  64. self._generate()
  65. @property
  66. def row(self):
  67. return self._row
  68. @row.setter
  69. def row(self, r):
  70. self._row = r
  71. self._generate()
  72. @property
  73. def col(self):
  74. return self._col
  75. @col.setter
  76. def col(self, c):
  77. self._col = c
  78. self._generate()
  79. @property
  80. def control(self):
  81. return self._control
  82. @control.setter
  83. def control(self, ctl):
  84. self._control = ctl
  85. self._generate()
  86. @property
  87. def cf_eq(self):
  88. return self._cf_eq
  89. @cf_eq.setter
  90. def cf_eq(self, eq):
  91. self._cf_eq = eq
  92. self._generate()
  93. @property
  94. def control_filter(self):
  95. return self._control_filter
  96. @control_filter.setter
  97. def control_filter(self, filter):
  98. self._control_filter = filter
  99. self._generate()
  100. @property
  101. def weight(self):
  102. return self._weight
  103. @weight.setter
  104. def weight(self, wgt):
  105. self._weight = wgt
  106. self._generate()
  107. @property
  108. def chisq(self):
  109. return self._chisq
  110. @chisq.setter
  111. def chisq(self, cs):
  112. self._chisq = cs
  113. self._generate()
  114. @property
  115. def fmt(self):
  116. return self._fmt
  117. @fmt.setter
  118. def fmt(self, f):
  119. self._fmt = f
  120. self._generate()
  121. @property
  122. def endpoint(self):
  123. return self._endpoint
  124. def _generate(self):
  125. self.setEndpoint(False)
  126. self.setFilename(False)
  127. def setEndpoint(self, loud=True):
  128. if loud and self._year == None or self._row == None or self._col == None:
  129. print("Year, row, and column must be set in order to build an endpoint.")
  130. self._endpoint = None
  131. lcontrol = "" if self.control == None else "&control={}".format(self.control)
  132. lcfnot = "" if self.cf_eq == True else "!"
  133. lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(self.control, lcfnot, self.control_filter)
  134. lweight = "" if self.weight == None else "&weight={}".format(self.weight)
  135. lchisq = "&run_chisq=false" if self.chisq == False or self.chisq == None else "&run_chisq=true"
  136. lfmt = "json" if (self.fmt == None or self.fmt not in ["json", "msgpack", "api"]) else self.fmt
  137. year_rng = "{}-{}".format(int(self.year), int(self.year)+1)
  138. self._endpoint = "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format(
  139. year_rng,
  140. self.row,
  141. self.col,
  142. lcontrol,
  143. lcontrol_filter,
  144. lweight,
  145. lchisq,
  146. lfmt
  147. )
  148. @property
  149. def filename(self):
  150. return self._filename
  151. def setFilename(self, loud=True, ext=None):
  152. if loud and self._year == None or self._row == None or self._col == None:
  153. print("Year, row, and column must be set in order to build a filename.")
  154. self._filename = None
  155. lext = "csv" if ext == None else ext
  156. lcontrol = "" if self.control == None else "_ctl_{}".format(self.control)
  157. lcfnot = "" if self.cf_eq == True else "n"
  158. lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "_{}eq_{}".format(lcfnot, self.control_filter)
  159. lweight = "" if self.weight == None else "_weight_{}".format(self.weight)
  160. lchisq = "_chisq_false" if self.chisq == False or self.chisq == None else "_chisq_true"
  161. self._filename = "NSDUH_{}_{}_vs_{}{}{}{}{}.{}".format(
  162. self.year,
  163. self.row,
  164. self.col,
  165. lcontrol,
  166. lcontrol_filter,
  167. lweight,
  168. lchisq,
  169. lext
  170. )
  171. def toString(self):
  172. print("Data Collector:")
  173. print("Row: {}, Column: {}".format(self.row, self.col))
  174. print("Controlling on {} {}= {}".format(self.control, self.control_filter[0], self.control_filter[1]))
  175. print("Weighted by {}".format(self.weight))
  176. print("Generating Chi-Squared" if self.chisq else "Not generating Chi-Squared")
  177. print("Formatting as {}".format(self.fmt))
  178. print("URL: {}".format(self.endpoint))
  179. def main():
  180. df = DataColl()
  181. df.year = 2017
  182. df.row = "STATE"
  183. df.col = "YOSELL2"
  184. df.control = "CATAG18"
  185. df.cf_eq = True
  186. df.control_filter = "2"
  187. df.weight = "DASWT_1"
  188. df.chisq = False
  189. df.fmt = "json"
  190. print(df.endpoint)
  191. df.setFilename(False, "csv")
  192. r = requests.get(df.endpoint)
  193. if __name__ == "__main__":
  194. main()