Python scripting to automate fetching data from RDAS.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

main.py 5.5KB

4 år sedan
4 år sedan
4 år sedan
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. import requests, time, os, sys
  2. import pandas, msgpack
  3. from bs4 import BeautifulSoup
  4. class DataColl:
  5. def __init__(self):
  6. self._year = None
  7. self._row = None
  8. self._col = None
  9. self._control = None
  10. self._cf_eq = None
  11. self._control_filter = None
  12. self._weight = None
  13. self._chisq = None
  14. self._fmt = None
  15. @property
  16. def year(self):
  17. return self._year
  18. @year.setter
  19. def year(self, year):
  20. # if int(year) < 2014:
  21. # print("NSDUH does not have data from before 2014. Setting to 2014.")
  22. # self._year = 2014
  23. self._year = int(year)
  24. self._generate()
  25. @property
  26. def row(self):
  27. return self._row
  28. @row.setter
  29. def row(self, r):
  30. self._row = r
  31. self._generate()
  32. @property
  33. def col(self):
  34. return self._col
  35. @col.setter
  36. def col(self, c):
  37. self._col = c
  38. self._generate()
  39. @property
  40. def control(self):
  41. return self._control
  42. @control.setter
  43. def control(self, ctl):
  44. self._control = ctl
  45. self._generate()
  46. @property
  47. def cf_eq(self):
  48. return self._cf_eq
  49. @cf_eq.setter
  50. def cf_eq(self, eq):
  51. self._cf_eq = eq
  52. self._generate()
  53. @property
  54. def control_filter(self):
  55. return self._control_filter
  56. @control_filter.setter
  57. def control_filter(self, filter):
  58. self._control_filter = filter
  59. self._generate()
  60. @property
  61. def weight(self):
  62. return self._weight
  63. @weight.setter
  64. def weight(self, wgt):
  65. self._weight = wgt
  66. self._generate()
  67. @property
  68. def chisq(self):
  69. return self._chisq
  70. @chisq.setter
  71. def chisq(self, cs):
  72. self._chisq = cs
  73. self._generate()
  74. @property
  75. def fmt(self):
  76. return self._fmt
  77. @fmt.setter
  78. def fmt(self, f):
  79. self._fmt = f
  80. self._generate()
  81. @property
  82. def endpoint(self):
  83. return self._endpoint
  84. def _generate(self):
  85. self.setEndpoint(False)
  86. self.setFilename(False)
  87. def setEndpoint(self, loud=True):
  88. if loud and self._year == None or self._row == None or self._col == None:
  89. print("Year, row, and column must be set in order to build an endpoint.")
  90. self._endpoint = None
  91. lcontrol = "" if self.control == None else "&control={}".format(self.control)
  92. lcfnot = "" if self.cf_eq == True else "!"
  93. lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "&filter={}{}%3D{}".format(self.control, lcfnot, self.control_filter)
  94. lweight = "" if self.weight == None else "&weight={}".format(self.weight)
  95. lchisq = "&run_chisq=false" if self.chisq == False or self.chisq == None else "&run_chisq=true"
  96. lfmt = "json" if (self.fmt == None or self.fmt not in ["json", "msgpack", "api"]) else self.fmt
  97. year_rng = "{}-{}".format(int(self.year), int(self.year)+1)
  98. self._endpoint = "https://rdas.samhsa.gov/api/surveys/NSDUH-{}-RD02YR/crosstab/?row={}&column={}{}{}{}{}&format={}".format(
  99. year_rng,
  100. self.row,
  101. self.col,
  102. lcontrol,
  103. lcontrol_filter,
  104. lweight,
  105. lchisq,
  106. lfmt
  107. )
  108. @property
  109. def filename(self):
  110. return self._filename
  111. def setFilename(self, loud=True, ext=None):
  112. if loud and self._year == None or self._row == None or self._col == None:
  113. print("Year, row, and column must be set in order to build a filename.")
  114. self._filename = None
  115. lext = "csv" if ext == None else ext
  116. lcontrol = "" if self.control == None else "_ctl_{}".format(self.control)
  117. lcfnot = "" if self.cf_eq == True else "n"
  118. lcontrol_filter = "" if (self.control_filter == None or lcontrol == "") else "_{}eq_{}".format(lcfnot, self.control_filter)
  119. lweight = "" if self.weight == None else "_weight_{}".format(self.weight)
  120. lchisq = "_chisq_false" if self.chisq == False or self.chisq == None else "_chisq_true"
  121. self._filename = "NSDUH_{}_{}_vs_{}{}{}{}{}.{}".format(
  122. self.year,
  123. self.row,
  124. self.col,
  125. lcontrol,
  126. lcontrol_filter,
  127. lweight,
  128. lchisq,
  129. lext
  130. )
  131. def toString(self):
  132. print("Data Collector:")
  133. print("Row: {}, Column: {}".format(self.row, self.col))
  134. print("Controlling on {} {}= {}".format(self.control, self.control_filter[0], self.control_filter[1]))
  135. print("Weighted by {}".format(self.weight))
  136. print("Generating Chi-Squared" if self.chisq else "Not generating Chi-Squared")
  137. print("Formatting as {}".format(self.fmt))
  138. print("URL: {}".format(self.endpoint))
  139. def main():
  140. df = DataColl()
  141. df.year = 2017
  142. df.row = "STATE"
  143. df.col = "YOSELL2"
  144. df.control = "CATAG18"
  145. df.cf_eq = True
  146. df.control_filter = "2"
  147. df.weight = "DASWT_1"
  148. df.chisq = False
  149. df.fmt = "json"
  150. print(df.endpoint)
  151. # df.setFilename(False, "txt")
  152. df.setFilename(False, "json")
  153. r = requests.get(df.endpoint)
  154. # t = msgpack.unpackb(r.content)
  155. # with open(df.filename, "w", newline='', encoding="utf-8") as file:
  156. # file.write(str(t[b'results']))
  157. rs = r.json()["results"]
  158. for k,_v in rs.items():
  159. print(k)
  160. #data = pandas.read_json(r.text, orient="columns")
  161. # with open(df.filename, "w", newline='') as file:
  162. # data.to_csv(file)
  163. #print(r.json())
  164. if __name__ == "__main__":
  165. main()