Робот для работы с виртуальным браузером Splash. Требует развернутый инстанс Splash (проще всего через docker). http://blindage.org/?p=9012
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

77 lines
2.7 KiB

  1. '''
  2. This program checks urls for specific requests. Requires Splash server https://github.com/scrapinghub/splash
  3. Code by Vladimir Smagin, 2018
  4. Mail: 21h@blindage.org
  5. '''
  6. import requests
  7. import mailer
  8. from urllib.parse import quote
  9. import json
  10. import config
  11. class C:
  12. HEADER = '\033[95m'
  13. OKBLUE = '\033[94m'
  14. OKGREEN = '\033[92m'
  15. WARNING = '\033[93m'
  16. FAIL = '\033[91m'
  17. ENDC = '\033[0m'
  18. BOLD = '\033[1m'
  19. UNDERLINE = '\033[4m'
  20. def sendmail(subj, body):
  21. message = mailer.Message(From='robot@blindage.org',
  22. To='21h@blindage.org')
  23. message.Subject = subj
  24. message.Html = """<p>Urgent report message<br>
  25. %s""" % body
  26. sender = mailer.Mailer('localhost')
  27. try:
  28. sender.send(message)
  29. except:
  30. print(C.FAIL+"Can't send message to sysadmin!"+C.ENDC)
  31. for pageUrl in config.searchURLs:
  32. try:
  33. print(C.HEADER+"Checking", pageUrl+C.ENDC)
  34. try:
  35. print(requests.get(config.searchServer + "/_ping").text)
  36. print(requests.post(config.searchServer + "/_gc").text)
  37. except:
  38. print(C.FAIL + "Splash server is down. It's time to panic!" + C.ENDC)
  39. break
  40. pageUrlRequest = config.searchServer+"/render.har?url="+ quote(pageUrl) + ""
  41. print(pageUrlRequest)
  42. r = requests.get(pageUrlRequest)
  43. answer = r.json()
  44. if r.status_code is 200:
  45. scriptNotFound = True
  46. for requestUrl in answer['log']['entries']:
  47. url = requestUrl['response']['url']
  48. for uSFR, aSFR in config.searchForRequests.items():
  49. if uSFR in url:
  50. for scriptName in aSFR:
  51. if scriptName in url:
  52. scriptNotFound = False
  53. print(C.OKGREEN + "Found request match", C.OKBLUE + url, uSFR, scriptName + C.ENDC)
  54. if scriptNotFound:
  55. print(C.FAIL + "No matches found" + C.ENDC)
  56. sendmail('URL ' + pageUrl + ' doesn\'t contain needed script request',"<p>URL " + pageUrl +
  57. "</p><p>No matches found with templates</p><p>" + r.text + "</p>")
  58. else:
  59. print(C.WARNING + "Response code is not 200" + C.ENDC)
  60. sendmail('URL ' + pageUrl + ' returned wrong response code', "<p>URL " + pageUrl +
  61. "</p><p>Response code is not 200</p><p>" + r.text + "</p>")
  62. except ValueError:
  63. print(ValueError)
  64. print(C.WARNING + "Couldn't connect to server" + C.ENDC)
  65. sendmail('URL '+ pageUrl+' inacessible', "<p>Status "+str(r.status_code)+"</p><p>Answer<br><br>"+r.text+"</p>")