view host/admin/src/private/lib/monitor.luan @ 2022:969291201e12

ping lucene backups
author Franklin Schmidt <fschmidt@gmail.com>
date Mon, 20 Oct 2025 17:25:47 -0600
parents b8e5d53c4fc9
children
line wrap: on
line source

local Luan = require "luan:Luan.luan"
local error = Luan.error
local Io = require "luan:Io.luan"
local uri = Io.uri or error()
local String = require "luan:String.luan"
local trim = String.trim or error()
local Time = require "luan:Time.luan"
local Thread = require "luan:Thread.luan"
local Http = require "luan:http/Http.luan"
local Config = require "site:/private/Config.luan"
local Utils = require "site:/private/lib/Utils.luan"
local send_mail = Utils.send_mail or error()
local Logging = require "luan:logging/Logging.luan"
local logger = Logging.logger "monitor"


local who_monitors_who = {
	["admin.s1.luan.software"] = "admin.s2.luan.software"
	["admin.s2.luan.software"] = "admin.s1.luan.software"
}
local frequency = Time.period{minutes=2}
local url

--[[  -- for development
who_monitors_who["admin.me.luan.software"] = "admin.me.luan.software"
url = "http://admin.me.luan.software:8080/hi.txt"
frequency = Time.period{seconds=30}
Config.email_to = "fschmidt@gmail.com"
--]]



local domain = who_monitors_who[Utils.domain]

if domain == nil then
	logger.info "nothing to monitor"
	return true
end

url = url or "https://"..domain.."/hi.txt"
local options = { time_out = Time.period{seconds=20} }

local function init_check()
	local fails = 0
	local last_page

	local function fail()
		fails = fails + 1
		if fails < 2 then return end
		try
			local s = Utils.ssh(domain,"/Users/administrator/luan/host/restart.sh monitoring")
			if trim(s) == "stopped with stop script" then
				logger.info("stopped with stop script")
			else
				logger.error("restart successful\n"..s)
				send_mail {
					Subject = domain.." restarted"
					body = s
				}
			end
			fails = 0
		catch e
			logger.error("restart failed: "..e.get_message())
			if fails < 5 then
				send_mail {
					Subject = domain.." restart failed"
					body = e.get_message()
				}
			end
		end
	end

	return function()
		try
			local page = uri(url,options).read_text()
			if page == last_page then
				logger.error("Scheduler not working on "..domain)
				fail()
				return
			end
			last_page = page
			fails = 0
			logger.info(domain.." is okay")
		catch e
			logger.error("Error connecting to "..domain..": "..e.get_message())
			fail()
		end
	end
end

Thread.schedule_closure(init_check,{repeating_delay=frequency})

logger.info("monitoring "..domain)

return true