Changeset f2974d8da963…
Parent 0ad9404448f7…
by Quentin Schroeder <quentin@fogcreek.com>
Changes to one file · Browse files at f2974d8da963 Showing diff from parent 0ad9404448f7 Diff from another changeset...
|
@@ -1,22 +1,135 @@ - #-----------------------------------------------------------------------------
-# This script will check that Kiln is running and healthy by hitting a variety of HTTP endpoints
-#
-# Usage example:
-# ./Monitor-Kiln.ps1 -kilnURL "http://localhost/fogbugz/kiln"
-#
-#-----------------------------------------------------------------------------
+<#
+.SYNOPSIS
+ Checks the vital signs of a Kiln Server
+.DESCRIPTION
+ This script will check a variety of vital signs to see if a Kiln Server is in good health.
+ When errors are found, the script can write them to std-out, email a message, and/or attempt
+ to automatically restart the necessary Kiln Services.
-param([string]$kilnURL = "http://localhost/fogbugz/kiln",
+.EXAMPLE
+ ./Monitor-Kiln.ps1 -kilnURL "http://localhost/fogbugz/kiln"
+ This is the most basic check, it will simply perform checks on all Kiln services at their default locations, write
+ information to std-out, and attempt to automatically restart the Kiln Storage Service and Kiln Queuing
+ Service when errors are found.
+
+.EXAMPLE
+ ./Monitor-Kiln.ps1 -kilnURL "http://localhost/fogbugz/kiln" -smtpServer smtp.myserver.com -from kiln-error-report@company.com -to kiln-admin@company.com -username bob -password pa55w0rd -smtpSsl
+ This will perform checks on all Kiln services at their default locations, and attempt to automatically
+ restart the Kiln Storage Service and Kiln Queuing Service when errors are found. Additionally, any errors
+ found will generate an email to "kiln-admin@company.com" from "kiln-error-report@company.com" which will be
+ sent via the smtp server "smtp.myserver.com" using SSL and the username "bob" with password "pa55w0rd" to
+ authenticate.
+
+.EXAMPLE
+ ./Monitor-Kiln.ps1 -kilnURL "http://localhost/fogbugz/kiln" -iisSiteName Default
+ This will perform checks on all Kiln services at their default location, write information to std-out,
+ and attempt to automatically restart the Kiln Storage Service and Kiln Queuing Service when errors are
+ found. It will also attempt to restart the IIS website named "Default" if the Kiln front page won't load.
+
+.EXAMPLE
+ ./Monitor-Kiln.ps1 -kilnURL "http://localhost/fogbugz/kiln" -noRestart
+ This will perform checks on all Kiln services at their default location, write information to std-out,
+ but will not attempt to restart any services.
+
+.PARAMETER kilnURL
+ The URL to reach the Kiln web page.
+
+.PARAMETER smtpServer
+ An SMPT server that this script can use to send error messages.
+
+.PARAMETER from
+ The from address to appear in emailed error messages.
+
+.PARAMETER to
+ The destination address for emailing error messages.
+
+.PARAMETER username
+ The username to log into the SMTP server.
+
+.PARAMETER password
+ The password to log into the SMTP server.
+
+.PARAMETER smtpSsl
+ A switch to use SSL when connecting to the SMTP server (by default SSL is off).
+
+.PARAMETER smtpPort
+ The port to connect to the SMTP server on (uses 587 by defualt).
+
+.PARAMETER backendVersionUrl
+ The URL of the Kiln backend. The default value is almost always correct.
+
+.PARAMETER esUrl
+ The URL of the ElasticSearch web front end. The default value is almost always correct.
+
+.PARAMETER queueStatsUrl
+ The URL of the Queue Stats page. The default value is almost always correct.
+
+.PARAMETER iisSiteName
+ The name of the IIS Website that FogBugz and Kiln are running under. If you provide this,
+ the script will attempt to restart IIS if it cannot load the Kiln web page.
+
+.PARAMETER noRestart
+ A flag which when set will force the script to only send alert messages and not
+ attempt to restart any services automatically.
+
+.PARAMETER reenqueuePath
+ The path to the Reenqueue script, this is used to try to resuscitate the Queue when problems are detected.
+
+.NOTES
+ Author: Quentin Schroeder
+ Date: Dec 13, 2012
+#>
+
+
+
+param([Parameter(ParameterSetName="noemail")]
+ [string]$kilnURL = "http://localhost/fogbugz/kiln",
+
+ [Parameter(ParameterSetName="email", Mandatory=$true)]
[string]$smtpServer = "",
+
+ [Parameter(ParameterSetName="email", Mandatory=$true)]
+ [Parameter(ParameterSetName="noemail")]
[string]$from = "",
+
+ [Parameter(ParameterSetName="email", Mandatory=$true)]
+ [Parameter(ParameterSetName="noemail")]
[string]$to = "",
+
+ [Parameter(ParameterSetName="email", Mandatory=$true)]
+ [Parameter(ParameterSetName="noemail")]
+ [string]$username = "",
+
+ [Parameter(ParameterSetName="email", Mandatory=$true)]
+ [Parameter(ParameterSetName="noemail")]
+ [string]$password = "",
+
+ [Parameter(ParameterSetName="email")]
+ [switch]$smtpSsl,
+
+ [Parameter(ParameterSetName="email")]
+ [string]$smtpPort = 587,
+
+ [Parameter(ParameterSetName="noemail")]
[string]$backendVersionUrl = "http://localhost:56783/version",
+
+ [Parameter(ParameterSetName="noemail")]
[string]$esUrl = "http://localhost:9200/",
+
+ [Parameter(ParameterSetName="noemail")]
[string]$queueStatsUrl = "http://localhost:56785/stats.json",
+
+ [Parameter(ParameterSetName="noemail")]
[string]$iisSiteName = "",
+
+ [Parameter(ParameterSetName="noemail")]
[switch]$noRestart,
- [string]$reenqueuePath = ".\reenqueue_2.9_tasks.ps1")
+
+ [Parameter(ParameterSetName="noemail")]
+ [string]$reenqueuePath = ".\reenqueue_2.9_tasks.ps1"
+ )
+
@@ -37,9 +150,24 @@function Handle-Error($msg)
{
Write-WithTime " ERROR!" $true
- if ($smtpServer -and $from -and $to)
+ if ($smtpServer -and $from -and $to -and $username -and $password)
{
- Send-MailMessage -From $from -To $to -SmtpServer $smtpServer -Subject "Kiln Server Error Report!" -Body ""
+ try
+ {
+ $SmtpClient = New-Object Net.Mail.SmtpClient($smtpServer, $smtpPor)
+ $SmtpClient.EnableSsl = $smtpSsl
+ $SmtpClient.Credentials = New-Object System.Net.NetworkCredential($username, $password);
+ $SmtpClient.Send($from, $to, "Kiln Server Error Report!", $msg)
+ }
+ catch [Exception]
+ {
+ $exceptionMessage = $_.Exception.Message
+ Write-WithTime " Unable to send email, an error occurred.`n$exceptionMessage" $true
+ }
+ }
+ elseif ($smtpServer -or $from -or $to -or $username -or $password -or $smtpSsl)
+ {
+ Write-WithTime " Unable to send email, some required parameters are missing!" $true
}
else
{
@@ -118,7 +246,13 @@ $runningTaskCount = $stats.root.runningTasks.ChildNodes.count
$oldestTaskAge = [int]$stats.root.oldestRunningTaskTotalSeconds."#text"
-
+
+ Write-WithTime ""
+ Write-WithTime "--Queue Stats Information--"
+ Write-WithTime " Time reported: $timeDiff seconds from now"
+ Write-WithTime " Queue Length: $queueLength"
+ Write-WithTime " Running Task Count: $runningTaskCount"
+ Write-WithTime " Age of oldest task: $oldestTaskAge seconds"
if (($timeDiff -gt 5) -or ($timeDiff -lt 5))
@@ -144,12 +278,16 @@ # Longer than 15 minutes on a task? This is a bad sign, time to give it some help!
if ( $oldestTaskAge -gt (15*60) )
{
- Handle-Error("Oldest task has been running for $oldestTaskAge seconds!")
+ Handle-Error("Oldest task has been running for $oldestTaskAge seconds! Attempting to retry all running tasks.")
if (test-path $reenqueuePath)
{
Start-Process $reenqueuePath "-retryRunning"
}
+ else
+ {
+ Write-WithTime "Reenqueue script not found at ($reenqueuePath), unable to retry running tasks in Kiln Queue."
+ }
}
}
@@ -271,4 +409,4 @@
-
+write-host "`n"
\ No newline at end of file |
Loading...