Changeset 0ca99d0c705a…
Parent e8792002a292…
by Quentin Schroeder <quentin@fogcreek.com>
Changes to one file · Browse files at 0ca99d0c705a Showing diff from parent e8792002a292 Diff from another changeset...
|
@@ -8,38 +8,79 @@
param([string]$kilnURL = "http://localhost/fogbugz/kiln",
+ [string]$smtpServer = "",
+ [string]$from = "",
+ [string]$to = "",
[string]$backendVersionUrl = "http://localhost:56783/version",
- [string]$EsUrl = "http://localhost:9200/",
- [string]$QueueStatsUrl = "http://localhost:56785/stats.json")
+ [string]$esUrl = "http://localhost:9200/",
+ [string]$queueStatsUrl = "http://localhost:56785/stats.json",
+ [string]$iisSiteName = "",
+ [switch]$noRestart,
+ [string]$reenqueuePath = ".\reenqueue_2.9_tasks.ps1")
-function Write-WithTime($msg){
- write "$(Get-Date -format o) $msg"
+
+############################# FUNCTION DEFINITIONS #############################
+
+function Write-WithTime($msg, $error=$false){
+ if ($error)
+ {
+ write-host "$(Get-Date -format o) $msg" -foregroundcolor "red"
+ return
+ }
+ write-host "$(Get-Date -format o) $msg"
}
-function Test-ProcessRunning($processName, $count = 1)
+
+function Handle-Error($msg)
+{
+ Write-WithTime " ERROR!" $true
+ if ($smtpServer -and $from -and $to)
+ {
+ Send-MailMessage -From $from -To $to -SmtpServer $smtpServer -Subject "Kiln Server Error Report!" -Body ""
+ }
+ else
+ {
+ Write-WithTime " $msg" $true
+ }
+}
+
+
+
+function Test-ProcessRunning($processName, $count = 1, [ref]$serviceNeedsRestart)
{
Write-WithTime "Checking that $count instance(s) of $processName are running..."
$result = (Get-Process $processName -ea SilentlyContinue | select id | measure).count -ge $count
- if ($result) { Write-WithTime "Success!" }
- else { Write-WithTime "FAILURE!" }
+ if (!($result))
+ {
+ Handle-Error("Critical Kiln Server error. Process `'$processName`' could not be found!")
+ $serviceNeedsRestart.value = $true
+ }
}
-function Test-ElasticSearchRunning()
+function Test-ElasticSearchRunning([ref]$serviceNeedsRestart)
{
Write-WithTime "Checking that 1 instances(s) of ElasticSearch are running..."
- $result = (Get-WmiObject win32_process -Filter "name like 'java.exe'" | select commandline | select-string "ElasticSearch" | measure).count -eq 1
- if ($result) { Write-WithTime "Success!" }
- else { Write-WithTime "FAILURE!" }
+ $result = $false
+ (Get-WmiObject win32_process -Filter "name like 'java.exe'") | ForEach-Object {
+ if (($_.CommandLine | select-string "ElasticSearch" | measure).count -eq 1) {
+ $result = $true
+ }
+ }
+ if (!($result))
+ {
+ Handle-Error("Critical Kiln Server error. Elastic Search is not running!")
+ $serviceNeedsRestart.value = $true
+ }
}
-function Test-HttpResponse($url, $searchString)
+function Test-HttpResponse($url, $searchString, [ref]$serviceNeedsRestart)
{
Write-WithTime "Checking for expected response from URL ($url)..."
@@ -48,25 +89,186 @@ $response = (New-Object net.webclient).DownloadString($url)
$response = (New-Object net.webclient).DownloadString($url)
$result = $response | select-string $searchString
- if ($result) { Write-WithTime "Success!" }
- else { Write-WithTime "FAILURE!" }
+ if (!($result))
+ {
+ Handle-Error("Critical Kiln Server error. URL [$url] did not contain the expected string `'$searchString`'")
+ $serviceNeedsRestart.value = $true
+ }
}
catch [Net.WebException]
{
- Write-WithTime $_.Exception.ToString()
+ $exceptionMessage = $_.Exception.Message
+ Handle-Error("Critical Kiln Server error. Could not reach URL [$url]`n$exceptionMessage")
+ $serviceNeedsRestart = $true
}
}
+function Test-QueueStats([ref]$serviceNeedsRestart)
+{
+ try
+ {
+ $response = (New-Object System.Net.WebClient).DownloadString($queueStatsUrl)
+
+ $stats = Convert-JsonToXml($response)
+
+ $statsTime = [long]$stats.root.currentTime."#text"
+ $timeDiff = (((Get-Date).ToUniversalTime().Ticks / 10e6) - $statsTime)
+ $queueLength = [int]$stats.root.queueLength."#text"
+ $runningTaskCount = $stats.root.runningTasks.ChildNodes.count
+ $oldestTaskAge = [int]$stats.root.oldestRunningTaskTotalSeconds."#text"
-Test-ProcessRunning "backend"
-Test-ProcessRunning "redis-server" 2
-Test-ProcessRunning "QueueService"
-Test-ElasticSearchRunning
-Test-HttpResponse $kilnURL "Log on to Kiln"
-Test-HttpResponse $backendVersionUrl '"hg_version"'
-Test-HttpResponse $EsUrl '"status" : 200'
-Test-HttpResponse $QueueStatsUrl '"queueName":"Kiln"'
-# TODO: Parse the QueueStats data and alert when poor behavior is detected. Attempt automatic restart?
\ No newline at end of file+
+ if (($timeDiff -gt 5) -or ($timeDiff -lt 5))
+ {
+ $serviceNeedsRestart = $true
+ }
+
+
+ # This is a warning sign, so just alert, but don't attempt to restart the service yet.
+ if ($queueLength -gt 50)
+ {
+ Handle-Error("The Kiln Queue is exceptionally long ($queueLength tasks), this might by a symptom of a recent large change or a problem.")
+ }
+
+
+ # This is a warning sign, so just alert, but don't attempt to restart the service yet.
+ if ($runningTaskCount -gt 16)
+ {
+ Handle-Error("Too many tasks ($runningTaskCount) are currently running in the Kiln Queue.")
+ }
+
+
+ # Longer than 15 minutes on a task? This is a bad sign, time to give it some help!
+ if ( $oldestTaskAge -gt (15*60) )
+ {
+ Handle-Error("Oldest task has been running for $oldestTaskAge seconds!")
+
+ if (test-path $reenqueuePath)
+ {
+ Start-Process $reenqueuePath "-retryRunning"
+ }
+ }
+
+ }
+ catch [Exception]
+ {
+ $exceptionMessage = $_.Exception.Message
+ Handle-Error("Error obtaining or processing Kiln Queue Stats`n$exceptionMessage")
+ $serviceNeedsRestart = $true
+ }
+}
+
+
+
+
+# This is provided as an alternative to ConvertFrom-JSON, which requires Powershell V3.
+# Powershell V3 is not on most servers by default and requires a reboot to install, so here we are instead...
+# Source: (https://www.cogmotive.com/blog/powershell/parsing-json-in-powershell-xml-the-member-item-is-already-present)
+Add-Type -Assembly System.ServiceModel.Web,System.Runtime.Serialization
+function Convert-JsonToXml([string]$json)
+{
+ $bytes = [byte[]][char[]]$json
+ $quotas = [System.Xml.XmlDictionaryReaderQuotas]::Max
+ $jsonReader = [System.Runtime.Serialization.Json.JsonReaderWriterFactory]::CreateJsonReader($bytes,$quotas)
+ try
+ {
+ $xml = new-object System.Xml.XmlDocument
+ $xml.Load($jsonReader)
+ $xml
+ }
+ finally
+ {
+ $jsonReader.Close()
+ }
+}
+
+
+
+########################### END FUNCTION DEFINITIONS ###########################
+
+
+
+
+
+$tryAutoRestart = !($noRestart) # Needed a negative name for the parameter since the default is true. Sanity rename here.
+$kssNeedsRestart = $false
+$kqsNeedsRestart = $false
+$iisNeedsRestart = $false
+
+Test-ProcessRunning "backend" 1 -serviceNeedsRestart ([ref]$kssNeedsRestart)
+Test-ProcessRunning "redis-server" 2 -serviceNeedsRestart ([ref]$kssNeedsRestart)
+Test-ElasticSearchRunning -serviceNeedsRestart ([ref]$kssNeedsRestart)
+Test-ProcessRunning "QueueService" -serviceNeedsRestart ([ref]$kqsNeedsRestart)
+
+Test-HttpResponse $kilnURL "Log on to Kiln" -serviceNeedsRestart ([ref]$iisNeedsRestart)
+Test-HttpResponse $backendVersionUrl '"hg_version"' -serviceNeedsRestart ([ref]$kssNeedsRestart)
+Test-HttpResponse $esUrl '"status" : 200' -serviceNeedsRestart ([ref]$kssNeedsRestart)
+Test-HttpResponse $queueStatsUrl '"queueName":"Kiln"' -serviceNeedsRestart ([ref]$kqsNeedsRestart)
+
+Test-QueueStats -serviceNeedsRestart ([ref]$kqsNeedsRestart)
+
+
+
+
+
+
+
+Write-Host "`n"
+
+
+
+if ($iisNeedsRestart -and $iisSiteName -and $tryAutoRestart)
+{
+ Write-WithTime "Attemping to restart site $iisSiteName in IIS...`n"
+
+ $appcmd = $env:SystemRoot + "\system32\inetsrv\appcmd.exe"
+ $stopArgs = "stop site /site.name:$iisSiteName"
+ $startArgs = "start site /site.name:$iisSiteName"
+
+ Start-Process $appcmd $stopArgs
+ Start-Process $appcmd $startArgs
+}
+
+
+if ($kssNeedsRestart -and $tryAutoRestart)
+{
+ Write-WithTime "Attempting to restart Kiln Storage Service..."
+
+ # Find and stop the java.exe process that is running Elastic Search
+ (Get-WmiObject win32_process -Filter "name like 'java.exe'") | ForEach-Object {
+ if (($_.CommandLine | select-string "ElasticSearch" | measure).count -eq 1) {
+ Stop-Process -id $_.ProcessId -Force -EV Err -EA "SilentlyContinue"
+ }
+ }
+
+ # Try to stop the processes, but don't show any errors (it might already be stopped)
+
+ # This will stop both of the redis-server.exe processes because the have the same name
+ Stop-Process -name redis-server -Force -EV Err -EA "SilentlyContinue"
+
+ Stop-Process -name backend -Force -EV Err -EA "SilentlyContinue"
+
+ Start-Service KilnStorageService
+}
+
+
+
+if ($kqsNeedsRestart -and $tryAutoRestart)
+{
+ Write-WithTime "Attempting to restart Kiln Queuing Service..."
+
+ # Try to stop the process, but don't show any errors (it might already be stopped)
+ Stop-Process -name QueueService -Force -EV Err -EA "SilentlyContinue"
+
+ Start-Sleep -s 1
+ Start-Service "Kiln Queuing Service"
+}
+
+
+
+
+
+
|
Loading...