We have an instability when we use Direct Access NLB and ESX E1000 adapters. It stops working randomly. As a temporary workaround we created a script that can monitor and reset failed adapters.
in long run we want to change to VMXNET3 adapters that are more stable.
@echo off
echo This is a DA watchdog, pls do not close. it will stop every day when hour is 00
eventcreate /ID 998 /L APPLICATION /T INFORMATION /SO DA /D "Direct Access network adapter watchdog is started"
:LOOPPING
set HH=%TIME: =0%
set HH=%HH:~0,2%
set MI=%TIME:~3,2%
echo %HH%
if "%HH%" == "00" goto Quit_here
ping -S vip-public -n 1 public-gw >nul
if errorlevel 1 goto NoServer_public
ping -S vip_private -n 1 private_gw >nul
if errorlevel 1 goto NoServer_private
timeout /t 60 >nul
goto LOOPPING
:NoServer_public
echo Check again
eventcreate /ID 997 /L APPLICATION /T WARNING /SO DA /D "Direct Access DMZ public gateway stopped responding, we will check again to be sure"
timeout /t 10 >nul
ping -S vip-public -n 1 public-gw >nul
if errorlevel 1 goto RESET_PUB
goto LOOPPING
:RESET_PUB
echo we have a problem - refer to eventlog event 999
eventcreate /ID 999 /L APPLICATION /T ERROR /SO DA /D "Direct Access DMZ public gateway stopped responding, we will reset adapter"
powershell -Command "& restart-netadapter 'Public DMZ'"
timeout /t 60 >nul
goto LOOPPING
:NoServer_private
echo Check again
eventcreate /ID 997 /L APPLICATION /T WARNING /SO DA /D "Direct Access DMZ private gateway stopped responding, we will check again to be sure"
timeout /t 10 >nul
ping -S vip_private -n 1 private_gw >nul
if errorlevel 1 goto RESET_PRIV
goto LOOPING
:RESET_PRIV
echo we have a problem - refer to eventlog event 999
eventcreate /ID 999 /L APPLICATION /T ERROR /SO DA /D "Direct Access DMZ private gateway stopped responding, we will reset adapter"
powershell -Command "& restart-netadapter 'Private DMZ'"
timeout /t 60 >nul
goto LOOPPING
:Quit_here
echo we finish script every day at 00:
No comments:
Post a Comment