Guys as discussed at my last post oracle-performance-tuning-queries I have promised that I will be back with some DBA monitoring shell scripts , So there they are. Below shell scripts can be used for daily Database monitoring by DBA, Some are created by my fellow DBA .
Please edit the configuration as per your environment
- This scripts checks for tablespace usage. If tablespace is 10 percent free, it will send an alert e-mail.
#####################################################################
## check_tablespace.sh ##
##
#####################################################################
###!/bin/bash
##
####### Start of configuration
##
######## Oracle Enviorment variables ##########
##
export ORACLE_BASE=/u01/app
export ORACLE_HOME=/rac/app/oracle/product/11.2.0
export ORACLE_UNQNAME=TEST01
export PATH=$PATH:$ORACLE_HOME/bin:$ORACLE_HOME/rdbms/admin:$ORACLE_HOME/lib
export alrt=/rac/app/oracle/diag/rdbms/test01/TEST01/trace/alert_TEST01.log
export asmalrt=/rac/app/oracle/diag/asm/+asm/+ASM1/trace/alert_+ASM1.log
export TNS_ADMIN=$ORACLE_HOME/network/admin
export MONITOR_DIR=$ORACLE_HOME/dba-scripts/monitor
export ORACLE_SID=TEST01
##
######## Other variables #################
##
DBA=nimai.karmakar@hotmail.com
DATABASE=TEST01
datevar=$(date)
datevar2=$(date ‘+%Y-%m-%d-%H-%M’)
##
####### End of configuration
##
sqlplus -s “/ as sysdba” << SQL1
set feed off
set linesize 100
set pagesize 200
spool /rac/app/oracle/product/11.2.0/dba-scripts/monitor/tablespace.alert
SELECT F.TABLESPACE_NAME,
TO_CHAR ((T.TOTAL_SPACE – F.FREE_SPACE),’999,999′) “USED(MB)”,
TO_CHAR (F.FREE_SPACE, ‘999,999’) “FREE(MB)”,
TO_CHAR (T.TOTAL_SPACE, ‘999,999’) “TOTAL(MB)”,
TO_CHAR ((ROUND ((F.FREE_SPACE/T.TOTAL_SPACE)*100)),’999,99999′)||’ %’ PERCENT_FREE
FROM (
SELECT TABLESPACE_NAME,
ROUND (SUM (BLOCKS*(SELECT VALUE/1024
FROM V\$PARAMETER
WHERE NAME = ‘db_block_size’)/1024)
) FREE_SPACE
FROM DBA_FREE_SPACE WHERE TABLESPACE_NAME NOT IN (‘SYSTEM’,’SYSAUX’,’TEMP’,’USERS’,’UNDOTBS1′,’UNDOTBS2′)
GROUP BY TABLESPACE_NAME
) F,
(
SELECT TABLESPACE_NAME,
ROUND (SUM (BYTES/1048576)) TOTAL_SPACE
FROM DBA_DATA_FILES WHERE TABLESPACE_NAME NOT IN (‘SYSTEM’,’SYSAUX’,’TEMP’,’USERS’,’UNDOTBS1′,’UNDOTBS2′)
GROUP BY TABLESPACE_NAME
) T
WHERE F.TABLESPACE_NAME = T.TABLESPACE_NAME
AND (ROUND ((F.FREE_SPACE/T.TOTAL_SPACE)*100)) < 10;
spool off
exit
SQL1
if [ `cat /rac/app/oracle/product/11.2.0/dba-scripts/monitor/tablespace.alert|wc -l` -gt 0 ]
then
echo Tablespace less than 10% free on ${DATABASE}.Please add space as necassary >> /rac/app/oracle/product/11.2.0/dba-scripts/monitor/tablespace.tmp
cat /rac/app/oracle/product/11.2.0/dba-scripts/monitor/tablespace.alert >> /rac/app/oracle/product/11.2.0/dba-scripts/monitor/tablespace.tmp
mailx -s “Tablespace percent usage for ${DATABASE} at $datevar” $DBA < /rac/app/oracle/product/11.2.0/dba-scripts/monitor/tablespace.tmp
#####mv /disk1/tablespace.tmp /disk1/tablespace_$datevar2.alert
rm /rac/app/oracle/product/11.2.0/dba-scripts/monitor/tablespace.alert
rm /rac/app/oracle/product/11.2.0/dba-scripts/monitor/tablespace.tmp
fi
- Script to gather SGA stats in timely manner
####################################################
##
##SGA.sh
##
####################################################
export ORACLE_HOME=/rac/app/oracle/product/11.2.0
export ORACLE_SID=TEST01
export ORACLE_UNQNAME=TEST01
export PATH=$PATH:$ORACLE_HOME/bin:$ORACLE_HOME/rdbms/admin:$ORACLE_HOME/lib
export alrt=/rac/app/oracle/diag/rdbms/test01/TEST01/trace/alert_test011.log
export asmalrt=/rac/app/oracle/diag/asm/+asm/+ASM1/trace/alert_+ASM1.log
export TNS_ADMIN=$ORACLE_HOME/network/admin
MAILID=nimai.karmakar@hotmail.com
#
#
#
sqlplus -s “/ as sysdba” << SQL1
set feed off
set linesize 100
set pagesize 200
spool /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/sga_stats.log
col inst_id format 9999999 heading “INSTANCE ID”
col sga_size/1024 format 9999999 heading “SGA SIZE”
col sga_size_factor format 9999999 heading “SGA SIZE FACTOR”
col estd_physical_reads format 9999999 heading “PHYSICAL READ”
select inst_id, sga_size/1024, sga_size_factor, estd_db_time, estd_db_time_factor, estd_physical_reads from gv\$sga_target_advice
order by inst_id, sga_size_factor;
spool off
exit;
SQL1
if [ `cat /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/sga_stats.log|wc -l` -gt 0 ]; then
cat /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/sga_stats.log
mailx -s “Statistics at `date +%H+%M` for `hostname`” $MAILID << /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/sga_stats.log
fi
exit
- Script to check Server Process (RAC)
#!/bin/bash
export PATH=$PATH:/grid/app/bin/
SUBJECT=”Server Process failed for – Server `hostname` on `date ‘+%m/%d/%y %X %A ‘`”
REPSUBJECT=”Server `hostname` health check report at `date +%H:%M` hours on `date +%d-%m-%Y`”
ERRLOG=$MONITOR_DIR/logs/server_process_check.log
REPORT=$MONITOR_DIR/logs/server_process_report.txt
BODY=$MONITOR_DIR/server_process_report_email_body.txt
## Delete the errorlog file if found
/usr/bin/find $ERRLOG -type f -exec rm {} \; 2>&1 > /dev/null
##Report recipients
MAILID=’nimai.karmakar@hotmail.com’
chour=`date +%H“date +%M`
if [ `grep -i “TEST01 HEALTH CHECK FOR” $REPORT | wc -l` -eq 0 ]; then
echo “—————————-TEST01 HEALTH CHECK FOR `date +%m_%d_%Y`———————————” > $REPORT
fi
echo ” ” >> $REPORT
echo “—————————-CRS process status `date +%m_%d_%Y_%H:%M`—————————-” >> $REPORT
crsctl check crs | while read outputline
do
if test `echo $outputline | grep ‘online’ | wc -l` -eq 0 ## This will check if the CRS process is online or not
then
echo “Date : “`date ‘+%m/%d/%y %X %A ‘` >> $ERRLOG
echo “Details :”`crsctl check crs` >> $ERRLOG
echo ” ” >> $ERRLOG
echo ” Details :”`crsctl check crs` >> $REPORT
echo ” ” >> $REPORT
echo ” Skiiping other tests ” >> $REPORT
echo ” ” >> $ERRLOG
##mutt -s “$SUBJECT” $MAILTO < $ERRLOG
/bin/mail -s “$SUBJECT” “$MAILID”
exit
else
echo ” Details :”`crsctl check crs` >> $REPORT
echo ” ” >> $REPORT
fi
done
echo “—————————-PMON process Status count on `date +%m_%d_%Y_%H:%M`—————————-” >> $REPORT
if test `ps -ef|grep pmon|grep -v grep |wc -l` -ne 2 ## This will check the no of pmon process for each of the server
then
echo “Date : “`date ‘+%m/%d/%y %X %A ‘` >> $ERRLOG
echo “Details :”`ps -ef|grep pmon|grep -v ‘grep’ |wc -l` >> $ERRLOG
echo ” ” >> $ERRLOG
echo ” PMON process not found. Oracle Instance on `hostname` may be down . Require immediate attention” >> $ERRLOG
echo ” ” >> $REPORT
echo ” Skiiping other tests ” >> $REPORT
echo ” ” >> $ERRLOG
/bin/mail -s “$SUBJECT” “$MAILID”
exit
else
echo ” Details :”`crsctl check crs` >> $REPORT
echo ” ” >> $REPORT
fi
echo “—————————-Listener Status on `date +%m_%d_%Y_%H:%M`—————————-” >> $REPORT
##Check whether listener is running. Output should be 1
if test `ps -ef|grep tnslsnr | grep -v “grep” |wc -l` -ne 1 ##Check the no of listener running.
then
echo “Date : “`date ‘+%m/%d/%y %X %A ‘` >> $ERRLOG
echo “Details :”`ps -ef|grep tnslsnr |grep -v ‘grep’ |wc -l` >> $ERRLOG
echo ” ” >> $ERRLOG
echo ” Listener on `hostname` may be down . Require immediate attention” >> $ERRLOG
echo ” ” >> $REPORT
echo ” ” >> $ERRLOG
/bin/mail -s “$SUBJECT” “$MAILID”
else
echo ” Details :”`crsctl check crs` >> $REPORT
echo ” ” >> $REPORT
fi
echo “—————————-Checking number of oracle processes `date +%m_%d_%Y_%H:%M`—————————-” >> $REPORT
##Check Process count of “oracle” user. Output should be less than or equal to 1500
if test `ps -ef|grep -i oracle |wc -l` -ge 1000
then
echo “Date : “`date ‘+%m/%d/%y %X %A ‘` >> $ERRLOG
echo “Details : “`ps -ef|grep -i oracle|wc -l` >> $ERRLOG
echo ” ”
echo ” Count of processes exceeded 1000. Require immediate attention” >> $ERRLOG
echo ” ” >> $ERRLOG
echo ” ” >> $ERRLOG
/bin/mail -s “$SUBJECT” “$MAILID”
else
echo “Number of oracle processes: ” `ps -ef|grep -i oracle |wc -l` >> $REPORT
echo ” ” >> $REPORT
fi
##Send the report at particular times (e.g 1500 hours or 2300 hours)
if [ $chour -ge 1500 -a $chour -lt 1502 ]; then
mutt -s “$REPSUBJECT” -a $REPORT $MAILID < $BODY
if [ $? -eq 0 ]; then
cp $REPORT $MONITOR_DIR/logs/server_process_report_`date +%d-%m-%Y`.txt
> $REPORT
fi
fi
if [ $chour -ge 2350 -a $chour -lt 2355 ]; then
mutt -s “$REPSUBJECT” -a $REPORT $MAILID < $BODY
if [ $? -eq 0 ]; then
cp $REPORT $MONITOR_DIR/logs/server_process_report_`date +%d-%m-%Y`.txt
> $REPORT
fi
fi
exit
- Script for purging old files
echo ‘Setting your environment’
###ORACLE_SID=#replace with your SID
ORACLE_SID=TEST01
export ORACLE_SID
###
###BDUMP=#replace with your BDUMP path
BDUMP=/rac/app/oracle/diag/rdbms/test01/TEST01/trace
export BDUMP
###
###ADUMP=#replace with your ADUMP path
ADUMP=/rac/app/oracle/admin/TEST01/adump
export ADUMP
###
###UDUMP=#replace with your UDUMP path
UDUMP=/rac/app/oracle/diag/rdbms/test01/TEST01/trace
export UDUMP
DT=`date “+%d%m%y”`
export DT
PID=${$}
export PID
FSEQ=${PID}
export FSEQ
################## Creating Backup Dir if not exist #############################
echo ‘Creating Backup Dir if not exist’
mkdir -p $BDUMP/bdump_oldfiles
mkdir -p $UDUMP/udump_oldfiles
mkdir -p $ADUMP/adump_oldfiles
#### Deleting old Alert log files and trace files################################
echo ‘Deleting old Alert log files and trace files’
cd $BDUMP/bdump_oldfiles
find . -name “*.trc.gz” -mtime +5 -exec rm {} \ ;
find . -name “*.log.gz” -mtime +5 -exec rm {} \ ;
cd $BDUMP
ls -lrt | grep “.trc” | awk ‘{print “mv “$9 ” $BDUMP/bdump_oldfiles “}’ > /tmp/mv$FSEQ.sh
sh /tmp/mv$FSEQ.sh
rm /tmp/mv$FSEQ.sh
#### Backup and Purging of Alert logfile #######################################
echo ‘Backup and Purging of Alert logfile’
cd $BDUMP
cp alert_$ORACLE_SID.log $BDUMP/bdump_oldfiles/alert_$ORACLE_SID.log
cd $BDUMP
> alert_$ORACLE_SID.log
#### Compression of old Alert log files ########################################
gzip -f $BDUMP/bdump_oldfiles/*.log
#### Deleting old user trace files #############################################
cd $UDUMP/udump_oldfiles
find /$UDUMP/udump_oldfiles -name “*.trc.gz” -mtime +5 -exec rm {} \ ;
cd $UDUMP
ls -lrt | grep “.trc” | awk ‘{print “mv “$9 ” $UDUMP/udump_oldfiles “}’ > /tmp/mv$FSEQ.sh
sh /tmp/mv$FSEQ.sh
rm /tmp/mv$FSEQ.sh
cd $UDUMP/udump_oldfiles
ls -lrt | grep “.trc” | grep -v “.gz” | awk ‘{print “gzip -f ” $9 }’ > /tmp/gzip$FSEQ.sh
sh /tmp/gzip$FSEQ.sh
rm /tmp/gzip$FSEQ.sh
#### Deleting old audit files ##################################################
cd $ADUMP/oldfiles
find /$ADUMP/oldfiles -name “*.aud.gz” -mtime +5 -type f -exec rm {} \
cd $ADUMP
ls -lrt | grep “.aud” | awk ‘{print “mv “$9 ” $ADUMP/oldfiles “}’ > /tmp/mv$FSEQ.sh
sh /tmp/mv$FSEQ.sh
rm /tmp/mv$FSEQ.sh
cd $ADUMP/oldfiles
ls -lrt | grep “.aud” | grep -v “.gz” | awk ‘{print “gzip -f ” $9 }’ > /tmp/gzip$FSEQ.sh
sh /tmp/gzip$FSEQ.sh
rm /tmp/gzip$FSEQ.sh
############################# END ######################################
- This scripts checks for database recovery area usage and send an alert e-mail.
#####################################################################
## db_recovery_area_usage.sh ##
##
## Author : Nimai Karmakar
#####################################################################
###!/bin/bash
##
#######
####### Start of configuration
#######
######## Oracle Enviorment variables ##########
##
export ORACLE_BASE=/u01/app
export ORACLE_HOME=/u01/app/oracle/product/11.2.0/test01
export JAVA_HOME=/usr/java/jdk1.6.0_30/
export PATH=${ORACLE_HOME}/bin:${JAVA_HOME}/bin:$PATH:.:
export ORACLE_SID=TEST01
##
######## Other variables #################
##
DBA=nimai.karmakar@hotmail.com
DATABASE=TEST01
datevar=$(date)
datevar2=$(date ‘+%Y-%m-%d-%H-%M’)
##
#######
####### End of configuration
#######
##
sqlplus -s “/ as sysdba” << SQL1
set feed off
set linesize 100
set pagesize 200
spool /disk1/scripts/archive.alert
set lines 100
col Location format a60
select name “Location”
, floor(space_limit / 1024 / 1024/1024) “Size GB”
, ceil(space_used / 1024 / 1024/1024) “Used GB”
from v\$recovery_file_dest
order by name
/
spool off
exit
SQL1
if [ `cat /disk1/scripts/archive.alert|awk ‘{print $3}’|wc -l` -gt 0 ]
then
cat /disk1/scripts/archive.alert > /disk1/scripts/archive.tmp
mail -s “DB Recovery area usage for ${DATABASE} at $datevar” $DBA < /disk1/scripts/archive.tmp
rm /disk1/scripts/archive.tmp
rm /disk1/scripts/archive.alert
fi
- This scripts checks for any ORA errors at alert log file and if found any ORA error send an alert e-mail.
#####################################################################
## check_alert_log.sh ##
##
## Author : Nimai Karmakar
#####################################################################
###!/bin/bash
##
#######
####### Start of configuration
#######
######## Oracle Enviorment variables ##########
##
export ORACLE_BASE=/u01/app
export ORACLE_HOME=/u01/app/oracle/product/11.2.0/test01
export JAVA_HOME=/usr/java/jdk1.6.0_30/
export PATH=${ORACLE_HOME}/bin:${JAVA_HOME}/bin:$PATH:.:
export ORACLE_SID=TEST01
##
######## Other variables #################
##
DBA=nimai.karmakar@hotmail.com
datevar=$(date)
##
#######
#######
#######
####### End of configuration
#######
#######
##
cd $ORACLE_BASE/diag/rdbms/test01/$ORACLE_SID/trace/
if [ -f alert_${ORACLE_SID}.log ]
then
tail -200 alert_${ORACLE_SID}.log > /disk1/scripts/alert_work.log
grep ORA- /disk1/scripts/alert_work.log >> /disk1/scripts/alert.err
grep Shut /disk1/scripts/alert_work.log >> /disk1/scripts/alert.err
fi
export error << `cat /disk1/scripts/alert.err`
if[ `cat /disk1/scripts/alert.err|wc -l` -gt 0 ]
then
mailx -s “${ORACLE_SID} ORACLE ALERT ERRORS $datevar” $DBA < /disk1/scripts/alert_work.log
fi
fi
rm -f /disk1/scripts/alert.err
rm -f /disk1/scripts/alert_work.log
exit
- SCRIPT FOR CHECKING MOUNT POINT SPACE IN UNIX
######################################
#!/bin/ksh
#rm /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/ERROR_LOG.txt
echo “df -k output for `date` `uname -n`” > /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/ERROR_LOG.txt
echo ” ” >> /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/ERROR_LOG.txt
echo “File system usage exceeded the threshold on `uname -n` server- `date`” >> /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/ERROR_LOG.txt
echo ” ” >> /tmp/dfk.txt
i=1
while [ $i -le `df -k | grep -v proc | grep -v capacity | wc -l` ] ;do
if [ `df -k | grep -v proc | grep -v capacity | head -n $i | tail -1 | awk ‘{print $5}’ | \sed -e ‘s/%//’` -gt 90 ] ; then
echo “File system usage exceeded the threshold on `uname -n` server- `date`” >> /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/ERROR_LOG.txt
echo ” ” >> /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/ERROR_LOG.txt
df -k | grep -v proc | grep -v capacity | head -n $i | tail -1 >> /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/ERROR_LOG.txt
fi
((i=i+1))
done
if [ `cat /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/ERROR_LOG.txt | wc -l` -gt 2 ] ; then
#cat /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/ERROR_LOG.txt | mailx -s “File system full alert” nimai.karmakar@hotmail.com
cat /rac/app/oracle/product/11.2.0/dba-scripts/monitor/logs/ERROR_LOG.txt
else
exit
fi
- Script to check the node Eviction (RAC)
#!/bin/bash
. ~/.bash_profile
mailid=’nimai.karmakar@hotmail.com’
date=`date +%Y-%m-%d” “%H:`
alertlog=/rac/app/oracle/product/11.2.0/dba-scripts/logs/ioerr.log
errlog=err_`date –date=”0 days ago” +_%d_%m_%y_%H_%M`.txt
err1=`grep -B 2 “$date” $alertlog | grep -A 2 “An I/O error” | wc -l`
err2=`grep -B 2 “$date” $alertlog | grep -A 2 “Network communication with node” | wc -l`
if [ $err1 -ge 1 -o $err2 -ge 1 ]
then
echo “Node eviction condition found in server `hostname`. Immediately check DB alert log for further action” >> $errlog
echo “” >> $errlog
echo `grep -B 2 “$date” $alertlog | grep -A 2 “An I/O error”` >> $errlog
echo “” >> $errlog
echo =`grep -B 2 “$date” $alertlog | grep -A 2 “Network communication with node”` >> $errlog
mutt -s “Node evition type condition found in `hostname`” $mailto < $errLog
rm $errlog
fi
Thats it, Hope it’s helpful.
Thanks & Regards
Nimai Karmakar