#!/bin/sh
#-------------------------------------------------------------------------------
# $Id: refreshdb,v 1.5 2003/09/08 20:05:35 ian Exp $
# $Source: /var/lib/cvs/refreshdb/refreshdb,v $
# $Copyright: Copyright (c) 2001 Ian Jones, IMiJ Ltd. $
#-------------------------------------------------------------------------------
# $Log: refreshdb,v $
# Revision 1.5  2003/09/08 20:05:35  ian
# Fixed bug where one more process is created than requested.
# Also -a seems to have slipped back in!
# BUGZID:49
#
# Revision 1.4  2003/08/25 20:35:56  ian
# Updated with various small bug fixes.
# Moved split of unload file to happen on unload rather than load so that more
# than one database may be loaded after initial unload.
# BUGZID: 37
#
# Revision 1.3.0.1  2003/08/25 20:17:09  ian
# Imported public release version (1.3) into CVS.
# BUGZID: 37
#
# Revision 1.3  2001/12/07 23:37:21  imij
# Added copyright.
#
# Revision 1.2  2001/12/07 22:31:49  imij
# Linux doesn't like -a option on split so removed it.
#
# Revision 1.1  2001/12/07 22:01:55  imij
# Initial revision
#-------------------------------------------------------------------------------

usage ()
{
  echo "Usage: $0 [ -klu ] [ -f conf_file ]"
  echo " [ -d delete_file ] [ -e exclude_file ] [ -i include_file ] [ -r refsql_file ]"
  echo " [ -s sourcedb@server] [ -t targetdb@server ]"
  exit 1;
}

gettables ()
{
  #
  # We need a list of the tables available in both the target and source.
  echo "Getting tables in $SOURCE_DB..."

dbaccess ${SOURCE_DB} - <<!
unload to ${TMP_TABLES_ID}1
select tabname, tabid
from systables
where tabid > 99
and tabtype = "T"
and tabname not matches "sys*"
order by tabname
;
!

  cat ${TMP_TABLES_ID}1 | cut -d"|" -f1 > ${TMP_TABLES}1

  echo "Getting tables in $TARGET_DB..."

dbaccess ${TARGET_DB} - <<!
unload to ${TMP_TABLES_ID}2
select tabname, tabid
from systables
where tabid > 99
and tabtype = "T"
and tabname not matches "sys*"
order by tabname
;
!

  cat ${TMP_TABLES_ID}2 | cut -d"|" -f1 > ${TMP_TABLES}2

  comm -12 ${TMP_TABLES}1 ${TMP_TABLES}2 > ${TMP_TABLES}3
  mv ${TMP_TABLES}3 ${TMP_TABLES}

}

processincludes ()
{
  #
  # If the include option has been used, only use those tables from list.
  if [ -n "$INCLUDE_FILE" ]
  then
    if [ -s "$INCLUDE_FILE" ]
    then
      echo "Processing include file..."
	  cat $INCLUDE_FILE | sort | uniq > $TMP_DIR/$INCLUDE_FILE.$PID
	  INCLUDE_FILE=$TMP_DIR/$INCLUDE_FILE.$PID
      comm -12 $TMP_TABLES $INCLUDE_FILE > ${TMP_TABLES}.tmp
      mv ${TMP_TABLES}.tmp $TMP_TABLES
    else
      echo "The include_file $INCLUDE_FILE is empty or non-existant!"
      usage
    fi
  fi

  if [ ! -s "$TMP_TABLES" ]
  then
    echo "There do not seem to be any tables left to refresh, goodbye!"
    exit 2
  fi
}

processexcludes ()
{
  #
  # If the exclude option has been used, remove those tables from list.
  if [ -n "$EXCLUDE_FILE" ]
  then
    if [ -s "$EXCLUDE_FILE" ]
    then
      echo "Processing exclude file..."
	  cat $EXCLUDE_FILE | sort | uniq > $TMP_DIR/$EXCLUDE_FILE.$PID
	  EXCLUDE_FILE=$TMP_DIR/$EXCLUDE_FILE.$PID
      comm -23 $TMP_TABLES $EXCLUDE_FILE > ${TMP_TABLES}.tmp
      mv ${TMP_TABLES}.tmp $TMP_TABLES
    else
      echo "The exclude_file $EXCLUDE_FILE is empty or non-existant!"
      usage
    fi
  fi

  if [ ! -s "$TMP_TABLES" ]
  then
    echo "There do not seem to be any tables left to refresh, goodbye!"
    exit 2
  fi
}

processdeletes ()
{
  #
  # If the delete option has been used, remove those tables from main list.
  if [ -n "$DELETE_FILE" ]
  then
    if [ -s "$DELETE_FILE" ]
    then
      echo "Processing delete file..."
	  cat $DELETE_FILE | sort | uniq > $TMP_DIR/$DELETE_FILE.$PID
	  DELETE_FILE=$TMP_DIR/$DELETE_FILE.$PID
      # Can only delete from tables also referenced in the include etc.
      comm -12 $TMP_TABLES $DELETE_FILE > ${TMP_TABLES}.del
      comm -23 $TMP_TABLES $DELETE_FILE > ${TMP_TABLES}.tmp
      mv ${TMP_TABLES}.tmp $TMP_TABLES
    else
      echo "The delete_file $DELETE_FILE is empty or non-existant!"
      usage
    fi
  fi
}

processrefsql ()
{
  #
  # If the refsql option has been used, create unload sql scripts for them.
  if [ -n "$REFSQL_FILE" ]
  then
    if [ -s "$REFSQL_FILE" ]
    then
      echo "Processing refsql file..."
      # Can only select from tables also referenced in the include etc.
      cat $REFSQL_FILE | grep -v "#" | sed '/^$/d' | awk -F"|" '{ printf("%s %s\n",$1,$2) }' | while read TABLE SQL
      do
        eval echo "unload to ${UNL_DIR}/${TABLE}.unl ${SQL}\;" >> ${TMP_DIR}/${TABLE}.sql
      done
    else
      echo "The refsql_file $REFSQL_FILE is empty or non-existant!"
      usage
    fi
  fi
}

unloadtables ()
{
  echo "Unloading data from $SOURCE_DB..."
  for TABLE in `cat ${TMP_TABLES}`
  do
    UNL_FILE=${UNL_DIR}/$TABLE.unl
    #
    # If "u" option set, then only unload
    # file does not exist or is empty.
    if [ -n "$uOPT" -a -s $UNL_FILE ]
    then
      echo "Skipping ${SOURCE_DB}:${TABLE}"
    else
      echo "Unloading ${SOURCE_DB}:${TABLE}"
      unloadtable >> ${TMP_DIR}/$TABLE.log 2>&1 &
      maxproccheck
    fi
  done
  echo "Waiting for unloads to finish..."
  wait
}

unloadtable ()
{
touch $TMP_DIR/$TABLE.PROCESSING

if [ -r ${TMP_DIR}/${TABLE}.sql ]
then
  echo ${TMP_DIR}/${TABLE}.sql
  dbaccess ${SOURCE_DB} ${TMP_DIR}/${TABLE}.sql
else

dbaccess ${SOURCE_DB} - <<!
unload to $UNL_FILE
select *
from ${TABLE}
!

fi

#
# Split the file to beat transaction levels.
rm -f ${UNL_FILE}_*
split -${MAXREC} ${UNL_FILE} ${UNL_FILE}_

rm -f $TMP_DIR/$TABLE.PROCESSING
}

deletetables ()
{
  if [ -n "$DELETE_FILE" ]
  then
    echo "Processing 'delete onlys' for $TARGET_DB..."
    for TABLE in `cat ${TMP_TABLES}.del`
    do
      echo "Deleting Data in ${TARGET_DB}:${TABLE}"
      deletetable >> ${TMP_DIR}/$TABLE.log 2>&1
    done
  fi
  echo "Deleting data for refresh tables in $TARGET_DB..."
  for TABLE in `cat ${TMP_TABLES}`
  do
    echo "Deleting Data in ${TARGET_DB}:${TABLE}"
    deletetable >> ${TMP_DIR}/$TABLE.log 2>&1
  done
  echo "Waiting for deletes to finish..."
  wait
}

deletetable ()
{
touch $TMP_DIR/${TABLE}.PROCESSING

#
# Drop the table.
dbschema -d ${TARGET_DB} -t ${TABLE} -p all -ss ${TMP_DIR}/${TABLE}-sch.sql
dbaccess ${TARGET_DB} - <<!
drop table ${TABLE};
!

#
# Re-Create the table and set permissions.
dbaccess ${TARGET_DB} ${TMP_DIR}/${TABLE}-sch.sql

#
# Best fix statistics before data loaded.
dbaccess ${TARGET_DB} <<!
set lock mode to wait;
set isolation to committed read;
update statistics low for table ${TABLE} drop distributions;
update statistics high for table ${TABLE} distributions only;
!

rm -f $TMP_DIR/${TABLE}.PROCESSING
}

loadtables ()
{
  echo "Loading data into $TARGET_DB..."
  for TABLE in `cat ${TMP_TABLES}`
  do
    UNL_FILE=${UNL_DIR}/$TABLE.unl
    echo "Loading data into ${TARGET_DB}:${TABLE}"
    loadtable >> ${TMP_DIR}/$TABLE.log 2>&1 &
    maxproccheck
  done
  echo "Waiting for loads to finish..."
  wait
}

loadtable ()
{
touch $TMP_DIR/${TABLE}.PROCESSING

for LOAD_FILE in `ls ${UNL_FILE}_* 2>/dev/null`
do
#
# Disable Constraints, Triggers And Indexes.
dbaccess ${TARGET_DB} - <<!
set lock mode to wait;
set isolation to committed read;
set constraints,triggers,indexes for ${TABLE} disabled;
!
#
# Lock Table And Load Data Chunk.
dbaccess ${TARGET_DB} - <<!
set lock mode to wait;
set isolation to committed read;
begin work;
lock table ${TABLE} in exclusive mode;
load from $LOAD_FILE
insert into $TABLE
;
commit work;
!
#
# Enable Constraints, Triggers And Indexes.
dbaccess ${TARGET_DB} - <<!
set lock mode to wait;
set isolation to committed read;
set constraints,triggers,indexes for ${TABLE} enabled;
!
done

#
# Best fix statistics now data has been loaded.
dbaccess ${TARGET_DB} <<!
set lock mode to wait;
set isolation to committed read;
update statistics low for table ${TABLE} drop distributions;
update statistics high for table ${TABLE} distributions only;
!

rm -f $TMP_DIR/${TABLE}.PROCESSING
}

maxproccheck ()
{
sleep $MAXPROC_SLEEP
NUM_PROCESSING=`ls $TMP_DIR/*.PROCESSING 2>/dev/null| wc -l`
while [ $NUM_PROCESSING -ge $MAXPROC ]
do
  echo "MAXPROC level ($MAXPROC) hit, waiting a little bit..."
  sleep $MAXPROC_SLEEP_CHECK
  NUM_PROCESSING=`ls $TMP_DIR/*.PROCESSING 2>/dev/null | wc -l`
done
}

############
### MAIN ###
############

PROG=$0
CONF=$PROG.conf
dOPT=
eOPT=
iOPT=
kOPT=
lOPT=
rOPT=
sOPT=
tOPT=
uOPT=

while getopts d:e:f:i:klr:s:t:u OPT
do
  case $OPT in
    \?)
    usage
    ;;
    d)
    dOPT=$OPTARG
    ;;
    e)
    eOPT=$OPTARG
    ;;
    f)
    CONF=$OPTARG
    ;;
    i)
    iOPT=$OPTARG
    ;;
    k)
    kOPT=yes
    ;;
    l)
    lOPT=yes
    ;;
    r)
    rOPT=$OPTARG
    ;;
    s)
    sOPT=$OPTARG
    ;;
    t)
    tOPT=$OPTARG
    ;;
    u)
    uOPT=yes
    ;;
  esac
done
shift `expr $OPTIND - 1`

. $CONF

#
# Check/set internal variables.
if [ -n "$sOPT" ]
then
  SOURCE_DB=$sOPT
fi

if [ -n "$tOPT" ]
then
  TARGET_DB=$tOPT
fi

if [ -z "$SOURCE_DB" -o -z "$TARGET_DB" ]
then
  echo "Error: You must supply a sourcedb and targetdb via options or conf"
  usage
else
  echo "Source database: $SOURCE_DB"
  echo "Target database: $TARGET_DB"
fi

if [ -n "$uOPT" ]
then
  echo "u option: Will not unload previously unloaded tables."
fi

if [ -n "$lOPT" ]
then
  echo "l option: Will not load any tables."
fi

if [ -n "$dOPT" ]
then
  DELETE_FILE=$dOPT
fi

if [ -n "$eOPT" ]
then
  EXCLUDE_FILE=$eOPT
fi

if [ -n "$iOPT" ]
then
  INCLUDE_FILE=$iOPT
fi

if [ -n "$rOPT" ]
then
  REFSQL_FILE=$rOPT
fi

PID=$$
TMP_DIR=${TMP_DIR:-./.refreshdb.tmp.d}
UNL_DIR=${UNL_DIR:-./.refreshdb.unl.d}
UNL_DIR=$UNL_DIR/$SOURCE_DB
MAXPROC=${MAXPROC:-10}
TMP_TABLES=${TMP_TABLES:-refreshdb.tables}
TMP_TABLES=$TMP_DIR/$TMP_TABLES
TMP_TABLES_ID=${TMP_TABLES_ID:-refreshdb.tables.withid}
TMP_TABLES_ID=$TMP_DIR/$TMP_TABLES_ID
REFRESHDB_LOG2=${REFRESHDB_LOG2:-refreshdb.log2}
REFRESHDB_LOG2=$TMP_DIR/$REFRESHDB_LOG2

#
# Make temp directory.
if [ -d $TMP_DIR ]
then
  echo "Removing existing $TMP_DIR..."
  rm -r $TMP_DIR
fi
mkdir $TMP_DIR
chmod 777 $TMP_DIR

#
# Make unload directory.
if [ ! -d $UNL_DIR ]
then
  mkdir -p $UNL_DIR
  chmod 777 $TMP_DIR
fi

#
# First, get a list of tables that we could refresh.
gettables 2>>$REFRESHDB_LOG2

#
# If the include option has been used, only use those tables from list.
processincludes 2>>$REFRESHDB_LOG2

#
# If the exclude option has been used, remove those tables from list.
processexcludes 2>>$REFRESHDB_LOG2

#
# If the delete option has been used, remove those tables from list.
processdeletes 2>>$REFRESHDB_LOG2

#
# Process refsql file and remove relevant tables from list.
processrefsql 2>>$REFRESHDB_LOG2

#
# OK, should now have at least a list of tables to refresh.
unloadtables 2>>$REFRESHDB_LOG2

#
# Maybe have a list of tables just to clean out.
deletetables 2>>$REFRESHDB_LOG2

#
# Well, best actually load some data into the target then...
# Only load tables if "l" option NOT specified.
if [ -z "$lOPT" ]
then
  loadtables 2>>$REFRESHDB_LOG2
fi

#
# Clean up.
echo "Cleaning up..."
if [ -z "$kOPT" ]
then
  rm -r $TMP_DIR
fi

echo "Finished."
exit 0
