|
|
@@ -0,0 +1,780 @@
|
|
|
+#!/bin/bash
|
|
|
+##
|
|
|
+# AUTHOR: DANIELE BAILO
|
|
|
+# https://github.com/danielebailo
|
|
|
+# www.danielebailo.it
|
|
|
+#
|
|
|
+# Contributors:
|
|
|
+# * dalgibbard - http://github.com/dalgibbard
|
|
|
+# * epos-eu - http://github.com/epos-eu
|
|
|
+# * maximilianhuber - http://github.com/maximilianhuber
|
|
|
+# * ahodgkinson - http://github.com/ahodgkinson (quiet-mode, timestamp, compress)
|
|
|
+##
|
|
|
+
|
|
|
+## This script allow for the Backup and Restore of a CouchDB Database.
|
|
|
+## Backups are produced in a format that can be later uploaded with the bulk docs directive (as used by this script)
|
|
|
+
|
|
|
+## USAGE
|
|
|
+## * To Backup:
|
|
|
+## ** example: ./couchdb-dump.sh -b -H 127.0.0.1 -d mydb -u admin -p password -f mydb.json
|
|
|
+## * To Restore:
|
|
|
+## ** example: ./couchdb-dump.sh -r -H 127.0.0.1 -d mydb -u admin -p password -f mydb.json
|
|
|
+
|
|
|
+
|
|
|
+###################### CODE STARTS HERE ###################
|
|
|
+scriptversionnumber="1.1.10"
|
|
|
+
|
|
|
+##START: FUNCTIONS
|
|
|
+usage(){
|
|
|
+ echo
|
|
|
+ echo "Usage: $0 [-b|-r] -H <COUCHDB_HOST> -d <DB_NAME> -f <BACKUP_FILE> [-u <username>] [-p <password>] [-P <port>] [-l <lines>] [-t <threads>] [-a <import_attempts>]"
|
|
|
+ echo -e "\t-b Run script in BACKUP mode."
|
|
|
+ echo -e "\t-r Run script in RESTORE mode."
|
|
|
+ echo -e "\t-H CouchDB Hostname or IP. Can be provided with or without 'http(s)://'"
|
|
|
+ echo -e "\t-d CouchDB Database name to backup/restore."
|
|
|
+ echo -e "\t-f File to Backup-to/Restore-from."
|
|
|
+ echo -e "\t-P Provide a port number for CouchDB [Default: 5984]"
|
|
|
+ echo -e "\t-u Provide a username for auth against CouchDB [Default: blank]"
|
|
|
+ echo -e "\t -- can also set with 'COUCHDB_USER' environment var"
|
|
|
+ echo -e "\t-p Provide a password for auth against CouchDB [Default: blank]"
|
|
|
+ echo -e "\t -- can also set with 'COUCHDB_PASS' environment var"
|
|
|
+ echo -e "\t-l Number of lines (documents) to Restore at a time. [Default: 5000] (Restore Only)"
|
|
|
+ echo -e "\t-t Number of CPU threads to use when parsing data [Default: nProcs-1] (Backup Only)"
|
|
|
+ echo -e "\t-a Number of times to Attempt import before failing [Default: 3] (Restore Only)"
|
|
|
+ echo -e "\t-c Create DB on demand, if they are not listed."
|
|
|
+ echo -e "\t-q Run in quiet mode. Suppress output, except for errors and warnings."
|
|
|
+ echo -e "\t-z Compress output file (Backup Only)"
|
|
|
+ echo -e "\t-T Add datetime stamp to output file name (Backup Only)"
|
|
|
+ echo -e "\t-V Display version information."
|
|
|
+ echo -e "\t-h Display usage information."
|
|
|
+ echo
|
|
|
+ echo "Example: $0 -b -H 127.0.0.1 -d mydb -f dumpedDB.json -u admin -p password"
|
|
|
+ echo
|
|
|
+ exit 1
|
|
|
+}
|
|
|
+
|
|
|
+scriptversion(){
|
|
|
+ echo
|
|
|
+ echo -e "\t** couchdb-dump version: $scriptversionnumber **"
|
|
|
+ echo
|
|
|
+ echo -e "\t URL:\thttps://github.com/danielebailo/couchdb-dump"
|
|
|
+ echo
|
|
|
+ echo -e "\t Authors:"
|
|
|
+ echo -e "\t Daniele Bailo (bailo.daniele@gmail.com)"
|
|
|
+ echo -e "\t Darren Gibbard (dalgibbard@gmail.com)"
|
|
|
+ echo -e "\t Maximilian Huber (maximilian.huber@tngtech.com)"
|
|
|
+ echo
|
|
|
+ exit 1
|
|
|
+}
|
|
|
+
|
|
|
+checkdiskspace(){
|
|
|
+## This function checks available diskspace for a required path, vs space required
|
|
|
+## Example call: checkdiskspace /path/to/file/to/create 1024
|
|
|
+ location=$1
|
|
|
+ KBrequired=$2
|
|
|
+ if [ "x$location" = "x" ]||[ "x$KBrequired" = "x" ]; then
|
|
|
+ echo "... ERROR: checkdiskspace() was not passed the correct arguments."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ stripdir=${location%/*}
|
|
|
+ KBavail=$(df -P -k ${stripdir} | tail -n 1 | awk '{print$4}' | $sed_cmd -e 's/K$//')
|
|
|
+
|
|
|
+ if [ $KBavail -ge $KBrequired ]; then
|
|
|
+ return 0
|
|
|
+ else
|
|
|
+ echo
|
|
|
+ echo "... ERROR: Insufficient Disk Space Available:"
|
|
|
+ echo " * Full Path: ${location}"
|
|
|
+ echo " * Affected Directory: ${stripdir}"
|
|
|
+ echo " * Space Available: ${KBavail} KB"
|
|
|
+ echo " * Total Space Required: ${KBrequired} KB"
|
|
|
+ echo " * Additional Space Req: $(expr $KBrequired - $KBavail) KB"
|
|
|
+ echo
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+}
|
|
|
+## END FUNCTIONS
|
|
|
+
|
|
|
+# Catch no args:
|
|
|
+if [ "x$1" = "x" ]; then
|
|
|
+ usage
|
|
|
+fi
|
|
|
+
|
|
|
+# Default Args
|
|
|
+username=""
|
|
|
+password=""
|
|
|
+backup=false
|
|
|
+restore=false
|
|
|
+port=5984
|
|
|
+OPTIND=1
|
|
|
+lines=5000
|
|
|
+attempts=3
|
|
|
+createDBsOnDemand=false
|
|
|
+verboseMode=true
|
|
|
+compress=false
|
|
|
+timestamp=false
|
|
|
+
|
|
|
+while getopts ":h?H:d:f:u:p:P:l:t:a:c?q?z?T?V?b?B?r?R?" opt; do
|
|
|
+ case "$opt" in
|
|
|
+ h) usage;;
|
|
|
+ b|B) backup=true ;;
|
|
|
+ r|R) restore=true ;;
|
|
|
+ H) url="$OPTARG" ;;
|
|
|
+ d) db_name="$OPTARG" ;;
|
|
|
+ f) file_name="$OPTARG" ;;
|
|
|
+ u) username="${OPTARG}";;
|
|
|
+ p) password="${OPTARG}";;
|
|
|
+ P) port="${OPTARG}";;
|
|
|
+ l) lines="${OPTARG}" ;;
|
|
|
+ t) threads="${OPTARG}" ;;
|
|
|
+ a) attempts="${OPTARG}";;
|
|
|
+ c) createDBsOnDemand=true;;
|
|
|
+ q) verboseMode=false;;
|
|
|
+ z) compress=true;;
|
|
|
+ T) timestamp=true;;
|
|
|
+ V) scriptversion;;
|
|
|
+ :) echo "... ERROR: Option \"-${OPTARG}\" requires an argument"; usage ;;
|
|
|
+ *|\?) echo "... ERROR: Unknown Option \"-${OPTARG}\""; usage;;
|
|
|
+ esac
|
|
|
+done
|
|
|
+
|
|
|
+# If quiet option: Setup echo mode and curl '--silent' opt
|
|
|
+if [ "$verboseMode" = true ]; then
|
|
|
+ curlSilentOpt=""
|
|
|
+ echoVerbose=true
|
|
|
+else
|
|
|
+ curlSilentOpt="--silent"
|
|
|
+ echoVerbose=false
|
|
|
+fi
|
|
|
+
|
|
|
+# Trap unexpected extra args
|
|
|
+shift $((OPTIND-1))
|
|
|
+[ "$1" = "--" ] && shift
|
|
|
+if [ ! "x$@" = "x" ]; then
|
|
|
+ echo "... ERROR: Unknown Option \"$@\""
|
|
|
+ usage
|
|
|
+fi
|
|
|
+
|
|
|
+# Handle invalid backup/restore states:
|
|
|
+if [ $backup = true ]&&[ $restore = true ]; then
|
|
|
+ echo "... ERROR: Cannot pass both '-b' and '-r'"
|
|
|
+ usage
|
|
|
+elif [ $backup = false ]&&[ $restore = false ]; then
|
|
|
+ echo "... ERROR: Missing argument '-b' (Backup), or '-r' (Restore)"
|
|
|
+ usage
|
|
|
+fi
|
|
|
+# Handle empty args
|
|
|
+# url
|
|
|
+if [ "x$url" = "x" ]; then
|
|
|
+ echo "... ERROR: Missing argument '-H <COUCHDB_HOST>'"
|
|
|
+ usage
|
|
|
+fi
|
|
|
+# db_name
|
|
|
+if [ "x$db_name" = "x" ]; then
|
|
|
+ echo "... ERROR: Missing argument '-d <DB_NAME>'"
|
|
|
+ usage
|
|
|
+fi
|
|
|
+# file_name
|
|
|
+if [ "x$file_name" = "x" ]; then
|
|
|
+ echo "... ERROR: Missing argument '-f <FILENAME>'"
|
|
|
+ usage
|
|
|
+fi
|
|
|
+file_name_orig=$file_name
|
|
|
+
|
|
|
+# Get OS TYPE (Linux for Linux, Darwin for MacOSX)
|
|
|
+os_type=`uname -s`
|
|
|
+
|
|
|
+# Pick sed or gsed
|
|
|
+if [ "$os_type" = "FreeBSD" ]||[ "$os_type" = "Darwin" ]; then
|
|
|
+ sed_cmd="gsed";
|
|
|
+else
|
|
|
+ sed_cmd="sed";
|
|
|
+fi
|
|
|
+## Make sure it's installed
|
|
|
+echo | $sed_cmd 's/a//' >/dev/null 2>&1
|
|
|
+if [ ! $? = 0 ]; then
|
|
|
+ echo "... ERROR: please install $sed_cmd (gnu-sed) and ensure it is in your path"
|
|
|
+ exit 1
|
|
|
+fi
|
|
|
+
|
|
|
+# Validate thread count
|
|
|
+## If we're on a Mac, use sysctl
|
|
|
+if [ "$os_type" = "Darwin" ]; then
|
|
|
+ cores=`sysctl -n hw.ncpu`
|
|
|
+## If we're on FreeBSD, use sysctl
|
|
|
+elif [ "$os_type" = "FreeBSD" ]; then
|
|
|
+ cores=`sysctl kern.smp.cpus | awk -F ": " '{print $2}'`;
|
|
|
+## Check if nproc available- set cores=1 if not
|
|
|
+elif ! type nproc >/dev/null; then
|
|
|
+ cores=1
|
|
|
+## Otherwise use nproc
|
|
|
+else
|
|
|
+ cores=`nproc`
|
|
|
+fi
|
|
|
+if [ ! "x$threads" = "x" ]; then
|
|
|
+ if [ $threads -gt $cores ]; then
|
|
|
+ echo "... WARN: Thread setting of $threads is more than CPU count. Setting to $cores"
|
|
|
+ threads=$cores
|
|
|
+ else
|
|
|
+ $echoVerbose && echo "... INFO: Setting parser threads to $threads"
|
|
|
+ fi
|
|
|
+else
|
|
|
+ threads=`expr $cores - 1`
|
|
|
+fi
|
|
|
+
|
|
|
+# Validate Attempts, set to no-retry if zero/invalid.
|
|
|
+case $attempts in
|
|
|
+ ''|0|*[!0-9]*) echo "... WARN: Retry Attempt value of \"$attempts\" is invalid. Disabling Retry-on-Error."; attempts=1 ;;
|
|
|
+ *) true ;;
|
|
|
+esac
|
|
|
+
|
|
|
+## Manage the passing of http/https for $url:
|
|
|
+# Note; if the user wants to use 'https://' on a non-443 port they must specify it exclusively in the '-H <HOSTNAME>' arg.
|
|
|
+if [ ! "`echo $url | grep -c http`" = 1 ]; then
|
|
|
+ if [ "$port" == "443" ]; then
|
|
|
+ url="https://$url";
|
|
|
+ else
|
|
|
+ url="http://$url";
|
|
|
+ fi
|
|
|
+fi
|
|
|
+
|
|
|
+# Manage the addition of port
|
|
|
+# If a port isn't already on our URL...
|
|
|
+if [ ! "`echo $url | egrep -c ":[0-9]*$"`" = "1" ]; then
|
|
|
+ # add it.
|
|
|
+ url="$url:$port"
|
|
|
+fi
|
|
|
+
|
|
|
+# Check for empty user/pass and try reading in from Envvars
|
|
|
+if [ "x$username" = "x" ]; then
|
|
|
+ username="$COUCHDB_USER"
|
|
|
+fi
|
|
|
+if [ "x$password" = "x" ]; then
|
|
|
+ password="$COUCHDB_PASS"
|
|
|
+fi
|
|
|
+
|
|
|
+## Manage the addition of user+pass if needed:
|
|
|
+# Ensure, if one is set, both are set.
|
|
|
+if [ ! "x${username}" = "x" ]; then
|
|
|
+ if [ "x${password}" = "x" ]; then
|
|
|
+ echo "... ERROR: Password cannot be blank, if username is specified."
|
|
|
+ usage
|
|
|
+ fi
|
|
|
+elif [ ! "x${password}" = "x" ]; then
|
|
|
+ if [ "x${username}" = "x" ]; then
|
|
|
+ echo "... ERROR: Username cannot be blank, if password is specified."
|
|
|
+ usage
|
|
|
+ fi
|
|
|
+fi
|
|
|
+
|
|
|
+# Check for sed option
|
|
|
+sed_edit_in_place='-i.sedtmp'
|
|
|
+if [ "$os_type" = "Darwin" ]; then
|
|
|
+ sed_regexp_option='E'
|
|
|
+else
|
|
|
+ sed_regexp_option='r'
|
|
|
+fi
|
|
|
+# Allow for self-signed/invalid certs if method is HTTPS:
|
|
|
+if [ "`echo $url | grep -ic "^https://"`" = "1" ]; then
|
|
|
+ curlopt="-k"
|
|
|
+fi
|
|
|
+
|
|
|
+if [ ! "x${username}" = "x" ]&&[ ! "x${password}" = "x" ]; then
|
|
|
+ curlopt="${curlopt} -u ${username}:${password}"
|
|
|
+fi
|
|
|
+
|
|
|
+## Check for curl
|
|
|
+curl --version >/dev/null 2>&1 || ( echo "... ERROR: This script requires 'curl' to be present."; exit 1 )
|
|
|
+
|
|
|
+# Check for tr
|
|
|
+echo | tr -d "" >/dev/null 2>&1 || ( echo "... ERROR: This script requires 'tr' to be present."; exit 1 )
|
|
|
+
|
|
|
+##### SETUP OUR LARGE VARS FOR SPLIT PROCESSING (due to limitations in split on Darwin/BSD)
|
|
|
+AZ2="`echo {a..z}{a..z}`"
|
|
|
+AZ3="`echo {a..z}{a..z}{a..z}`"
|
|
|
+
|
|
|
+### If user selected BACKUP, run the following code:
|
|
|
+if [ $backup = true ]&&[ $restore = false ]; then
|
|
|
+ #################################################################
|
|
|
+ ##################### BACKUP START ##############################
|
|
|
+ #################################################################
|
|
|
+
|
|
|
+ # If -T (timestamp) option, append datetime stamp ("-YYYYMMDD-hhmmss") before file extension
|
|
|
+ if [ "$timestamp" = true ]; then
|
|
|
+ datetime=`date "+%Y%m%d-%H%M%S"` # Format: YYYYMMDD-hhmmss
|
|
|
+ # Check for file_name extension, if so add the timestamp before it
|
|
|
+ if [[ $file_name =~ \.[a-zA-Z0-9][a-zA-Z0-9_]* ]]; then
|
|
|
+ file_name_ext=` echo "$file_name" | $sed_cmd 's/.*\.//'` # Get text after last '.'
|
|
|
+ file_name_base=`echo "$file_name" | $sed_cmd "s/\.${file_name_ext}$//"` # file_name without '.' & extension
|
|
|
+ file_name="$file_name_base-$datetime.$file_name_ext"
|
|
|
+ else # Otherwise add timestamp to the end of file_name
|
|
|
+ file_name="$file_name-$datetime"
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+ $echoVerbose && echo "... INFO: Output file ${file_name}"
|
|
|
+
|
|
|
+ # Check if output already exists:
|
|
|
+ if [ -f ${file_name} ]; then
|
|
|
+ echo "... ERROR: Output file ${file_name} already exists."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ # Grab our data from couchdb
|
|
|
+ curl ${curlSilentOpt} ${curlopt} -X GET "$url/$db_name/_all_docs?include_docs=true&attachments=true" -o ${file_name}
|
|
|
+ # Check for curl errors
|
|
|
+ if [ ! $? = 0 ]; then
|
|
|
+ echo "... ERROR: Curl encountered an issue whilst dumping the database."
|
|
|
+ rm -f ${file_name} 2>/dev/null
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ # Check for export errors
|
|
|
+ ERR_CHECK="`head -n 1 ${file_name} | grep '^{"error'`"
|
|
|
+ if [ ! "x${ERR_CHECK}" = "x" ]; then
|
|
|
+ echo "... ERROR: CouchDB reported: $ERR_CHECK"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ # CouchDB has a tendancy to output Windows carriage returns in it's output -
|
|
|
+ # This messes up us trying to sed things at the end of lines!
|
|
|
+ if grep -qU $'\x0d' $file_name; then
|
|
|
+ $echoVerbose && echo "... INFO: File may contain Windows carriage returns- converting..."
|
|
|
+ filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ tr -d '\r' < ${file_name} > ${file_name}.tmp
|
|
|
+ if [ $? = 0 ]; then
|
|
|
+ mv ${file_name}.tmp ${file_name}
|
|
|
+ if [ $? = 0 ]; then
|
|
|
+ $echoVerbose && echo "... INFO: Completed successfully."
|
|
|
+ else
|
|
|
+ echo "... ERROR: Failed to overwrite ${file_name} with ${file_name}.tmp"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ echo ".. ERROR: Failed to convert file."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+
|
|
|
+ ## Now we parse the output file to make it suitable for re-import.
|
|
|
+ $echoVerbose && echo "... INFO: Amending file to make it suitable for Import."
|
|
|
+ $echoVerbose && echo "... INFO: Stage 1 - Document filtering"
|
|
|
+
|
|
|
+ # If the input file is larger than 250MB, multi-thread the parsing:
|
|
|
+ if [ $(du -P -k ${file_name} | awk '{print$1}') -ge 256000 ]&&[ ! $threads -le 1 ]; then
|
|
|
+ filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
|
+ KBreduction=$(($((`wc -l ${file_name} | awk '{print$1}'` * 80)) / 1024))
|
|
|
+ filesize=`expr $filesize + $(expr $filesize - $KBreduction)`
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ $echoVerbose && echo "... INFO: Multi-Threaded Parsing Enabled."
|
|
|
+ if [ -f ${file_name}.thread000000 ]; then
|
|
|
+ echo "... ERROR: Split files \"${file_name}.thread*\" already present. Please remove before continuing."
|
|
|
+ exit 1
|
|
|
+ elif [ -f ${file_name}.tmp ]; then
|
|
|
+ echo "... ERROR: Tempfile ${file_name}.tmp already present. Please remove before continuing."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ ### SPLIT INTO THREADS
|
|
|
+ split_cal=$(( $((`wc -l ${file_name} | awk '{print$1}'` / $threads)) + $threads ))
|
|
|
+ #split --numeric-suffixes --suffix-length=6 -l ${split_cal} ${file_name} ${file_name}.thread
|
|
|
+ split -a 2 -l ${split_cal} ${file_name} ${file_name}.thread
|
|
|
+ if [ ! "$?" = "0" ]; then
|
|
|
+ echo "... ERROR: Unable to create split files."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ # Capture if someone happens to breach the defined limits of AZ2 var. If this happens, we'll need to switch it out for AZ3 ...
|
|
|
+ if [[ $threads -gt 650 ]]; then
|
|
|
+ echo "Whoops- we hit a maximum limit here... \$AZ2 only allows for a maximum of 650 cores..."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ count=0
|
|
|
+ for suffix in ${AZ2}; do
|
|
|
+ (( count++ ))
|
|
|
+ if [[ $count -gt $threads ]]; then
|
|
|
+ break
|
|
|
+ fi
|
|
|
+ PADNAME="${file_name}.thread${suffix}"
|
|
|
+ $sed_cmd ${sed_edit_in_place} 's/{"id".*,"doc"://g' ${PADNAME} &
|
|
|
+ done
|
|
|
+ wait
|
|
|
+ count=0
|
|
|
+ for suffix in ${AZ2}; do
|
|
|
+ (( count++ ))
|
|
|
+ if [[ $count -gt $threads ]]; then
|
|
|
+ break
|
|
|
+ fi
|
|
|
+ PADNAME="${file_name}.thread${suffix}"
|
|
|
+ cat ${PADNAME} >> ${file_name}.tmp
|
|
|
+ rm -f ${PADNAME} ${PADNAME}.sedtmp
|
|
|
+ (( NUM++ ))
|
|
|
+ done
|
|
|
+ if [ `wc -l ${file_name} | awk '{print$1}'` = `wc -l ${file_name}.tmp | awk '{print$1}'` ]; then
|
|
|
+ mv ${file_name}{.tmp,}
|
|
|
+ if [ ! $? = 0 ]; then
|
|
|
+ echo "... ERROR: Failed to overwrite ${file_name}"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ echo "... ERROR: Multi-threaded data parsing encountered an error."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ else
|
|
|
+ # Estimating 80byte saving per line... probably a little conservative depending on keysize.
|
|
|
+ KBreduction=$(($((`wc -l ${file_name} | awk '{print$1}'` * 80)) / 1024))
|
|
|
+ filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
|
+ filesize=`expr $filesize - $KBreduction`
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ $sed_cmd ${sed_edit_in_place} 's/{"id".*,"doc"://g' $file_name && rm -f ${file_name}.sedtmp
|
|
|
+ if [ ! $? = 0 ];then
|
|
|
+ echo "Stage failed."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+
|
|
|
+ $echoVerbose && echo "... INFO: Stage 2 - Duplicate curly brace removal"
|
|
|
+ # Approx 1Byte per line removed
|
|
|
+ KBreduction=$((`wc -l ${file_name} | awk '{print$1}'` / 1024))
|
|
|
+ filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
|
+ filesize=`expr $filesize - $KBreduction`
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ $sed_cmd ${sed_edit_in_place} 's/}},$/},/g' ${file_name} && rm -f ${file_name}.sedtmp
|
|
|
+ if [ ! $? = 0 ];then
|
|
|
+ echo "Stage failed."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ $echoVerbose && echo "... INFO: Stage 3 - Header Correction"
|
|
|
+ filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ $sed_cmd ${sed_edit_in_place} '1s/^.*/{"new_edits":false,"docs":[/' ${file_name} && rm -f ${file_name}.sedtmp
|
|
|
+ if [ ! $? = 0 ];then
|
|
|
+ echo "Stage failed."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ $echoVerbose && echo "... INFO: Stage 4 - Final document line correction"
|
|
|
+ filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ $sed_cmd ${sed_edit_in_place} 's/}}$/}/g' ${file_name} && rm -f ${file_name}.sedtmp
|
|
|
+ if [ ! $? = 0 ];then
|
|
|
+ echo "Stage failed."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ # If -z (compress) option then compress output file
|
|
|
+ if [ "$compress" = true ]; then
|
|
|
+ $echoVerbose && echo "... INFO: Stage 5 - File compression"
|
|
|
+ gzip $file_name
|
|
|
+ file_name="$file_name.gz"
|
|
|
+ fi
|
|
|
+
|
|
|
+ $echoVerbose && echo "... INFO: Export completed successfully. File available at: ${file_name}"
|
|
|
+ exit 0
|
|
|
+
|
|
|
+### Else if user selected Restore:
|
|
|
+elif [ $restore = true ]&&[ $backup = false ]; then
|
|
|
+ #################################################################
|
|
|
+ ##################### RESTORE START #############################
|
|
|
+ #################################################################
|
|
|
+ # Check if input exists:
|
|
|
+ if [ ! -f ${file_name} ]; then
|
|
|
+ echo "... ERROR: Input file ${file_name} not found."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ #### VALIDATION END
|
|
|
+
|
|
|
+ $echoVerbose && echo "... INFO: Checking for database"
|
|
|
+ attemptcount=0
|
|
|
+ A=0
|
|
|
+ until [ $A = 1 ]; do
|
|
|
+ (( attemptcount++ ))
|
|
|
+ existing_dbs=$(curl $curlSilentOpt $curlopt -X GET "${url}/_all_dbs")
|
|
|
+ if [ ! $? = 0 ]; then
|
|
|
+ if [ $attemptcount = $attempts ]; then
|
|
|
+ echo "... ERROR: Curl failed to get the list of databases - Stopping"
|
|
|
+ exit 1
|
|
|
+ else
|
|
|
+ echo "... WARN: Curl failed to get the list of databases - Attempt ${attemptcount}/${attempts}. Retrying..."
|
|
|
+ sleep 1
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ A=1
|
|
|
+ fi
|
|
|
+ done
|
|
|
+ if [[ ! "$existing_dbs" = "["*"]" ]]; then
|
|
|
+ echo "... WARN: Curl failed to get the list of databases - Continuing"
|
|
|
+ if [ "x$existing_dbs" = "x" ]; then
|
|
|
+ echo "... WARN: Curl just returned: $existing_dbs"
|
|
|
+ fi
|
|
|
+ elif [[ ! "$existing_dbs" = *"\"${db_name}\""* ]]; then
|
|
|
+ # database was not listed as existing databasa
|
|
|
+ if [ $createDBsOnDemand = true ]; then
|
|
|
+ attemptcount=0
|
|
|
+ A=0
|
|
|
+ until [ $A = 1 ]; do
|
|
|
+ (( attemptcount++ ))
|
|
|
+ curl $curlSilentOpt $curlopt -X PUT "${url}/${db_name}" -o tmp.out
|
|
|
+ # If curl threw an error:
|
|
|
+ if [ ! $? = 0 ]; then
|
|
|
+ if [ $attemptcount = $attempts ]; then
|
|
|
+ echo "... ERROR: Curl failed to create the database ${db_name} - Stopping"
|
|
|
+ if [ -f tmp.out ]; then
|
|
|
+ echo -n "... ERROR: Error message was: "
|
|
|
+ cat tmp.out
|
|
|
+ else
|
|
|
+ echo ".. ERROR: See above for any errors"
|
|
|
+ fi
|
|
|
+ exit 1
|
|
|
+ else
|
|
|
+ echo "... WARN: Curl failed to create the database ${db_name} - Attempt ${attemptcount}/${attempts}. Retrying..."
|
|
|
+ sleep 1
|
|
|
+ fi
|
|
|
+ # If curl was happy, but CouchDB returned an error in the return JSON:
|
|
|
+ elif [ ! "`head -n 1 tmp.out | grep -c '^{"error":'`" = 0 ]; then
|
|
|
+ if [ $attemptcount = $attempts ]; then
|
|
|
+ echo "... ERROR: CouchDB Reported: `head -n 1 tmp.out`"
|
|
|
+ exit 1
|
|
|
+ else
|
|
|
+ echo "... WARN: CouchDB Reported an error during db creation - Attempt ${attemptcount}/${attempts} - Retrying..."
|
|
|
+ sleep 1
|
|
|
+ fi
|
|
|
+ # Otherwise, if everything went well, delete our temp files.
|
|
|
+ else
|
|
|
+ rm tmp.out
|
|
|
+ A=1
|
|
|
+ fi
|
|
|
+ done
|
|
|
+ else
|
|
|
+ echo "... ERROR: corresponding database ${db_name} not yet created - Stopping"
|
|
|
+ $echoVerbose && echo "... HINT: you could add the -c flag to create the database automatically"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+
|
|
|
+ ## Stop bash mangling wildcard...
|
|
|
+ set -o noglob
|
|
|
+ # Manage Design Documents as a priority, and remove them from the main import job
|
|
|
+ $echoVerbose && echo "... INFO: Checking for Design documents"
|
|
|
+ # Find all _design docs, put them into another file
|
|
|
+ design_file_name=${file_name}-design
|
|
|
+ grep '^{"_id":"_design' ${file_name} > ${design_file_name}
|
|
|
+
|
|
|
+ # Count the design file (if it even exists)
|
|
|
+ DESIGNS="`wc -l ${design_file_name} 2>/dev/null | awk '{print$1}'`"
|
|
|
+ # If there's no design docs for import...
|
|
|
+ if [ "x$DESIGNS" = "x" ]||[ "$DESIGNS" = "0" ]; then
|
|
|
+ # Cleanup any null files
|
|
|
+ rm -f ${design_file_name} 2>/dev/null
|
|
|
+ $echoVerbose && echo "... INFO: No Design Documents found for import."
|
|
|
+ else
|
|
|
+ $echoVerbose && echo "... INFO: Duplicating original file for alteration"
|
|
|
+ # Duplicate the original DB file, so we don't mangle the user's input file:
|
|
|
+ filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ cp -f ${file_name}{,-nodesign}
|
|
|
+ # Re-set file_name to be our new file.
|
|
|
+ file_name=${file_name}-nodesign
|
|
|
+ # Remove these design docs from (our new) main file.
|
|
|
+ $echoVerbose && echo "... INFO: Stripping _design elements from regular documents"
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ $sed_cmd ${sed_edit_in_place} '/^{"_id":"_design/d' ${file_name} && rm -f ${file_name}.sedtmp
|
|
|
+ # Remove the final document's trailing comma
|
|
|
+ $echoVerbose && echo "... INFO: Fixing end document"
|
|
|
+ line=$(expr `wc -l ${file_name} | awk '{print$1}'` - 1)
|
|
|
+ filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ $sed_cmd ${sed_edit_in_place} "${line}s/,$//" ${file_name} && rm -f ${file_name}.sedtmp
|
|
|
+
|
|
|
+ $echoVerbose && echo "... INFO: Inserting Design documents"
|
|
|
+ designcount=0
|
|
|
+ # For each design doc...
|
|
|
+ while IFS="" read -r; do
|
|
|
+ line="${REPLY}"
|
|
|
+ # Split the ID out for use as the import URL path
|
|
|
+ URLPATH=$(echo $line | awk -F'"' '{print$4}')
|
|
|
+ # Scrap the ID and Rev from the main data, as well as any trailing ','
|
|
|
+ echo "${line}" | $sed_cmd -${sed_regexp_option}e "s@^\{\"_id\":\"${URLPATH}\",\"_rev\":\"[0-9]*-[0-9a-zA-Z_\-]*\",@\{@" | $sed_cmd -e 's/,$//' > ${design_file_name}.${designcount}
|
|
|
+ # Fix Windows CRLF
|
|
|
+ if grep -qU $'\x0d' ${design_file_name}.${designcount}; then
|
|
|
+ $echoVerbose && echo "... INFO: File contains Windows carriage returns- converting..."
|
|
|
+ filesize=$(du -P -k ${design_file_name}.${designcount} | awk '{print$1}')
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ tr -d '\r' < ${design_file_name}.${designcount} > ${design_file_name}.${designcount}.tmp
|
|
|
+ if [ $? = 0 ]; then
|
|
|
+ mv ${design_file_name}.${designcount}.tmp ${design_file_name}.${designcount}
|
|
|
+ if [ $? = 0 ]; then
|
|
|
+ $echoVerbose && echo "... INFO: Completed successfully."
|
|
|
+ else
|
|
|
+ echo "... ERROR: Failed to overwrite ${design_file_name}.${designcount} with ${design_file_name}.${designcount}.tmp"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ echo ".. ERROR: Failed to convert file."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+
|
|
|
+ # Insert this file into the DB
|
|
|
+ A=0
|
|
|
+ attemptcount=0
|
|
|
+ until [ $A = 1 ]; do
|
|
|
+ (( attemptcount++ ))
|
|
|
+ curl $curlSilentOpt ${curlopt} -T ${design_file_name}.${designcount} -X PUT "${url}/${db_name}/${URLPATH}" -H 'Content-Type: application/json' -o ${design_file_name}.out.${designcount}
|
|
|
+ # If curl threw an error:
|
|
|
+ if [ ! $? = 0 ]; then
|
|
|
+ if [ $attemptcount = $attempts ]; then
|
|
|
+ echo "... ERROR: Curl failed trying to restore ${design_file_name}.${designcount} - Stopping"
|
|
|
+ exit 1
|
|
|
+ else
|
|
|
+ echo "... WARN: Import of ${design_file_name}.${designcount} failed - Attempt ${attemptcount}/${attempts}. Retrying..."
|
|
|
+ sleep 1
|
|
|
+ fi
|
|
|
+ # If curl was happy, but CouchDB returned an error in the return JSON:
|
|
|
+ elif [ ! "`head -n 1 ${design_file_name}.out.${designcount} | grep -c '^{"error":'`" = 0 ]; then
|
|
|
+ if [ $attemptcount = $attempts ]; then
|
|
|
+ echo "... ERROR: CouchDB Reported: `head -n 1 ${design_file_name}.out.${designcount}`"
|
|
|
+ exit 1
|
|
|
+ else
|
|
|
+ echo "... WARN: CouchDB Reported an error during import - Attempt ${attemptcount}/${attempts} - Retrying..."
|
|
|
+ sleep 1
|
|
|
+ fi
|
|
|
+ # Otherwise, if everything went well, delete our temp files.
|
|
|
+ else
|
|
|
+ A=1
|
|
|
+ rm -f ${design_file_name}.out.${designcount}
|
|
|
+ rm -f ${design_file_name}.${designcount}
|
|
|
+ fi
|
|
|
+ done
|
|
|
+ # Increase design count - mainly used for the INFO at the end.
|
|
|
+ (( designcount++ ))
|
|
|
+ # NOTE: This is where we insert the design lines exported from the main block
|
|
|
+ done < <(cat ${design_file_name})
|
|
|
+ $echoVerbose && echo "... INFO: Successfully imported ${designcount} Design Documents"
|
|
|
+ fi
|
|
|
+ set +o noglob
|
|
|
+
|
|
|
+ # If the size of the file to import is less than our $lines size, don't worry about splitting
|
|
|
+ if [ `wc -l $file_name | awk '{print$1}'` -lt $lines ]; then
|
|
|
+ $echoVerbose && echo "... INFO: Small dataset. Importing as a single file."
|
|
|
+ A=0
|
|
|
+ attemptcount=0
|
|
|
+ until [ $A = 1 ]; do
|
|
|
+ (( attemptcount++ ))
|
|
|
+ curl $curlSilentOpt $curlopt -T $file_name -X POST "$url/$db_name/_bulk_docs" -H 'Content-Type: application/json' -o tmp.out
|
|
|
+ if [ "`head -n 1 tmp.out | grep -c '^{"error":'`" -eq 0 ]; then
|
|
|
+ $echoVerbose && echo "... INFO: Imported ${file_name_orig} Successfully."
|
|
|
+ rm -f tmp.out
|
|
|
+ rm -f ${file_name_orig}-design
|
|
|
+ rm -f ${file_name_orig}-nodesign
|
|
|
+ exit 0
|
|
|
+ else
|
|
|
+ if [ $attemptcount = $attempts ]; then
|
|
|
+ echo "... ERROR: Import of ${file_name_orig} failed."
|
|
|
+ if [ -f tmp.out ]; then
|
|
|
+ echo -n "... ERROR: Error message was: "
|
|
|
+ cat tmp.out
|
|
|
+ else
|
|
|
+ echo ".. ERROR: See above for any errors"
|
|
|
+ fi
|
|
|
+ rm -f tmp.out
|
|
|
+ exit 1
|
|
|
+ else
|
|
|
+ echo "... WARN: Import of ${file_name_orig} failed - Attempt ${attemptcount}/${attempts} - Retrying..."
|
|
|
+ sleep 1
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+ done
|
|
|
+ # Otherwise, it's a large import that requires bulk insertion.
|
|
|
+ else
|
|
|
+ $echoVerbose && echo "... INFO: Block import set to ${lines} lines."
|
|
|
+ if [ -f ${file_name}.splitaaa ]; then
|
|
|
+ echo "... ERROR: Split files \"${file_name}.split*\" already present. Please remove before continuing."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ importlines=`cat ${file_name} | grep -c .`
|
|
|
+
|
|
|
+ # Due to the file limit imposed by the pre-calculated AZ3 variable, max split files is 15600 (alpha x 3positions)
|
|
|
+ if [[ `expr ${importlines} / ${lines}` -gt 15600 ]]; then
|
|
|
+ echo "... ERROR: Pre-processed split variable limit of 15600 files reached."
|
|
|
+ echo " Please increase the '-l' parameter (Currently: $lines) and try again."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ $echoVerbose && echo "... INFO: Generating files to import"
|
|
|
+ filesize=$(du -P -k ${file_name} | awk '{print$1}')
|
|
|
+ checkdiskspace "${file_name}" $filesize
|
|
|
+ ### Split the file into many
|
|
|
+ split -a 3 -l ${lines} ${file_name} ${file_name}.split
|
|
|
+ if [ ! "$?" = "0" ]; then
|
|
|
+ echo "... ERROR: Unable to create split files."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ HEADER="`head -n 1 $file_name`"
|
|
|
+ FOOTER="`tail -n 1 $file_name`"
|
|
|
+
|
|
|
+ count=0
|
|
|
+ for PADNUM in $AZ3; do
|
|
|
+ PADNAME="${file_name}.split${PADNUM}"
|
|
|
+ if [ ! -f ${PADNAME} ]; then
|
|
|
+ echo "... INFO: Import Cycle Completed."
|
|
|
+ break
|
|
|
+ fi
|
|
|
+
|
|
|
+ if [ ! "`head -n 1 ${PADNAME}`" = "${HEADER}" ]; then
|
|
|
+ $echoVerbose && echo "... INFO: Adding header to ${PADNAME}"
|
|
|
+ filesize=$(du -P -k ${PADNAME} | awk '{print$1}')
|
|
|
+ checkdiskspace "${PADNAME}" $filesize
|
|
|
+ $sed_cmd ${sed_edit_in_place} "1i${HEADER}" ${PADNAME} && rm -f ${PADNAME}.sedtmp
|
|
|
+ else
|
|
|
+ $echoVerbose && echo "... INFO: Header already applied to ${PADNAME}"
|
|
|
+ fi
|
|
|
+ if [ ! "`tail -n 1 ${PADNAME}`" = "${FOOTER}" ]; then
|
|
|
+ $echoVerbose && echo "... INFO: Adding footer to ${PADNAME}"
|
|
|
+ filesize=$(du -P -k ${PADNAME} | awk '{print$1}')
|
|
|
+ checkdiskspace "${PADNAME}" $filesize
|
|
|
+ $sed_cmd ${sed_edit_in_place} '$s/,$//g' ${PADNAME} && rm -f ${PADNAME}.sedtmp
|
|
|
+ echo "${FOOTER}" >> ${PADNAME}
|
|
|
+ else
|
|
|
+ $echoVerbose && echo "... INFO: Footer already applied to ${PADNAME}"
|
|
|
+ fi
|
|
|
+
|
|
|
+ $echoVerbose && echo "... INFO: Inserting ${PADNAME}"
|
|
|
+ A=0
|
|
|
+ attemptcount=0
|
|
|
+ until [ $A = 1 ]; do
|
|
|
+ (( attemptcount++ ))
|
|
|
+ curl $curlSilentOpt $curlopt -T ${PADNAME} -X POST "$url/$db_name/_bulk_docs" -H 'Content-Type: application/json' -o tmp.out
|
|
|
+ if [ ! $? = 0 ]; then
|
|
|
+ if [ $attemptcount = $attempts ]; then
|
|
|
+ echo "... ERROR: Curl failed trying to restore ${PADNAME} - Stopping"
|
|
|
+ exit 1
|
|
|
+ else
|
|
|
+ echo "... WARN: Failed to import ${PADNAME} - Attempt ${attemptcount}/${attempts} - Retrying..."
|
|
|
+ sleep 1
|
|
|
+ fi
|
|
|
+ elif [ ! "`head -n 1 tmp.out | grep -c '^{"error":'`" = 0 ]; then
|
|
|
+ if [ $attemptcount = $attempts ]; then
|
|
|
+ echo "... ERROR: CouchDB Reported: `head -n 1 tmp.out`"
|
|
|
+ exit 1
|
|
|
+ else
|
|
|
+ echo "... WARN: CouchDB Reported and error during import - Attempt ${attemptcount}/${attempts} - Retrying..."
|
|
|
+ sleep 1
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ A=1
|
|
|
+ rm -f ${PADNAME}
|
|
|
+ rm -f tmp.out
|
|
|
+ (( count++ ))
|
|
|
+ fi
|
|
|
+ done
|
|
|
+
|
|
|
+ $echoVerbose && echo "... INFO: Successfully Imported `expr ${count}` Files"
|
|
|
+ A=1
|
|
|
+ rm -f ${file_name_orig}-design
|
|
|
+ rm -f ${file_name_orig}-nodesign
|
|
|
+ done
|
|
|
+ fi
|
|
|
+fi
|