| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780 |
- #!/bin/bash
- ##
- # AUTHOR: DANIELE BAILO
- # https://github.com/danielebailo
- # www.danielebailo.it
- #
- # Contributors:
- # * dalgibbard - http://github.com/dalgibbard
- # * epos-eu - http://github.com/epos-eu
- # * maximilianhuber - http://github.com/maximilianhuber
- # * ahodgkinson - http://github.com/ahodgkinson (quiet-mode, timestamp, compress)
- ##
- ## This script allow for the Backup and Restore of a CouchDB Database.
- ## Backups are produced in a format that can be later uploaded with the bulk docs directive (as used by this script)
- ## USAGE
- ## * To Backup:
- ## ** example: ./couchdb-dump.sh -b -H 127.0.0.1 -d mydb -u admin -p password -f mydb.json
- ## * To Restore:
- ## ** example: ./couchdb-dump.sh -r -H 127.0.0.1 -d mydb -u admin -p password -f mydb.json
- ###################### CODE STARTS HERE ###################
- scriptversionnumber="1.1.10"
- ##START: FUNCTIONS
- usage(){
- echo
- echo "Usage: $0 [-b|-r] -H <COUCHDB_HOST> -d <DB_NAME> -f <BACKUP_FILE> [-u <username>] [-p <password>] [-P <port>] [-l <lines>] [-t <threads>] [-a <import_attempts>]"
- echo -e "\t-b Run script in BACKUP mode."
- echo -e "\t-r Run script in RESTORE mode."
- echo -e "\t-H CouchDB Hostname or IP. Can be provided with or without 'http(s)://'"
- echo -e "\t-d CouchDB Database name to backup/restore."
- echo -e "\t-f File to Backup-to/Restore-from."
- echo -e "\t-P Provide a port number for CouchDB [Default: 5984]"
- echo -e "\t-u Provide a username for auth against CouchDB [Default: blank]"
- echo -e "\t -- can also set with 'COUCHDB_USER' environment var"
- echo -e "\t-p Provide a password for auth against CouchDB [Default: blank]"
- echo -e "\t -- can also set with 'COUCHDB_PASS' environment var"
- echo -e "\t-l Number of lines (documents) to Restore at a time. [Default: 5000] (Restore Only)"
- echo -e "\t-t Number of CPU threads to use when parsing data [Default: nProcs-1] (Backup Only)"
- echo -e "\t-a Number of times to Attempt import before failing [Default: 3] (Restore Only)"
- echo -e "\t-c Create DB on demand, if they are not listed."
- echo -e "\t-q Run in quiet mode. Suppress output, except for errors and warnings."
- echo -e "\t-z Compress output file (Backup Only)"
- echo -e "\t-T Add datetime stamp to output file name (Backup Only)"
- echo -e "\t-V Display version information."
- echo -e "\t-h Display usage information."
- echo
- echo "Example: $0 -b -H 127.0.0.1 -d mydb -f dumpedDB.json -u admin -p password"
- echo
- exit 1
- }
- scriptversion(){
- echo
- echo -e "\t** couchdb-dump version: $scriptversionnumber **"
- echo
- echo -e "\t URL:\thttps://github.com/danielebailo/couchdb-dump"
- echo
- echo -e "\t Authors:"
- echo -e "\t Daniele Bailo (bailo.daniele@gmail.com)"
- echo -e "\t Darren Gibbard (dalgibbard@gmail.com)"
- echo -e "\t Maximilian Huber (maximilian.huber@tngtech.com)"
- echo
- exit 1
- }
- checkdiskspace(){
- ## This function checks available diskspace for a required path, vs space required
- ## Example call: checkdiskspace /path/to/file/to/create 1024
- location=$1
- KBrequired=$2
- if [ "x$location" = "x" ]||[ "x$KBrequired" = "x" ]; then
- echo "... ERROR: checkdiskspace() was not passed the correct arguments."
- exit 1
- fi
- stripdir=${location%/*}
- KBavail=$(df -P -k ${stripdir} | tail -n 1 | awk '{print$4}' | $sed_cmd -e 's/K$//')
- if [ $KBavail -ge $KBrequired ]; then
- return 0
- else
- echo
- echo "... ERROR: Insufficient Disk Space Available:"
- echo " * Full Path: ${location}"
- echo " * Affected Directory: ${stripdir}"
- echo " * Space Available: ${KBavail} KB"
- echo " * Total Space Required: ${KBrequired} KB"
- echo " * Additional Space Req: $(expr $KBrequired - $KBavail) KB"
- echo
- exit 1
- fi
- }
- ## END FUNCTIONS
- # Catch no args:
- if [ "x$1" = "x" ]; then
- usage
- fi
- # Default Args
- username=""
- password=""
- backup=false
- restore=false
- port=5984
- OPTIND=1
- lines=5000
- attempts=3
- createDBsOnDemand=false
- verboseMode=true
- compress=false
- timestamp=false
- while getopts ":h?H:d:f:u:p:P:l:t:a:c?q?z?T?V?b?B?r?R?" opt; do
- case "$opt" in
- h) usage;;
- b|B) backup=true ;;
- r|R) restore=true ;;
- H) url="$OPTARG" ;;
- d) db_name="$OPTARG" ;;
- f) file_name="$OPTARG" ;;
- u) username="${OPTARG}";;
- p) password="${OPTARG}";;
- P) port="${OPTARG}";;
- l) lines="${OPTARG}" ;;
- t) threads="${OPTARG}" ;;
- a) attempts="${OPTARG}";;
- c) createDBsOnDemand=true;;
- q) verboseMode=false;;
- z) compress=true;;
- T) timestamp=true;;
- V) scriptversion;;
- :) echo "... ERROR: Option \"-${OPTARG}\" requires an argument"; usage ;;
- *|\?) echo "... ERROR: Unknown Option \"-${OPTARG}\""; usage;;
- esac
- done
- # If quiet option: Setup echo mode and curl '--silent' opt
- if [ "$verboseMode" = true ]; then
- curlSilentOpt=""
- echoVerbose=true
- else
- curlSilentOpt="--silent"
- echoVerbose=false
- fi
- # Trap unexpected extra args
- shift $((OPTIND-1))
- [ "$1" = "--" ] && shift
- if [ ! "x$@" = "x" ]; then
- echo "... ERROR: Unknown Option \"$@\""
- usage
- fi
- # Handle invalid backup/restore states:
- if [ $backup = true ]&&[ $restore = true ]; then
- echo "... ERROR: Cannot pass both '-b' and '-r'"
- usage
- elif [ $backup = false ]&&[ $restore = false ]; then
- echo "... ERROR: Missing argument '-b' (Backup), or '-r' (Restore)"
- usage
- fi
- # Handle empty args
- # url
- if [ "x$url" = "x" ]; then
- echo "... ERROR: Missing argument '-H <COUCHDB_HOST>'"
- usage
- fi
- # db_name
- if [ "x$db_name" = "x" ]; then
- echo "... ERROR: Missing argument '-d <DB_NAME>'"
- usage
- fi
- # file_name
- if [ "x$file_name" = "x" ]; then
- echo "... ERROR: Missing argument '-f <FILENAME>'"
- usage
- fi
- file_name_orig=$file_name
- # Get OS TYPE (Linux for Linux, Darwin for MacOSX)
- os_type=`uname -s`
- # Pick sed or gsed
- if [ "$os_type" = "FreeBSD" ]||[ "$os_type" = "Darwin" ]; then
- sed_cmd="gsed";
- else
- sed_cmd="sed";
- fi
- ## Make sure it's installed
- echo | $sed_cmd 's/a//' >/dev/null 2>&1
- if [ ! $? = 0 ]; then
- echo "... ERROR: please install $sed_cmd (gnu-sed) and ensure it is in your path"
- exit 1
- fi
- # Validate thread count
- ## If we're on a Mac, use sysctl
- if [ "$os_type" = "Darwin" ]; then
- cores=`sysctl -n hw.ncpu`
- ## If we're on FreeBSD, use sysctl
- elif [ "$os_type" = "FreeBSD" ]; then
- cores=`sysctl kern.smp.cpus | awk -F ": " '{print $2}'`;
- ## Check if nproc available- set cores=1 if not
- elif ! type nproc >/dev/null; then
- cores=1
- ## Otherwise use nproc
- else
- cores=`nproc`
- fi
- if [ ! "x$threads" = "x" ]; then
- if [ $threads -gt $cores ]; then
- echo "... WARN: Thread setting of $threads is more than CPU count. Setting to $cores"
- threads=$cores
- else
- $echoVerbose && echo "... INFO: Setting parser threads to $threads"
- fi
- else
- threads=`expr $cores - 1`
- fi
- # Validate Attempts, set to no-retry if zero/invalid.
- case $attempts in
- ''|0|*[!0-9]*) echo "... WARN: Retry Attempt value of \"$attempts\" is invalid. Disabling Retry-on-Error."; attempts=1 ;;
- *) true ;;
- esac
- ## Manage the passing of http/https for $url:
- # Note; if the user wants to use 'https://' on a non-443 port they must specify it exclusively in the '-H <HOSTNAME>' arg.
- if [ ! "`echo $url | grep -c http`" = 1 ]; then
- if [ "$port" == "443" ]; then
- url="https://$url";
- else
- url="http://$url";
- fi
- fi
- # Manage the addition of port
- # If a port isn't already on our URL...
- if [ ! "`echo $url | egrep -c ":[0-9]*$"`" = "1" ]; then
- # add it.
- url="$url:$port"
- fi
- # Check for empty user/pass and try reading in from Envvars
- if [ "x$username" = "x" ]; then
- username="$COUCHDB_USER"
- fi
- if [ "x$password" = "x" ]; then
- password="$COUCHDB_PASS"
- fi
- ## Manage the addition of user+pass if needed:
- # Ensure, if one is set, both are set.
- if [ ! "x${username}" = "x" ]; then
- if [ "x${password}" = "x" ]; then
- echo "... ERROR: Password cannot be blank, if username is specified."
- usage
- fi
- elif [ ! "x${password}" = "x" ]; then
- if [ "x${username}" = "x" ]; then
- echo "... ERROR: Username cannot be blank, if password is specified."
- usage
- fi
- fi
- # Check for sed option
- sed_edit_in_place='-i.sedtmp'
- if [ "$os_type" = "Darwin" ]; then
- sed_regexp_option='E'
- else
- sed_regexp_option='r'
- fi
- # Allow for self-signed/invalid certs if method is HTTPS:
- if [ "`echo $url | grep -ic "^https://"`" = "1" ]; then
- curlopt="-k"
- fi
- if [ ! "x${username}" = "x" ]&&[ ! "x${password}" = "x" ]; then
- curlopt="${curlopt} -u ${username}:${password}"
- fi
- ## Check for curl
- curl --version >/dev/null 2>&1 || ( echo "... ERROR: This script requires 'curl' to be present."; exit 1 )
- # Check for tr
- echo | tr -d "" >/dev/null 2>&1 || ( echo "... ERROR: This script requires 'tr' to be present."; exit 1 )
- ##### SETUP OUR LARGE VARS FOR SPLIT PROCESSING (due to limitations in split on Darwin/BSD)
- AZ2="`echo {a..z}{a..z}`"
- AZ3="`echo {a..z}{a..z}{a..z}`"
- ### If user selected BACKUP, run the following code:
- if [ $backup = true ]&&[ $restore = false ]; then
- #################################################################
- ##################### BACKUP START ##############################
- #################################################################
- # If -T (timestamp) option, append datetime stamp ("-YYYYMMDD-hhmmss") before file extension
- if [ "$timestamp" = true ]; then
- datetime=`date "+%Y%m%d-%H%M%S"` # Format: YYYYMMDD-hhmmss
- # Check for file_name extension, if so add the timestamp before it
- if [[ $file_name =~ \.[a-zA-Z0-9][a-zA-Z0-9_]* ]]; then
- file_name_ext=` echo "$file_name" | $sed_cmd 's/.*\.//'` # Get text after last '.'
- file_name_base=`echo "$file_name" | $sed_cmd "s/\.${file_name_ext}$//"` # file_name without '.' & extension
- file_name="$file_name_base-$datetime.$file_name_ext"
- else # Otherwise add timestamp to the end of file_name
- file_name="$file_name-$datetime"
- fi
- fi
- $echoVerbose && echo "... INFO: Output file ${file_name}"
- # Check if output already exists:
- if [ -f ${file_name} ]; then
- echo "... ERROR: Output file ${file_name} already exists."
- exit 1
- fi
- # Grab our data from couchdb
- curl ${curlSilentOpt} ${curlopt} -X GET "$url/$db_name/_all_docs?include_docs=true&attachments=true" -o ${file_name}
- # Check for curl errors
- if [ ! $? = 0 ]; then
- echo "... ERROR: Curl encountered an issue whilst dumping the database."
- rm -f ${file_name} 2>/dev/null
- exit 1
- fi
- # Check for export errors
- ERR_CHECK="`head -n 1 ${file_name} | grep '^{"error'`"
- if [ ! "x${ERR_CHECK}" = "x" ]; then
- echo "... ERROR: CouchDB reported: $ERR_CHECK"
- exit 1
- fi
- # CouchDB has a tendancy to output Windows carriage returns in it's output -
- # This messes up us trying to sed things at the end of lines!
- if grep -qU $'\x0d' $file_name; then
- $echoVerbose && echo "... INFO: File may contain Windows carriage returns- converting..."
- filesize=$(du -P -k ${file_name} | awk '{print$1}')
- checkdiskspace "${file_name}" $filesize
- tr -d '\r' < ${file_name} > ${file_name}.tmp
- if [ $? = 0 ]; then
- mv ${file_name}.tmp ${file_name}
- if [ $? = 0 ]; then
- $echoVerbose && echo "... INFO: Completed successfully."
- else
- echo "... ERROR: Failed to overwrite ${file_name} with ${file_name}.tmp"
- exit 1
- fi
- else
- echo ".. ERROR: Failed to convert file."
- exit 1
- fi
- fi
- ## Now we parse the output file to make it suitable for re-import.
- $echoVerbose && echo "... INFO: Amending file to make it suitable for Import."
- $echoVerbose && echo "... INFO: Stage 1 - Document filtering"
- # If the input file is larger than 250MB, multi-thread the parsing:
- if [ $(du -P -k ${file_name} | awk '{print$1}') -ge 256000 ]&&[ ! $threads -le 1 ]; then
- filesize=$(du -P -k ${file_name} | awk '{print$1}')
- KBreduction=$(($((`wc -l ${file_name} | awk '{print$1}'` * 80)) / 1024))
- filesize=`expr $filesize + $(expr $filesize - $KBreduction)`
- checkdiskspace "${file_name}" $filesize
- $echoVerbose && echo "... INFO: Multi-Threaded Parsing Enabled."
- if [ -f ${file_name}.thread000000 ]; then
- echo "... ERROR: Split files \"${file_name}.thread*\" already present. Please remove before continuing."
- exit 1
- elif [ -f ${file_name}.tmp ]; then
- echo "... ERROR: Tempfile ${file_name}.tmp already present. Please remove before continuing."
- exit 1
- fi
- ### SPLIT INTO THREADS
- split_cal=$(( $((`wc -l ${file_name} | awk '{print$1}'` / $threads)) + $threads ))
- #split --numeric-suffixes --suffix-length=6 -l ${split_cal} ${file_name} ${file_name}.thread
- split -a 2 -l ${split_cal} ${file_name} ${file_name}.thread
- if [ ! "$?" = "0" ]; then
- echo "... ERROR: Unable to create split files."
- exit 1
- fi
- # Capture if someone happens to breach the defined limits of AZ2 var. If this happens, we'll need to switch it out for AZ3 ...
- if [[ $threads -gt 650 ]]; then
- echo "Whoops- we hit a maximum limit here... \$AZ2 only allows for a maximum of 650 cores..."
- exit 1
- fi
- count=0
- for suffix in ${AZ2}; do
- (( count++ ))
- if [[ $count -gt $threads ]]; then
- break
- fi
- PADNAME="${file_name}.thread${suffix}"
- $sed_cmd ${sed_edit_in_place} 's/{"id".*,"doc"://g' ${PADNAME} &
- done
- wait
- count=0
- for suffix in ${AZ2}; do
- (( count++ ))
- if [[ $count -gt $threads ]]; then
- break
- fi
- PADNAME="${file_name}.thread${suffix}"
- cat ${PADNAME} >> ${file_name}.tmp
- rm -f ${PADNAME} ${PADNAME}.sedtmp
- (( NUM++ ))
- done
- if [ `wc -l ${file_name} | awk '{print$1}'` = `wc -l ${file_name}.tmp | awk '{print$1}'` ]; then
- mv ${file_name}{.tmp,}
- if [ ! $? = 0 ]; then
- echo "... ERROR: Failed to overwrite ${file_name}"
- exit 1
- fi
- else
- echo "... ERROR: Multi-threaded data parsing encountered an error."
- exit 1
- fi
- else
- # Estimating 80byte saving per line... probably a little conservative depending on keysize.
- KBreduction=$(($((`wc -l ${file_name} | awk '{print$1}'` * 80)) / 1024))
- filesize=$(du -P -k ${file_name} | awk '{print$1}')
- filesize=`expr $filesize - $KBreduction`
- checkdiskspace "${file_name}" $filesize
- $sed_cmd ${sed_edit_in_place} 's/{"id".*,"doc"://g' $file_name && rm -f ${file_name}.sedtmp
- if [ ! $? = 0 ];then
- echo "Stage failed."
- exit 1
- fi
- fi
- $echoVerbose && echo "... INFO: Stage 2 - Duplicate curly brace removal"
- # Approx 1Byte per line removed
- KBreduction=$((`wc -l ${file_name} | awk '{print$1}'` / 1024))
- filesize=$(du -P -k ${file_name} | awk '{print$1}')
- filesize=`expr $filesize - $KBreduction`
- checkdiskspace "${file_name}" $filesize
- $sed_cmd ${sed_edit_in_place} 's/}},$/},/g' ${file_name} && rm -f ${file_name}.sedtmp
- if [ ! $? = 0 ];then
- echo "Stage failed."
- exit 1
- fi
- $echoVerbose && echo "... INFO: Stage 3 - Header Correction"
- filesize=$(du -P -k ${file_name} | awk '{print$1}')
- checkdiskspace "${file_name}" $filesize
- $sed_cmd ${sed_edit_in_place} '1s/^.*/{"new_edits":false,"docs":[/' ${file_name} && rm -f ${file_name}.sedtmp
- if [ ! $? = 0 ];then
- echo "Stage failed."
- exit 1
- fi
- $echoVerbose && echo "... INFO: Stage 4 - Final document line correction"
- filesize=$(du -P -k ${file_name} | awk '{print$1}')
- checkdiskspace "${file_name}" $filesize
- $sed_cmd ${sed_edit_in_place} 's/}}$/}/g' ${file_name} && rm -f ${file_name}.sedtmp
- if [ ! $? = 0 ];then
- echo "Stage failed."
- exit 1
- fi
- # If -z (compress) option then compress output file
- if [ "$compress" = true ]; then
- $echoVerbose && echo "... INFO: Stage 5 - File compression"
- gzip $file_name
- file_name="$file_name.gz"
- fi
- $echoVerbose && echo "... INFO: Export completed successfully. File available at: ${file_name}"
- exit 0
- ### Else if user selected Restore:
- elif [ $restore = true ]&&[ $backup = false ]; then
- #################################################################
- ##################### RESTORE START #############################
- #################################################################
- # Check if input exists:
- if [ ! -f ${file_name} ]; then
- echo "... ERROR: Input file ${file_name} not found."
- exit 1
- fi
- #### VALIDATION END
- $echoVerbose && echo "... INFO: Checking for database"
- attemptcount=0
- A=0
- until [ $A = 1 ]; do
- (( attemptcount++ ))
- existing_dbs=$(curl $curlSilentOpt $curlopt -X GET "${url}/_all_dbs")
- if [ ! $? = 0 ]; then
- if [ $attemptcount = $attempts ]; then
- echo "... ERROR: Curl failed to get the list of databases - Stopping"
- exit 1
- else
- echo "... WARN: Curl failed to get the list of databases - Attempt ${attemptcount}/${attempts}. Retrying..."
- sleep 1
- fi
- else
- A=1
- fi
- done
- if [[ ! "$existing_dbs" = "["*"]" ]]; then
- echo "... WARN: Curl failed to get the list of databases - Continuing"
- if [ "x$existing_dbs" = "x" ]; then
- echo "... WARN: Curl just returned: $existing_dbs"
- fi
- elif [[ ! "$existing_dbs" = *"\"${db_name}\""* ]]; then
- # database was not listed as existing databasa
- if [ $createDBsOnDemand = true ]; then
- attemptcount=0
- A=0
- until [ $A = 1 ]; do
- (( attemptcount++ ))
- curl $curlSilentOpt $curlopt -X PUT "${url}/${db_name}" -o tmp.out
- # If curl threw an error:
- if [ ! $? = 0 ]; then
- if [ $attemptcount = $attempts ]; then
- echo "... ERROR: Curl failed to create the database ${db_name} - Stopping"
- if [ -f tmp.out ]; then
- echo -n "... ERROR: Error message was: "
- cat tmp.out
- else
- echo ".. ERROR: See above for any errors"
- fi
- exit 1
- else
- echo "... WARN: Curl failed to create the database ${db_name} - Attempt ${attemptcount}/${attempts}. Retrying..."
- sleep 1
- fi
- # If curl was happy, but CouchDB returned an error in the return JSON:
- elif [ ! "`head -n 1 tmp.out | grep -c '^{"error":'`" = 0 ]; then
- if [ $attemptcount = $attempts ]; then
- echo "... ERROR: CouchDB Reported: `head -n 1 tmp.out`"
- exit 1
- else
- echo "... WARN: CouchDB Reported an error during db creation - Attempt ${attemptcount}/${attempts} - Retrying..."
- sleep 1
- fi
- # Otherwise, if everything went well, delete our temp files.
- else
- rm tmp.out
- A=1
- fi
- done
- else
- echo "... ERROR: corresponding database ${db_name} not yet created - Stopping"
- $echoVerbose && echo "... HINT: you could add the -c flag to create the database automatically"
- exit 1
- fi
- fi
- ## Stop bash mangling wildcard...
- set -o noglob
- # Manage Design Documents as a priority, and remove them from the main import job
- $echoVerbose && echo "... INFO: Checking for Design documents"
- # Find all _design docs, put them into another file
- design_file_name=${file_name}-design
- grep '^{"_id":"_design' ${file_name} > ${design_file_name}
- # Count the design file (if it even exists)
- DESIGNS="`wc -l ${design_file_name} 2>/dev/null | awk '{print$1}'`"
- # If there's no design docs for import...
- if [ "x$DESIGNS" = "x" ]||[ "$DESIGNS" = "0" ]; then
- # Cleanup any null files
- rm -f ${design_file_name} 2>/dev/null
- $echoVerbose && echo "... INFO: No Design Documents found for import."
- else
- $echoVerbose && echo "... INFO: Duplicating original file for alteration"
- # Duplicate the original DB file, so we don't mangle the user's input file:
- filesize=$(du -P -k ${file_name} | awk '{print$1}')
- checkdiskspace "${file_name}" $filesize
- cp -f ${file_name}{,-nodesign}
- # Re-set file_name to be our new file.
- file_name=${file_name}-nodesign
- # Remove these design docs from (our new) main file.
- $echoVerbose && echo "... INFO: Stripping _design elements from regular documents"
- checkdiskspace "${file_name}" $filesize
- $sed_cmd ${sed_edit_in_place} '/^{"_id":"_design/d' ${file_name} && rm -f ${file_name}.sedtmp
- # Remove the final document's trailing comma
- $echoVerbose && echo "... INFO: Fixing end document"
- line=$(expr `wc -l ${file_name} | awk '{print$1}'` - 1)
- filesize=$(du -P -k ${file_name} | awk '{print$1}')
- checkdiskspace "${file_name}" $filesize
- $sed_cmd ${sed_edit_in_place} "${line}s/,$//" ${file_name} && rm -f ${file_name}.sedtmp
- $echoVerbose && echo "... INFO: Inserting Design documents"
- designcount=0
- # For each design doc...
- while IFS="" read -r; do
- line="${REPLY}"
- # Split the ID out for use as the import URL path
- URLPATH=$(echo $line | awk -F'"' '{print$4}')
- # Scrap the ID and Rev from the main data, as well as any trailing ','
- echo "${line}" | $sed_cmd -${sed_regexp_option}e "s@^\{\"_id\":\"${URLPATH}\",\"_rev\":\"[0-9]*-[0-9a-zA-Z_\-]*\",@\{@" | $sed_cmd -e 's/,$//' > ${design_file_name}.${designcount}
- # Fix Windows CRLF
- if grep -qU $'\x0d' ${design_file_name}.${designcount}; then
- $echoVerbose && echo "... INFO: File contains Windows carriage returns- converting..."
- filesize=$(du -P -k ${design_file_name}.${designcount} | awk '{print$1}')
- checkdiskspace "${file_name}" $filesize
- tr -d '\r' < ${design_file_name}.${designcount} > ${design_file_name}.${designcount}.tmp
- if [ $? = 0 ]; then
- mv ${design_file_name}.${designcount}.tmp ${design_file_name}.${designcount}
- if [ $? = 0 ]; then
- $echoVerbose && echo "... INFO: Completed successfully."
- else
- echo "... ERROR: Failed to overwrite ${design_file_name}.${designcount} with ${design_file_name}.${designcount}.tmp"
- exit 1
- fi
- else
- echo ".. ERROR: Failed to convert file."
- exit 1
- fi
- fi
- # Insert this file into the DB
- A=0
- attemptcount=0
- until [ $A = 1 ]; do
- (( attemptcount++ ))
- curl $curlSilentOpt ${curlopt} -T ${design_file_name}.${designcount} -X PUT "${url}/${db_name}/${URLPATH}" -H 'Content-Type: application/json' -o ${design_file_name}.out.${designcount}
- # If curl threw an error:
- if [ ! $? = 0 ]; then
- if [ $attemptcount = $attempts ]; then
- echo "... ERROR: Curl failed trying to restore ${design_file_name}.${designcount} - Stopping"
- exit 1
- else
- echo "... WARN: Import of ${design_file_name}.${designcount} failed - Attempt ${attemptcount}/${attempts}. Retrying..."
- sleep 1
- fi
- # If curl was happy, but CouchDB returned an error in the return JSON:
- elif [ ! "`head -n 1 ${design_file_name}.out.${designcount} | grep -c '^{"error":'`" = 0 ]; then
- if [ $attemptcount = $attempts ]; then
- echo "... ERROR: CouchDB Reported: `head -n 1 ${design_file_name}.out.${designcount}`"
- exit 1
- else
- echo "... WARN: CouchDB Reported an error during import - Attempt ${attemptcount}/${attempts} - Retrying..."
- sleep 1
- fi
- # Otherwise, if everything went well, delete our temp files.
- else
- A=1
- rm -f ${design_file_name}.out.${designcount}
- rm -f ${design_file_name}.${designcount}
- fi
- done
- # Increase design count - mainly used for the INFO at the end.
- (( designcount++ ))
- # NOTE: This is where we insert the design lines exported from the main block
- done < <(cat ${design_file_name})
- $echoVerbose && echo "... INFO: Successfully imported ${designcount} Design Documents"
- fi
- set +o noglob
- # If the size of the file to import is less than our $lines size, don't worry about splitting
- if [ `wc -l $file_name | awk '{print$1}'` -lt $lines ]; then
- $echoVerbose && echo "... INFO: Small dataset. Importing as a single file."
- A=0
- attemptcount=0
- until [ $A = 1 ]; do
- (( attemptcount++ ))
- curl $curlSilentOpt $curlopt -T $file_name -X POST "$url/$db_name/_bulk_docs" -H 'Content-Type: application/json' -o tmp.out
- if [ "`head -n 1 tmp.out | grep -c '^{"error":'`" -eq 0 ]; then
- $echoVerbose && echo "... INFO: Imported ${file_name_orig} Successfully."
- rm -f tmp.out
- rm -f ${file_name_orig}-design
- rm -f ${file_name_orig}-nodesign
- exit 0
- else
- if [ $attemptcount = $attempts ]; then
- echo "... ERROR: Import of ${file_name_orig} failed."
- if [ -f tmp.out ]; then
- echo -n "... ERROR: Error message was: "
- cat tmp.out
- else
- echo ".. ERROR: See above for any errors"
- fi
- rm -f tmp.out
- exit 1
- else
- echo "... WARN: Import of ${file_name_orig} failed - Attempt ${attemptcount}/${attempts} - Retrying..."
- sleep 1
- fi
- fi
- done
- # Otherwise, it's a large import that requires bulk insertion.
- else
- $echoVerbose && echo "... INFO: Block import set to ${lines} lines."
- if [ -f ${file_name}.splitaaa ]; then
- echo "... ERROR: Split files \"${file_name}.split*\" already present. Please remove before continuing."
- exit 1
- fi
- importlines=`cat ${file_name} | grep -c .`
- # Due to the file limit imposed by the pre-calculated AZ3 variable, max split files is 15600 (alpha x 3positions)
- if [[ `expr ${importlines} / ${lines}` -gt 15600 ]]; then
- echo "... ERROR: Pre-processed split variable limit of 15600 files reached."
- echo " Please increase the '-l' parameter (Currently: $lines) and try again."
- exit 1
- fi
- $echoVerbose && echo "... INFO: Generating files to import"
- filesize=$(du -P -k ${file_name} | awk '{print$1}')
- checkdiskspace "${file_name}" $filesize
- ### Split the file into many
- split -a 3 -l ${lines} ${file_name} ${file_name}.split
- if [ ! "$?" = "0" ]; then
- echo "... ERROR: Unable to create split files."
- exit 1
- fi
- HEADER="`head -n 1 $file_name`"
- FOOTER="`tail -n 1 $file_name`"
- count=0
- for PADNUM in $AZ3; do
- PADNAME="${file_name}.split${PADNUM}"
- if [ ! -f ${PADNAME} ]; then
- echo "... INFO: Import Cycle Completed."
- break
- fi
- if [ ! "`head -n 1 ${PADNAME}`" = "${HEADER}" ]; then
- $echoVerbose && echo "... INFO: Adding header to ${PADNAME}"
- filesize=$(du -P -k ${PADNAME} | awk '{print$1}')
- checkdiskspace "${PADNAME}" $filesize
- $sed_cmd ${sed_edit_in_place} "1i${HEADER}" ${PADNAME} && rm -f ${PADNAME}.sedtmp
- else
- $echoVerbose && echo "... INFO: Header already applied to ${PADNAME}"
- fi
- if [ ! "`tail -n 1 ${PADNAME}`" = "${FOOTER}" ]; then
- $echoVerbose && echo "... INFO: Adding footer to ${PADNAME}"
- filesize=$(du -P -k ${PADNAME} | awk '{print$1}')
- checkdiskspace "${PADNAME}" $filesize
- $sed_cmd ${sed_edit_in_place} '$s/,$//g' ${PADNAME} && rm -f ${PADNAME}.sedtmp
- echo "${FOOTER}" >> ${PADNAME}
- else
- $echoVerbose && echo "... INFO: Footer already applied to ${PADNAME}"
- fi
- $echoVerbose && echo "... INFO: Inserting ${PADNAME}"
- A=0
- attemptcount=0
- until [ $A = 1 ]; do
- (( attemptcount++ ))
- curl $curlSilentOpt $curlopt -T ${PADNAME} -X POST "$url/$db_name/_bulk_docs" -H 'Content-Type: application/json' -o tmp.out
- if [ ! $? = 0 ]; then
- if [ $attemptcount = $attempts ]; then
- echo "... ERROR: Curl failed trying to restore ${PADNAME} - Stopping"
- exit 1
- else
- echo "... WARN: Failed to import ${PADNAME} - Attempt ${attemptcount}/${attempts} - Retrying..."
- sleep 1
- fi
- elif [ ! "`head -n 1 tmp.out | grep -c '^{"error":'`" = 0 ]; then
- if [ $attemptcount = $attempts ]; then
- echo "... ERROR: CouchDB Reported: `head -n 1 tmp.out`"
- exit 1
- else
- echo "... WARN: CouchDB Reported and error during import - Attempt ${attemptcount}/${attempts} - Retrying..."
- sleep 1
- fi
- else
- A=1
- rm -f ${PADNAME}
- rm -f tmp.out
- (( count++ ))
- fi
- done
- $echoVerbose && echo "... INFO: Successfully Imported `expr ${count}` Files"
- A=1
- rm -f ${file_name_orig}-design
- rm -f ${file_name_orig}-nodesign
- done
- fi
- fi
|