couchdb-dump.sh 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780
  1. #!/bin/bash
  2. ##
  3. # AUTHOR: DANIELE BAILO
  4. # https://github.com/danielebailo
  5. # www.danielebailo.it
  6. #
  7. # Contributors:
  8. # * dalgibbard - http://github.com/dalgibbard
  9. # * epos-eu - http://github.com/epos-eu
  10. # * maximilianhuber - http://github.com/maximilianhuber
  11. # * ahodgkinson - http://github.com/ahodgkinson (quiet-mode, timestamp, compress)
  12. ##
  13. ## This script allow for the Backup and Restore of a CouchDB Database.
  14. ## Backups are produced in a format that can be later uploaded with the bulk docs directive (as used by this script)
  15. ## USAGE
  16. ## * To Backup:
  17. ## ** example: ./couchdb-dump.sh -b -H 127.0.0.1 -d mydb -u admin -p password -f mydb.json
  18. ## * To Restore:
  19. ## ** example: ./couchdb-dump.sh -r -H 127.0.0.1 -d mydb -u admin -p password -f mydb.json
  20. ###################### CODE STARTS HERE ###################
  21. scriptversionnumber="1.1.10"
  22. ##START: FUNCTIONS
  23. usage(){
  24. echo
  25. echo "Usage: $0 [-b|-r] -H <COUCHDB_HOST> -d <DB_NAME> -f <BACKUP_FILE> [-u <username>] [-p <password>] [-P <port>] [-l <lines>] [-t <threads>] [-a <import_attempts>]"
  26. echo -e "\t-b Run script in BACKUP mode."
  27. echo -e "\t-r Run script in RESTORE mode."
  28. echo -e "\t-H CouchDB Hostname or IP. Can be provided with or without 'http(s)://'"
  29. echo -e "\t-d CouchDB Database name to backup/restore."
  30. echo -e "\t-f File to Backup-to/Restore-from."
  31. echo -e "\t-P Provide a port number for CouchDB [Default: 5984]"
  32. echo -e "\t-u Provide a username for auth against CouchDB [Default: blank]"
  33. echo -e "\t -- can also set with 'COUCHDB_USER' environment var"
  34. echo -e "\t-p Provide a password for auth against CouchDB [Default: blank]"
  35. echo -e "\t -- can also set with 'COUCHDB_PASS' environment var"
  36. echo -e "\t-l Number of lines (documents) to Restore at a time. [Default: 5000] (Restore Only)"
  37. echo -e "\t-t Number of CPU threads to use when parsing data [Default: nProcs-1] (Backup Only)"
  38. echo -e "\t-a Number of times to Attempt import before failing [Default: 3] (Restore Only)"
  39. echo -e "\t-c Create DB on demand, if they are not listed."
  40. echo -e "\t-q Run in quiet mode. Suppress output, except for errors and warnings."
  41. echo -e "\t-z Compress output file (Backup Only)"
  42. echo -e "\t-T Add datetime stamp to output file name (Backup Only)"
  43. echo -e "\t-V Display version information."
  44. echo -e "\t-h Display usage information."
  45. echo
  46. echo "Example: $0 -b -H 127.0.0.1 -d mydb -f dumpedDB.json -u admin -p password"
  47. echo
  48. exit 1
  49. }
  50. scriptversion(){
  51. echo
  52. echo -e "\t** couchdb-dump version: $scriptversionnumber **"
  53. echo
  54. echo -e "\t URL:\thttps://github.com/danielebailo/couchdb-dump"
  55. echo
  56. echo -e "\t Authors:"
  57. echo -e "\t Daniele Bailo (bailo.daniele@gmail.com)"
  58. echo -e "\t Darren Gibbard (dalgibbard@gmail.com)"
  59. echo -e "\t Maximilian Huber (maximilian.huber@tngtech.com)"
  60. echo
  61. exit 1
  62. }
  63. checkdiskspace(){
  64. ## This function checks available diskspace for a required path, vs space required
  65. ## Example call: checkdiskspace /path/to/file/to/create 1024
  66. location=$1
  67. KBrequired=$2
  68. if [ "x$location" = "x" ]||[ "x$KBrequired" = "x" ]; then
  69. echo "... ERROR: checkdiskspace() was not passed the correct arguments."
  70. exit 1
  71. fi
  72. stripdir=${location%/*}
  73. KBavail=$(df -P -k ${stripdir} | tail -n 1 | awk '{print$4}' | $sed_cmd -e 's/K$//')
  74. if [ $KBavail -ge $KBrequired ]; then
  75. return 0
  76. else
  77. echo
  78. echo "... ERROR: Insufficient Disk Space Available:"
  79. echo " * Full Path: ${location}"
  80. echo " * Affected Directory: ${stripdir}"
  81. echo " * Space Available: ${KBavail} KB"
  82. echo " * Total Space Required: ${KBrequired} KB"
  83. echo " * Additional Space Req: $(expr $KBrequired - $KBavail) KB"
  84. echo
  85. exit 1
  86. fi
  87. }
  88. ## END FUNCTIONS
  89. # Catch no args:
  90. if [ "x$1" = "x" ]; then
  91. usage
  92. fi
  93. # Default Args
  94. username=""
  95. password=""
  96. backup=false
  97. restore=false
  98. port=5984
  99. OPTIND=1
  100. lines=5000
  101. attempts=3
  102. createDBsOnDemand=false
  103. verboseMode=true
  104. compress=false
  105. timestamp=false
  106. while getopts ":h?H:d:f:u:p:P:l:t:a:c?q?z?T?V?b?B?r?R?" opt; do
  107. case "$opt" in
  108. h) usage;;
  109. b|B) backup=true ;;
  110. r|R) restore=true ;;
  111. H) url="$OPTARG" ;;
  112. d) db_name="$OPTARG" ;;
  113. f) file_name="$OPTARG" ;;
  114. u) username="${OPTARG}";;
  115. p) password="${OPTARG}";;
  116. P) port="${OPTARG}";;
  117. l) lines="${OPTARG}" ;;
  118. t) threads="${OPTARG}" ;;
  119. a) attempts="${OPTARG}";;
  120. c) createDBsOnDemand=true;;
  121. q) verboseMode=false;;
  122. z) compress=true;;
  123. T) timestamp=true;;
  124. V) scriptversion;;
  125. :) echo "... ERROR: Option \"-${OPTARG}\" requires an argument"; usage ;;
  126. *|\?) echo "... ERROR: Unknown Option \"-${OPTARG}\""; usage;;
  127. esac
  128. done
  129. # If quiet option: Setup echo mode and curl '--silent' opt
  130. if [ "$verboseMode" = true ]; then
  131. curlSilentOpt=""
  132. echoVerbose=true
  133. else
  134. curlSilentOpt="--silent"
  135. echoVerbose=false
  136. fi
  137. # Trap unexpected extra args
  138. shift $((OPTIND-1))
  139. [ "$1" = "--" ] && shift
  140. if [ ! "x$@" = "x" ]; then
  141. echo "... ERROR: Unknown Option \"$@\""
  142. usage
  143. fi
  144. # Handle invalid backup/restore states:
  145. if [ $backup = true ]&&[ $restore = true ]; then
  146. echo "... ERROR: Cannot pass both '-b' and '-r'"
  147. usage
  148. elif [ $backup = false ]&&[ $restore = false ]; then
  149. echo "... ERROR: Missing argument '-b' (Backup), or '-r' (Restore)"
  150. usage
  151. fi
  152. # Handle empty args
  153. # url
  154. if [ "x$url" = "x" ]; then
  155. echo "... ERROR: Missing argument '-H <COUCHDB_HOST>'"
  156. usage
  157. fi
  158. # db_name
  159. if [ "x$db_name" = "x" ]; then
  160. echo "... ERROR: Missing argument '-d <DB_NAME>'"
  161. usage
  162. fi
  163. # file_name
  164. if [ "x$file_name" = "x" ]; then
  165. echo "... ERROR: Missing argument '-f <FILENAME>'"
  166. usage
  167. fi
  168. file_name_orig=$file_name
  169. # Get OS TYPE (Linux for Linux, Darwin for MacOSX)
  170. os_type=`uname -s`
  171. # Pick sed or gsed
  172. if [ "$os_type" = "FreeBSD" ]||[ "$os_type" = "Darwin" ]; then
  173. sed_cmd="gsed";
  174. else
  175. sed_cmd="sed";
  176. fi
  177. ## Make sure it's installed
  178. echo | $sed_cmd 's/a//' >/dev/null 2>&1
  179. if [ ! $? = 0 ]; then
  180. echo "... ERROR: please install $sed_cmd (gnu-sed) and ensure it is in your path"
  181. exit 1
  182. fi
  183. # Validate thread count
  184. ## If we're on a Mac, use sysctl
  185. if [ "$os_type" = "Darwin" ]; then
  186. cores=`sysctl -n hw.ncpu`
  187. ## If we're on FreeBSD, use sysctl
  188. elif [ "$os_type" = "FreeBSD" ]; then
  189. cores=`sysctl kern.smp.cpus | awk -F ": " '{print $2}'`;
  190. ## Check if nproc available- set cores=1 if not
  191. elif ! type nproc >/dev/null; then
  192. cores=1
  193. ## Otherwise use nproc
  194. else
  195. cores=`nproc`
  196. fi
  197. if [ ! "x$threads" = "x" ]; then
  198. if [ $threads -gt $cores ]; then
  199. echo "... WARN: Thread setting of $threads is more than CPU count. Setting to $cores"
  200. threads=$cores
  201. else
  202. $echoVerbose && echo "... INFO: Setting parser threads to $threads"
  203. fi
  204. else
  205. threads=`expr $cores - 1`
  206. fi
  207. # Validate Attempts, set to no-retry if zero/invalid.
  208. case $attempts in
  209. ''|0|*[!0-9]*) echo "... WARN: Retry Attempt value of \"$attempts\" is invalid. Disabling Retry-on-Error."; attempts=1 ;;
  210. *) true ;;
  211. esac
  212. ## Manage the passing of http/https for $url:
  213. # Note; if the user wants to use 'https://' on a non-443 port they must specify it exclusively in the '-H <HOSTNAME>' arg.
  214. if [ ! "`echo $url | grep -c http`" = 1 ]; then
  215. if [ "$port" == "443" ]; then
  216. url="https://$url";
  217. else
  218. url="http://$url";
  219. fi
  220. fi
  221. # Manage the addition of port
  222. # If a port isn't already on our URL...
  223. if [ ! "`echo $url | egrep -c ":[0-9]*$"`" = "1" ]; then
  224. # add it.
  225. url="$url:$port"
  226. fi
  227. # Check for empty user/pass and try reading in from Envvars
  228. if [ "x$username" = "x" ]; then
  229. username="$COUCHDB_USER"
  230. fi
  231. if [ "x$password" = "x" ]; then
  232. password="$COUCHDB_PASS"
  233. fi
  234. ## Manage the addition of user+pass if needed:
  235. # Ensure, if one is set, both are set.
  236. if [ ! "x${username}" = "x" ]; then
  237. if [ "x${password}" = "x" ]; then
  238. echo "... ERROR: Password cannot be blank, if username is specified."
  239. usage
  240. fi
  241. elif [ ! "x${password}" = "x" ]; then
  242. if [ "x${username}" = "x" ]; then
  243. echo "... ERROR: Username cannot be blank, if password is specified."
  244. usage
  245. fi
  246. fi
  247. # Check for sed option
  248. sed_edit_in_place='-i.sedtmp'
  249. if [ "$os_type" = "Darwin" ]; then
  250. sed_regexp_option='E'
  251. else
  252. sed_regexp_option='r'
  253. fi
  254. # Allow for self-signed/invalid certs if method is HTTPS:
  255. if [ "`echo $url | grep -ic "^https://"`" = "1" ]; then
  256. curlopt="-k"
  257. fi
  258. if [ ! "x${username}" = "x" ]&&[ ! "x${password}" = "x" ]; then
  259. curlopt="${curlopt} -u ${username}:${password}"
  260. fi
  261. ## Check for curl
  262. curl --version >/dev/null 2>&1 || ( echo "... ERROR: This script requires 'curl' to be present."; exit 1 )
  263. # Check for tr
  264. echo | tr -d "" >/dev/null 2>&1 || ( echo "... ERROR: This script requires 'tr' to be present."; exit 1 )
  265. ##### SETUP OUR LARGE VARS FOR SPLIT PROCESSING (due to limitations in split on Darwin/BSD)
  266. AZ2="`echo {a..z}{a..z}`"
  267. AZ3="`echo {a..z}{a..z}{a..z}`"
  268. ### If user selected BACKUP, run the following code:
  269. if [ $backup = true ]&&[ $restore = false ]; then
  270. #################################################################
  271. ##################### BACKUP START ##############################
  272. #################################################################
  273. # If -T (timestamp) option, append datetime stamp ("-YYYYMMDD-hhmmss") before file extension
  274. if [ "$timestamp" = true ]; then
  275. datetime=`date "+%Y%m%d-%H%M%S"` # Format: YYYYMMDD-hhmmss
  276. # Check for file_name extension, if so add the timestamp before it
  277. if [[ $file_name =~ \.[a-zA-Z0-9][a-zA-Z0-9_]* ]]; then
  278. file_name_ext=` echo "$file_name" | $sed_cmd 's/.*\.//'` # Get text after last '.'
  279. file_name_base=`echo "$file_name" | $sed_cmd "s/\.${file_name_ext}$//"` # file_name without '.' & extension
  280. file_name="$file_name_base-$datetime.$file_name_ext"
  281. else # Otherwise add timestamp to the end of file_name
  282. file_name="$file_name-$datetime"
  283. fi
  284. fi
  285. $echoVerbose && echo "... INFO: Output file ${file_name}"
  286. # Check if output already exists:
  287. if [ -f ${file_name} ]; then
  288. echo "... ERROR: Output file ${file_name} already exists."
  289. exit 1
  290. fi
  291. # Grab our data from couchdb
  292. curl ${curlSilentOpt} ${curlopt} -X GET "$url/$db_name/_all_docs?include_docs=true&attachments=true" -o ${file_name}
  293. # Check for curl errors
  294. if [ ! $? = 0 ]; then
  295. echo "... ERROR: Curl encountered an issue whilst dumping the database."
  296. rm -f ${file_name} 2>/dev/null
  297. exit 1
  298. fi
  299. # Check for export errors
  300. ERR_CHECK="`head -n 1 ${file_name} | grep '^{"error'`"
  301. if [ ! "x${ERR_CHECK}" = "x" ]; then
  302. echo "... ERROR: CouchDB reported: $ERR_CHECK"
  303. exit 1
  304. fi
  305. # CouchDB has a tendancy to output Windows carriage returns in it's output -
  306. # This messes up us trying to sed things at the end of lines!
  307. if grep -qU $'\x0d' $file_name; then
  308. $echoVerbose && echo "... INFO: File may contain Windows carriage returns- converting..."
  309. filesize=$(du -P -k ${file_name} | awk '{print$1}')
  310. checkdiskspace "${file_name}" $filesize
  311. tr -d '\r' < ${file_name} > ${file_name}.tmp
  312. if [ $? = 0 ]; then
  313. mv ${file_name}.tmp ${file_name}
  314. if [ $? = 0 ]; then
  315. $echoVerbose && echo "... INFO: Completed successfully."
  316. else
  317. echo "... ERROR: Failed to overwrite ${file_name} with ${file_name}.tmp"
  318. exit 1
  319. fi
  320. else
  321. echo ".. ERROR: Failed to convert file."
  322. exit 1
  323. fi
  324. fi
  325. ## Now we parse the output file to make it suitable for re-import.
  326. $echoVerbose && echo "... INFO: Amending file to make it suitable for Import."
  327. $echoVerbose && echo "... INFO: Stage 1 - Document filtering"
  328. # If the input file is larger than 250MB, multi-thread the parsing:
  329. if [ $(du -P -k ${file_name} | awk '{print$1}') -ge 256000 ]&&[ ! $threads -le 1 ]; then
  330. filesize=$(du -P -k ${file_name} | awk '{print$1}')
  331. KBreduction=$(($((`wc -l ${file_name} | awk '{print$1}'` * 80)) / 1024))
  332. filesize=`expr $filesize + $(expr $filesize - $KBreduction)`
  333. checkdiskspace "${file_name}" $filesize
  334. $echoVerbose && echo "... INFO: Multi-Threaded Parsing Enabled."
  335. if [ -f ${file_name}.thread000000 ]; then
  336. echo "... ERROR: Split files \"${file_name}.thread*\" already present. Please remove before continuing."
  337. exit 1
  338. elif [ -f ${file_name}.tmp ]; then
  339. echo "... ERROR: Tempfile ${file_name}.tmp already present. Please remove before continuing."
  340. exit 1
  341. fi
  342. ### SPLIT INTO THREADS
  343. split_cal=$(( $((`wc -l ${file_name} | awk '{print$1}'` / $threads)) + $threads ))
  344. #split --numeric-suffixes --suffix-length=6 -l ${split_cal} ${file_name} ${file_name}.thread
  345. split -a 2 -l ${split_cal} ${file_name} ${file_name}.thread
  346. if [ ! "$?" = "0" ]; then
  347. echo "... ERROR: Unable to create split files."
  348. exit 1
  349. fi
  350. # Capture if someone happens to breach the defined limits of AZ2 var. If this happens, we'll need to switch it out for AZ3 ...
  351. if [[ $threads -gt 650 ]]; then
  352. echo "Whoops- we hit a maximum limit here... \$AZ2 only allows for a maximum of 650 cores..."
  353. exit 1
  354. fi
  355. count=0
  356. for suffix in ${AZ2}; do
  357. (( count++ ))
  358. if [[ $count -gt $threads ]]; then
  359. break
  360. fi
  361. PADNAME="${file_name}.thread${suffix}"
  362. $sed_cmd ${sed_edit_in_place} 's/{"id".*,"doc"://g' ${PADNAME} &
  363. done
  364. wait
  365. count=0
  366. for suffix in ${AZ2}; do
  367. (( count++ ))
  368. if [[ $count -gt $threads ]]; then
  369. break
  370. fi
  371. PADNAME="${file_name}.thread${suffix}"
  372. cat ${PADNAME} >> ${file_name}.tmp
  373. rm -f ${PADNAME} ${PADNAME}.sedtmp
  374. (( NUM++ ))
  375. done
  376. if [ `wc -l ${file_name} | awk '{print$1}'` = `wc -l ${file_name}.tmp | awk '{print$1}'` ]; then
  377. mv ${file_name}{.tmp,}
  378. if [ ! $? = 0 ]; then
  379. echo "... ERROR: Failed to overwrite ${file_name}"
  380. exit 1
  381. fi
  382. else
  383. echo "... ERROR: Multi-threaded data parsing encountered an error."
  384. exit 1
  385. fi
  386. else
  387. # Estimating 80byte saving per line... probably a little conservative depending on keysize.
  388. KBreduction=$(($((`wc -l ${file_name} | awk '{print$1}'` * 80)) / 1024))
  389. filesize=$(du -P -k ${file_name} | awk '{print$1}')
  390. filesize=`expr $filesize - $KBreduction`
  391. checkdiskspace "${file_name}" $filesize
  392. $sed_cmd ${sed_edit_in_place} 's/{"id".*,"doc"://g' $file_name && rm -f ${file_name}.sedtmp
  393. if [ ! $? = 0 ];then
  394. echo "Stage failed."
  395. exit 1
  396. fi
  397. fi
  398. $echoVerbose && echo "... INFO: Stage 2 - Duplicate curly brace removal"
  399. # Approx 1Byte per line removed
  400. KBreduction=$((`wc -l ${file_name} | awk '{print$1}'` / 1024))
  401. filesize=$(du -P -k ${file_name} | awk '{print$1}')
  402. filesize=`expr $filesize - $KBreduction`
  403. checkdiskspace "${file_name}" $filesize
  404. $sed_cmd ${sed_edit_in_place} 's/}},$/},/g' ${file_name} && rm -f ${file_name}.sedtmp
  405. if [ ! $? = 0 ];then
  406. echo "Stage failed."
  407. exit 1
  408. fi
  409. $echoVerbose && echo "... INFO: Stage 3 - Header Correction"
  410. filesize=$(du -P -k ${file_name} | awk '{print$1}')
  411. checkdiskspace "${file_name}" $filesize
  412. $sed_cmd ${sed_edit_in_place} '1s/^.*/{"new_edits":false,"docs":[/' ${file_name} && rm -f ${file_name}.sedtmp
  413. if [ ! $? = 0 ];then
  414. echo "Stage failed."
  415. exit 1
  416. fi
  417. $echoVerbose && echo "... INFO: Stage 4 - Final document line correction"
  418. filesize=$(du -P -k ${file_name} | awk '{print$1}')
  419. checkdiskspace "${file_name}" $filesize
  420. $sed_cmd ${sed_edit_in_place} 's/}}$/}/g' ${file_name} && rm -f ${file_name}.sedtmp
  421. if [ ! $? = 0 ];then
  422. echo "Stage failed."
  423. exit 1
  424. fi
  425. # If -z (compress) option then compress output file
  426. if [ "$compress" = true ]; then
  427. $echoVerbose && echo "... INFO: Stage 5 - File compression"
  428. gzip $file_name
  429. file_name="$file_name.gz"
  430. fi
  431. $echoVerbose && echo "... INFO: Export completed successfully. File available at: ${file_name}"
  432. exit 0
  433. ### Else if user selected Restore:
  434. elif [ $restore = true ]&&[ $backup = false ]; then
  435. #################################################################
  436. ##################### RESTORE START #############################
  437. #################################################################
  438. # Check if input exists:
  439. if [ ! -f ${file_name} ]; then
  440. echo "... ERROR: Input file ${file_name} not found."
  441. exit 1
  442. fi
  443. #### VALIDATION END
  444. $echoVerbose && echo "... INFO: Checking for database"
  445. attemptcount=0
  446. A=0
  447. until [ $A = 1 ]; do
  448. (( attemptcount++ ))
  449. existing_dbs=$(curl $curlSilentOpt $curlopt -X GET "${url}/_all_dbs")
  450. if [ ! $? = 0 ]; then
  451. if [ $attemptcount = $attempts ]; then
  452. echo "... ERROR: Curl failed to get the list of databases - Stopping"
  453. exit 1
  454. else
  455. echo "... WARN: Curl failed to get the list of databases - Attempt ${attemptcount}/${attempts}. Retrying..."
  456. sleep 1
  457. fi
  458. else
  459. A=1
  460. fi
  461. done
  462. if [[ ! "$existing_dbs" = "["*"]" ]]; then
  463. echo "... WARN: Curl failed to get the list of databases - Continuing"
  464. if [ "x$existing_dbs" = "x" ]; then
  465. echo "... WARN: Curl just returned: $existing_dbs"
  466. fi
  467. elif [[ ! "$existing_dbs" = *"\"${db_name}\""* ]]; then
  468. # database was not listed as existing databasa
  469. if [ $createDBsOnDemand = true ]; then
  470. attemptcount=0
  471. A=0
  472. until [ $A = 1 ]; do
  473. (( attemptcount++ ))
  474. curl $curlSilentOpt $curlopt -X PUT "${url}/${db_name}" -o tmp.out
  475. # If curl threw an error:
  476. if [ ! $? = 0 ]; then
  477. if [ $attemptcount = $attempts ]; then
  478. echo "... ERROR: Curl failed to create the database ${db_name} - Stopping"
  479. if [ -f tmp.out ]; then
  480. echo -n "... ERROR: Error message was: "
  481. cat tmp.out
  482. else
  483. echo ".. ERROR: See above for any errors"
  484. fi
  485. exit 1
  486. else
  487. echo "... WARN: Curl failed to create the database ${db_name} - Attempt ${attemptcount}/${attempts}. Retrying..."
  488. sleep 1
  489. fi
  490. # If curl was happy, but CouchDB returned an error in the return JSON:
  491. elif [ ! "`head -n 1 tmp.out | grep -c '^{"error":'`" = 0 ]; then
  492. if [ $attemptcount = $attempts ]; then
  493. echo "... ERROR: CouchDB Reported: `head -n 1 tmp.out`"
  494. exit 1
  495. else
  496. echo "... WARN: CouchDB Reported an error during db creation - Attempt ${attemptcount}/${attempts} - Retrying..."
  497. sleep 1
  498. fi
  499. # Otherwise, if everything went well, delete our temp files.
  500. else
  501. rm tmp.out
  502. A=1
  503. fi
  504. done
  505. else
  506. echo "... ERROR: corresponding database ${db_name} not yet created - Stopping"
  507. $echoVerbose && echo "... HINT: you could add the -c flag to create the database automatically"
  508. exit 1
  509. fi
  510. fi
  511. ## Stop bash mangling wildcard...
  512. set -o noglob
  513. # Manage Design Documents as a priority, and remove them from the main import job
  514. $echoVerbose && echo "... INFO: Checking for Design documents"
  515. # Find all _design docs, put them into another file
  516. design_file_name=${file_name}-design
  517. grep '^{"_id":"_design' ${file_name} > ${design_file_name}
  518. # Count the design file (if it even exists)
  519. DESIGNS="`wc -l ${design_file_name} 2>/dev/null | awk '{print$1}'`"
  520. # If there's no design docs for import...
  521. if [ "x$DESIGNS" = "x" ]||[ "$DESIGNS" = "0" ]; then
  522. # Cleanup any null files
  523. rm -f ${design_file_name} 2>/dev/null
  524. $echoVerbose && echo "... INFO: No Design Documents found for import."
  525. else
  526. $echoVerbose && echo "... INFO: Duplicating original file for alteration"
  527. # Duplicate the original DB file, so we don't mangle the user's input file:
  528. filesize=$(du -P -k ${file_name} | awk '{print$1}')
  529. checkdiskspace "${file_name}" $filesize
  530. cp -f ${file_name}{,-nodesign}
  531. # Re-set file_name to be our new file.
  532. file_name=${file_name}-nodesign
  533. # Remove these design docs from (our new) main file.
  534. $echoVerbose && echo "... INFO: Stripping _design elements from regular documents"
  535. checkdiskspace "${file_name}" $filesize
  536. $sed_cmd ${sed_edit_in_place} '/^{"_id":"_design/d' ${file_name} && rm -f ${file_name}.sedtmp
  537. # Remove the final document's trailing comma
  538. $echoVerbose && echo "... INFO: Fixing end document"
  539. line=$(expr `wc -l ${file_name} | awk '{print$1}'` - 1)
  540. filesize=$(du -P -k ${file_name} | awk '{print$1}')
  541. checkdiskspace "${file_name}" $filesize
  542. $sed_cmd ${sed_edit_in_place} "${line}s/,$//" ${file_name} && rm -f ${file_name}.sedtmp
  543. $echoVerbose && echo "... INFO: Inserting Design documents"
  544. designcount=0
  545. # For each design doc...
  546. while IFS="" read -r; do
  547. line="${REPLY}"
  548. # Split the ID out for use as the import URL path
  549. URLPATH=$(echo $line | awk -F'"' '{print$4}')
  550. # Scrap the ID and Rev from the main data, as well as any trailing ','
  551. echo "${line}" | $sed_cmd -${sed_regexp_option}e "s@^\{\"_id\":\"${URLPATH}\",\"_rev\":\"[0-9]*-[0-9a-zA-Z_\-]*\",@\{@" | $sed_cmd -e 's/,$//' > ${design_file_name}.${designcount}
  552. # Fix Windows CRLF
  553. if grep -qU $'\x0d' ${design_file_name}.${designcount}; then
  554. $echoVerbose && echo "... INFO: File contains Windows carriage returns- converting..."
  555. filesize=$(du -P -k ${design_file_name}.${designcount} | awk '{print$1}')
  556. checkdiskspace "${file_name}" $filesize
  557. tr -d '\r' < ${design_file_name}.${designcount} > ${design_file_name}.${designcount}.tmp
  558. if [ $? = 0 ]; then
  559. mv ${design_file_name}.${designcount}.tmp ${design_file_name}.${designcount}
  560. if [ $? = 0 ]; then
  561. $echoVerbose && echo "... INFO: Completed successfully."
  562. else
  563. echo "... ERROR: Failed to overwrite ${design_file_name}.${designcount} with ${design_file_name}.${designcount}.tmp"
  564. exit 1
  565. fi
  566. else
  567. echo ".. ERROR: Failed to convert file."
  568. exit 1
  569. fi
  570. fi
  571. # Insert this file into the DB
  572. A=0
  573. attemptcount=0
  574. until [ $A = 1 ]; do
  575. (( attemptcount++ ))
  576. curl $curlSilentOpt ${curlopt} -T ${design_file_name}.${designcount} -X PUT "${url}/${db_name}/${URLPATH}" -H 'Content-Type: application/json' -o ${design_file_name}.out.${designcount}
  577. # If curl threw an error:
  578. if [ ! $? = 0 ]; then
  579. if [ $attemptcount = $attempts ]; then
  580. echo "... ERROR: Curl failed trying to restore ${design_file_name}.${designcount} - Stopping"
  581. exit 1
  582. else
  583. echo "... WARN: Import of ${design_file_name}.${designcount} failed - Attempt ${attemptcount}/${attempts}. Retrying..."
  584. sleep 1
  585. fi
  586. # If curl was happy, but CouchDB returned an error in the return JSON:
  587. elif [ ! "`head -n 1 ${design_file_name}.out.${designcount} | grep -c '^{"error":'`" = 0 ]; then
  588. if [ $attemptcount = $attempts ]; then
  589. echo "... ERROR: CouchDB Reported: `head -n 1 ${design_file_name}.out.${designcount}`"
  590. exit 1
  591. else
  592. echo "... WARN: CouchDB Reported an error during import - Attempt ${attemptcount}/${attempts} - Retrying..."
  593. sleep 1
  594. fi
  595. # Otherwise, if everything went well, delete our temp files.
  596. else
  597. A=1
  598. rm -f ${design_file_name}.out.${designcount}
  599. rm -f ${design_file_name}.${designcount}
  600. fi
  601. done
  602. # Increase design count - mainly used for the INFO at the end.
  603. (( designcount++ ))
  604. # NOTE: This is where we insert the design lines exported from the main block
  605. done < <(cat ${design_file_name})
  606. $echoVerbose && echo "... INFO: Successfully imported ${designcount} Design Documents"
  607. fi
  608. set +o noglob
  609. # If the size of the file to import is less than our $lines size, don't worry about splitting
  610. if [ `wc -l $file_name | awk '{print$1}'` -lt $lines ]; then
  611. $echoVerbose && echo "... INFO: Small dataset. Importing as a single file."
  612. A=0
  613. attemptcount=0
  614. until [ $A = 1 ]; do
  615. (( attemptcount++ ))
  616. curl $curlSilentOpt $curlopt -T $file_name -X POST "$url/$db_name/_bulk_docs" -H 'Content-Type: application/json' -o tmp.out
  617. if [ "`head -n 1 tmp.out | grep -c '^{"error":'`" -eq 0 ]; then
  618. $echoVerbose && echo "... INFO: Imported ${file_name_orig} Successfully."
  619. rm -f tmp.out
  620. rm -f ${file_name_orig}-design
  621. rm -f ${file_name_orig}-nodesign
  622. exit 0
  623. else
  624. if [ $attemptcount = $attempts ]; then
  625. echo "... ERROR: Import of ${file_name_orig} failed."
  626. if [ -f tmp.out ]; then
  627. echo -n "... ERROR: Error message was: "
  628. cat tmp.out
  629. else
  630. echo ".. ERROR: See above for any errors"
  631. fi
  632. rm -f tmp.out
  633. exit 1
  634. else
  635. echo "... WARN: Import of ${file_name_orig} failed - Attempt ${attemptcount}/${attempts} - Retrying..."
  636. sleep 1
  637. fi
  638. fi
  639. done
  640. # Otherwise, it's a large import that requires bulk insertion.
  641. else
  642. $echoVerbose && echo "... INFO: Block import set to ${lines} lines."
  643. if [ -f ${file_name}.splitaaa ]; then
  644. echo "... ERROR: Split files \"${file_name}.split*\" already present. Please remove before continuing."
  645. exit 1
  646. fi
  647. importlines=`cat ${file_name} | grep -c .`
  648. # Due to the file limit imposed by the pre-calculated AZ3 variable, max split files is 15600 (alpha x 3positions)
  649. if [[ `expr ${importlines} / ${lines}` -gt 15600 ]]; then
  650. echo "... ERROR: Pre-processed split variable limit of 15600 files reached."
  651. echo " Please increase the '-l' parameter (Currently: $lines) and try again."
  652. exit 1
  653. fi
  654. $echoVerbose && echo "... INFO: Generating files to import"
  655. filesize=$(du -P -k ${file_name} | awk '{print$1}')
  656. checkdiskspace "${file_name}" $filesize
  657. ### Split the file into many
  658. split -a 3 -l ${lines} ${file_name} ${file_name}.split
  659. if [ ! "$?" = "0" ]; then
  660. echo "... ERROR: Unable to create split files."
  661. exit 1
  662. fi
  663. HEADER="`head -n 1 $file_name`"
  664. FOOTER="`tail -n 1 $file_name`"
  665. count=0
  666. for PADNUM in $AZ3; do
  667. PADNAME="${file_name}.split${PADNUM}"
  668. if [ ! -f ${PADNAME} ]; then
  669. echo "... INFO: Import Cycle Completed."
  670. break
  671. fi
  672. if [ ! "`head -n 1 ${PADNAME}`" = "${HEADER}" ]; then
  673. $echoVerbose && echo "... INFO: Adding header to ${PADNAME}"
  674. filesize=$(du -P -k ${PADNAME} | awk '{print$1}')
  675. checkdiskspace "${PADNAME}" $filesize
  676. $sed_cmd ${sed_edit_in_place} "1i${HEADER}" ${PADNAME} && rm -f ${PADNAME}.sedtmp
  677. else
  678. $echoVerbose && echo "... INFO: Header already applied to ${PADNAME}"
  679. fi
  680. if [ ! "`tail -n 1 ${PADNAME}`" = "${FOOTER}" ]; then
  681. $echoVerbose && echo "... INFO: Adding footer to ${PADNAME}"
  682. filesize=$(du -P -k ${PADNAME} | awk '{print$1}')
  683. checkdiskspace "${PADNAME}" $filesize
  684. $sed_cmd ${sed_edit_in_place} '$s/,$//g' ${PADNAME} && rm -f ${PADNAME}.sedtmp
  685. echo "${FOOTER}" >> ${PADNAME}
  686. else
  687. $echoVerbose && echo "... INFO: Footer already applied to ${PADNAME}"
  688. fi
  689. $echoVerbose && echo "... INFO: Inserting ${PADNAME}"
  690. A=0
  691. attemptcount=0
  692. until [ $A = 1 ]; do
  693. (( attemptcount++ ))
  694. curl $curlSilentOpt $curlopt -T ${PADNAME} -X POST "$url/$db_name/_bulk_docs" -H 'Content-Type: application/json' -o tmp.out
  695. if [ ! $? = 0 ]; then
  696. if [ $attemptcount = $attempts ]; then
  697. echo "... ERROR: Curl failed trying to restore ${PADNAME} - Stopping"
  698. exit 1
  699. else
  700. echo "... WARN: Failed to import ${PADNAME} - Attempt ${attemptcount}/${attempts} - Retrying..."
  701. sleep 1
  702. fi
  703. elif [ ! "`head -n 1 tmp.out | grep -c '^{"error":'`" = 0 ]; then
  704. if [ $attemptcount = $attempts ]; then
  705. echo "... ERROR: CouchDB Reported: `head -n 1 tmp.out`"
  706. exit 1
  707. else
  708. echo "... WARN: CouchDB Reported and error during import - Attempt ${attemptcount}/${attempts} - Retrying..."
  709. sleep 1
  710. fi
  711. else
  712. A=1
  713. rm -f ${PADNAME}
  714. rm -f tmp.out
  715. (( count++ ))
  716. fi
  717. done
  718. $echoVerbose && echo "... INFO: Successfully Imported `expr ${count}` Files"
  719. A=1
  720. rm -f ${file_name_orig}-design
  721. rm -f ${file_name_orig}-nodesign
  722. done
  723. fi
  724. fi