Преглед изворни кода

export/import couchdb 1st try

Chris Vogel пре 1 година
родитељ
комит
01a864eef1

+ 47 - 0
scripts/_common.sh

@@ -98,6 +98,53 @@ flohmarkt_ynh_up_inst_couchdb() {
           --package="couchdb"
 }
 
+flohmarkt_ynh_dump_couchdb() {
+  ls -l ../settings/scripts/couchdb-dump/couchdb-dump.sh # debug
+  ../settings/scripts/couchdb-dump/couchdb-dump.sh -b -H 127.0.0.1 -d "${app}" \
+    -u admin -p "${password_couchdb_admin}" > "${YNH_CWD}/${app}.json" || true
+}
+
+flohmarkt_ynh_delete_couchdb_user() {
+  # https://codeberg.org/flohmarkt/flohmarkt_ynh/issues/46 - more than one revision?
+  local couchdb_user_revision=$( curl -sX GET "http://127.0.0.1:5984/_users/org.couchdb.user%3A${app}" \
+    --user "admin:${password_couchdb_admin}" | jq -r ._rev )
+  curl -s -X DELETE "http://127.0.0.1:5984/_users/org.couchdb.user%3A${app}?rev=${couchdb_user_revision}" \
+    --user "admin:${password_couchdb_admin}"
+}
+
+flohmarkt_ynh_delete_couchdb_db() {
+  curl -s -X DELETE "http://127.0.0.1:5984/${app}" --user "admin:${password_couchdb_admin}"
+}
+
+flohmarkt_ynh_import_couchdb() {
+  ls -l ../settings/scripts/couchdb-dump/couchdb-dump.sh # debug
+  ../settings/scripts/couchdb-dump/couchdb-dump.sh -r -c -H 127.0.0.1 -d "${app}" \
+    -u admin -p "${password_couchdb_admin}" > "${YNH_CWD}/${app}.json" || true
+}
+
+flohmarkt_ynh_create_couchdb_user() {
+  curl -X PUT "http://127.0.0.1:5984/_users/org.couchdb.user:${app}" --user "admin:${password_couchdb_admin}"\
+    -H "Accept: application/json" -H "Content-Type: application/json" \
+    -d "\{\"name\": \"${app}\", \"${password_couchdb_flohmarkt}\": \"\", \"roles\": \[\], \"type\": \"user\"\}"
+}
+
+flohmarkt_ynh_couchdb_user_permissions() {
+  curl -X PUT "http://127.0.0.1:5984/${app}/_security" --user "admin:${password_couchdb_admin}"\
+    -H "Accept: application/json" -H "Content-Type: application/json" \
+    -d "\{\"members\":\{\"names\": \[\"${app}\"\],\"roles\": \[\"editor\"\]\}\}"
+
+}
+
+flohmarkt_ynh_restore_couchdb() {
+  # @@ todo for now we'll make sure dbuser and db do not exist
+  flohmarkt_ynh_delete_couchdb_user || true
+  flohmarkt_ynh_delete_couchdb_db || true
+
+  flohmarkt_ynh_import_couchdb
+  flohmarkt_ynh_create_couchdb_user
+  flohmarkt_ynh_couchdb_user_permissions
+}
+
 # create venv
 flohmarkt_ynh_create_venv() {
   python3 -m venv --without-pip "$flohmarkt_venv_dir"

+ 10 - 7
scripts/backup

@@ -16,8 +16,8 @@ ynh_backup --src_path="${flohmarkt_log_dir}"
 # for the following backups we'll want to stop flohmarkt and couchdb
 # to guarentee a consistant state
 ynh_print_info --message="Stopping flohmarkt and couchdb to backup data..."
-yunohost service stop $flohmarkt_filename
-systemctl stop couchdb
+flohmarkt_ynh_stop_service
+flohmarkt_ynh_stop_couchdb
 
 # https://codeberg.org/ChriChri/flohmarkt_ynh/issues/24
 # since this might be re-installed as a dependency during 'remove' and 
@@ -28,12 +28,15 @@ systemctl stop couchdb
 # 
 # if this becomes a pain we'll need to stop deleting this directories on 'remove'
 # ynh_backup --src_path="$data_dir" --is_big
-# ynh_backup --src_path="/var/lib/couchdb" --is_big
 ynh_backup --src_path="$flohmarkt_data_dir"
-ynh_backup --src_path="/var/lib/couchdb"
 
-ynh_print_info --message="...done. Starting couchdb and flohmarkt."
-systemctl start couchdb
+ynh_print_info --message="Starting couchdb..."
+flohmarkt_ynh_start_couchdb
+
+ynh_print_info --message="Dumping couchdb..."
+flohmarkt_ynh_dump_couchdb
+
+ynh_print_info --message="Starting flohmarkt..."
 flohmarkt_ynh_start_service
 
-ynh_print_info --message="Backup script completed for $app. (YunoHost will then actually copy those files to the archive)."
+ynh_print_info --message="Backup script completed for $app."

+ 23 - 0
scripts/couchdb-dump/LICENSE

@@ -0,0 +1,23 @@
+The MIT License (MIT)
+
+Copyright (c) 2015  	Daniele Bailo - daniele@danielebailo.it - www.danielebailo.it
+			Darren Gibbard - dalgibbard@gmail.com - dgunix.com
+
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 70 - 0
scripts/couchdb-dump/README.md

@@ -0,0 +1,70 @@
+Couchdb-dump (& restore)
+============
+
+It works on LINUX/UNIX, Bash based systems (MacOSx)
+
+**Bash command line script to EASILY Backup & Restore a CouchDB database**
+
+ * Needs bash (plus curl, tr, file, split, awk, sed)
+ * Dumped database is output to a file (configurable).
+
+## Quickstart (& quickend)
+* Backup:
+
+```bash couchdb-dump.sh -b -H 127.0.0.1 -d my-db -f dumpedDB.json -u admin -p password```
+
+* Restore:
+
+```bash couchdb-dump.sh -r -H 127.0.0.1 -d my-db -f dumpedDB.json -u admin -p password```
+
+## Why do you need it?
+Surprisingly, there is not a straightforward way to dump a CouchDB database. Often you are suggested to replicate it or to dump it with the couchdb `_all_docs` directive. 
+
+**But, using the `_all_docs` directive provides you with JSON which cannot be directly re-import back into CouchDB**.
+
+Hence, the goal of this script(s) is to give you a simple way to Dump & Restore your CouchDB database.
+
+## NOTE
+
+Attachments in Database documents are only supported in CouchDB 1.6+
+
+## Usage
+```
+Usage: ./couchdb-dump.sh [-b|-r] -H <COUCHDB_HOST> -d <DB_NAME> -f <BACKUP_FILE> [-u <username>] [-p <password>] [-P <port>] [-l <lines>] [-t <threads>] [-a <import_attempts>]
+	-b   Run script in BACKUP mode.
+	-r   Run script in RESTORE mode.
+	-H   CouchDB Hostname or IP. Can be provided with or without 'http(s)://'
+	-d   CouchDB Database name to backup/restore.
+	-f   File to Backup-to/Restore-from.
+	-P   Provide a port number for CouchDB [Default: 5984]
+	-u   Provide a username for auth against CouchDB [Default: blank]
+	       -- can also set with 'COUCHDB_USER' environment var
+	-p   Provide a password for auth against CouchDB [Default: blank]
+	       -- can also set with 'COUCHDB_PASS' environment var
+	-l   Number of lines (documents) to Restore at a time. [Default: 5000] (Restore Only)
+	-t   Number of CPU threads to use when parsing data [Default: nProcs-1] (Backup Only)
+	-a   Number of times to Attempt import before failing [Default: 3] (Restore Only)
+	-c   Create DB on demand, if they are not listed.
+	-q   Run in quiet mode. Suppress output, except for errors and warnings.
+	-z   Compress output file (Backup Only)
+	-T   Add datetime stamp to output file name (Backup Only)
+	-V   Display version information.
+	-h   Display usage information.
+
+Example: ./couchdb-dump.sh -b -H 127.0.0.1 -d mydb -f dumpedDB.json -u admin -p password
+```
+
+### Bonus 1! Full Database Compaction
+In the past, we've used this script to greatly compress a bloated database.
+In our use case, we had non-sequential IDs which cause CouchDB's B-Tree to balloon out of control, even with daily compactions.
+
+**How does this fix work?**
+When running the export, all of the documents are pulled out in "ID Order"- When re-importing these (now sorted) documents again, the B-Tree can be created in a much more efficient manner. We've seen 15GB database files, containing only 2.1GB of raw JSON, reduced to 2.5GB on disk after import!
+
+### Bonus 2! Purge Historic and Deleted Data
+CouchDB is an append-only database. When you delete records, the metadata is maintained for future reference, and is never fully deleted. All documents also retain a historic revision count.
+With the above points in mind; the export and import does not include Deleted documents, or old revisions; therefore, using this script, you can export and re-import your data, cleansing it of any previously (logically) deleted data!
+
+If you pair this with deletion and re-creation of replication rules (using the 'update_seq' parameter to avoid re-pulling the entire DB/deleted documents from a remote node) you can manually compress and clean out an entire cluster of waste, node-by-node.
+Note though; after creating all the rules with a fixed update_seq, once completed to the entire cluster, you will need to destroy and recreate all replication rules without the fixed update_seq - else, when restarting a node etc, replication will restart from the old seq.
+

+ 780 - 0
scripts/couchdb-dump/couchdb-dump.sh

@@ -0,0 +1,780 @@
+#!/bin/bash
+##
+#    AUTHOR: DANIELE BAILO
+#    https://github.com/danielebailo
+#    www.danielebailo.it
+#
+#    Contributors:
+#     * dalgibbard      - http://github.com/dalgibbard
+#     * epos-eu         - http://github.com/epos-eu
+#     * maximilianhuber - http://github.com/maximilianhuber
+#     * ahodgkinson     - http://github.com/ahodgkinson (quiet-mode, timestamp, compress)
+##
+
+## This script allow for the Backup and Restore of a CouchDB Database.
+## Backups are produced in a format that can be later uploaded with the bulk docs directive (as used by this script)
+
+## USAGE
+## * To Backup:
+## ** example: ./couchdb-dump.sh -b -H 127.0.0.1 -d mydb -u admin -p password -f mydb.json
+## * To Restore:
+## ** example: ./couchdb-dump.sh -r -H 127.0.0.1 -d mydb -u admin -p password -f mydb.json
+
+
+###################### CODE STARTS HERE ###################
+scriptversionnumber="1.1.10"
+
+##START: FUNCTIONS
+usage(){
+    echo
+    echo "Usage: $0 [-b|-r] -H <COUCHDB_HOST> -d <DB_NAME> -f <BACKUP_FILE> [-u <username>] [-p <password>] [-P <port>] [-l <lines>] [-t <threads>] [-a <import_attempts>]"
+    echo -e "\t-b   Run script in BACKUP mode."
+    echo -e "\t-r   Run script in RESTORE mode."
+    echo -e "\t-H   CouchDB Hostname or IP. Can be provided with or without 'http(s)://'"
+    echo -e "\t-d   CouchDB Database name to backup/restore."
+    echo -e "\t-f   File to Backup-to/Restore-from."
+    echo -e "\t-P   Provide a port number for CouchDB [Default: 5984]"
+    echo -e "\t-u   Provide a username for auth against CouchDB [Default: blank]"
+    echo -e "\t       -- can also set with 'COUCHDB_USER' environment var"
+    echo -e "\t-p   Provide a password for auth against CouchDB [Default: blank]"
+    echo -e "\t       -- can also set with 'COUCHDB_PASS' environment var"
+    echo -e "\t-l   Number of lines (documents) to Restore at a time. [Default: 5000] (Restore Only)"
+    echo -e "\t-t   Number of CPU threads to use when parsing data [Default: nProcs-1] (Backup Only)"
+    echo -e "\t-a   Number of times to Attempt import before failing [Default: 3] (Restore Only)"
+    echo -e "\t-c   Create DB on demand, if they are not listed."
+    echo -e "\t-q   Run in quiet mode. Suppress output, except for errors and warnings."
+    echo -e "\t-z   Compress output file (Backup Only)"
+    echo -e "\t-T   Add datetime stamp to output file name (Backup Only)"
+    echo -e "\t-V   Display version information."
+    echo -e "\t-h   Display usage information."
+    echo
+    echo "Example: $0 -b -H 127.0.0.1 -d mydb -f dumpedDB.json -u admin -p password"
+    echo
+    exit 1
+}
+
+scriptversion(){
+    echo
+    echo -e "\t** couchdb-dump version: $scriptversionnumber **"
+    echo
+    echo -e "\t URL:\thttps://github.com/danielebailo/couchdb-dump"
+    echo
+    echo -e "\t Authors:"
+    echo -e "\t Daniele Bailo    (bailo.daniele@gmail.com)"
+    echo -e "\t Darren Gibbard   (dalgibbard@gmail.com)"
+    echo -e "\t Maximilian Huber (maximilian.huber@tngtech.com)"
+    echo
+    exit 1
+}
+
+checkdiskspace(){
+## This function checks available diskspace for a required path, vs space required
+## Example call:   checkdiskspace /path/to/file/to/create 1024
+    location=$1
+    KBrequired=$2
+    if [ "x$location" = "x" ]||[ "x$KBrequired" = "x" ]; then
+        echo "... ERROR: checkdiskspace() was not passed the correct arguments."
+        exit 1
+    fi
+
+    stripdir=${location%/*}
+    KBavail=$(df -P -k ${stripdir} | tail -n 1 | awk '{print$4}' | $sed_cmd -e 's/K$//')
+
+    if [ $KBavail -ge $KBrequired ]; then
+        return 0
+    else
+        echo
+        echo "... ERROR: Insufficient Disk Space Available:"
+        echo "        * Full Path:            ${location}"
+        echo "        * Affected Directory:   ${stripdir}"
+        echo "        * Space Available:      ${KBavail} KB"
+        echo "        * Total Space Required: ${KBrequired} KB"
+        echo "        * Additional Space Req: $(expr $KBrequired - $KBavail) KB"
+        echo
+        exit 1
+    fi
+}
+## END FUNCTIONS
+
+# Catch no args:
+if [ "x$1" = "x" ]; then
+    usage
+fi
+
+# Default Args
+username=""
+password=""
+backup=false
+restore=false
+port=5984
+OPTIND=1
+lines=5000
+attempts=3
+createDBsOnDemand=false
+verboseMode=true
+compress=false
+timestamp=false
+
+while getopts ":h?H:d:f:u:p:P:l:t:a:c?q?z?T?V?b?B?r?R?" opt; do
+    case "$opt" in
+        h) usage;;
+        b|B) backup=true ;;
+        r|R) restore=true ;;
+        H) url="$OPTARG" ;;
+        d) db_name="$OPTARG" ;;
+        f) file_name="$OPTARG" ;;
+        u) username="${OPTARG}";;
+        p) password="${OPTARG}";;
+        P) port="${OPTARG}";;
+        l) lines="${OPTARG}" ;;
+        t) threads="${OPTARG}" ;;
+        a) attempts="${OPTARG}";;
+        c) createDBsOnDemand=true;;
+        q) verboseMode=false;;
+        z) compress=true;;
+        T) timestamp=true;;
+        V) scriptversion;;
+        :) echo "... ERROR: Option \"-${OPTARG}\" requires an argument"; usage ;;
+        *|\?) echo "... ERROR: Unknown Option \"-${OPTARG}\""; usage;;
+    esac
+done
+
+# If quiet option: Setup echo mode and curl '--silent' opt
+if [ "$verboseMode" = true ]; then
+  curlSilentOpt=""
+  echoVerbose=true
+else
+  curlSilentOpt="--silent"
+  echoVerbose=false
+fi
+
+# Trap unexpected extra args
+shift $((OPTIND-1))
+[ "$1" = "--" ] && shift
+if [ ! "x$@" = "x" ]; then
+    echo "... ERROR: Unknown Option \"$@\""
+    usage
+fi
+
+# Handle invalid backup/restore states:
+if [ $backup = true ]&&[ $restore = true ]; then
+    echo "... ERROR: Cannot pass both '-b' and '-r'"
+    usage
+elif [ $backup = false ]&&[ $restore = false ]; then
+    echo "... ERROR: Missing argument '-b' (Backup), or '-r' (Restore)"
+    usage
+fi
+# Handle empty args
+# url
+if [ "x$url" = "x" ]; then
+    echo "... ERROR: Missing argument '-H <COUCHDB_HOST>'"
+    usage
+fi
+# db_name
+if [ "x$db_name" = "x" ]; then
+    echo "... ERROR: Missing argument '-d <DB_NAME>'"
+    usage
+fi
+# file_name
+if [ "x$file_name" = "x" ]; then
+    echo "... ERROR: Missing argument '-f <FILENAME>'"
+    usage
+fi
+file_name_orig=$file_name
+
+# Get OS TYPE (Linux for Linux, Darwin for MacOSX)
+os_type=`uname -s`
+
+# Pick sed or gsed
+if [ "$os_type" = "FreeBSD" ]||[ "$os_type" = "Darwin" ]; then
+    sed_cmd="gsed";
+else
+    sed_cmd="sed";
+fi
+## Make sure it's installed
+echo | $sed_cmd 's/a//' >/dev/null 2>&1 
+if [ ! $? = 0 ]; then
+    echo "... ERROR: please install $sed_cmd (gnu-sed) and ensure it is in your path"
+    exit 1
+fi
+
+# Validate thread count
+## If we're on a Mac, use sysctl
+if [ "$os_type" = "Darwin" ]; then
+    cores=`sysctl -n hw.ncpu`
+## If we're on FreeBSD, use sysctl
+elif [ "$os_type" = "FreeBSD" ]; then
+    cores=`sysctl kern.smp.cpus | awk -F ": " '{print $2}'`;
+## Check if nproc available- set cores=1 if not
+elif ! type nproc >/dev/null; then
+    cores=1
+## Otherwise use nproc
+else
+    cores=`nproc`
+fi
+if [ ! "x$threads" = "x" ]; then
+    if [ $threads -gt $cores ]; then
+        echo "... WARN: Thread setting of $threads is more than CPU count. Setting to $cores"
+        threads=$cores
+    else
+        $echoVerbose && echo "... INFO: Setting parser threads to $threads"
+    fi
+else
+    threads=`expr $cores - 1`
+fi
+
+# Validate Attempts, set to no-retry if zero/invalid.
+case $attempts in
+    ''|0|*[!0-9]*) echo "... WARN: Retry Attempt value of \"$attempts\" is invalid. Disabling Retry-on-Error."; attempts=1 ;;
+    *) true ;;
+esac
+
+## Manage the passing of http/https for $url:
+# Note; if the user wants to use 'https://' on a non-443 port they must specify it exclusively in the '-H <HOSTNAME>' arg.
+if [ ! "`echo $url | grep -c http`" = 1 ]; then
+    if [ "$port" == "443" ]; then
+        url="https://$url";
+    else
+        url="http://$url";
+    fi
+fi
+
+# Manage the addition of port
+# If a port isn't already on our URL...
+if [ ! "`echo $url | egrep -c ":[0-9]*$"`" = "1" ]; then
+    # add it.
+    url="$url:$port"
+fi	
+
+# Check for empty user/pass and try reading in from Envvars
+if [ "x$username" = "x" ]; then
+    username="$COUCHDB_USER"
+fi
+if [ "x$password" = "x" ]; then
+    password="$COUCHDB_PASS"
+fi
+
+## Manage the addition of user+pass if needed:
+# Ensure, if one is set, both are set.
+if [ ! "x${username}" = "x" ]; then
+    if [ "x${password}" = "x" ]; then
+        echo "... ERROR: Password cannot be blank, if username is specified."
+        usage
+    fi
+elif [ ! "x${password}" = "x" ]; then
+    if [ "x${username}" = "x" ]; then
+        echo "... ERROR: Username cannot be blank, if password is specified."
+        usage
+    fi
+fi
+
+# Check for sed option
+sed_edit_in_place='-i.sedtmp'
+if [ "$os_type" = "Darwin" ]; then
+    sed_regexp_option='E'
+else
+    sed_regexp_option='r'
+fi
+# Allow for self-signed/invalid certs if method is HTTPS:
+if [ "`echo $url | grep -ic "^https://"`" = "1" ]; then
+	curlopt="-k"
+fi
+
+if [ ! "x${username}" = "x" ]&&[ ! "x${password}" = "x" ]; then
+    curlopt="${curlopt} -u ${username}:${password}"
+fi
+
+## Check for curl
+curl --version >/dev/null 2>&1 || ( echo "... ERROR: This script requires 'curl' to be present."; exit 1 )
+
+# Check for tr
+echo | tr -d "" >/dev/null 2>&1 || ( echo "... ERROR: This script requires 'tr' to be present."; exit 1 )
+
+##### SETUP OUR LARGE VARS FOR SPLIT PROCESSING (due to limitations in split on Darwin/BSD)
+AZ2="`echo {a..z}{a..z}`"
+AZ3="`echo {a..z}{a..z}{a..z}`"
+
+### If user selected BACKUP, run the following code:
+if [ $backup = true ]&&[ $restore = false ]; then
+    #################################################################
+    ##################### BACKUP START ##############################
+    #################################################################
+
+    # If -T (timestamp) option, append datetime stamp ("-YYYYMMDD-hhmmss") before file extension
+    if [ "$timestamp" = true ]; then
+      datetime=`date "+%Y%m%d-%H%M%S"`						# Format: YYYYMMDD-hhmmss
+      # Check for file_name extension, if so add the timestamp before it
+      if [[ $file_name =~ \.[a-zA-Z0-9][a-zA-Z0-9_]* ]]; then
+        file_name_ext=` echo "$file_name" | $sed_cmd 's/.*\.//'`		# Get text after last '.'
+        file_name_base=`echo "$file_name" | $sed_cmd "s/\.${file_name_ext}$//"`	# file_name without '.' & extension
+        file_name="$file_name_base-$datetime.$file_name_ext"
+      else # Otherwise add timestamp to the end of file_name
+        file_name="$file_name-$datetime"
+      fi
+    fi
+    $echoVerbose && echo "... INFO: Output file ${file_name}"
+
+    # Check if output already exists:
+    if [ -f ${file_name} ]; then
+        echo "... ERROR: Output file ${file_name} already exists."
+        exit 1
+    fi
+
+    # Grab our data from couchdb
+    curl ${curlSilentOpt} ${curlopt} -X GET "$url/$db_name/_all_docs?include_docs=true&attachments=true" -o ${file_name}
+    # Check for curl errors
+    if [ ! $? = 0 ]; then
+        echo "... ERROR: Curl encountered an issue whilst dumping the database."
+        rm -f ${file_name} 2>/dev/null
+        exit 1
+    fi
+    # Check for export errors
+    ERR_CHECK="`head -n 1 ${file_name} | grep '^{"error'`"
+    if [ ! "x${ERR_CHECK}" = "x" ]; then
+        echo "... ERROR: CouchDB reported: $ERR_CHECK"
+        exit 1
+    fi
+
+    # CouchDB has a tendancy to output Windows carriage returns in it's output -
+    # This messes up us trying to sed things at the end of lines!
+    if grep -qU $'\x0d' $file_name; then
+        $echoVerbose && echo "... INFO: File may contain Windows carriage returns- converting..."
+        filesize=$(du -P -k ${file_name} | awk '{print$1}')
+        checkdiskspace "${file_name}" $filesize
+        tr -d '\r' < ${file_name} > ${file_name}.tmp
+        if [ $? = 0 ]; then
+            mv ${file_name}.tmp ${file_name}
+            if [ $? = 0 ]; then
+                $echoVerbose && echo "... INFO: Completed successfully."
+            else
+                echo "... ERROR: Failed to overwrite ${file_name} with ${file_name}.tmp"
+                exit 1
+            fi
+        else
+            echo ".. ERROR: Failed to convert file."
+            exit 1
+        fi
+    fi
+
+    ## Now we parse the output file to make it suitable for re-import.
+    $echoVerbose && echo "... INFO: Amending file to make it suitable for Import."
+    $echoVerbose && echo "... INFO: Stage 1 - Document filtering"
+
+    # If the input file is larger than 250MB, multi-thread the parsing:
+    if [ $(du -P -k ${file_name} | awk '{print$1}') -ge 256000 ]&&[ ! $threads -le 1 ]; then
+        filesize=$(du -P -k ${file_name} | awk '{print$1}')
+        KBreduction=$(($((`wc -l ${file_name} | awk '{print$1}'` * 80)) / 1024))
+        filesize=`expr $filesize + $(expr $filesize - $KBreduction)`
+        checkdiskspace "${file_name}" $filesize
+        $echoVerbose && echo "... INFO: Multi-Threaded Parsing Enabled."
+        if [ -f ${file_name}.thread000000 ]; then
+            echo "... ERROR: Split files \"${file_name}.thread*\" already present. Please remove before continuing."
+            exit 1
+        elif [ -f ${file_name}.tmp ]; then
+            echo "... ERROR: Tempfile ${file_name}.tmp already present. Please remove before continuing."
+            exit 1
+        fi
+
+        ### SPLIT INTO THREADS
+        split_cal=$(( $((`wc -l ${file_name} | awk '{print$1}'` / $threads)) + $threads ))
+        #split --numeric-suffixes --suffix-length=6 -l ${split_cal} ${file_name} ${file_name}.thread
+        split -a 2 -l ${split_cal} ${file_name} ${file_name}.thread
+        if [ ! "$?" = "0" ]; then
+            echo "... ERROR: Unable to create split files."
+            exit 1
+        fi
+
+        # Capture if someone happens to breach the defined limits of AZ2 var. If this happens, we'll need to switch it out for AZ3 ...
+        if [[ $threads -gt 650 ]]; then
+            echo "Whoops- we hit a maximum limit here... \$AZ2 only allows for a maximum of 650 cores..."
+            exit 1
+        fi
+
+        count=0
+        for suffix in ${AZ2}; do
+            (( count++ ))
+            if [[ $count -gt $threads ]]; then
+                break
+            fi
+            PADNAME="${file_name}.thread${suffix}"
+            $sed_cmd ${sed_edit_in_place} 's/{"id".*,"doc"://g' ${PADNAME} &
+        done
+        wait
+        count=0
+        for suffix in ${AZ2}; do
+            (( count++ ))
+            if [[ $count -gt $threads ]]; then
+                break
+            fi
+            PADNAME="${file_name}.thread${suffix}"
+            cat ${PADNAME} >> ${file_name}.tmp
+            rm -f ${PADNAME} ${PADNAME}.sedtmp
+            (( NUM++ ))
+        done
+        if [ `wc -l ${file_name} | awk '{print$1}'` = `wc -l ${file_name}.tmp | awk '{print$1}'` ]; then
+            mv ${file_name}{.tmp,}
+            if [ ! $? = 0 ]; then
+                echo "... ERROR: Failed to overwrite ${file_name}"
+                exit 1
+            fi
+        else
+            echo "... ERROR: Multi-threaded data parsing encountered an error."
+            exit 1
+        fi
+
+    else
+        # Estimating 80byte saving per line... probably a little conservative depending on keysize.
+        KBreduction=$(($((`wc -l ${file_name} | awk '{print$1}'` * 80)) / 1024))
+        filesize=$(du -P -k ${file_name} | awk '{print$1}')
+        filesize=`expr $filesize - $KBreduction`
+        checkdiskspace "${file_name}" $filesize
+        $sed_cmd ${sed_edit_in_place} 's/{"id".*,"doc"://g' $file_name && rm -f ${file_name}.sedtmp
+        if [ ! $? = 0 ];then
+            echo "Stage failed."
+            exit 1
+        fi
+    fi
+
+    $echoVerbose && echo "... INFO: Stage 2 - Duplicate curly brace removal"
+    # Approx 1Byte per line removed
+    KBreduction=$((`wc -l ${file_name} | awk '{print$1}'` / 1024))
+    filesize=$(du -P -k ${file_name} | awk '{print$1}')
+    filesize=`expr $filesize - $KBreduction`
+    checkdiskspace "${file_name}" $filesize
+    $sed_cmd ${sed_edit_in_place} 's/}},$/},/g' ${file_name} && rm -f ${file_name}.sedtmp
+    if [ ! $? = 0 ];then
+        echo "Stage failed."
+        exit 1
+    fi
+    $echoVerbose && echo "... INFO: Stage 3 - Header Correction"
+    filesize=$(du -P -k ${file_name} | awk '{print$1}')
+    checkdiskspace "${file_name}" $filesize
+    $sed_cmd ${sed_edit_in_place} '1s/^.*/{"new_edits":false,"docs":[/' ${file_name} && rm -f ${file_name}.sedtmp
+    if [ ! $? = 0 ];then
+        echo "Stage failed."
+        exit 1
+    fi
+    $echoVerbose && echo "... INFO: Stage 4 - Final document line correction"
+    filesize=$(du -P -k ${file_name} | awk '{print$1}')
+    checkdiskspace "${file_name}" $filesize
+    $sed_cmd ${sed_edit_in_place} 's/}}$/}/g' ${file_name} && rm -f ${file_name}.sedtmp
+    if [ ! $? = 0 ];then
+        echo "Stage failed."
+        exit 1
+    fi
+
+    # If -z (compress) option then compress output file
+    if [ "$compress" = true ]; then
+      $echoVerbose && echo "... INFO: Stage 5 - File compression"
+      gzip $file_name
+      file_name="$file_name.gz"
+    fi
+
+    $echoVerbose && echo "... INFO: Export completed successfully. File available at: ${file_name}"
+    exit 0
+
+### Else if user selected Restore:
+elif [ $restore = true ]&&[ $backup = false ]; then
+    #################################################################
+    ##################### RESTORE START #############################
+    #################################################################
+    # Check if input exists:
+    if [ ! -f ${file_name} ]; then
+        echo "... ERROR: Input file ${file_name} not found."
+        exit 1
+    fi
+
+    #### VALIDATION END
+
+    $echoVerbose && echo "... INFO: Checking for database"
+    attemptcount=0
+    A=0
+    until [ $A = 1 ]; do
+        (( attemptcount++ ))
+        existing_dbs=$(curl $curlSilentOpt $curlopt -X GET "${url}/_all_dbs")
+        if [ ! $? = 0 ]; then
+            if [ $attemptcount = $attempts ]; then
+                echo "... ERROR: Curl failed to get the list of databases - Stopping"
+                exit 1
+            else
+                echo "... WARN: Curl failed to get the list of databases - Attempt ${attemptcount}/${attempts}. Retrying..."
+                sleep 1
+            fi
+        else
+            A=1
+        fi
+    done
+    if [[ ! "$existing_dbs" = "["*"]" ]]; then
+        echo "... WARN: Curl failed to get the list of databases - Continuing"
+        if [ "x$existing_dbs" = "x" ]; then
+            echo "... WARN: Curl just returned: $existing_dbs"
+        fi
+    elif [[ ! "$existing_dbs" = *"\"${db_name}\""* ]]; then
+        # database was not listed as existing databasa
+        if [ $createDBsOnDemand = true ]; then
+            attemptcount=0
+            A=0
+            until [ $A = 1 ]; do
+                (( attemptcount++ ))
+                curl $curlSilentOpt $curlopt -X PUT "${url}/${db_name}" -o tmp.out
+                # If curl threw an error:
+                if [ ! $? = 0 ]; then
+                    if [ $attemptcount = $attempts ]; then
+                        echo "... ERROR: Curl failed to create the database ${db_name} - Stopping"
+                        if [ -f tmp.out ]; then
+                            echo -n "... ERROR: Error message was:   "
+                            cat tmp.out
+                        else
+                            echo ".. ERROR: See above for any errors"
+                        fi
+                        exit 1
+                    else
+                        echo "... WARN: Curl failed to create the database ${db_name} - Attempt ${attemptcount}/${attempts}. Retrying..."
+                        sleep 1
+                    fi
+                # If curl was happy, but CouchDB returned an error in the return JSON:
+                elif [ ! "`head -n 1 tmp.out | grep -c '^{"error":'`" = 0 ]; then
+                    if [ $attemptcount = $attempts ]; then
+                        echo "... ERROR: CouchDB Reported: `head -n 1 tmp.out`"
+                        exit 1
+                    else
+                        echo "... WARN: CouchDB Reported an error during db creation - Attempt ${attemptcount}/${attempts} - Retrying..."
+                        sleep 1
+                    fi
+                # Otherwise, if everything went well, delete our temp files.
+                else
+                    rm tmp.out
+                    A=1
+                fi
+            done
+        else
+            echo "... ERROR: corresponding database ${db_name} not yet created - Stopping"
+            $echoVerbose && echo "... HINT: you could add the -c flag to create the database automatically"
+            exit 1
+        fi
+    fi
+
+    ## Stop bash mangling wildcard...
+    set -o noglob
+    # Manage Design Documents as a priority, and remove them from the main import job
+    $echoVerbose && echo "... INFO: Checking for Design documents"
+    # Find all _design docs, put them into another file
+    design_file_name=${file_name}-design
+    grep '^{"_id":"_design' ${file_name} > ${design_file_name}
+
+    # Count the design file (if it even exists)
+    DESIGNS="`wc -l ${design_file_name} 2>/dev/null | awk '{print$1}'`"
+    # If there's no design docs for import...
+    if [ "x$DESIGNS" = "x" ]||[ "$DESIGNS" = "0" ]; then 
+        # Cleanup any null files
+        rm -f ${design_file_name} 2>/dev/null
+        $echoVerbose && echo "... INFO: No Design Documents found for import."
+    else
+        $echoVerbose && echo "... INFO: Duplicating original file for alteration"
+        # Duplicate the original DB file, so we don't mangle the user's input file:
+        filesize=$(du -P -k ${file_name} | awk '{print$1}')
+        checkdiskspace "${file_name}" $filesize
+        cp -f ${file_name}{,-nodesign}
+        # Re-set file_name to be our new file.
+        file_name=${file_name}-nodesign
+        # Remove these design docs from (our new) main file.
+        $echoVerbose && echo "... INFO: Stripping _design elements from regular documents"
+        checkdiskspace "${file_name}" $filesize
+        $sed_cmd ${sed_edit_in_place} '/^{"_id":"_design/d' ${file_name} && rm -f ${file_name}.sedtmp
+        # Remove the final document's trailing comma
+        $echoVerbose && echo "... INFO: Fixing end document"
+        line=$(expr `wc -l ${file_name} | awk '{print$1}'` - 1)
+        filesize=$(du -P -k ${file_name} | awk '{print$1}')
+        checkdiskspace "${file_name}" $filesize
+        $sed_cmd ${sed_edit_in_place} "${line}s/,$//" ${file_name} && rm -f ${file_name}.sedtmp
+
+        $echoVerbose && echo "... INFO: Inserting Design documents"
+        designcount=0
+        # For each design doc...
+        while IFS="" read -r; do
+            line="${REPLY}"
+            # Split the ID out for use as the import URL path
+            URLPATH=$(echo $line | awk -F'"' '{print$4}')
+            # Scrap the ID and Rev from the main data, as well as any trailing ','
+            echo "${line}" | $sed_cmd -${sed_regexp_option}e "s@^\{\"_id\":\"${URLPATH}\",\"_rev\":\"[0-9]*-[0-9a-zA-Z_\-]*\",@\{@" | $sed_cmd -e 's/,$//' > ${design_file_name}.${designcount}
+            # Fix Windows CRLF
+            if grep -qU $'\x0d' ${design_file_name}.${designcount}; then
+                $echoVerbose && echo "... INFO: File contains Windows carriage returns- converting..."
+                filesize=$(du -P -k ${design_file_name}.${designcount} | awk '{print$1}')
+                checkdiskspace "${file_name}" $filesize
+                tr -d '\r' < ${design_file_name}.${designcount} > ${design_file_name}.${designcount}.tmp
+                if [ $? = 0 ]; then
+                    mv ${design_file_name}.${designcount}.tmp ${design_file_name}.${designcount}
+                    if [ $? = 0 ]; then
+                        $echoVerbose && echo "... INFO: Completed successfully."
+                    else
+                        echo "... ERROR: Failed to overwrite ${design_file_name}.${designcount} with ${design_file_name}.${designcount}.tmp"
+                        exit 1
+                    fi
+                else
+                    echo ".. ERROR: Failed to convert file."
+                    exit 1
+                fi
+            fi
+
+            # Insert this file into the DB
+            A=0
+            attemptcount=0
+            until [ $A = 1 ]; do
+                (( attemptcount++ ))
+                curl $curlSilentOpt ${curlopt} -T ${design_file_name}.${designcount} -X PUT "${url}/${db_name}/${URLPATH}" -H 'Content-Type: application/json' -o ${design_file_name}.out.${designcount}
+                # If curl threw an error:
+                if [ ! $? = 0 ]; then
+                     if [ $attemptcount = $attempts ]; then
+                         echo "... ERROR: Curl failed trying to restore ${design_file_name}.${designcount} - Stopping"
+                         exit 1
+                     else
+                         echo "... WARN: Import of ${design_file_name}.${designcount} failed - Attempt ${attemptcount}/${attempts}. Retrying..."
+                         sleep 1
+                     fi
+                # If curl was happy, but CouchDB returned an error in the return JSON:
+                elif [ ! "`head -n 1 ${design_file_name}.out.${designcount} | grep -c '^{"error":'`" = 0 ]; then
+                     if [ $attemptcount = $attempts ]; then
+                         echo "... ERROR: CouchDB Reported: `head -n 1 ${design_file_name}.out.${designcount}`"
+                         exit 1
+                     else
+                         echo "... WARN: CouchDB Reported an error during import - Attempt ${attemptcount}/${attempts} - Retrying..."
+                         sleep 1
+                     fi
+                # Otherwise, if everything went well, delete our temp files.
+                else
+                     A=1
+                     rm -f ${design_file_name}.out.${designcount}
+                     rm -f ${design_file_name}.${designcount}
+                fi
+            done
+            # Increase design count - mainly used for the INFO at the end.
+            (( designcount++ ))
+        # NOTE: This is where we insert the design lines exported from the main block
+        done < <(cat ${design_file_name})
+        $echoVerbose && echo "... INFO: Successfully imported ${designcount} Design Documents"
+    fi
+    set +o noglob
+
+    # If the size of the file to import is less than our $lines size, don't worry about splitting
+    if [ `wc -l $file_name | awk '{print$1}'` -lt $lines ]; then
+        $echoVerbose && echo "... INFO: Small dataset. Importing as a single file."
+        A=0
+        attemptcount=0
+        until [ $A = 1 ]; do
+            (( attemptcount++ ))
+            curl $curlSilentOpt $curlopt -T $file_name -X POST "$url/$db_name/_bulk_docs" -H 'Content-Type: application/json' -o tmp.out
+            if [ "`head -n 1 tmp.out | grep -c '^{"error":'`" -eq 0 ]; then
+                $echoVerbose && echo "... INFO: Imported ${file_name_orig} Successfully."
+                rm -f tmp.out
+                rm -f ${file_name_orig}-design
+                rm -f ${file_name_orig}-nodesign
+                exit 0
+            else
+                if [ $attemptcount = $attempts ]; then
+                    echo "... ERROR: Import of ${file_name_orig} failed."
+                    if [ -f tmp.out ]; then
+                        echo -n "... ERROR: Error message was:   "
+                        cat tmp.out
+                    else
+                        echo ".. ERROR: See above for any errors"
+                    fi
+                    rm -f tmp.out
+                    exit 1
+                else
+                    echo "... WARN: Import of ${file_name_orig} failed - Attempt ${attemptcount}/${attempts} - Retrying..."
+                    sleep 1
+                fi
+            fi
+        done
+    # Otherwise, it's a large import that requires bulk insertion.
+    else
+        $echoVerbose && echo "... INFO: Block import set to ${lines} lines."
+        if [ -f ${file_name}.splitaaa ]; then
+            echo "... ERROR: Split files \"${file_name}.split*\" already present. Please remove before continuing."
+            exit 1
+        fi
+        importlines=`cat ${file_name} | grep -c .`
+
+        # Due to the file limit imposed by the pre-calculated AZ3 variable, max split files is 15600 (alpha x 3positions)
+        if [[ `expr ${importlines} / ${lines}` -gt 15600 ]]; then
+            echo "... ERROR: Pre-processed split variable limit of 15600 files reached."
+            echo "           Please increase the '-l' parameter (Currently: $lines) and try again."
+            exit 1
+        fi
+
+        $echoVerbose && echo "... INFO: Generating files to import"
+        filesize=$(du -P -k ${file_name} | awk '{print$1}')
+        checkdiskspace "${file_name}" $filesize
+        ### Split the file into many
+        split -a 3 -l ${lines} ${file_name} ${file_name}.split
+        if [ ! "$?" = "0" ]; then
+            echo "... ERROR: Unable to create split files."
+            exit 1
+        fi
+        HEADER="`head -n 1 $file_name`"
+        FOOTER="`tail -n 1 $file_name`"
+
+        count=0
+        for PADNUM in $AZ3; do
+            PADNAME="${file_name}.split${PADNUM}"
+            if [ ! -f ${PADNAME} ]; then
+                echo "... INFO: Import Cycle Completed."
+                break
+            fi
+
+            if [ ! "`head -n 1 ${PADNAME}`" = "${HEADER}" ]; then
+                $echoVerbose && echo "... INFO: Adding header to ${PADNAME}"
+                filesize=$(du -P -k ${PADNAME} | awk '{print$1}')
+                checkdiskspace "${PADNAME}" $filesize
+                $sed_cmd ${sed_edit_in_place} "1i${HEADER}" ${PADNAME} && rm -f ${PADNAME}.sedtmp
+            else
+                $echoVerbose && echo "... INFO: Header already applied to ${PADNAME}"
+            fi
+            if [ ! "`tail -n 1 ${PADNAME}`" = "${FOOTER}" ]; then
+                $echoVerbose && echo "... INFO: Adding footer to ${PADNAME}"
+                filesize=$(du -P -k ${PADNAME} | awk '{print$1}')
+                checkdiskspace "${PADNAME}" $filesize
+                $sed_cmd ${sed_edit_in_place} '$s/,$//g' ${PADNAME} && rm -f ${PADNAME}.sedtmp
+                echo "${FOOTER}" >> ${PADNAME}
+            else
+                $echoVerbose && echo "... INFO: Footer already applied to ${PADNAME}"
+            fi
+
+            $echoVerbose && echo "... INFO: Inserting ${PADNAME}"
+            A=0
+            attemptcount=0
+            until [ $A = 1 ]; do
+                (( attemptcount++ ))
+                curl $curlSilentOpt $curlopt -T ${PADNAME} -X POST "$url/$db_name/_bulk_docs" -H 'Content-Type: application/json' -o tmp.out
+                if [ ! $? = 0 ]; then
+                    if [ $attemptcount = $attempts ]; then
+                        echo "... ERROR: Curl failed trying to restore ${PADNAME} - Stopping"
+                        exit 1
+                    else
+                        echo "... WARN: Failed to import ${PADNAME} - Attempt ${attemptcount}/${attempts} - Retrying..."
+                        sleep 1
+                    fi
+                elif [ ! "`head -n 1 tmp.out | grep -c '^{"error":'`" = 0 ]; then
+                    if [ $attemptcount = $attempts ]; then
+                        echo "... ERROR: CouchDB Reported: `head -n 1 tmp.out`"
+                        exit 1
+                    else
+                        echo "... WARN: CouchDB Reported and error during import - Attempt ${attemptcount}/${attempts} - Retrying..."
+                        sleep 1
+                    fi
+                else
+                    A=1
+                    rm -f ${PADNAME}
+                    rm -f tmp.out
+                    (( count++ ))
+                fi
+            done
+
+            $echoVerbose && echo "... INFO: Successfully Imported `expr ${count}` Files"
+            A=1
+            rm -f ${file_name_orig}-design
+            rm -f ${file_name_orig}-nodesign
+        done
+    fi
+fi

+ 0 - 5
scripts/install

@@ -1,7 +1,5 @@
 #!/bin/bash
 
-ps axf # debug
-
 # IMPORT GENERIC HELPERS
 source _common.sh
 source /usr/share/yunohost/helpers
@@ -95,13 +93,11 @@ ynh_add_nginx_config
 
 # systemd.service
 ynh_script_progression --message="Configuring a systemd service..." --weight=1
-ps axf # debug
 # Create a dedicated systemd config
 ynh_add_systemd_config --service=$flohmarkt_filename
 # integrate into yunohost
 ynh_script_progression --message="Integrating service in YunoHost..." --weight=1
 yunohost service add $flohmarkt_filename --description="A decentral federated small advertisement platform" --log="$flohmarkt_logfile"
-ps axf # debug
 
 #  logfile contains possibly the secret setup URL
 ynh_script_progression --message="Setting permissions on logfile..." --weight=2
@@ -126,7 +122,6 @@ ln -s "$flohmarkt_data_dir" "$flohmarkt_sym_data_dir"
 
 # start service
 ynh_script_progression --message="Debug before starting flohmarkt..." --weight=1
-ps axuf # debug
 ls -l $flohmarkt_logfile /bin/bash /usr/bin/bash || true
 ynh_script_progression --message="Starting flohmarkt..." --weight=10
 flohmarkt_ynh_start_service

+ 2 - 6
scripts/remove

@@ -31,10 +31,9 @@ fi
 # https://codeberg.org/flohmarkt/flohmarkt_ynh/issues/12
 ynh_script_progression --message="Removing database and database user..." --weight=2
 # remove DB
-curl -s -X DELETE 'http://127.0.0.1:5984/flohmarkt' --user "admin:${password_couchdb_admin}"
+flohmarkt_ynh_remove_couchdb
 # remove DB user for this instance:
-#   get rev for the user and then delete user/rev
-curl -s -X DELETE "http://127.0.0.1:5984/_users/org.couchdb.user%3A${app}?rev=$( curl -sX GET "http://127.0.0.1:5984/_users/org.couchdb.user%3A${app}" --user "admin:${password_couchdb_admin}" | jq -r ._rev)" --user "admin:${password_couchdb_admin}"
+flohmarkt_ynh_delete_dbuser
 
 # Remove the app-specific logrotate config
 ynh_remove_logrotate
@@ -68,6 +67,3 @@ ynh_secure_remove "$flohmarkt_sym_data_dir"
 #=================================================
 
 ynh_script_progression --message="Removal of $app completed" --last
-
-# debug
-ps -axf

+ 17 - 12
scripts/restore

@@ -3,24 +3,29 @@
 source ../settings/scripts/_common.sh
 source /usr/share/yunohost/helpers
 
-# restore couchdb directory
-ynh_script_progression --message="Restoring couchdb directory..." --weight=1
-# argh, need this for a single database
-ynh_restore_file --origin_path="/var/lib/couchdb"
-
 # reinstall couchdb
 ynh_script_progression --message="Reinstalling couchdb..." --weight=40
 flohmarkt_ynh_up_inst_couchdb
-flohmarkt_ynh_stop_couchdb
 
 # add couchdb configuration
-ynh_script_progression --message="Adding a configuration file..." --weight=2
-ynh_restore_file --origin_path="/opt/couchdb/etc/local.d/05-flohmarkt.ini"
-chown root:couchdb /opt/couchdb/etc/local.d/05-flohmarkt.ini
-chmod 640 /opt/couchdb/etc/local.d/05-flohmarkt.ini
+if ![[ -e /opt/couchdb/etc/local.d/05-flohmarkt.ini ]]; then
+  ynh_script_progression --message="Adding a configuration file..." --weight=2
+
+  flohmarkt_ynh_stop_couchdb
+
+  ynh_restore_file --origin_path="/opt/couchdb/etc/local.d/05-flohmarkt.ini"
+  chown root:couchdb /opt/couchdb/etc/local.d/05-flohmarkt.ini
+  chmod 640 /opt/couchdb/etc/local.d/05-flohmarkt.ini
+
+  ynh_script_progression --message="Starting couchdb..." --weight=4
+  flohmarkt_ynh_start_couchdb
+else
+  ynh_script_progression --message="CouchDB configuration file already exists." --weight=1
+fi
 
-# start couchdb
-flohmarkt_ynh_start_couchdb
+# restore couchdb from json
+ynh_script_progression --message="Importing couchdb from json backup..." --weight=8
+flohmarkt_ynh_restore_couchdb
 
 # RESTORE THE APP MAIN DIR
 ynh_script_progression --message="Restoring the app main directory..." --weight=10