123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589 |
- #!/bin/bash
- ## inspired by Percona clustercheck.sh
- # https://github.com/percona/proxysql-admin-tool/blob/master/proxysql_galera_checker
- #-------------------------------------------------------------------------------
- #
- # Step 1 : Bash internal configuration
- #
- set -o nounset # no undefined variables
- #-------------------------------------------------------------------------------
- #
- # Step 2 : Global variables
- #
- #
- # Script parameters/constants
- #
- declare -i DEBUG=0
- readonly PROXYSQL_ADMIN_VERSION="1.4.14"
- #Timeout exists for instances where mysqld may be hung
- declare -i TIMEOUT=10
- declare RED=""
- declare NRED=""
- #
- # Global variables used by the script
- #
- declare ERR_FILE="/dev/stderr"
- declare NODE_MONITOR_LOG_FILE=""
- declare CONFIG_FILE="/etc/proxysql-admin.cnf"
- declare HOST_PRIORITY_FILE=""
- declare CHECKER_PIDFILE=""
- # Set to send output here when DEBUG is set
- declare DEBUG_ERR_FILE="/dev/null"
- declare -i HOSTGROUP_WRITER_ID=1
- declare -i HOSTGROUP_READER_ID=-1
- declare -i HOSTGROUP_SLAVEREADER_ID=-1
- declare -i NUMBER_WRITERS=0
- declare WRITER_IS_READER="ondemand"
- declare SLAVE_IS_WRITER="yes"
- declare P_MODE=""
- declare P_PRIORITY=""
- declare MYSQL_USERNAME
- declare MYSQL_PASSWORD
- declare PROXYSQL_USERNAME
- declare PROXYSQL_PASSWORD
- declare PROXYSQL_HOSTNAME
- declare PROXYSQL_PORT
- declare PROXYSQL_DATADIR='/var/lib/proxysql'
- # Set to 1 if slave readers are being used
- declare -i HAVE_SLAVEREADERS=0
- declare -i HAVE_SLAVEWRITERS=0
- # How far behind can a slave be before its put into OFFLINE_SOFT state
- declare SLAVE_SECONDS_BEHIND=3600
- # Some extra text that will be logged
- # (useful for debugging)
- declare LOG_TEXT=""
- # Default value for max_connections in mysql_servers
- declare MAX_CONNECTIONS=1000
- #-------------------------------------------------------------------------------
- #
- # Step 3 : Helper functions
- #
- function log() {
- local lineno=$1
- shift
- if [[ -n $ERR_FILE ]]; then
- if [[ -n $lineno && $DEBUG -ne 0 ]]; then
- echo -e "[$(date +%Y-%m-%d\ %H:%M:%S)] $$ (line $lineno) $*" >> $ERR_FILE
- else
- echo -e "[$(date +%Y-%m-%d\ %H:%M:%S)] $$ $*" >> $ERR_FILE
- fi
- fi
- }
- # Checks the return value of the most recent command
- #
- # Globals:
- # None
- #
- # Arguments:
- # 1: the lineno where the error occurred
- # 2: the error code of the most recent command
- # 3: the error message if the error code is non-zero
- #
- function check_cmd() {
- local lineno=$1
- local retcode=$2
- local errmsg=$3
- shift 3
- if [[ ${retcode} -ne 0 ]]; then
- error "$lineno" $errmsg
- if [[ -n "$*" ]]; then
- log "$lineno" $*
- fi
- fi
- return $retcode
- }
- # Checks the return value of the most recent command
- # Exits the program if the command fails (non-zero return codes)
- #
- # This should not be used with commands that can fail for legitimate
- # reasons.
- #
- # Globals:
- # None
- #
- # Arguments:
- # 1: the lineno where the error occurred
- # 2: the error code of the most recent command
- # 3: the error message if the error code is non-zero
- #
- function check_cmd_and_exit() {
- check_cmd "$@"
- local retcode=$?
- if [[ $retcode -ne 0 ]]; then
- exit 1
- fi
- return $retcode
- }
- function log_if_success() {
- local lineno=$1
- local rc=$2
- shift 2
- if [[ $rc -eq 0 ]]; then
- log "$lineno" "$*"
- fi
- return $rc
- }
- function error() {
- local lineno=$1
- shift
- log "$lineno" "proxysql_galera_checker : Error ($lineno): $*"
- }
- function warning() {
- local lineno=$1
- shift
- log "$lineno" "Warning: $*"
- }
- function debug() {
- if [[ $DEBUG -eq 0 ]]; then
- return
- fi
- local lineno=$1
- shift
- log "$lineno" "${RED}debug: $*${NRED}"
- }
- function usage() {
- local path=$0
- cat << EOF
- Usage: ${path##*/} --write-hg=10 --read-hg=11 --config-file=/etc/proxysql-admin.cnf --log=/var/lib/proxysql/pxc_test_proxysql_galera_check.log
- Options:
- -w, --write-hg=<NUMBER> Specify ProxySQL write hostgroup.
- -r, --read-hg=<NUMBER> Specify ProxySQL read hostgroup.
- -c, --config-file=PATH Specify ProxySQL-admin configuration file.
- -l, --log=PATH Specify proxysql_galera_checker log file.
- --log-text=TEXT This is text that will be written to the log file
- whenever this script is run (useful for debugging).
- --node-monitor-log=PATH Specify proxysql_node_monitor log file.
- -n, --writer-count=<NUMBER> Maximum number of write hostgroup_id nodes
- that can be marked ONLINE
- When 0 (default), all nodes can be marked ONLINE
- -p, --priority=<HOST_LIST> Can accept comma delimited list of write nodes priority
- -m, --mode=[loadbal|singlewrite] ProxySQL read/write configuration mode,
- currently supporting: 'loadbal' and 'singlewrite'
- --writer-is-reader=<value> Defines if the writer node also accepts writes.
- Possible values are 'always', 'never', and 'ondemand'.
- 'ondemand' means that the writer node only accepts reads
- if there are no other readers.
- (default: 'never')
- --use-slave-as-writer=<yes/no> If this is 'yes' then slave nodes may
- be added to the write hostgroup if all other
- cluster nodes are down.
- (default: 'yes')
- --max-connections=<NUMBER> Value for max_connections in the mysql_servers table.
- This is the maximum number of connections that
- ProxySQL will open to the backend servers.
- (default: 1000)
- --debug Enables additional debug logging.
- -h, --help Display script usage information
- -v, --version Print version info
- Notes about the mysql_servers in ProxySQL:
- - NODE STATUS * Nodes that are in status OFFLINE_HARD will not be checked
- nor will their status be changed
- * SHUNNED nodes are not to be used with Galera based systems,
- they will be checked and their status will be changed
- to either ONLINE or OFFLINE_SOFT.
- When no nodes were found to be in wsrep_local_state=4 (SYNCED) for either
- read or write nodes, then the script will try 5 times for each node to try
- to find nodes wsrep_local_state=4 (SYNCED) or wsrep_local_state=2 (DONOR/DESYNC)
- EOF
- }
- # Check the permissions for a file or directory
- #
- # Globals:
- # None
- #
- # Arguments:
- # 1: the bash test to be applied to the file
- # 2: the lineno where this call is invoked (used for errors)
- # 3: the path to the file
- # 4: (optional) description of the path (mostly used for existence checks)
- #
- # Exits the script if the permissions test fails.
- #
- function check_permission() {
- local permission=$1
- local lineno=$2
- local path_to_check=$3
- local description=""
- if [[ $# -gt 3 ]]; then
- description="$4"
- fi
- if [ ! $permission "$path_to_check" ] ; then
- if [[ $permission == "-r" ]]; then
- error $lineno "You do not have READ permission for: $path_to_check"
- elif [[ $permission == "-w" ]]; then
- error $lineno "You do not have WRITE permission for: $path_to_check"
- elif [[ $permission == "-x" ]]; then
- error $lineno "You do not have EXECUTE permission for: $path_to_check"
- elif [[ $permission == "-e" ]]; then
- if [[ -n $description ]]; then
- error $lineno "Could not find the $description: $path_to_check"
- else
- error $lineno "Could not find: $path_to_check"
- fi
- elif [[ $permission == "-d" ]]; then
- if [[ -n $description ]]; then
- error $lineno "Could not find the $description: $path_to_check"
- else
- error $lineno "Could not find the directory: $path_to_check"
- fi
- elif [[ $permission == "-f" ]]; then
- if [[ -n $description ]]; then
- error $lineno "Could not find the $description: $path_to_check"
- else
- error $lineno "Could not find the file: $path_to_check"
- fi
- else
- error $lineno "You do not have the correct permissions for: $path_to_check"
- fi
- exit 1
- fi
- }
- # Executes a SQL query with the (fully) specified server
- #
- # Globals:
- # None
- #
- # Arguments:
- # 1: lineno
- # 2: the name of the user
- # 3: the user's password
- # 4: the hostname of the server
- # 5: the port used to connect to the server
- # 6: the query to be run
- # 7: arguments to the mysql client
- # 8: timeout in secs
- # 9: additional options, space separated
- # Available options:
- # "hide_output"
- # This will not show the output of the query when DEBUG is set.
- # Used to stop the display of sensitve information (such as passwords)
- # from being displayed when debugging.
- #
- function exec_sql() {
- local lineno=$1
- local user=$2
- local password=$3
- local hostname=$4
- local port=$5
- local query=$6
- local args=$7
- local timeout_secs=$8
- local more_options=$9
- local retvalue
- local retoutput
- debug "$lineno" "exec_sql : $user@$hostname:$port ($args) ==> $query"
- retoutput=$(printf "[client]\nuser=${user}\npassword=\"${password}\"\nhost=${hostname}\nport=${port}" \
- | timeout ${timeout_secs} mysql --defaults-file=/dev/stdin --protocol=tcp \
- ${args} -e "$query")
- retvalue=$?
- if [[ $DEBUG -eq 1 ]]; then
- local number_of_newlines=0
- local dbgoutput=$retoutput
- if [[ " $more_options " =~ [[:space:]]hide_output[[:space:]] ]]; then
- dbgoutput="**** data hidden ****"
- fi
- if [[ -n $dbgoutput ]]; then
- number_of_newlines=$(printf "%s" "${dbgoutput}" | wc -l)
- fi
- if [[ $retvalue -ne 0 ]]; then
- debug "" "--> query failed $retvalue"
- elif [[ -z $dbgoutput ]]; then
- debug "" "--> query returned $retvalue : <query returned no data>"
- elif [[ ${number_of_newlines} -eq 0 ]]; then
- debug "" "--> query returned $retvalue : ${dbgoutput}"
- else
- debug "" "--> query returned $retvalue : <data follows>"
- printf "${dbgoutput//%/%%}\n" | while IFS= read -r line; do
- debug "" "----> $line"
- done
- fi
- fi
- printf "${retoutput//%/%%}"
- return $retvalue
- }
- # Executes a SQL query on proxysql (with a timeout of $TIMEOUT seconds)
- #
- # Globals:
- # PROXYSQL_USERNAME
- # PROXYSQL_PASSWORD
- # PROXYSQL_HOSTNAME
- # PROXYSQL_PORT
- #
- # Arguments:
- # 1: lineno (used for debugging/output, may be blank)
- # 2: Additional arguments to the mysql client for the query
- # 3: The SQL query
- # 4: (optional) see the additional options for exec_sql
- #
- function proxysql_exec() {
- local lineno=$1
- local args=$2
- local query="$3"
- local more_options=""
- local retoutput
- if [[ $# -ge 4 ]]; then
- more_options=$4
- fi
- exec_sql "$lineno" "$PROXYSQL_USERNAME" "$PROXYSQL_PASSWORD" \
- "$PROXYSQL_HOSTNAME" "$PROXYSQL_PORT" \
- "$query" "$args" "$TIMEOUT" "$more_options"
- retoutput=$?
- if [[ $retoutput -eq 124 ]]; then
- error $lineno "TIMEOUT: SQL query ($PROXYSQL_HOSTNAME:$PROXYSQL_PORT) : $query"
- fi
- return $retoutput
- }
- # Executes a SQL query on mysql (with a timeout of $TIMEOUT secs)
- #
- # Globals:
- # MYSQL_USERNAME
- # MYSQL_PASSWORD
- #
- # Arguments:
- # 1: lineno (used for debugging/output, may be blank)
- # 2: the hostname of the server
- # 3: the port used to connect to the server
- # 4: arguments to the mysql client
- # 5: the query to be run
- # 6: (optional) more options see exec_sql
- #
- function mysql_exec() {
- local lineno=$1
- local hostname=$2
- local port=$3
- local args=$4
- local query=$5
- local more_options=""
- local retoutput
- if [[ $# -ge 6 ]]; then
- more_options=$6
- fi
- exec_sql "$lineno" "$MYSQL_USERNAME" "$MYSQL_PASSWORD" \
- "$hostname" "$port" \
- "$query" "$args" "$TIMEOUT" "$more_options"
- retoutput=$?
- if [[ $retoutput -eq 124 ]]; then
- error $lineno "TIMEOUT: SQL query ($hostname:$$port) : $query"
- fi
- return $retoutput
- }
- # Separates the IP address from the port in a network address
- # Works for IPv4 and IPv6
- #
- # Globals:
- # None
- #
- # Params:
- # 1. The network address to be parsed
- #
- # Outputs:
- # A string with a space separating the IP address from the port
- #
- function separate_ip_port_from_address()
- {
- #
- # Break address string into host:port/path parts
- #
- local address=$1
- # Has to have at least one ':' to separate the port from the ip address
- if [[ $address =~ : ]]; then
- ip_addr=${address%:*}
- port=${address##*:}
- else
- ip_addr=$address
- port=""
- fi
- # Remove any braces that surround the ip address portion
- ip_addr=${ip_addr#\[}
- ip_addr=${ip_addr%\]}
- echo "${ip_addr} ${port}"
- }
- # Combines the IP address and port into a network address
- # Works for IPv4 and IPv6
- # (If the IP address is IPv6, the IP portion will have brackets)
- #
- # Globals:
- # None
- #
- # Params:
- # 1: The IP address portion
- # 2: The port
- #
- # Outputs:
- # A string containing the full network address
- #
- function combine_ip_port_into_address()
- {
- local ip_addr=$1
- local port=$2
- local addr
- if [[ ! $ip_addr =~ \[.*\] && $ip_addr =~ .*:.* ]] ; then
- # If there are no brackets and it does have a ':', then add the brackets
- # because this is an unbracketed IPv6 address
- addr="[${ip_addr}]:${port}"
- else
- addr="${ip_addr}:${port}"
- fi
- echo $addr
- }
- # upgrade scheduler from old layout to new layout
- #
- # Globals:
- # PROXYSQL_DATADIR
- # TIMEOUT
- # HOST_PRIORITY_FILE
- #
- # Arguments:
- # None
- function upgrade_scheduler(){
- log $LINENO "**** Scheduler upgrade started ****"
- if [[ -f /etc/proxysql-admin.cnf ]]; then
- source /etc/proxysql-admin.cnf
- else
- error $LINENO "Assert! proxysql-admin configuration file : /etc/proxysql-admin.cnf does not exist, Terminating!"
- exit 1
- fi
- # For this function, use a shorter timeout than normal
- TIMEOUT=2
- local scheduler_rows
- local -i rows_found=0
- local -i rows_modified=0
- scheduler_rows=$(proxysql_exec $LINENO "-Ns" "SELECT * FROM scheduler")
- check_cmd_and_exit $LINENO $? "Could not retreive rows from scheduler (query failed). Exiting"
- while read i; do
- if [[ -z $i ]]; then continue; fi
- rows_found+=1
- # Extract fields from the line
- local id=$(echo "$i" | awk '{print $1}')
- local s_write_hg=$(echo "$i" | awk '{print $5}')
- local s_read_hg=$(echo "$i" | awk '{print $6}')
- local s_number_of_writes=$(echo "$i" | awk '{print $7}')
- local s_log=$(echo "$i" | awk '{print $9}')
- local s_cluster_name=$(echo "$i" | awk '{print $10}')
- local s_mode=""
- log $LINENO "Modifying the scheduler for write hostgroup: $s_write_hg"
- # Get the mode for this cluster
- local proxysql_mode_file
- if [[ -z $s_cluster_name ]]; then
- proxysql_mode_file="${PROXYSQL_DATADIR}/mode"
- else
- proxysql_mode_file="${PROXYSQL_DATADIR}/${s_cluster_name}_mode"
- fi
- if [[ -f ${proxysql_mode_file} && -r ${proxysql_mode_file} ]] ; then
- s_mode=$(cat ${proxysql_mode_file})
- else
- log $LINENO ".. Cannot find the ${proxysql_mode_file} file"
- if [[ $s_read_hg == "-1" ]]; then
- log $LINENO ".. Assuming mode='loadbal'"
- s_mode="loadbal"
- else
- log $LINENO ".. Assuming mode='singlewrite'"
- s_mode="singlewrite"
- fi
- fi
- # TODO: kennt
- # This will fail in the multi-cluster case, but may be a non-issue
- # since those nodes should appear in one cluster (may cause extra work
- # for the othre clusters though).
- # Get the host priority file
- local s_host_priority=''
- if [[ -n $HOST_PRIORITY_FILE && -f $HOST_PRIORITY_FILE ]]; then
- debug $LINENO "Found a host priority file: $HOST_PRIORITY_FILE"
- local p_priority_hosts=""
- # Get the list of hosts from the host_priority file ignoring blanks
- # and any lines that start with '#'
- p_priority_hosts=$(cat $HOST_PRIORITY_FILE | grep '^[^#]' | sed ':a;N;$!ba;s/\n/,/g')
- if [[ ! -z $p_priority_hosts ]] ; then
- s_host_priority="--priority=$p_priority_hosts"
- fi
- fi
- # Make the changes
- if [[ ! -z $s_write_hg ]] && [[ ! -z $s_read_hg ]] && [[ ! -z $s_number_of_writes ]] && [[ ! -z $s_log ]]; then
- proxysql_exec $LINENO -Ns "UPDATE scheduler SET arg1='--config-file=/etc/proxysql-admin.cnf --writer-is-reader=ondemand --write-hg=$s_write_hg --read-hg=$s_read_hg --writer-count=$s_number_of_writes $s_host_priority --mode=$s_mode --log=$s_log', arg2=NULL, arg3=NULL, arg4=NULL, arg5=NULL WHERE id=$id"
- check_cmd_and_exit $LINENO $? "Could not update the scheduler (query failed). Exiting."
- rows_modified+=1
- fi
- done< <(printf "${scheduler_rows}\n")
- if [[ $rows_modified -gt 0 ]]; then
- proxysql_exec $LINENO -Ns "LOAD SCHEDULER TO RUNTIME; SAVE SCHEDULER TO DISK;"
- check_cmd_and_exit $LINENO $? "Could not save scheduler changes to runtime or disk (query failed). Exiting."
- log_if_success $LINENO $? "Scheduler changes saved to runtime and disk"
- fi
- log $LINENO "$rows_modified row(s) modified / $rows_found row(s) found"
- log $LINENO "**** Scheduler upgrade finished ****"
- }
- # Upgrade the status of a node
- # This may also perform an INSERT (depending on the reader_status)
- #
- # Globals:
- # TIMEOUT
- #
- # Arguments:
- # 1: lineno
- # 2: hostgroup
- # 3: server address
- # 4: port
- # 5: new status
- # 6: reader_status
- # 7: comment
- #
- function change_server_status() {
- local lineno=$1
- local hostgroup=$2
- local server=$3
- local port=$4
- local status=$5
- local reader_status=$6
- local comment=$7
- local address
- address=$(combine_ip_port_into_address "$server" "$port")
- # If we have a PRIORITY_NODE, then we don't have a WRITER entry
- # to upgrade, but we do have a READER entry, so upgrade that
- if [[ $reader_status == "PRIORITY_NODE" ]]; then
- proxysql_exec $lineno -Ns "INSERT INTO mysql_servers
- (hostname,hostgroup_id,port,weight,status,comment,max_connections)
- VALUES ('$server',$hostgroup,$port,1000000,'$status','WRITE',$MAX_CONNECTIONS);"
- check_cmd_and_exit $LINENO $? "Could not create new mysql_servers row (query failed). Exiting."
- log "$lineno" "Adding server $hostgroup:$address with status $status. Reason: $comment"
- else
- proxysql_exec $lineno -Ns "UPDATE mysql_servers
- set status = '$status' WHERE hostgroup_id = $hostgroup AND hostname = '$server' AND port = $port;" 2>>${ERR_FILE}
- check_cmd_and_exit $LINENO $? "Could not update new mysql_servers row (query failed). Exiting."
- log "$lineno" "Changing server $hostgroup:$address to status $status. Reason: $comment"
- fi
- }
- # Arguments:
- # The arguments to the script.
- #
- function parse_args() {
- local go_out=""
- # TODO: kennt, what happens if we don't have a functional getopt()?
- # Check if we have a functional getopt(1)
- if ! getopt --test; then
- go_out="$(getopt --options=w:r:c:l:n:m:p:vh --longoptions=write-hg:,read-hg:,config-file:,log:,node-monitor-log:,writer-count:,mode:,priority:,writer-is-reader:,use-slave-as-writer:,log-text:,max-connections:,version,debug,help \
- --name="$(basename "$0")" -- "$@")"
- if [[ $? -ne 0 ]]; then
- # no place to send output
- echo "proxysql_galera_checker : Script error: getopt() failed" >&2
- exit 1
- fi
- eval set -- "$go_out"
- fi
- if [[ $go_out == " --" ]];then
- usage
- exit 1
- fi
- #
- # We iterate through the command-line options twice
- # (1) to handle options that don't need permissions (such as --help)
- # (2) to handle options that need to be done before other
- # options, such as loading the config file
- #
- for arg
- do
- case "$arg" in
- -- ) shift; break;;
- --config-file )
- CONFIG_FILE="$2"
- check_permission -e $LINENO "$CONFIG_FILE" "proxysql-admin configuration file"
- debug $LINENO "--config-file specified, using : $CONFIG_FILE"
- shift 2
- ;;
- --help)
- usage
- exit 0
- ;;
- -v | --version)
- echo "proxysql_galera_checker version $PROXYSQL_ADMIN_VERSION"
- exit 0
- ;;
- --debug)
- DEBUG=1
- shift
- ;;
- *)
- shift
- ;;
- esac
- done
- #
- # Load the config file before reading in the command-line options
- #
- readonly CONFIG_FILE
- if [ ! -e "$CONFIG_FILE" ]; then
- warning "" "Could not locate the configuration file: $CONFIG_FILE"
- else
- check_permission -r $LINENO "$CONFIG_FILE"
- debug $LINENO "Loading $CONFIG_FILE"
- source "$CONFIG_FILE"
- fi
- if [[ $DEBUG -ne 0 ]]; then
- # For now
- if [[ -t 1 ]]; then
- ERR_FILE=/dev/stderr
- fi
- fi
- # Reset the command line for the next invocation
- eval set -- "$go_out"
- for arg
- do
- case "$arg" in
- -- ) shift; break;;
- -w | --write-hg )
- HOSTGROUP_WRITER_ID=$2
- shift 2
- ;;
- -r | --read-hg )
- HOSTGROUP_READER_ID=$2
- shift 2
- ;;
- --config-file )
- # Do no processing of config-file here, it is processed
- # before this loop (see above)
- shift 2
- ;;
- -l | --log )
- ERR_FILE="$2"
- shift 2
- # Test if stderr is open to a terminal
- # We cannot use stdout as the log output, since it is used
- # to return values.
- if [[ $ERR_FILE == "/dev/stderr" ]]; then
- RED=$(tput setaf 1)
- NRED=$(tput sgr0)
- else
- RED=""
- NRED=""
- fi
- ;;
- --node-monitor-log )
- NODE_MONITOR_LOG_FILE="$2"
- shift 2
- ;;
- -n | --writer-count )
- NUMBER_WRITERS="$2"
- shift 2
- ;;
- -p | --priority )
- P_PRIORITY="$2"
- shift 2
- ;;
- -m | --mode )
- P_MODE="$2"
- shift 2
- if [ "$P_MODE" != "loadbal" ] && [ "$P_MODE" != "singlewrite" ]; then
- echo "ERROR: Invalid --mode passed:"
- echo " Please choose one of these modes: loadbal, singlewrite"
- exit 1
- fi
- ;;
- --writer-is-reader )
- WRITER_IS_READER="$2"
- shift 2
- ;;
- --use-slave-as-writer )
- SLAVE_IS_WRITER="$2"
- shift 2
- ;;
- --max-connections )
- MAX_CONNECTIONS="$2"
- shift 2
- ;;
- --debug )
- # Not handled here, see above
- shift
- ;;
- --log-text )
- LOG_TEXT="$2"
- shift 2
- ;;
- -v | --version )
- # Not handled here, see above
- shift
- ;;
- -h | --help )
- # Not handled here, see above
- shift
- ;;
- esac
- done
- if [[ $DEBUG -eq 1 ]]; then
- DEBUG_ERR_FILE=$ERR_FILE
- fi
- #
- # Argument validation
- #
- test $HOSTGROUP_WRITER_ID -ge 0 &>/dev/null
- if [[ $? -ne 0 ]]; then
- echo "ERROR: writer hostgroup_id is not an integer"
- usage
- exit 1
- fi
- test $HOSTGROUP_READER_ID -ge -1 &>/dev/null
- if [[ $? -ne 0 ]]; then
- echo "ERROR: reader hostgroup_id is not an integer"
- usage
- exit 1
- fi
- HOSTGROUP_SLAVEREADER_ID=$HOSTGROUP_READER_ID
- if [ $HOSTGROUP_SLAVEREADER_ID -eq $HOSTGROUP_WRITER_ID ];then
- let HOSTGROUP_SLAVEREADER_ID+=1
- fi
- if [[ $NUMBER_WRITERS -lt 0 ]]; then
- echo "ERROR: The number of writers should either be 0 to enable all possible nodes ONLINE"
- echo " or be larger than 0 to limit the number of writers"
- usage
- exit 1
- fi
- if [[ ! $WRITER_IS_READER =~ ^(always|never|ondemand)$ ]]; then
- error "" "Invalid --writer-is-reader option: '$WRITER_IS_READER'"
- echo "Please choose one of these values: always, never, or ondemand"
- exit 1
- fi
- if [[ ! $SLAVE_IS_WRITER =~ ^(yes|YES|no|NO)$ ]]; then
- error "" "Invalid --use-slave-as-writer option: '$SLAVE_IS_WRITER'"
- echo "Please choose either yes or no"
- exit 1
- fi
- if [[ $SLAVE_IS_WRITER =~ ^(yes|YES)$ ]]; then
- SLAVE_IS_WRITER="yes"
- else
- SLAVE_IS_WRITER="no"
- fi
- # These may get set in the config file, but they are not used
- # by this script. So to avoid confusion and problems, remove
- # these variables explicitly.
- unset WRITE_HOSTGROUP_ID
- unset READ_HOSTGROUP_ID
- unset SLAVEREAD_HOSTGROUP_ID
- # Verify that we have an integer
- if [[ -n $MAX_CONNECTIONS ]]; then
- if ! [ "$MAX_CONNECTIONS" -eq "$MAX_CONNECTIONS" ] 2>/dev/null
- then
- error "" "option '--max-connections' parameter (must be a number) : $MAX_CONNECTIONS"
- exit 1
- fi
- fi
- readonly ERR_FILE
- readonly CONFIG_FILE
- readonly DEBUG_ERR_FILE
- readonly HOSTGROUP_WRITER_ID
- readonly HOSTGROUP_READER_ID
- readonly HOSTGROUP_SLAVEREADER_ID
- readonly NUMBER_WRITERS
- readonly WRITER_IS_READER
- readonly SLAVE_IS_WRITER
- readonly P_PRIORITY
- readonly P_MODE
- readonly MAX_CONNECTIONS
- }
- # Checks to see if another instance of the script is running.
- # We want only one instance of this script to be running at a time.
- #
- # Globals:
- # PROXYSQL_DATADIR
- # ERR_FILE
- #
- # Arguments:
- # 1: the cluster name
- #
- # This function will exit if another instance of the script is running.
- #
- # With thanks, http://bencane.com/2015/09/22/preventing-duplicate-cron-job-executions/
- #
- function check_is_galera_checker_running() {
- local cluster_name=$1
- if [[ -z $cluster_name ]]; then
- CHECKER_PIDFILE=${PROXYSQL_DATADIR}/galera_checker.pid
- else
- CHECKER_PIDFILE=${PROXYSQL_DATADIR}/${cluster_name}_galera_checker.pid
- fi
- if [[ -f $CHECKER_PIDFILE && -r $CHECKER_PIDFILE ]] ; then
- local GPID
- GPID=$(cat "$CHECKER_PIDFILE")
- if ps -p $GPID -o args=ARGS | grep $ERR_FILE | grep -o proxysql_galera_check >/dev/null 2>&1 ; then
- ps -p $GPID > /dev/null 2>&1
- if [[ $? -eq 0 ]]; then
- log "$LINENO" "ProxySQL galera checker process already running. (pid:$GPID this pid:$$)"
- # We don't want to remove this file on cleanup
- CHECKER_PIDFILE=""
- exit 1
- else
- echo $$ > $CHECKER_PIDFILE
- if [[ $? -ne 0 ]] ; then
- warning "$LINENO" "Could not create galera checker PID file"
- exit 1
- fi
- debug "$LINENO" "Created PID file at $CHECKER_PIDFILE"
- fi
- else
- warning "$LINENO" "Existing PID($GPID) belongs to some other process. Creating new PID file."
- echo $$ > $CHECKER_PIDFILE
- if [[ $? -ne 0 ]] ; then
- warning "$LINENO" "Could not create galera checker PID file"
- exit 1
- fi
- debug "$LINENO" "Created PID file at $CHECKER_PIDFILE"
- fi
- else
- echo "$$" > "$CHECKER_PIDFILE"
- if [[ $? -ne 0 ]]; then
- warning "$LINENO" "Could not create galera checker PID file"
- exit 1
- fi
- debug "$LINENO" "Created PID file at $CHECKER_PIDFILE"
- fi
- }
- function cleanup_handler() {
- if [[ -n $CHECKER_PIDFILE ]]; then
- rm -f $CHECKER_PIDFILE
- fi
- }
- # Checks to see that the READ hostgroup entries have been created/deleted
- # This function has no meaning if mode="loadbal".
- #
- # Globals:
- # HOSTGROUP_READER_ID
- # HOSTGROUP_WRITER_ID
- # WRITER_IS_READER
- # MODE
- #
- # Arguments:
- # 1: the name of the reload_check_file
- # 2: the number of online-readers
- #
- function writer_is_reader_check() {
- local reload_check_file=$1
- local number_readers_online=$2
- local servers
- if [[ $MODE == "loadbal" ]]; then
- return
- fi
- servers=$(proxysql_exec $LINENO -Ns "SELECT hostgroup_id, hostname, port, status
- FROM mysql_servers
- WHERE hostgroup_id IN ($HOSTGROUP_READER_ID,$HOSTGROUP_WRITER_ID) AND
- status <> 'OFFLINE_HARD'
- AND comment <> 'SLAVEREAD'
- ORDER BY hostname, port, hostgroup_id")
- check_cmd_and_exit $LINENO $? "Could not retreive data from mysql_servers (query failed). Exiting."
- debug $LINENO "writer_is_reader_check : number_readers_online:$number_readers_online"
- # The extra test at the end is to ensure that the very last line read in
- # is also handled (the read may be returning EOF)
- printf "${servers}" | while read hostgroup server port stat || [ -n "$stat" ]
- do
- debug $LINENO "Examining $hostgroup:$server:$port"
- # Only look at writer nodes, so skip non-writers
- if [[ $hostgroup -ne $HOSTGROUP_WRITER_ID ]]; then
- continue
- fi
- local regex="(^|[[:space:]])${HOSTGROUP_READER_ID}[[:blank:]]${server}[[:blank:]]${port}[[:blank:]]"
- local address
- address=$(combine_ip_port_into_address "$server" "$port")
- if [[ $WRITER_IS_READER == "always" || $WRITER_IS_READER == "ondemand" ]]; then
- #
- # Ensure that there is a corresponding entry in mysql_servers
- #
- local reader_row=$(echo "$servers" | grep -E "$regex")
- if [[ -z $reader_row ]]; then
- # We did not find a matching row, insert the corresponding READER entry
- # If we are to always have a reader, or if there are no other
- # readers, add it as an ONLINE reader
- local comment
- if [[ $WRITER_IS_READER == "always" || $number_readers_online -eq 0 ]]; then
- comment="ONLINE"
- else
- comment="OFFLINE_SOFT"
- fi
- proxysql_exec $LINENO -Ns "INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,status,comment,max_connections) VALUES ('$server',$HOSTGROUP_READER_ID,$port,1000,'$comment','READ',$MAX_CONNECTIONS);"
- check_cmd_and_exit $LINENO $? "writer-is-reader($WRITER_IS_READER): Failed to add $HOSTGROUP_READER_ID:$address (query failed). Exiting."
- log_if_success $LINENO $? "writer-is-reader($WRITER_IS_READER): Adding reader $HOSTGROUP_READER_ID:$address with status OFFLINE_SOFT"
- echo "1" > ${reload_check_file}
- elif [[ $WRITER_IS_READER == "ondemand" && $stat == "ONLINE" && $number_readers_online -gt 1 ]]; then
- # If the reader node is ONLINE and there is another reader
- # Then we can deactivate this reader node (if ondemand)
- # This should only do this if there is a corresponding READ entry that is NOT "OFFLINE_SOFT"
- # Search through the $servers for an entry corresponding to the READ data
- local reader_status=$(echo -e "$reader_row" | cut -f4)
- if [[ $reader_status != "OFFLINE_SOFT" ]]; then
- proxysql_exec $LINENO -Ns "UPDATE mysql_servers SET status='OFFLINE_SOFT' WHERE hostgroup_id=$HOSTGROUP_READER_ID AND hostname='$server' AND port=$port"
- check_cmd_and_exit $LINENO $? "Could not update mysql_servers (query failed). Exiting."
- log_if_success $LINENO $? "Updating $hostgroup:$address to OFFLINE_SOFT because writers prefer to not be readers (writer-is-reader:ondemand)"
- echo "1" > ${reload_check_file}
- fi
- fi
- elif [[ $WRITER_IS_READER == "never" ]]; then
- #
- # Ensure that there is NO corresponding entry in mysql_servers
- #
- if [[ $servers =~ $regex ]]; then
- # Delete the corresponding READER entry
- proxysql_exec $LINENO -Ns "DELETE FROM mysql_servers WHERE hostgroup_id=$HOSTGROUP_READER_ID and hostname='$server' and port=$port"
- check_cmd_and_exit $LINENO $? "writer-is-reader(never): Failed to remove $HOSTGROUP_READER_ID:$address (query failed). Exiting."
- log_if_success $LINENO $? "writer-is-reader(never): Removing reader $HOSTGROUP_READER_ID:$address"
- echo "1" > ${reload_check_file}
- fi
- fi
- done
- }
- # Returns the address of an available (online) cluster host
- #
- # Globals:
- # None
- #
- # Arguments:
- # None
- #
- # Returns:
- # 0 : if the function succeeds (this means that no errors occurred in the SQL queries)
- # The function can return nothing and still return success
- # 1 : if an error occurs
- #
- function find_online_cluster_host() {
- # Query the proxysql database for hosts,ports in use
- # Then just go through the list until we reach one that responds
- local hosts
- hosts=$(proxysql_exec $LINENO -Ns "SELECT hostname,port FROM mysql_servers where hostgroup_id in ($HOSTGROUP_WRITER_ID, $HOSTGROUP_READER_ID) and comment <> 'SLAVEREAD' and status='ONLINE'")
- if [[ $? -ne 0 ]]; then
- return 1
- fi
- printf "$hosts" | while read server port || [[ -n $port ]]
- do
- debug $LINENO "Trying to contact $server:$port..."
- mysql_exec "$LINENO" "$server" "$port" -Bs "select @@port" 1>/dev/null 2>>${DEBUG_ERR_FILE}
- if [[ $? -eq 0 ]]; then
- printf "$server $port"
- return 0
- fi
- done
- # No cluster host available (cannot contact any)
- return 0
- }
- # Checks the number of online writers and promotes a reader to a writer
- # if needed
- #
- # Globals:
- # HOSTGROUP_WRITER_ID
- # CHECK_STATUS
- #
- # Arguments:
- # 1: The path to the reload_check_file
- #
- function ensure_one_writer_node() {
- local reload_check_file=$1
- log $LINENO "No ONLINE writers found, looking for readers to promote"
- # We are trying to find a reader node that can be promoted to
- # a writer.
- # So what we do is we ORDER BY writer.status ASC because
- # by accident ONLINE is last in the line
- local reader_proxysql_query="SELECT
- reader.hostname,
- reader.port,
- writer.status
- FROM mysql_servers as reader
- LEFT JOIN mysql_servers as writer
- ON writer.hostgroup_id = $HOSTGROUP_WRITER_ID
- AND writer.hostname = reader.hostname
- AND writer.port = reader.port
- WHERE reader.hostgroup_id = $HOSTGROUP_READER_ID
- AND reader.status = 'ONLINE'
- AND reader.comment = 'READ'
- ORDER BY writer.status ASC,
- reader.weight DESC,
- reader.hostname,
- reader.port"
- local possible_hosts
- possible_hosts=$(proxysql_exec $LINENO -Ns "$reader_proxysql_query")
- check_cmd_and_exit $LINENO $? "Could not get data from mysql_servers (query failed). Exiting."
- if [[ -z $possible_hosts ]]; then
- log $LINENO "Cannot find a reader that can be promoted to a writer"
- return
- fi
- while read line; do
- if [[ -z $line ]]; then
- continue
- fi
- local host=$(echo $line | awk '{ print $1 }')
- local port=$(echo $line | awk '{ print $2 }')
- # Have to check that we can actually access the node
- local wsrep_status
- local pxc_main_mode
- local result
- local address
- address=$(combine_ip_port_into_address "$host" "$port")
- result=$(mysql_exec $LINENO "$host" "$port" -Nns "SHOW STATUS LIKE 'wsrep_local_state'; SHOW VARIABLES LIKE 'pxc_maint_mode';" 2>>${DEBUG_ERR_FILE})
- wsrep_status=$(echo "$result" | grep "wsrep_local_state" | awk '{ print $2 }')
- pxc_main_mode=$(echo "$result" | grep "pxc_maint_mode" | awk '{ print $2 }')
- if [[ -z $pxc_main_mode ]]; then
- pxc_main_mode="DISABLED"
- fi
- if [[ $wsrep_status -ne 4 || $pxc_main_mode != "DISABLED" ]]; then
- continue
- fi
- local write_stat=$(echo "$line" | awk '{print $3}')
- debug $LINENO "Looking at $address write_status:$write_stat"
- log $LINENO "Promoting $address as writer node..."
- # We have an ONLINE reader node
- # if ondemand or always keep the reader node (may need to move it to OFFLINE_SOFT)
- # if we have a writer, move to ONLINE
- # if no writer, create it
- if [[ $WRITER_IS_READER == "ondemand" || $WRITER_IS_READER == "always" ]]; then
- if [[ -z $write_stat || $write_stat == "NULL" ]]; then
- # Writer does not exist, add an ONLINE writer
- proxysql_exec $LINENO -Ns \
- "INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,status,comment,max_connections)
- VALUES ('$host',$HOSTGROUP_WRITER_ID,$port,1000000,'ONLINE','WRITE',$MAX_CONNECTIONS);"
- check_cmd_and_exit $LINENO $? "Cannot add a PXC writer node to ProxySQL (query failed). Exiting."
- log_if_success $LINENO $? "Added $HOSTGROUP_WRITER_ID:$address as a writer."
- else
- # Writer exists, move writer to ONLINE
- proxysql_exec $LINENO -Ns "UPDATE mysql_servers SET status='ONLINE',comment='WRITE',weight=1000000 WHERE hostgroup_id=$HOSTGROUP_WRITER_ID AND hostname='$host' AND port=$port"
- check_cmd_and_exit $LINENO $? "Cannot update the PXC node $HOSTGROUP_WRITER_ID:$address (query failed). Exiting."
- log_if_success $LINENO $? "Updated ${HOSTGROUP_WRITER_ID}:${host}:${port} node in ProxySQL."
- fi
- else
- if [[ -z $write_stat || $write_stat == "NULL" ]]; then
- # Writer does not exist, move reader to writer
- proxysql_exec $$LINENO -Ns\
- "UPDATE mysql_servers set status='ONLINE',hostgroup_id=$HOSTGROUP_WRITER_ID, comment='WRITE', weight=1000000 WHERE hostgroup_id=$HOSTGROUP_READER_ID and hostname='$host' and port=$port"
- check_cmd_and_exit $LINENO $? "Cannot update PXC writer in ProxySQL (query failed). Exiting."
- log_if_success $LINENO $? "Added $HOSTGROUP_WRITER_ID:$address as a writer node."
- else
- # Writer exists, move writer to ONLINE, remove reader
- proxysql_exec $LINENO -Ns "UPDATE mysql_servers SET status='ONLINE',comment='WRITE',weight=1000000 WHERE hostgroup_id=$HOSTGROUP_WRITER_ID AND hostname='$host' AND port=$port"
- check_cmd_and_exit $LINENO $? "Cannot update the PXC node $HOSTGROUP_WRITER_ID:$address (query failed). Exiting."
- log_if_success $LINENO $? "Updated ${HOSTGROUP_WRITER_ID}:${host}:${port} node in ProxySQL."
- proxysql_exec $LINENO -Ns "DELETE FROM mysql_servers WHERE hostgroup_id=$HOSTGROUP_READER_ID and hostname='$host' and port=$port"
- check_cmd_and_exit $LINENO $? "writer-is-reader(never): Failed to remove $HOSTGROUP_READER_ID:$server:$port (query failed). Exiting."
- log_if_success $LINENO $? "writer-is-reader(never): Removing reader $HOSTGROUP_READER_ID:$server:$port"
- fi
- fi
- echo '1' > ${reload_check_file}
- break
- done< <(printf "${possible_hosts}\n")
- }
- # Returns the host priority list defined by the user
- #
- # Globals:
- # HOST_PRIORITY_FILE
- # P_PRIORITY
- # PROXYSQL_DATADIR
- # CLUSTER_NAME
- #
- # Arguments:
- # 1: cluster name
- #
- function get_host_priority_list() {
- local cluster_name=$1
- local priority_hosts=""
- if [[ -z $HOST_PRIORITY_FILE || ! -f $HOST_PRIORITY_FILE ]]; then
- HOST_PRIORITY_FILE=${PROXYSQL_DATADIR}/${cluster_name}_host_priority
- fi
- if [[ ! -z "$P_PRIORITY" ]] ; then
- IFS=',' read -r -a priority_hosts <<< "$P_PRIORITY"
- debug $LINENO "host_priority = ${priority_hosts[@]}"
- elif [[ -f $HOST_PRIORITY_FILE ]];then
- # Get the list of hosts from the host_priority file ignoring blanks and
- # any lines that start with '#'
- debug $LINENO "Found a host priority file: $HOST_PRIORITY_FILE"
- priority_hosts=$(cat "$HOST_PRIORITY_FILE" | grep '^[^#]')
- if [[ -n $priority_hosts ]]; then
- priority_hosts=($(echo $priority_hosts))
- fi
- fi
- # File sample:
- # 10.11.12.21:3306
- # 10.21.12.21:3306
- # 10.31.12.21:3306
- #
- echo "${priority_hosts[@]}"
- }
- # Builds the proxysql host list and merges it with the priority list.
- # The entries from the priority list are put at the top in order.
- #
- # If they have an entry in the proxysql_list, the data is copied over.
- # If it does not exist in the proxysql_list, then it is added
- # with status=OFFLINE_SOFT.
- #
- # This ensures that the entry will not be added as
- # a writer if the node is really offline.
- #
- # Globals:
- # None
- #
- # Argument:
- # 1: field separator
- # 2: the proxysql list (array of nodes from proxysql)
- # 3: the priority list
- #
- function build_proxysql_list_with_priority() {
- local sep=$1
- local proxysql_list=$(echo "$2" | tr ' ' '\n')
- local priority_list=($3)
- local new_proxysql_list=()
- local reader_list=""
- local reader_query=0
- for prio in "${priority_list[@]}"; do
- local psql_entry
- local prio_entry
- local prio_ip
- local prio_port
- prio_entry=$(separate_ip_port_from_address "$prio")
- prio_ip=$(echo "$prio_entry" | cut -d' ' -f1)
- prio_port=$(echo "$prio_entry" | cut -d' ' -f2)
- prio_entry="$prio_ip$sep$prio_port"
- # properly escape the characters for grep
- prio_entry_re="$(printf '%s' "$prio_entry" | sed 's/[.[\*^$]/\\&/g')"
- psql_entry=$(echo "$proxysql_list" | grep "^${prio_entry_re}[^[[:space:]]]*")
- if [[ $? -eq 0 && -n ${psql_entry} ]]; then
- # Add entries that are in the priority list and in the proxysql list
- new_proxysql_list+=($psql_entry)
- else
- # Add entries that are in the priority list but not in proxysql
- # (but only if there is a reader entry for the address)
- if [[ $reader_query -eq 0 ]]; then
- reader_list=$(proxysql_exec $LINENO -Ns "SELECT hostname, port
- FROM mysql_servers
- WHERE hostgroup_id IN ($HOSTGROUP_READER_ID)
- AND status <> 'OFFLINE_HARD'
- AND comment <> 'SLAVEREAD'
- ORDER BY status DESC, hostgroup_id, weight DESC, hostname, port")
- check_cmd_and_exit $LINENO $? "Unable to obtain list of nodes from ProxySQL (query failed). Exiting."
- reader_list=$(echo "$reader_list" | tr '\t' "$sep" | tr '\n' ' ')
- reader_query=1
- fi
- if [[ -n $reader_list ]]; then
- if echo $reader_list | grep -E -q "(^|[[:space:]])${prio_entry_re}($|[[:space:]])*"; then
- # If this address is a reader, add a fake writer entry
- # If detected to be online, the writer check algorithm
- # will insert an entry
- new_proxysql_list+=("${prio_entry}${sep}${HOSTGROUP_WRITER_ID}${sep}OFFLINE_SOFT${sep}WRITE${sep}PRIORITY_NODE")
- fi
- fi
- fi
- done
- # Add the rest of the entries (not in the priority list but in proxysql)
- # For this magic, see:
- # https://stackoverflow.com/questions/7577052/bash-empty-array-expansion-with-set-u
- if [[ -n ${priority_list[@]+"${priority_list[@]}"} ]]; then
- local priority_string="${priority_list[@]}"
- for row in ${proxysql_list}; do
- local host=$(echo $row | cut -d "$sep" -f 1)
- local port=$(echo $row | cut -d "$sep" -f 2)
- local addr=$(combine_ip_port_into_address "$host" "$port")
- # properly escape the characters for grep -E
- addr_re="$(printf '%s' "$addr" | sed 's/[.[\*^$()+?{|]/\\&/g')"
- if ! echo $priority_string | grep -E -q "(^|[[:space:]])${addr_re}($|[[:space:]])"; then
- new_proxysql_list+=($row)
- fi
- done
- fi
- if [[ -n ${new_proxysql_list[@]+"${new_proxysql_list[@]}"} ]]; then
- debug $LINENO "new priority list = ${new_proxysql_list[@]}"
- echo "${new_proxysql_list[@]}"
- fi
- }
- function update_writers() {
- local reload_check_file=$1
- local proxysql_list=""
- local priority_list=""
- # Nodes are orderd by status DESC first, this allows ONLINE nodes to always
- # be processed first.
- writer_proxysql_query="SELECT
- writer.hostname,
- writer.port,
- writer.hostgroup_id,
- writer.status,
- writer.comment,
- reader.status
- FROM mysql_servers as writer
- LEFT JOIN mysql_servers as reader
- ON reader.hostgroup_id = $HOSTGROUP_READER_ID
- AND reader.hostname = writer.hostname
- AND reader.port = writer.port
- WHERE writer.hostgroup_id = $HOSTGROUP_WRITER_ID
- AND writer.status <> 'OFFLINE_HARD'
- ORDER BY writer.status DESC"
- proxysql_list=$(proxysql_exec $LINENO -Ns "$writer_proxysql_query")
- check_cmd_and_exit $LINENO $? "Unable to obtain list of nodes from ProxySQL (query failed). Exiting."
- if [[ $proxysql_list =~ [[:space:]]ONLINE[[:space:]]SLAVEREAD[[:space:]] ]]; then
- HAVE_SLAVEWRITERS=1
- fi
- # Now that we have the proxysql list, order it by the priority list
- # (with unprioritized nodes at the end)
- # Using the priority list only makes sense for non-load balancing
- if [[ $MODE != "loadbal" ]]; then
- priority_list=$(get_host_priority_list "$cluster_name")
- fi
- if [[ -n $priority_list ]]; then
- #
- # Need to do some processing of the lists so that we can pass them down
- # Assume that the fields cannot contain the ';' character'
- # Can't use "local -n" becaus of Centos6 (needs newer bash version)
- #
- local prio_list=${priority_list}
- local pxsql_list=$(echo "${proxysql_list[@]}" | tr '\t' ';' | tr '\n' ' ')
- proxysql_list=$(build_proxysql_list_with_priority ";" "$pxsql_list" "$prio_list")
- proxysql_list=$(echo "${proxysql_list}" | tr ' ' '\n' | tr ';' '\t')
- fi
- if [[ -z ${proxysql_list[@]+"${proxysql_list[@]}"} ]]; then
- log $LINENO "No writers found."
- return
- fi
- # Go through the list
- #
- while read line; do
- if [[ -z $line ]]; then
- continue
- fi
- server=$(echo -e "$line" | cut -f1)
- port=$(echo -e "$line" | cut -f2)
- hostgroup=$(echo -e "$line" | cut -f3)
- stat=$(echo -e "$line" | cut -f4)
- comment=$(echo -e "$line" | cut -f5)
- rdstat=$(echo -e "$line" | cut -f6)
- if [[ $comment == "SLAVEREAD" ]]; then
- set_slave_status "${reload_check_file}" $hostgroup $server $port $stat
- continue
- fi
- local wsrep_status
- local pxc_main_mode
- local result
- local address
- address=$(combine_ip_port_into_address "$server" "$port")
- result=$(mysql_exec $LINENO "$server" "$port" -Nns "SHOW STATUS LIKE 'wsrep_local_state'; SHOW VARIABLES LIKE 'pxc_maint_mode';" 2>>${DEBUG_ERR_FILE})
- wsrep_status=$(echo "$result" | grep "wsrep_local_state" | awk '{ print $2 }')
- pxc_main_mode=$(echo "$result" | grep "pxc_maint_mode" | awk '{ print $2 }')
- if [[ -z $wsrep_status ]]; then
- wsrep_status="<unknown:query failed>"
- fi
- # For PXC 5.6 there is no pxc_maint_mode, so assume DISABLED
- if [[ -z $pxc_main_mode ]]; then
- pxc_main_mode="DISABLED"
- fi
- # If this node was added because of the priority list and the
- # node is not reporting SYNCED, then just skip it
- if [[ $rdstat == "PRIORITY_NODE" && $wsrep_status != "4" ]]; then
- continue
- fi
- log "" "--> Checking WRITE server $hostgroup:$address, current status $stat, wsrep_local_state $wsrep_status"
- # we have to limit amount of writers, WSREP status OK, AND node is marked ONLINE
- # PXC and ProxySQL agreee on the status
- # wsrep:ok pxc:-- status:online
- if [ $NUMBER_WRITERS -gt 0 -a "${wsrep_status}" = "4" -a "$stat" == "ONLINE" -a "${pxc_main_mode}" == "DISABLED" ] ; then
- if [[ $number_writers_online -lt $NUMBER_WRITERS ]]; then
- number_writers_online=$(( $number_writers_online + 1 ))
- log "" "server $hostgroup:$address is already ONLINE: ${number_writers_online} of ${NUMBER_WRITERS} write nodes"
- else
- number_writers_online=$(( $number_writers_online + 1 ))
- change_server_status $LINENO $HOSTGROUP_WRITER_ID "$server" "$port" "OFFLINE_SOFT" "$rdstat" \
- "max write nodes reached (${NUMBER_WRITERS})"
- echo "1" > ${reload_check_file}
- fi
- fi
- # WSREP status OK, but node is not marked ONLINE
- # Make the node ONLINE if possible
- # wsrep:ok pxc:ok status:not online
- if [ "${wsrep_status}" = "4" -a "$stat" != "ONLINE" -a "${pxc_main_mode}" == "DISABLED" ] ; then
- # we have to limit amount of writers
- if [[ $NUMBER_WRITERS -gt 0 ]] ; then
- if [[ $number_writers_online -lt $NUMBER_WRITERS ]]; then
- number_writers_online=$(( $number_writers_online + 1 ))
- change_server_status $LINENO $HOSTGROUP_WRITER_ID "$server" "$port" "ONLINE" "$rdstat" \
- "${number_writers_online} of ${NUMBER_WRITERS} write nodes"
- echo "1" > ${reload_check_file}
- else
- number_writers_online=$(( $number_writers_online + 1 ))
- if [ "$stat" != "OFFLINE_SOFT" ]; then
- change_server_status $LINENO $HOSTGROUP_WRITER_ID "$server" "$port" "OFFLINE_SOFT" "" \
- "max write nodes reached (${NUMBER_WRITERS})"
- echo "1" > ${reload_check_file}
- elif [[ $rdstat != "PRIORITY_NODE" ]]; then
- log $LINENO "server $hostgroup:$address is already OFFLINE_SOFT, max write nodes reached (${NUMBER_WRITERS})"
- fi
- fi
- # we do not have to limit
- elif [[ $NUMBER_WRITERS -eq 0 ]] ; then
- # TODO: kennt, What if node is SHUNNED?
- change_server_status $LINENO $HOSTGROUP_WRITER_ID "$server" "$port" "ONLINE" "$rdstat"\
- "Changed state, marking write node ONLINE"
- echo "1" > ${reload_check_file}
- fi
- fi
- # WSREP status is not ok, but the node is marked online, we should put it offline
- # wsrep:not ok pxc:-- status:online
- if [ "${wsrep_status}" != "4" -a "$stat" = "ONLINE" ]; then
- change_server_status $LINENO $HOSTGROUP_WRITER_ID "$server" "$port" "OFFLINE_SOFT" "" \
- "WSREP status is ${wsrep_status} which is not ok"
- echo "1" > ${reload_check_file}
- # wsrep:-- pxc:not ok status:online
- elif [ "${pxc_main_mode}" != "DISABLED" -a "$stat" = "ONLINE" ]; then
- change_server_status $LINENO $HOSTGROUP_WRITER_ID "$server" "$port" "OFFLINE_SOFT" "" \
- "pxc_maint_mode is $pxc_main_mode" 2>>${ERR_FILE}
- echo "1" > ${reload_check_file}
- # wsrep:not ok pxc:-- status:offline soft
- elif [ "${wsrep_status}" != "4" -a "$stat" = "OFFLINE_SOFT" -a "$rdstat" != "PRIORITY_NODE" ]; then
- log "" "server $hostgroup:$address is already OFFLINE_SOFT, WSREP status is ${wsrep_status} which is not ok"
- # wsrep:-- pxc:not ok status:offline soft
- elif [ "${pxc_main_mode}" != "DISABLED" -a "$stat" = "OFFLINE_SOFT" -a "$rdstat" != "PRIORITY_NODE" ]; then
- log "" "server $hostgroup:$address is already OFFLINE_SOFT, pxc_maint_mode is ${pxc_main_mode} which is not ok"
- fi
- done< <(printf "${proxysql_list[@]}\n")
- }
- #
- # This function checks the status of slave machines and sets their status field
- #
- # Globals:
- # PROXSQL_DATADIR
- # SLAVE_SECONDS_BEHIND
- # HOSTGROUP_SLAVEREADER_ID
- #
- # Arguments:
- # 1: Path to the reload_check_file
- # 2: Slave hostgroup
- # 3: Slave IP address
- # 4: Slave port
- # 5: Slave status in ProxySQL
- #
- function set_slave_status() {
- debug $LINENO "START set_slave_status"
- local reload_check_file=$1
- local ws_hg_id=$2
- local ws_ip=$3
- local ws_port=$4
- local ws_status=$5
- local ws_address
- ws_address=$(combine_ip_port_into_address "$ws_ip" "$ws_port")
- local node_id="${ws_hg_id}:${ws_address}"
- # This function will get and return a status of a slave node, 4=GOOD, 2=BEHIND, 0=OTHER
- local slave_status
- log $LINENO "--> Checking SLAVE server ${node_id}"
- slave_status=$(mysql_exec $LINENO "$ws_ip" "$ws_port" -nsE "SHOW SLAVE STATUS")
- check_cmd $LINENO $? "Cannot get status from the slave $ws_address, Please check cluster login credentials"
- slave_status=$(echo "$slave_status" | sed 's/ //g')
- echo "$slave_status" | grep "^Master_Host:" >/dev/null
- if [ $? -ne 0 ];then
- #
- # No status was found, this is not replicating
- # Only changing the status here as another node might be in the writer hostgroup
- #
- proxysql_exec $LINENO -Ns "UPDATE mysql_servers SET status = 'OFFLINE_HARD' WHERE hostname='$ws_ip' and port=$ws_port;"
- check_cmd_and_exit $LINENO $? "Cannot update Galera Cluster node $ws_address to ProxySQL database (query failed). Exiting"
- log_if_success $LINENO $? "slave server ${ws_address} set to OFFLINE_HARD status in ProxySQL (cannot determine slave status)."
- echo "1" > ${reload_check_file}
- else
- local slave_master_host slave_io_running slave_sql_running seconds_behind
- slave_master_host=$(echo "$slave_status" | grep "^Master_Host:" | cut -d: -f2)
- slave_io_running=$(echo "$slave_status" | grep "^Slave_IO_Running:" | cut -d: -f2)
- slave_sql_running=$(echo "$slave_status" | grep "^Slave_SQL_Running:" | cut -d: -f2)
- seconds_behind=$(echo "$slave_status" | grep "^Seconds_Behind_Master:" | cut -d: -f2)
- if [ "$seconds_behind" == "NULL" ];then
- #
- # When slave_io is not working, the seconds behind value will read 'NULL',
- # convert this to a number higher than the max
- #
- let seconds_behind=SLAVE_SECONDS_BEHIND+1
- fi
- if [ "$slave_sql_running" != "Yes" ];then
- #
- # Slave is not replicating, so set to OFFLINE_HARD
- #
- if [ "$ws_status" != "OFFLINE_HARD" ];then
- proxysql_exec $LINENO -Ns "UPDATE mysql_servers SET status = 'OFFLINE_HARD' WHERE hostname='$ws_ip' and port=$ws_port;"
- check_cmd_and_exit $LINENO $? "Cannot update Galera Cluster node $ws_address to ProxySQL database (query failed). Exiting."
- log_if_success $LINENO $? "slave server ${ws_address} set to OFFLINE_HARD status in ProxySQL (io:$slave_io_running sql:$slave_sql_running)."
- echo "1" > ${reload_check_file}
- else
- log $LINENO "slave server (${ws_hg_id}:${ws_address}) current status '$ws_status' in ProxySQL. (io:$slave_io_running sql:$slave_sql_running)"
- fi
- elif [[ $slave_io_running == "Yes" ]]; then
- #
- # The slave is replicating (and the cluster is up)
- # So set the status accordingly
- #
- if [ $seconds_behind -gt $SLAVE_SECONDS_BEHIND ];then
- # Slave is more than the set number of seconds behind, return status 2
- if [ "$ws_status" != "OFFLINE_SOFT" ];then
- proxysql_exec $LINENO -Ns "UPDATE mysql_servers SET status = 'OFFLINE_SOFT' WHERE hostname='$ws_ip' and port=$ws_port;"
- check_cmd_and_exit $LINENO $? "Cannot update Galera Cluster node $ws_address to ProxySQL database (query failed). Exiting."
- log_if_success $LINENO $? "slave server ${ws_address} set to OFFLINE_SOFT status in ProxySQL (slave is too far behind:$seconds_behind). (io:$slave_io_running sql:$slave_sql_running)"
- echo "1" > ${reload_check_file}
- else
- log $LINENO "slave server (${node_id}) current status '$ws_status' in ProxySQL. (io:$slave_io_running sql:$slave_sql_running)"
- fi
- else
- #
- # The slave is replicating and is caught up (relatively)
- # So it is ok for READs
- #
- if [ "$ws_status" != "ONLINE" ];then
- proxysql_exec $LINENO -Ns "UPDATE mysql_servers SET status = 'ONLINE' WHERE hostgroup_id=$HOSTGROUP_SLAVEREADER_ID AND hostname='$ws_ip' and port=$ws_port;"
- check_cmd_and_exit $LINENO $? "Cannot update Galera Cluster node $ws_address in ProxySQL (query failed). Exiting."
- log_if_success $LINENO $? "slave server $HOSTGROUP_SLAVEREADER_ID:${ws_address} set to ONLINE status in ProxySQL. (io:$slave_io_running sql:$slave_sql_running)"
- if [[ $ws_hg_id -ne $HOSTGROUP_SLAVEREADER_ID ]]; then
- log $LINENO "slave server (${node_id}) current status '$ws_status' in ProxySQL. (io:$slave_io_running sql:$slave_sql_running)"
- fi
- echo "1" > ${reload_check_file}
- else
- log $LINENO "slave server (${node_id}) current status '$ws_status' in ProxySQL. (io:$slave_io_running sql:$slave_sql_running)"
- fi
- fi
- else
- #
- # Note: if slave_sql_running is YES and slave_io_running is NO
- # This may indicate that the cluster is down, so we may move a
- # slave to the WRITE hostgroup (but that is not done here).
- # So leave it ONLINE in this state.
- #
- if [[ $ws_status == "ONLINE" ]]; then
- log $LINENO "slave server ${ws_address} status:'${ws_status}' maintained in case the cluster is down. (io:$slave_io_running sql:$slave_sql_running)"
- else
- proxysql_exec $LINENO -Ns "UPDATE mysql_servers SET status = 'OFFLINE_SOFT' WHERE hostname='$ws_ip' and port=$ws_port;"
- check_cmd_and_exit $LINENO $? "Unable to update mysql_servers (query failed). Exiting."
- log $LINENO "slave server ${ws_address} status:'OFFLINE_SOFT'. (io:$slave_io_running sql:$slave_sql_running)"
- echo "1" > ${reload_check_file}
- fi
- fi
- fi
- debug $LINENO "END set_slave_status"
- }
- function update_readers() {
- local reload_check_file=$1
- if [[ $WRITER_IS_READER != "ondemand" ]]; then
- reader_proxysql_query="SELECT hostgroup_id,
- hostname,
- port,
- status,
- comment,
- 'NULL'
- FROM mysql_servers
- WHERE hostgroup_id IN ($HOSTGROUP_READER_ID)
- AND status <> 'OFFLINE_HARD'
- ORDER BY weight DESC, hostname, port"
- elif [[ $WRITER_IS_READER == "ondemand" ]]; then
- # We will not try to change reader state of nodes that are writer ONLINE,
- # so what we do is we ORDER BY writer.status ASC because by accident ONLINE
- # is last in the line
- reader_proxysql_query="SELECT reader.hostgroup_id,
- reader.hostname,
- reader.port,
- reader.status,
- reader.comment,
- writer.status
- FROM mysql_servers as reader
- LEFT JOIN mysql_servers as writer
- ON writer.hostgroup_id = $HOSTGROUP_WRITER_ID
- AND writer.hostname = reader.hostname
- AND writer.port = reader.port
- WHERE reader.hostgroup_id = $HOSTGROUP_READER_ID
- ORDER BY writer.status ASC,
- reader.weight DESC,
- reader.hostname,
- reader.port"
- fi
- # This is the count of nodes that have readers with ONLINE status
- # and writers that are not ONLINE (either no entry or !ONLINE)
- local online_readonly_nodes_found=0
- local query_result
- query_result=$(proxysql_exec $LINENO -Ns "$reader_proxysql_query")
- check_cmd_and_exit $LINENO $? "Unable to obtain list of nodes from ProxySQL (query failed). Exiting."
- if [[ $query_result =~ [[:space:]]SLAVEREAD[[:space:]] ]]; then
- HAVE_SLAVEREADERS=1
- fi
- while read line; do
- if [[ -z $line ]]; then
- continue
- fi
- hostgroup=$(echo "$line" | cut -f1)
- server=$(echo "$line" | cut -f2)
- port=$(echo "$line" | cut -f3)
- stat=$(echo "$line" | cut -f4)
- comment=$(echo "$line" | cut -f5)
- writer_stat=$(echo "$line" | cut -f6)
- if [[ $comment == "SLAVEREAD" ]]; then
- set_slave_status "${reload_check_file}" $hostgroup $server $port $stat
- continue
- fi
- if [[ $stat == "OFFLINE_HARD" ]]; then
- continue
- fi
- local wsrep_status
- local pxc_main_mode
- local result
- local address
- address=$(combine_ip_port_into_address "$server" "$port")
- result=$(mysql_exec $LINENO "$server" "$port" -Nns "SHOW STATUS LIKE 'wsrep_local_state'; SHOW VARIABLES LIKE 'pxc_maint_mode';" 2>>${DEBUG_ERR_FILE})
- wsrep_status=$(echo "$result" | grep "wsrep_local_state" | awk '{ print $2 }')
- pxc_main_mode=$(echo "$result" | grep "pxc_maint_mode" | awk '{ print $2 }')
- if [[ -z $wsrep_status ]]; then
- wsrep_status="<unknown:query failed>"
- fi
- # For PXC 5.6 there is no pxc_maint_mode, so assume DISABLED
- if [[ -z $pxc_main_mode ]]; then
- pxc_main_mode="DISABLED"
- fi
- log "" "--> Checking READ server $hostgroup:$address, current status $stat, wsrep_local_state $wsrep_status"
- if [[ $WRITER_IS_READER == "ondemand" && $writer_stat == "ONLINE" ]] ; then
- if [ $online_readonly_nodes_found -eq 0 ] ; then
- # WSREP:ok PXC:ok STATUS:online
- if [ "${wsrep_status}" = "4" -a "$stat" == "ONLINE" -a "${pxc_main_mode}" == "DISABLED" ] ; then
- log "" "server $hostgroup:$address is already ONLINE, is also write node in ONLINE state, not enough non-ONLINE readers found"
- fi
- # WSREP:ok PXC:ok STATUS:not online
- if [ "${wsrep_status}" = "4" -a "$stat" != "ONLINE" -a "${pxc_main_mode}" == "DISABLED" ] ; then
- #
- # Enable the first one found as ONLINE
- # (when writer-is-reader=ondemand)
- #
- change_server_status $LINENO $HOSTGROUP_READER_ID "$server" "$port" "ONLINE" ""\
- "marking ONLINE write node as read ONLINE state, not enough non-ONLINE readers found"
- echo "1" > ${reload_check_file}
- fi
- else
- # WSREP:ok PXC:ok STATUS:online
- if [ "${wsrep_status}" = "4" -a "$stat" == "ONLINE" -a "${pxc_main_mode}" == "DISABLED" ] ; then
- # Else disable the other READ nodes
- change_server_status $LINENO $HOSTGROUP_READER_ID "$server" "$port" "OFFLINE_SOFT" ""\
- "making ONLINE writer node as read OFFLINE_SOFT as well because writers should not be readers"
- echo "1" > ${reload_check_file}
- fi
- # WSREP:ok PXC:ok STATUS:not online
- if [ "${wsrep_status}" = "4" -a "$stat" != "ONLINE" -a "${pxc_main_mode}" == "DISABLED" ] ; then
- log "" "server $hostgroup:$address is $stat, keeping node as $stat,as it is an ONLINE writer and we prefer not to have writers as readers (writer-is-reader:ondemand)"
- fi
- fi
- else
- # WSREP:ok PXC:ok STATUS:online
- if [ "${wsrep_status}" = "4" -a "$stat" == "ONLINE" -a "${pxc_main_mode}" == "DISABLED" ] ; then
- log "" "server $hostgroup:$address is already ONLINE"
- online_readonly_nodes_found=$(( $online_readonly_nodes_found + 1 ))
- # WSREP:ok PXC:not ok STATUS:not online
- elif [ "${wsrep_status}" = "4" -a "$stat" != "ONLINE" -a "${pxc_main_mode}" != "DISABLED" ] ; then
- log "" "server $hostgroup:$address is $stat"
- fi
- # WSREP status OK, but node is not marked ONLINE
- # WSREP:ok PXC:ok STATUS:not online
- if [ "${wsrep_status}" = "4" -a "$stat" != "ONLINE" -a "${pxc_main_mode}" == "DISABLED" ] ; then
- change_server_status $LINENO $HOSTGROUP_READER_ID "$server" "$port" "ONLINE" ""\
- "changed state"
- echo "1" > ${reload_check_file}
- online_readonly_nodes_found=$(( $online_readonly_nodes_found + 1 ))
- fi
- fi
- # WSREP status is not ok, but the node is marked online, we should put it offline
- # WSREP:not ok STATUS:online
- if [ "${wsrep_status}" != "4" -a "$stat" = "ONLINE" ]; then
- change_server_status $LINENO $HOSTGROUP_READER_ID "$server" "$port" "OFFLINE_SOFT" ""\
- "WSREP status is ${wsrep_status} which is not ok"
- echo "1" > ${reload_check_file}
- # PXC:not ok STATUS:online
- elif [ "${pxc_main_mode}" != "DISABLED" -a "$stat" = "ONLINE" ];then
- change_server_status $LINENO $HOSTGROUP_READER_ID "$server" "$port" "OFFLINE_SOFT" ""\
- "pxc_maint_mode is $pxc_main_mode" 2>>${ERR_FILE}
- echo "1" > ${reload_check_file}
- # WSREP:not ok STATUS:offline soft
- elif [ "${wsrep_status}" != "4" -a "$stat" = "OFFLINE_SOFT" ]; then
- log "" "server $hostgroup:$address is already OFFLINE_SOFT, WSREP status is ${wsrep_status} which is not ok"
- fi
- done< <(printf "$query_result\n")
- }
- # Looks specifically for nodes that are in the DONOR/DESYNCED(2) state
- #
- # Globals:
- # NUMBER_WRITERS
- # HOSTGROUP_WRITER_ID
- #
- # Arguments:
- # None
- #
- function search_for_desynced_writers() {
- local reload_check_file=$1
- local cnt=0
- local sort_order="ASC"
- # We want the writers to come before the readers
- if [[ $HOSTGROUP_WRITER_ID -gt $HOSTGROUP_READER_ID ]]; then
- sort_order="DESC"
- fi
- local writer_query="SELECT hostgroup_id, hostname, port, status
- FROM mysql_servers
- WHERE hostgroup_id IN ($HOSTGROUP_WRITER_ID,$HOSTGROUP_READER_ID)
- AND status <> 'OFFLINE_HARD'
- AND comment <> 'SLAVEREAD'
- ORDER BY hostgroup_id ${sort_order}"
- query_result=$(proxysql_exec $LINENO -Ns "$writer_query")
- check_cmd_and_exit $LINENO $? "Could not get the list of nodes (query failed). Exiting."
- while read line; do
- if [[ -z $line ]]; then
- continue
- fi
- hostgroup=$(echo "$line" | cut -f1)
- server=$(echo "$line" | cut -f2)
- port=$(echo "$line" | cut -f3)
- stat=$(echo "$line" | cut -f4)
- safety_cnt=0
- while [ ${cnt} -lt $NUMBER_WRITERS -a ${safety_cnt} -lt 5 ]
- do
- local wsrep_status
- local pxc_main_mode
- local result
- local address
- address=$(combine_ip_port_into_address "$server" "$port")
- result=$(mysql_exec $LINENO "$server" "$port" -Nns "SHOW STATUS LIKE 'wsrep_local_state'; SHOW VARIABLES LIKE 'pxc_maint_mode';" 2>>${DEBUG_ERR_FILE})
- wsrep_status=$(echo "$result" | grep "wsrep_local_state" | awk '{ print $2 }')
- pxc_main_mode=$(echo "$result" | grep "pxc_maint_mode" | awk '{ print $2 }')
- if [[ -z $wsrep_status ]]; then
- wsrep_status="<unknown:query failed>"
- fi
- # PXC 5.6 does not have this, so default to DISABLED
- if [[ -z $pxc_main_mode ]]; then
- pxc_main_mode="DISABLED"
- fi
- # Nodes in maintenance are not allowed
- if [[ $pxc_main_mode != "DISABLED" ]]; then
- log "" "Skipping $hostgroup:$address node is in pxc_maint_mode:$pxc_main_mode"
- break
- fi
- log "" "Checking $hostgroup:$address for node in DONOR state, status $stat , wsrep_local_state $wsrep_status"
- if [ "${wsrep_status}" = "2" -a "$stat" != "ONLINE" ]; then
- # if we are on Donor/Desync and not online in mysql_servers -> proceed
- # If we do not have a writer row, we have to add it
- local writer_count=$(proxysql_exec $LINENO -Ns "SELECT count(*) FROM mysql_servers WHERE hostgroup_id=$HOSTGROUP_WRITER_ID AND hostname='$server' AND port=$port")
- check_cmd_and_exit $LINENO $? "Could not get writer count (query failed). Exiting."
- if [[ $writer_count -eq 0 ]]; then
- proxysql_exec $LINENO -Ns \
- "INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,comment,max_connections)
- VALUES ('$server',$HOSTGROUP_WRITER_ID,$port,1000000,'WRITE',$MAX_CONNECTIONS);"
- check_cmd_and_exit $LINENO $? "Could not add writer node (query failed). Exiting."
- log $LINENO "Adding server $HOSTGROUP_WRITER_ID:$address with status ONLINE. Reason: WSREP status is DESYNC/DONOR, as this is the only node we will put this one online"
- else
- change_server_status $LINENO $HOSTGROUP_WRITER_ID "$server" "$port" "ONLINE" ""\
- "WSREP status is DESYNC/DONOR, as this is the only node we will put this one online"
- fi
- cnt=$(( $cnt + 1 ))
- local proxy_runtime_status
- proxy_runtime_status=$(proxysql_exec $LINENO -Ns "SELECT status FROM runtime_mysql_servers WHERE hostname='${server}' AND port='${port}' AND hostgroup_id='${hostgroup}'")
- check_cmd_and_exit $LINENO $? "Could not get writer node status (query failed). Exiting."
- if [ "${proxy_runtime_status}" != "ONLINE" ]; then
- # if we are not online in runtime_mysql_servers, proceed to change
- # the server status and reload mysql_servers
- echo "1" > ${reload_check_file}
- fi
- # TODO: kennt, this doesn't work. We will go through the upper
- # loop and move the writer to OFFLINE_SOFT (since it's desynced).
- # We would have to detect that state (then we could move it to 0).
- # Or we could only force the update if the tables are the same.
- # (But only for the descyned node case).
- # Maybe we can compare the table to itself, no updates needed if it
- # hasn't changed.
- # Note: If the node in the runtime is already ONLINE, then we don't
- # need to change the reload_check_file to 1
- # So if there was no change to the state, we will skip uploading
- # the in-memory tables and nothing will change. (This only makes
- # sense if all the nodes are already down). If any of the nodes are
- # up, this node will be moved to OFFLINE_SOFT since it is not in
- # the SYNCED(4) state.
- fi
- safety_cnt=$(( $safety_cnt + 1 ))
- done
- done< <(printf "$query_result\n")
- }
- #
- #
- # Globals:
- # HOSTGROUP_READER_ID
- #
- # Arguments:
- # None
- #
- function search_for_desynced_readers() {
- local reload_check_file=$1
- local cnt=0
- query_result=$(proxysql_exec $LINENO -Ns "SELECT hostgroup_id, hostname, port, status FROM mysql_servers WHERE hostgroup_id IN ($HOSTGROUP_READER_ID) AND status <> 'OFFLINE_HARD' AND comment <> 'SLAVEREAD'")
- check_cmd_and_exit $LINENO $? "Could not get the list of nodes (query failed). Exiting."
- while read line; do
- if [[ -z $line ]]; then
- continue
- fi
- hostgroup=$(echo "$line" | cut -f1)
- server=$(echo "$line" | cut -f2)
- port=$(echo "$line" | cut -f3)
- stat=$(echo "$line" | cut -f4)
- local safety_cnt=0
- while [[ ${cnt} -eq 0 && ${safety_cnt} -lt 5 ]]
- do
- local wsrep_status
- local pxc_main_mode
- local result
- local address
- address=$(combine_ip_port_into_address "$server" "$port")
- result=$(mysql_exec $LINENO "$server" "$port" -Nns "SHOW STATUS LIKE 'wsrep_local_state'; SHOW VARIABLES LIKE 'pxc_maint_mode';" 2>>${DEBUG_ERR_FILE})
- wsrep_status=$(echo "$result" | grep "wsrep_local_state" | awk '{ print $2 }')
- pxc_main_mode=$(echo "$result" | grep "pxc_maint_mode" | awk '{ print $2 }')
- if [[ -z $wsrep_status ]]; then
- wsrep_status="<unknown:query failed>"
- fi
- # PXC 5.6 does not have this, so default to DISABLED
- if [[ -z $pxc_main_mode ]]; then
- pxc_main_mode="DISABLED"
- fi
- # Nodes in maintenance are not allowed
- if [[ $pxc_main_mode != "DISABLED" ]]; then
- log "" "Skipping $hostgroup:$address node is in pxc_maint_mode:$pxc_main_mode"
- break
- fi
- log "" "Checking $hostgroup:$address for node in DONOR state, status $stat , wsrep_local_state $wsrep_status"
- if [ "${wsrep_status}" = "2" -a "$stat" != "ONLINE" ];then
- # if we are on Donor/Desync an not online in mysql_servers -> proceed
- change_server_status $LINENO $HOSTGROUP_READER_ID "$server" "$port" "ONLINE" ""\
- "WSREP status is DESYNC/DONOR, as this is the only node we will put this one online"
- cnt=$(( $cnt + 1 ))
- local proxy_runtime_status
- proxy_runtime_status=$(proxysql_exec $LINENO -Ns "SELECT status FROM runtime_mysql_servers WHERE hostname='${server}' AND port='${port}' AND hostgroup_id='${hostgroup}'")
- check_cmd_and_exit $LINENO $? "Could not get the runtime server status (query failed). Exiting."
- if [ "${proxy_runtime_status}" != "ONLINE" ]; then
- # if we are not online in runtime_mysql_servers,
- # proceed to change the server status and reload mysql_servers
- echo "1" > ${reload_check_file}
- fi
- # Note: If the node in the runtime is already ONLINE, then we don't
- # need to change the reload_check_file to 1
- # So if there was no change to the state, we will skip uploading
- # the in-memory tables and nothing will change. (This only makes
- # sense if all the nodes are already down). If any of the nodes are
- # up, this node will be moved to OFFLINE_SOFT since it is not in
- # the SYNCED(4) state.
- break 2
- fi
- safety_cnt=$(( $safety_cnt + 1 ))
- done
- done< <(printf "$query_result\n")
- }
- # Saves the reload_check_file value and resets it to 0
- #
- # Globals:
- # None
- #
- # Arguments:
- # 1: path to the reload_check_file
- #
- function save_reload_check() {
- local reload_check_file=$1
- local save_state
- save_state=$(cat "${reload_check_file}")
- echo 0 > "${reload_check_file}"
- echo "${save_state}"
- }
- # Resets the restore_reload_check value if needed
- # If nothing has changed, restores the value
- #
- # Globals:
- # None
- #
- # Arguments:
- # 1: Path to the reload_check_file
- # 2: The saved (previous) value
- #
- # Returns:
- # Returns 0 (success) if something changed
- # Else returns 1
- #
- function restore_reload_check() {
- local reload_check_file=$1
- local save_state=$2
- # Default return value is failure (non-zero)
- local rc=1
- if [[ $(cat ${reload_check_file}) -ne 0 ]] ; then
- save_state=1
- # A success means that something changed
- rc=0
- fi
- echo "${save_state}" > "${reload_check_file}"
- return $rc
- }
- # Adds slaves to the write hostgroup if needed
- #
- # Globals:
- # HOSTGROUP_WRITER_ID
- # HOSTGROUP_SLAVEREADER_ID
- #
- # Arguments:
- # None
- #
- function add_slave_to_write_hostgroup() {
- # General outline:
- # Delete all non-ONLINE slaves to the read hostgroup
- # Check for an ONLINE slave in the write hostgroup
- # If there is none, add a random slave to the write hostgroup
- # If cannot find an ONLINE, look for OFFLINE_SOFT (emergency only)
- local reload_check_file=$1
- local -i online_count=0
- local -i offline_count=0
- local query_result=""
- query_result=$(proxysql_exec $LINENO -Ns "SELECT status FROM mysql_servers
- WHERE hostgroup_id=$HOSTGROUP_WRITER_ID
- AND comment = 'SLAVEREAD'")
- check_cmd_and_exit $LINENO $? "Could not get the list of slave writers (query failed). Exiting."
- # Count # of online vs offline slave nodes
- local status
- while read line; do
- if [[ -z $line ]]; then
- continue
- fi
- status=$(echo $line | awk '{ print $1 }')
- if [[ $status = 'ONLINE' ]]; then
- online_count+=1
- else
- offline_count+=1
- fi
- done< <(printf "$query_result\n")
- # Remove any nodes that have been moved OFFLINE
- if [[ $offline_count -gt 0 ]]; then
- proxysql_exec $LINENO -Ns "DELETE FROM mysql_servers
- WHERE hostgroup_id=$HOSTGROUP_WRITER_ID
- AND status != 'ONLINE'
- AND comment = 'SLAVEREAD'"
- check_cmd_and_exit $LINENO $? "Failed to remove all non-ONLINE slaves acting as writers (query failed). Exiting."
- log_if_success $LINENO $? "Removed all non-ONLINE slaves acting as writers"
- echo 1 > "${reload_check_file}"
- fi
- # If there is an active slave writer node, no need to add another node
- if [[ $online_count -eq 1 ]]; then
- log $LINENO "Async-slave already ONLINE"
- return
- fi
- if [[ $online_count -eq 0 ]]; then
- # There are no ONLINE slaves, so add a random one from the
- # list of ONLINE reader slaves
- local next_host host port
- next_host=$(proxysql_exec $LINENO -Ns "select hostname,port FROM mysql_servers
- WHERE status='ONLINE'
- AND comment = 'SLAVEREAD'
- AND hostgroup_id='$HOSTGROUP_SLAVEREADER_ID'
- ORDER BY random() LIMIT 1")
- check_cmd_and_exit $LINENO $? "Could not get info for a slave node (query failed). Exiting."
- # Emergency situation, if there are no ONLINE hosts,
- # look for OFFLINE_SOFT hosts
- if [[ -z $next_host ]]; then
- next_host=$(proxysql_exec $LINENO -Ns "select hostname,port FROM mysql_servers
- WHERE status='OFFLINE_SOFT'
- AND comment = 'SLAVEREAD'
- AND hostgroup_id='$HOSTGROUP_SLAVEREADER_ID'
- ORDER BY random() LIMIT 1")
- check_cmd_and_exit $LINENO $? "Could not get info for a slave node (query failed). Exiting."
- if [[ -n $next_host ]]; then
- local host=$(echo "$next_host" | awk '{ print $1 }')
- local port=$(echo "$next_host" | awk '{ print $2 }')
- local address=$(combine_ip_port_into_address "$host" "$port")
- # Found a node, update the READER to ONLINE
- proxysql_exec $LINENO -Ns "UPDATE mysql_servers
- SET status='ONLINE'
- WHERE hostgroup_id=$HOSTGROUP_SLAVEREADER_ID
- AND hostname='$host'
- AND port=$port"
- check_cmd_and_exit $LINENO $? "Could not update the info for a slave node (query failed). Exiting."
- log_if_success $LINENO $? "slave server ${HOSTGROUP_SLAVEREADER_ID}:$address set to ONLINE status in ProxySQL."
- echo 1 > "${reload_check_file}"
- fi
- fi
- if [[ -n $next_host ]]; then
- local host=$(echo "$next_host" | awk '{ print $1 }')
- local port=$(echo "$next_host" | awk '{ print $2 }')
- local address=$(combine_ip_port_into_address "$host" "$port")
- proxysql_exec $LINENO -Ns "INSERT INTO mysql_servers
- (hostname,hostgroup_id,port,weight,status,comment,max_connections)
- VALUES ('$host',$HOSTGROUP_WRITER_ID,$port,1000000,'ONLINE','SLAVEREAD',$MAX_CONNECTIONS);"
- check_cmd_and_exit $LINENO $? "Cannot add Galera Cluster node $address to ProxySQL (query failed). Exiting."
- log_if_success $LINENO $? "slave server ${HOSTGROUP_WRITER_ID}:$address set to ONLINE status in ProxySQL."
- echo 1 > "${reload_check_file}"
- else
- log $LINENO "Could not find any slave readers to promote to a writer"
- fi
- fi
- }
- # Removes slaves from the write hostgroup
- # (Actually moves the writers to OFFLINE_SOFT)
- #
- # Globals:
- # HOSTGROUP_WRITER_ID
- #
- # Arguments:
- # None
- #
- function remove_slave_from_write_hostgroup() {
- local reload_check_file=$1
- log $LINENO "Removing async-slaves from the writegroup"
- # Move all writer slaves to OFFLINE_SOFT
- proxysql_exec $LINENO -Ns "UPDATE mysql_servers
- SET status = 'OFFLINE_SOFT'
- WHERE hostgroup_id = $HOSTGROUP_WRITER_ID
- AND comment = 'SLAVEREAD'"
- check_cmd_and_exit $LINENO $? "Failed to move slave writers to OFFLINE_SOFT (query failed). Exiting."
- log_if_success $LINENO $? "Moved slave writers to OFFLINE_SOFT"
- echo 1 > "${reload_check_file}"
- }
- #
- # Globals:
- # TIMEOUT
- # MYSQL_USERNAME MYSQL_PASSWORD MYSQL_HOSTNAME MYSQL_PORT
- # PROXYSQL_DATADIR
- # HOSTGROUP_READER_ID HOSTGROUP_WRITER_ID
- # CONFIG_FILE
- # NUMBER_WRITERS
- #
- function main() {
- local cluster_name=""
- local mysql_credentials
- local scheduler_id
- # If this call fails, exit out. We can't do anything without the monitor
- # credentials. (Or if we can't connect to proxysql).
- mysql_credentials=$(proxysql_exec $LINENO -Ns "SELECT variable_value FROM global_variables WHERE variable_name IN ('mysql-monitor_username','mysql-monitor_password') ORDER BY variable_name DESC" "hide_output")
- check_cmd_and_exit $LINENO $? "Unable to obtain MySQL credentials from ProxySQL (query failed). Exiting."
- MYSQL_USERNAME=$(echo $mysql_credentials | awk '{print $1}')
- MYSQL_PASSWORD=$(echo $mysql_credentials | awk '{print $2}')
- # Search for the scheduler id that corresponds to our write hostgroup
- scheduler_id=$(proxysql_exec $LINENO -Ns "SELECT id FROM scheduler where arg1 LIKE '%--write-hg=$HOSTGROUP_WRITER_ID %' OR arg1 LIKE '%-w $HOSTGROUP_WRITER_ID %'")
- check_cmd_and_exit $LINENO $? "Could not retreive scheduler row (query failed). Exiting."
- if [[ -z $scheduler_id ]]; then
- error $LINENO "Cannot find the scheduler row with write hostgroup : $HOSTGROUP_WRITER_ID"
- else
- cluster_name=$(proxysql_exec $LINENO -Ns "SELECT comment FROM scheduler WHERE id=${scheduler_id}" 2>>$ERR_FILE)
- check_cmd_and_exit $LINENO $? "Cannot get the cluster name from ProxySQL (query failed). Exiting."
- fi
- if [[ -z $cluster_name ]]; then
- #
- # Could not find the cluster name from the scheduler,
- # contact the cluster directly
- #
- local available_host=""
- local server port
- # Find a PXC host that's we can connect to
- available_host=$(find_online_cluster_host)
- check_cmd_and_exit $LINENO $? "Cannot get the list of nodes in the cluster from ProxySQL (query failed). Exiting"
- if [[ -z $available_host ]]; then
- error $LINENO "Cannot contact a PXC host in hostgroups($HOSTGROUP_WRITER_ID, $HOSTGROUP_READER_ID)"
- else
- server=$(echo $available_host | awk '{print $1}')
- port=$(echo $available_host | awk '{print $2}')
- cluster_name=$(mysql_exec $LINENO "$server" "$port" -Nn \
- "SELECT @@wsrep_cluster_name" 2>>${ERR_FILE} | tail -1)
- fi
- if [[ ! -z $cluster_name ]]; then
- #
- # We've found the cluster name so update the scheduler args
- #
- if [[ -z $scheduler_id ]]; then
- warning "$LINENO" "Cannot update scheduler due to missing scheduler_id"
- else
- local arg1 my_path
- log "$LINENO" "Updating scheduler for cluster:${cluster_name} write_hg:$HOSTGROUP_WRITER_ID"
- arg1=$(proxysql_exec $LINENO -Ns "SELECT arg1 from scheduler where id=${scheduler_id}")
- check_cmd_and_exit $LINENO $? "Could not get arg1 from scheduler from (query failed)" "Please check ProxySQL credentials and status."
- if [[ -z $arg1 ]]; then
- warning $LINENO "Cannot update scheduler due to missing arguments (arg1 is empty)"
- else
- my_path=$(echo ${PROXYSQL_DATADIR}/${cluster_name}_proxysql_galera_check.log | sed 's#\/#\\\/#g')
- arg1=$(echo $arg1 | sed "s/--log=.*/--log=$my_path/g")
- proxysql_exec $LINENO -Ns "UPDATE scheduler set comment='$cluster_name',arg1='$arg1' where id=${scheduler_id};load scheduler to runtime;save scheduler to disk"
- check_cmd_and_exit $LINENO $? "Could not update scheduler from (query failed). Exiting."
- fi
- fi
- fi
- fi
- # Check to see if there are other proxysql_galera_checkers running
- # Before we can check, we need the cluster name
- check_is_galera_checker_running "$cluster_name"
- local mode=$MODE
- if [[ ! -z $P_MODE ]] ; then
- mode=$P_MODE
- else
- local proxysql_mode_file
- if [[ -z $cluster_name ]]; then
- proxysql_mode_file="${PROXYSQL_DATADIR}/mode"
- else
- proxysql_mode_file="${PROXYSQL_DATADIR}/${cluster_name}_mode"
- fi
- if [[ ! -f ${proxysql_mode_file} ]]; then
- local mode_check
- mode_check=$(proxysql_exec $LINENO -Ns "SELECT comment from mysql_servers where comment='WRITE' and hostgroup_id in ($HOSTGROUP_WRITER_ID, $HOSTGROUP_READER_ID)")
- if [[ "$mode_check" == "WRITE" ]]; then
- echo "singlewrite" > ${proxysql_mode_file}
- else
- echo "loadbal" > ${proxysql_mode_file}
- fi
- fi
- if [[ -r $proxysql_mode_file ]]; then
- mode=$(cat ${proxysql_mode_file})
- fi
- fi
- MODE=$mode
- # Running proxysql_node_monitor script.
- # First try the same directory as this script
- local monitor_dir
- monitor_dir=$(cd $(dirname $0) && pwd)
- if [[ ! -f $monitor_dir/proxysql_node_monitor ]]; then
- # Cannot find it in same directory, try default location
- monitor_dir="/usr/bin"
- if [[ ! -f $monitor_dir/proxysql_node_monitor ]]; then
- log "" "ERROR! Could not find proxysql_node_monitor. Terminating"
- exit 1
- fi
- fi
- # If the reload_check_file contains 1, then there was a change
- # made to the state and we need to upload the MYSQL SERVERS to runtime.
- # This is used because changes are made to the MYSQL SERVERS table
- # in other scripts/subshells.
- local reload_check_file
- if [[ -z $cluster_name ]]; then
- reload_check_file="${PROXYSQL_DATADIR}/reload"
- else
- reload_check_file="${PROXYSQL_DATADIR}/${cluster_name}_reload"
- fi
- echo "0" > ${reload_check_file}
- # Run the monitor script
- local proxysql_monitor_log
- if [[ -n $NODE_MONITOR_LOG_FILE ]]; then
- proxysql_monitor_log=$NODE_MONITOR_LOG_FILE
- else
- if [[ -z $cluster_name ]]; then
- proxysql_monitor_log="${PROXYSQL_DATADIR}/proxysql_node_monitor.log"
- else
- proxysql_monitor_log="${PROXYSQL_DATADIR}/${cluster_name}_proxysql_node_monitor.log"
- fi
- fi
- local more_monitor_options=""
- if [[ $DEBUG -ne 0 ]]; then
- more_monitor_options+=" --debug "
- fi
- # TODO: kennt, do we need to check the return code? do we care?
- $monitor_dir/proxysql_node_monitor --config-file=$CONFIG_FILE \
- --write-hg=$HOSTGROUP_WRITER_ID \
- --read-hg=$HOSTGROUP_READER_ID \
- --mode=$mode \
- --reload-check-file="$reload_check_file" \
- --log-text="$LOG_TEXT" \
- --max-connections="$MAX_CONNECTIONS" \
- --log="$proxysql_monitor_log" $more_monitor_options
- # print information prior to a run if ${ERR_FILE} is defined
- log "" "###### proxysql_galera_checker.sh SUMMARY ######"
- log "" "Hostgroup writers $HOSTGROUP_WRITER_ID"
- log "" "Hostgroup readers $HOSTGROUP_READER_ID"
- log "" "Number of writers $NUMBER_WRITERS"
- log "" "Writers are readers $WRITER_IS_READER"
- log "" "Log file $ERR_FILE"
- log "" "Mode $MODE"
- if [[ -n $P_PRIORITY ]]; then
- log "" "Priority $P_PRIORITY"
- fi
- if [[ -n $LOG_TEXT ]]; then
- log "" "Extra notes $LOG_TEXT"
- fi
- local number_readers_online=0
- local number_writers_online=0
- local save_reload_state=0
- log "" "###### HANDLE WRITER NODES ######"
- update_writers "${reload_check_file}"
- number_writers_online=$(proxysql_exec $LINENO -Ns "SELECT count(*) FROM mysql_servers WHERE hostgroup_id=$HOSTGROUP_WRITER_ID AND status = 'ONLINE' AND comment <> 'SLAVEREAD'" 2>>${ERR_FILE})
- check_cmd_and_exit $LINENO $? "Could not get node count (query failed). Exiting."
- # Check to see if we need to add readers for any writer nodes
- # (depends on the writer-is-reader setting)
- number_readers_online=$(proxysql_exec $LINENO -Ns "SELECT count(*) FROM mysql_servers WHERE hostgroup_id IN ($HOSTGROUP_READER_ID) AND status = 'ONLINE' AND comment <> 'SLAVEREAD'")
- check_cmd_and_exit $LINENO $? "Could not get node count (query failed). Exiting."
- save_reload_state=$(save_reload_check "${reload_check_file}")
- writer_is_reader_check "$reload_check_file" "$number_readers_online"
- # Something changed
- if restore_reload_check "${reload_check_file}" $save_reload_state; then
- number_readers_online=$(proxysql_exec $LINENO -Ns "SELECT count(*) FROM mysql_servers WHERE hostgroup_id IN ($HOSTGROUP_READER_ID) AND status = 'ONLINE' AND comment <> 'SLAVEREAD'")
- check_cmd_and_exit $LINENO $? "Could not get node count (query failed). Exiting."
- fi
- if [ ${HOSTGROUP_READER_ID} -ne -1 ]; then
- log "" "###### HANDLE READER NODES ######"
- save_reload_state=$(save_reload_check "${reload_check_file}")
- update_readers "${reload_check_file}"
- # Something changed
- if restore_reload_check "${reload_check_file}" $save_reload_state; then
- number_readers_online=$(proxysql_exec $LINENO -Ns "SELECT count(*) FROM mysql_servers WHERE hostgroup_id IN ($HOSTGROUP_READER_ID) AND status = 'ONLINE' AND comment <> 'SLAVEREAD'")
- check_cmd_and_exit $LINENO $? "Could not get node count (query failed). Exiting."
- fi
- fi
- if [[ $MODE != "loadbal" ]]; then
- # If we have no writers, check if we can create one (just one)
- if [[ $number_writers_online -eq 0 ]]; then
- save_reload_state=$(save_reload_check "${reload_check_file}")
- ensure_one_writer_node "${reload_check_file}"
- # Something changed
- if restore_reload_check "${reload_check_file}" $save_reload_state; then
- # Check to see if we need to add readers for any writer nodes
- # (depends on the writer-is-reader setting)
- writer_is_reader_check "$reload_check_file" "$number_readers_online"
- echo 1 > "${reload_check_file}"
- number_readers_online=$(proxysql_exec $LINENO -Ns "SELECT count(*) FROM mysql_servers WHERE hostgroup_id IN ($HOSTGROUP_READER_ID) AND status = 'ONLINE' AND comment <> 'SLAVEREAD'")
- check_cmd_and_exit $LINENO $? "Could not get node count (query failed). Exiting."
- number_writers_online=$(proxysql_exec $LINENO -Ns "SELECT count(*) FROM mysql_servers WHERE hostgroup_id=$HOSTGROUP_WRITER_ID AND status = 'ONLINE' AND comment <> 'SLAVEREAD'" 2>>${ERR_FILE})
- check_cmd_and_exit $LINENO $? "Could not get node count (query failed). Exiting."
- fi
- else
- debug $LINENO "writer nodes found: $number_writers_online, no need to add more"
- fi
- fi
- log "" "###### SUMMARY ######"
- log "" "--> Number of writers that are 'ONLINE': ${number_writers_online} : hostgroup: ${HOSTGROUP_WRITER_ID}"
- [[ ${HOSTGROUP_READER_ID} -ne -1 ]] && log "" "--> Number of readers that are 'ONLINE': ${number_readers_online} : hostgroup: ${HOSTGROUP_READER_ID}"
- # We don't have any writers... alert, try to bring some online!
- # This includes bringing a DONOR online
- if [[ ${number_writers_online} -eq 0 ]]; then
- log "" "###### TRYING TO FIX MISSING WRITERS ######"
- log "" "No writers found, Trying to enable last available node of the cluster (in Donor/Desync state)"
- save_reload_state=$(save_reload_check "${reload_check_file}")
- search_for_desynced_writers "${reload_check_file}"
- # Something changed
- if restore_reload_check "${reload_check_file}" $save_reload_state; then
- number_writers_online=$(proxysql_exec $LINENO -Ns "SELECT count(*) FROM mysql_servers WHERE hostgroup_id=$HOSTGROUP_WRITER_ID AND status = 'ONLINE' AND comment <> 'SLAVEREAD'" 2>>${ERR_FILE})
- check_cmd_and_exit $LINENO $? "Could not get node count (query failed). Exiting."
- fi
- fi
- # We don't have any readers... alert, try to bring some online!
- if [[ ${HOSTGROUP_READER_ID} -ne -1 && ${number_readers_online} -eq 0 ]]; then
- log "" "###### TRYING TO FIX MISSING READERS ######"
- log "" "--> No readers found, Trying to enable last available node of the cluster (in Donor/Desync state) or pick the master"
- save_reload_state=$(save_reload_check "${reload_check_file}")
- search_for_desynced_readers "${reload_check_file}"
- # Something changed
- if restore_reload_check "${reload_check_file}" $save_reload_state; then
- number_readers_online=$(proxysql_exec $LINENO -Ns "SELECT count(*) FROM mysql_servers WHERE hostgroup_id IN ($HOSTGROUP_READER_ID) AND status = 'ONLINE' AND comment <> 'SLAVEREAD'")
- check_cmd_and_exit $LINENO $? "Could not get node count (query failed). Exiting."
- fi
- fi
- # Check to see if we need to enable the slaves as readers/writers
- if [[ $HAVE_SLAVEREADERS -eq 1 ]]; then
- save_reload_state=$(save_reload_check "${reload_check_file}")
- log $LINENO "###### ASYNC-SLAVE ACTIVITY ######"
- # If use-slave-as-writer is set, then we do not allow slaves to
- # be added, however we still call the remove slave writers
- # in case the option was just changed.
- if [[ $SLAVE_IS_WRITER == "yes" && $number_readers_online -eq 0 && $number_writers_online -eq 0 ]]; then
- # No cluster nodes active, add a slave to the writer hostgroup
- debug $LINENO "Nothing in the cluster, checking slavereaders"
- add_slave_to_write_hostgroup "${reload_check_file}"
- elif [[ $HAVE_SLAVEWRITERS -eq 1 ]]; then
- # Active cluster nodes discovered, remove all write nodes
- debug $LINENO "Found a cluster, removing slavereaders (acting as writers)"
- remove_slave_from_write_hostgroup "${reload_check_file}"
- fi
- local query_result
- local -i num_slave_writers=0
- local -i num_slave_readers=0
- query_result=$(proxysql_exec $LINENO -Ns "SELECT hostgroup_id FROM mysql_servers
- WHERE hostgroup_id IN ($HOSTGROUP_WRITER_ID, $HOSTGROUP_READER_ID, $HOSTGROUP_SLAVEREADER_ID)
- AND comment = 'SLAVEREAD'
- AND status = 'ONLINE'")
- check_cmd_and_exit $LINENO $? "Could not get the list of slave nodes (query failed). Exiting."
- while read line; do
- if [[ -z $line ]]; then
- continue
- fi
- if [[ $line = "$HOSTGROUP_WRITER_ID" ]]; then
- num_slave_writers+=1
- elif [[ $line = "$HOSTGROUP_READER_ID" ]]; then
- num_slave_readers+=1
- fi
- done< <(printf "$query_result\n")
- log "" "--> Number of slave writers that are 'ONLINE': ${num_slave_writers} : hostgroup: ${HOSTGROUP_WRITER_ID}"
- [[ ${HOSTGROUP_READER_ID} -ne -1 ]] && log "" "--> Number of slave readers that are 'ONLINE': ${num_slave_readers} : hostgroup: ${HOSTGROUP_READER_ID}"
- restore_reload_check "${reload_check_file}" $save_reload_state
- fi
- if [[ $(cat ${reload_check_file}) -ne 0 ]] ; then
- log "" "###### Loading mysql_servers config into runtime ######"
- proxysql_exec $LINENO -Ns "LOAD MYSQL SERVERS TO RUNTIME;" 2>>${ERR_FILE}
- check_cmd_and_exit $LINENO $? "Could not update the mysql_servers table in ProxySQL. Exiting."
- else
- log "" "###### Not loading mysql_servers, no change needed ######"
- fi
- }
- #-------------------------------------------------------------------------------
- #
- # Step 4 : Begin script execution
- #
- #
- # In the initial version, ProxySQL has 5 slots for command-line arguments
- # and would send them separately (each enclosed in double quotes,
- # such as "arg1" "arg2" etc..).
- #
- # We now configure all parameters in the arg1 field (since we need more
- # than 5 arguments). This means that we now receive all the arguments
- # in one parameter "arg1 arg2 arg3 arg4 arg5"
- #
- # This means that anytime this script is called with > 1 argument, we
- # assume that the old method is in use and we need to upgrade the script
- # to the new method (in upgrade_scheduler).
- #
- if [ "$#" -eq 1 ]; then
- # Below set will reshuffle parameters.
- # example arg1=" --one=1 --two=2" will result in:
- # $1 = --one=1
- # $2 = --two=2
- #
- # The eval is needed here to preserve whitespace in the arguments
- eval set -- $1
- else
- # Parse the arguments
- declare param value
- # We don't need all the options here, just the log/debug options
- while [[ $# -gt 0 && "$1" != "" ]]; do
- param=`echo $1 | awk -F= '{print $1}'`
- value=`echo $1 | awk -F= '{print $2}'`
- # Assume that all options start with a '-'
- # otherwise treat as a positional parameter
- if [[ ! $param =~ ^- ]]; then
- continue
- fi
- case $param in
- -h | --help)
- usage
- exit 0
- ;;
- --debug)
- DEBUG=1
- ;;
- -v | --version)
- echo "proxysql_galera_checker version $PROXYSQL_ADMIN_VERSION"
- exit 0
- ;;
- --log)
- if [[ -n $value ]]; then
- ERR_FILE=$value
- fi
- ;;
- esac
- shift
- done
- echo "Old config detected...trying to upgrade More than one parameter"
- if [[ $DEBUG -eq 1 ]]; then
- # For now
- if [[ -z $ERR_FILE && -t 1 ]]; then
- ERR_FILE=/dev/stderr
- fi
- fi
- upgrade_scheduler
- exit 1
- fi
- trap cleanup_handler EXIT
- parse_args "$@"
- main
- exit 0
|