proxysql_node_monitor 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086
  1. #!/bin/bash -u
  2. # This script will assist to setup Galera cluster ProxySQL monitoring script.
  3. #####################################################################################
  4. #-------------------------------------------------------------------------------
  5. #
  6. # Step 1 : Bash internal configuration
  7. #
  8. set -o nounset # no undefined variables
  9. set -o pipefail # internal pipe failures cause an exit
  10. #bash prompt internal configuration
  11. declare RED=""
  12. declare NRED=""
  13. #-------------------------------------------------------------------------------
  14. #
  15. # Step 2 : Global variables
  16. #
  17. declare -i DEBUG=0
  18. readonly PROXYSQL_ADMIN_VERSION="2.0.16"
  19. declare CONFIG_FILE="/etc/proxysql-admin.cnf"
  20. declare ERR_FILE="/dev/null"
  21. declare RELOAD_CHECK_FILE=""
  22. # Set to send output here when DEBUG is set
  23. declare DEBUG_ERR_FILE="/dev/null"
  24. declare -i WRITE_HOSTGROUP_ID=1
  25. declare -i READ_HOSTGROUP_ID=-1
  26. declare -i SLAVEREAD_HOSTGROUP_ID
  27. # This is the hostgroup that new nodes will be added to
  28. declare -i DEFAULT_HOSTGROUP_ID
  29. declare MODE
  30. declare CHECK_STATUS=0
  31. declare PROXYSQL_DATADIR='/var/lib/proxysql'
  32. declare -i TIMEOUT=10
  33. # Maximum time to wait for cluster status
  34. declare -i CLUSTER_TIMEOUT=3
  35. # Extra text that will be logged with the output
  36. # (useful for debugging/testing)
  37. declare LOG_TEXT=""
  38. # Default value for max_connections in mysql_servers
  39. declare MAX_CONNECTIONS="1000"
  40. #we need to be able to disable autodiscovery
  41. #This because in presence of multiple IPs on the PXC nodes the script choose to use the use the one
  42. #Nodes are registered for internal traffic which is normally different from the one
  43. #used for application traffic.
  44. #Result is that the AUTODICOVERY will cause ProxySQL to use wrong IP set
  45. declare PXC_AUTODISCOVERY=1
  46. #-------------------------------------------------------------------------------
  47. #
  48. # Step 3 : Helper functions
  49. #
  50. function log() {
  51. local lineno=$1
  52. shift
  53. if [[ -n $ERR_FILE ]]; then
  54. if [[ -n $lineno && $DEBUG -ne 0 ]]; then
  55. echo "[$(date +%Y-%m-%d\ %H:%M:%S)] (line $lineno) $*" >> $ERR_FILE
  56. else
  57. echo "[$(date +%Y-%m-%d\ %H:%M:%S)] $*" >> $ERR_FILE
  58. fi
  59. fi
  60. }
  61. function log_if_success() {
  62. local lineno=$1
  63. local rc=$2
  64. shift 2
  65. if [[ $rc -eq 0 ]]; then
  66. log "$lineno" "$*"
  67. fi
  68. }
  69. function error() {
  70. local lineno=$1
  71. shift
  72. log "$lineno" "ERROR: $*"
  73. }
  74. function warning() {
  75. local lineno=$1
  76. shift
  77. log "$lineno" "WARNING: $*"
  78. }
  79. function debug() {
  80. if [[ $DEBUG -eq 0 ]]; then
  81. return
  82. fi
  83. local lineno=$1
  84. shift
  85. log "$lineno" "${RED}debug: $*${NRED}"
  86. }
  87. function usage () {
  88. local path=$0
  89. cat << EOF
  90. Usage: ${path##*/} [ options ]
  91. Example:
  92. proxysql_node_monitor --write-hg=10 --read-hg=11 --config-file=/etc/proxysql-admin.cnf --log=/var/lib/proxysql/pxc_test_proxysql_galera_check.log
  93. Options:
  94. -w, --write-hg=<NUMBER> Specify ProxySQL write hostgroup.
  95. -r, --read-hg=<NUMBER> Specify ProxySQL read hostgroup.
  96. -m, --mode=[loadbal|singlewrite] ProxySQL read/write configuration mode, currently supporting: 'loadbal' and 'singlewrite' (the default) modes
  97. -p, --priority=<HOST_LIST> Can accept comma delimited list of write nodes priority
  98. -c, --config-file=PATH Specify ProxySQL-admin configuration file.
  99. -l, --log=PATH Specify proxysql_node_monitor log file.
  100. --log-text=TEXT This is text that will be written to the log file
  101. whenever this script is run (useful for debugging).
  102. --reload-check-file=PATH Specify file used to notify proysql_galera_checker
  103. of a change in server configuration
  104. --max-connections=<NUMBER> Value for max_connections in the mysql_servers table.
  105. This is the maximum number of connections that
  106. ProxySQL will open to the backend servers.
  107. (default: 1000)
  108. --debug Enables additional debug logging.
  109. -h, --help Display script usage information
  110. -v, --version Print version info
  111. EOF
  112. }
  113. # Check the permissions for a file or directory
  114. #
  115. # Globals:
  116. # None
  117. #
  118. # Arguments:
  119. # 1: the bash test to be applied to the file
  120. # 2: the lineno where this call is invoked (used for errors)
  121. # 3: the path to the file
  122. # 4: (optional) description of the path (mostly used for existence checks)
  123. #
  124. # Exits the script if the permissions test fails.
  125. #
  126. function check_permission() {
  127. local permission=$1
  128. local lineno=$2
  129. local path_to_check=$3
  130. local description=""
  131. if [[ $# -gt 3 ]]; then
  132. description="$4"
  133. fi
  134. if [ ! $permission "$path_to_check" ] ; then
  135. if [[ $permission == "-r" ]]; then
  136. error $lineno "You do not have READ permission for: $path_to_check"
  137. elif [[ $permission == "-w" ]]; then
  138. error $lineno "You do not have WRITE permission for: $path_to_check"
  139. elif [[ $permission == "-x" ]]; then
  140. error $lineno "You do not have EXECUTE permission for: $path_to_check"
  141. elif [[ $permission == "-e" ]]; then
  142. if [[ -n $description ]]; then
  143. error $lineno "Could not find the $description: $path_to_check"
  144. else
  145. error $lineno "Could not find: $path_to_check"
  146. fi
  147. elif [[ $permission == "-d" ]]; then
  148. if [[ -n $description ]]; then
  149. error $lineno "Could not find the $description: $path_to_check"
  150. else
  151. error $lineno "Could not find the directory: $path_to_check"
  152. fi
  153. elif [[ $permission == "-f" ]]; then
  154. if [[ -n $description ]]; then
  155. error $lineno "Could not find the $description: $path_to_check"
  156. else
  157. error $lineno "Could not find the file: $path_to_check"
  158. fi
  159. else
  160. error $lineno "You do not have the correct permissions for: $path_to_check"
  161. fi
  162. exit 1
  163. fi
  164. }
  165. #
  166. #
  167. # Globals:
  168. # None
  169. #
  170. # Arguments:
  171. # 1: the lineno
  172. # 2: the return value that is being checked
  173. # 3: the error message
  174. # 4: Additional information (only used if an error occurred) (optional)
  175. #
  176. # Returns:
  177. # Returns the return value that is passed in.
  178. # This allows the code that follows to check the return value.
  179. #
  180. # Note that this will NOT exit the script.
  181. #
  182. function check_cmd() {
  183. local lineno=$1
  184. local mpid=$2
  185. local error_msg=$3
  186. local error_info=""
  187. if [[ $# -ge 4 ]]; then
  188. error_info=$4
  189. fi
  190. if [ "$mpid" == "124" ]; then
  191. error $lineno "TIMEOUT: Connection terminated due to timeout."
  192. fi
  193. if [ ${mpid} -ne 0 ]; then
  194. warning $lineno "$error_msg."
  195. if [[ ! -z $error_info ]]; then
  196. log $lineno "$error_info."
  197. fi
  198. fi
  199. return $mpid
  200. }
  201. # Executes a SQL query with the (fully) specified server
  202. #
  203. # Globals:
  204. # None
  205. #
  206. # Arguments:
  207. # 1: lineno
  208. # 2: the name of the user
  209. # 3: the user's password
  210. # 4: the hostname of the server
  211. # 5: the port used to connect to the server
  212. # 6: timeout in secs
  213. # 7: arguments to the mysql client
  214. # 8: additional options to the [client] config
  215. # 9: the query to be run
  216. # 10: additional options, space separated
  217. # Available options:
  218. # "hide_output"
  219. # This will not show the output of the query when DEBUG is set.
  220. # Used to stop the display of sensitve information (such as passwords)
  221. # from being displayed when debugging.
  222. #
  223. function exec_sql() {
  224. local lineno=$1
  225. local user=$2
  226. local password=$3
  227. local hostname=$4
  228. local port=$5
  229. local timeout_secs=$6
  230. local args=$7
  231. local client_options=$8
  232. local query="$9"
  233. local more_options="${10}"
  234. local retvalue
  235. local retoutput
  236. debug "$lineno" "exec_sql : $user@$hostname:$port ==> $query"
  237. retoutput=$(printf "[client]\n${client_options}\nuser=${user}\npassword=\"${password}\"\nhost=${hostname}\nport=${port}" \
  238. | timeout ${timeout_secs} mysql --defaults-file=/dev/stdin --protocol=tcp \
  239. ${args} -e "$query")
  240. retvalue=$?
  241. if [[ $DEBUG -eq 1 ]]; then
  242. local number_of_newlines=0
  243. local dbgoutput=$retoutput
  244. if [[ " $more_options " =~ [[:space:]]hide_output[[:space:]] ]]; then
  245. dbgoutput="**** data hidden ****"
  246. fi
  247. if [[ -n $dbgoutput ]]; then
  248. number_of_newlines=$(printf "%s" "${dbgoutput}" | wc -l)
  249. fi
  250. if [[ $retvalue -ne 0 ]]; then
  251. debug "" "--> query failed $retvalue"
  252. elif [[ -z $dbgoutput ]]; then
  253. debug "" "--> query returned $retvalue : <query returned no data>"
  254. elif [[ ${number_of_newlines} -eq 0 ]]; then
  255. debug "" "--> query returned $retvalue : ${dbgoutput}"
  256. else
  257. debug "" "--> query returned $retvalue : <data follows>"
  258. printf "${dbgoutput//%/%%}\n" | while IFS= read -r line; do
  259. debug "" "----> $line"
  260. done
  261. fi
  262. fi
  263. printf "${retoutput//%/%%}"
  264. return $retvalue
  265. }
  266. # Executes a SQL query on proxysql (with a timeout of $TIMEOUT seconds)
  267. #
  268. # Globals:
  269. # PROXYSQL_USERNAME
  270. # PROXYSQL_PASSWORD
  271. # PROXYSQL_HOSTNAME
  272. # PROXYSQL_PORT
  273. # TIMEOUT
  274. #
  275. # Arguments:
  276. # 1: lineno (used for debugging/output, may be blank)
  277. # 2: The SQL query
  278. # 3: (optional) more options, see exec_sql
  279. #
  280. function proxysql_exec() {
  281. local lineno=$1
  282. local query="$2"
  283. local more_options=""
  284. local retoutput
  285. if [[ $# -ge 3 ]]; then
  286. more_options=$3
  287. fi
  288. exec_sql "$lineno" "$PROXYSQL_USERNAME" "$PROXYSQL_PASSWORD" \
  289. "$PROXYSQL_HOSTNAME" "$PROXYSQL_PORT" \
  290. "$TIMEOUT" "-Bs" "" "$query" "$more_options"
  291. retoutput=$?
  292. return $retoutput
  293. }
  294. # Executes a SQL query on mysql (with a timeout of $TIMEOUT secs)
  295. #
  296. # Globals:
  297. # CLUSTER_USERNAME
  298. # CLUSTER_PASSWORD
  299. # CLUSTER_HOSTNAME
  300. # CLUSTER_PORT
  301. # CLUSTER_TIMEOUT
  302. #
  303. # Arguments:
  304. # 1: lineno (used for debugging/output, may be blank)
  305. # 2: the query to be run
  306. # 3: (optional) more options, see exec_sql
  307. #
  308. function mysql_exec() {
  309. local lineno=$1
  310. local query=$2
  311. local more_options=""
  312. local retoutput
  313. if [[ $# -ge 3 ]]; then
  314. more_options=$3
  315. fi
  316. exec_sql "$lineno" "$CLUSTER_USERNAME" "$CLUSTER_PASSWORD" \
  317. "$CLUSTER_HOSTNAME" "$CLUSTER_PORT" \
  318. "$TIMEOUT" "-Bs" "connect-timeout=${CLUSTER_TIMEOUT}" "$query" "$more_options"
  319. retoutput=$?
  320. return $retoutput
  321. }
  322. # Executes a SQL query on mysql (with a timeout of $TIMEOUT secs)
  323. #
  324. # Globals:
  325. # CLUSTER_USERNAME
  326. # CLUSTER_PASSWORD
  327. # CLUSTER_TIMEOUT
  328. #
  329. # Arguments:
  330. # 1: lineno (used for debugging/output, may be blank)
  331. # 2: the hostname of the server
  332. # 3: the port used to connect to the server
  333. # 4: the query to be run
  334. # 5: (optional) more options, see exec_sql
  335. #
  336. function slave_exec() {
  337. local lineno=$1
  338. local hostname=$2
  339. local port=$3
  340. local query=$4
  341. local more_options=""
  342. local timeout_secs=$TIMEOUT
  343. local retoutput
  344. if [[ $# -ge 5 ]]; then
  345. more_options=$5
  346. fi
  347. exec_sql "$lineno" "$CLUSTER_USERNAME" "$CLUSTER_PASSWORD" \
  348. "$hostname" "$port" \
  349. "$timeout_secs" "-Bs" "" "$query" "$more_options"
  350. retoutput=$?
  351. return $retoutput
  352. }
  353. # Separates the IP address from the port in a network address
  354. # Works for IPv4 and IPv6
  355. #
  356. # Globals:
  357. # None
  358. #
  359. # Params:
  360. # 1. The network address to be parsed
  361. #
  362. # Outputs:
  363. # A string with a space separating the IP address from the port
  364. #
  365. function separate_ip_port_from_address()
  366. {
  367. #
  368. # Break address string into host:port/path parts
  369. #
  370. local address=$1
  371. # Has to have at least one ':' to separate the port from the ip address
  372. if [[ $address =~ : ]]; then
  373. ip_addr=${address%:*}
  374. port=${address##*:}
  375. else
  376. ip_addr=$address
  377. port=""
  378. fi
  379. # Remove any braces that surround the ip address portion
  380. ip_addr=${ip_addr#\[}
  381. ip_addr=${ip_addr%\]}
  382. echo "${ip_addr} ${port}"
  383. }
  384. # Combines the IP address and port into a network address
  385. # Works for IPv4 and IPv6
  386. # (If the IP address is IPv6, the IP portion will have brackets)
  387. #
  388. # Globals:
  389. # None
  390. #
  391. # Params:
  392. # 1: The IP address portion
  393. # 2: The port
  394. #
  395. # Outputs:
  396. # A string containing the full network address
  397. #
  398. function combine_ip_port_into_address()
  399. {
  400. local ip_addr=$1
  401. local port=$2
  402. local addr
  403. if [[ ! $ip_addr =~ \[.*\] && $ip_addr =~ .*:.* ]] ; then
  404. # If there are no brackets and it does have a ':', then add the brackets
  405. # because this is an unbracketed IPv6 address
  406. addr="[${ip_addr}]:${port}"
  407. else
  408. addr="${ip_addr}:${port}"
  409. fi
  410. echo $addr
  411. }
  412. # Update Galera Cluster nodes in ProxySQL database
  413. # This will take care of nodes that have gone up or gone down
  414. # (i.e. if the ProxySQL and PXC memberships differ).
  415. #
  416. # This does not take care of the policy issues, it does not
  417. # ensure there is a writer.
  418. #
  419. # Globals:
  420. # WRITE_HOSTGROUP_ID
  421. # READ_HOSTGROUP_ID
  422. # SLAVEREAD_HOSTGROUP_ID
  423. # MODE
  424. # MODE_COMMENT
  425. # CHECK_STATUS
  426. #
  427. # Arguments:
  428. # 1: active cluster host (may be empty if cluster is offline)
  429. # 1: active cluster port (may be empty if cluster is offline)
  430. #
  431. function update_cluster() {
  432. debug $LINENO "START update_cluster"
  433. local cluster_host=$1
  434. local cluster_port=$2
  435. local host_info=""
  436. local current_hosts=""
  437. local is_current_hosts_empty=0
  438. local wsrep_address=""
  439. local ws_address
  440. local ws_ip
  441. local ws_port
  442. local ws_hg_status
  443. local ws_hg_id
  444. local ws_status
  445. local ws_comment
  446. # get all nodes from ProxySQL in use by hostgroups
  447. host_info=$(proxysql_exec $LINENO "SELECT DISTINCT hostname || ':' || port,hostgroup_id,status FROM mysql_servers where status != 'OFFLINE_HARD' and hostgroup_id in ( $WRITE_HOSTGROUP_ID, $READ_HOSTGROUP_ID, $SLAVEREAD_HOSTGROUP_ID )" | tr '\t' ' ')
  448. if [[ -n host_info ]]; then
  449. # Extract the hostname and port from the rows
  450. # Creates a string of "host:port" separated by spaces
  451. current_hosts=""
  452. while read line; do
  453. if [[ -z $line ]]; then
  454. continue
  455. fi
  456. net_address=$(echo $line | cut -d' ' -f1)
  457. net_address=$(separate_ip_port_from_address $net_address)
  458. local ip_addr=$(echo "$net_address" | cut -d' ' -f1)
  459. local port=$(echo "$net_address" | cut -d' ' -f2)
  460. net_address=$(combine_ip_port_into_address "$ip_addr" "$port")
  461. current_hosts+="$net_address "
  462. done< <(printf "$host_info\n")
  463. current_hosts=${current_hosts% }
  464. fi
  465. if [[ -n $cluster_host && -n $cluster_port ]]; then
  466. # First, find a host that is online from ProxySQL
  467. ws_ip=$cluster_host
  468. ws_port=$cluster_port
  469. # Second, get the wsrep_incoming_addresses from the cluster
  470. wsrep_address=$(slave_exec $LINENO "${ws_ip}" "${ws_port}" \
  471. "SHOW STATUS LIKE 'wsrep_incoming_addresses'" | awk '{print $2}' | sed 's|,| |g')
  472. fi
  473. if [[ -z $wsrep_address && -z $current_hosts ]]; then
  474. debug $LINENO "Returning from update_cluster(), both PXC and ProxySQL have no active nodes"
  475. return
  476. fi
  477. #
  478. # Given the WSREP members, compare to ProxySQL
  479. # If missing from ProxySQL, add to ProxySQL as a reader.
  480. #
  481. debug $LINENO "Looking for PXC nodes not in ProxySQL"
  482. for i in ${wsrep_address}; do
  483. # if we have a match, the the PXC node is in ProxySQL and we can skip
  484. if [[ -n $current_hosts && " ${current_hosts} " =~ " ${i} " ]]; then
  485. continue
  486. fi
  487. log $LINENO "Cluster node (${i}) does not exist in ProxySQL, adding as a $MODE_COMMENT node"
  488. ws_address=$(separate_ip_port_from_address "$i")
  489. ws_ip=$(echo "$ws_address" | cut -d' ' -f1)
  490. ws_port=$(echo "$ws_address" | cut -d' ' -f2)
  491. # Add the node as a reader
  492. local hostgroup
  493. # Before inserting, check if a previous READ entry exists (it may be in OFFLINE_HARD state)
  494. hostgroup=$(proxysql_exec $LINENO "SELECT hostgroup_id FROM mysql_servers WHERE hostgroup_id=${DEFAULT_HOSTGROUP_ID} AND hostname='${ws_ip}' AND port=${ws_port}")
  495. if [[ -n $hostgroup ]]; then
  496. # Update reader to OFFLINE_SOFT if new PXC node in ProxySQL
  497. proxysql_exec $LINENO "UPDATE mysql_servers SET status='OFFLINE_SOFT',weight=1000,comment='$MODE_COMMENT' WHERE hostname='${ws_ip}' AND port=${ws_port} AND hostgroup_id=${hostgroup}"
  498. check_cmd $LINENO $? "Cannot update Galera Cluster node $ws_address (hostgroup $hostgroup) to ProxySQL database, Please check ProxySQL login credentials"
  499. log_if_success $LINENO $? "Updated ${hostgroup}:${i} node in the ProxySQL database."
  500. else
  501. if [[ $PXC_AUTODISCOVERY -eq 1 ]]; then
  502. # Insert a reader if new PXC node not in ProxySQL
  503. proxysql_exec $LINENO "INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,comment,max_connections) VALUES ('$ws_ip',$DEFAULT_HOSTGROUP_ID,$ws_port,1000,'$MODE_COMMENT',$MAX_CONNECTIONS);"
  504. check_cmd $LINENO $? "Cannot add Galera Cluster node $ws_address (hostgroup $DEFAULT_HOSTGROUP_ID) to ProxySQL database, Please check ProxySQL login credentials"
  505. log_if_success $LINENO $? "Added ${DEFAULT_HOSTGROUP_ID}:${i} node into ProxySQL database."
  506. else
  507. log_if_success $LINENO $? "Node identified by AUTODISCOVERY but will not add it because insert by AUTODISCOVERY for PXC is disable. Candidate node: ${DEFAULT_HOSTGROUP_ID}:${i} for ProxySQL database."
  508. fi
  509. fi
  510. CHECK_STATUS=1
  511. done
  512. #
  513. # Given the ProxySQL members, compare to WSREP
  514. # If not in WSREP, mark as OFFLINE_HARD
  515. #
  516. debug $LINENO "Looking for ProxySQL nodes not in PXC"
  517. for i in $current_hosts; do
  518. # if we have a match, then the proxysql node is in PXC
  519. # so we can skip it
  520. if [[ -n ${wsrep_address} && " ${wsrep_address} " =~ " ${i} " ]]; then
  521. continue
  522. fi
  523. debug $LINENO "ProxySQL host $i not found in cluster membership"
  524. #
  525. # The current host in current_hosts was not found in cluster membership,
  526. # set it OFFLINE_SOFT unless its a slave node
  527. #
  528. ws_address=$(separate_ip_port_from_address "$i")
  529. ws_ip=$(echo "$ws_address" | cut -d' ' -f1)
  530. ws_port=$(echo "$ws_address" | cut -d' ' -f2)
  531. # This is supported by status, so OFFLINE should come before ONLINE
  532. # Note that the status is in DESC order, so "ONLINE : OFFLINE_SOFT : OFFLINE_HARD"
  533. # This is needed because there may be multiple entries
  534. ws_hg_status=$(proxysql_exec $LINENO "SELECT hostgroup_id,status,comment from mysql_servers WHERE hostname='$ws_ip' and port=$ws_port ORDER BY status DESC LIMIT 1")
  535. ws_hg_id=$(echo -e "$ws_hg_status" | cut -f1)
  536. ws_status=$(echo -e "$ws_hg_status" | cut -f2)
  537. ws_comment=$(echo -e "$ws_hg_status" | cut -f3)
  538. if [ "$ws_comment" == "SLAVEREAD" ]; then
  539. # This update now happens in proxysql_galera_checker
  540. continue
  541. fi
  542. if [ "$ws_status" == "OFFLINE_SOFT" ]; then
  543. #
  544. # If OFFLINE_SOFT, move to OFFLINE_HARD
  545. #
  546. log $LINENO "Cluster node ${ws_hg_id}:${i} does not exist in PXC! Changing status from OFFLINE_SOFT to OFFLINE_HARD"
  547. proxysql_exec $LINENO "UPDATE mysql_servers set status='OFFLINE_HARD' WHERE hostname='$ws_ip' and port=$ws_port"
  548. check_cmd $LINENO $? "Cannot update Galera Cluster writer node in ProxySQL database, Please check ProxySQL login credentials"
  549. CHECK_STATUS=1
  550. elif [[ $ws_status == "ONLINE" ]]; then
  551. #
  552. # else if ONLINE, move to OFFLINE_SOFT
  553. # It will take another iteration to get it to OFFLINE_HARD
  554. #
  555. log $LINENO "Cluster node ${ws_hg_id}:${i} does not exist in PXC! Changing status to OFFLINE_SOFT"
  556. # Set all entries to OFFLINE_SOFT
  557. proxysql_exec $LINENO "UPDATE mysql_servers set status='OFFLINE_SOFT' WHERE hostname='$ws_ip' and port=$ws_port"
  558. check_cmd $LINENO $? "Cannot update Galera Cluster writer node in ProxySQL database, Please check ProxySQL login credentials"
  559. CHECK_STATUS=1
  560. fi
  561. node_status=$(proxysql_exec $LINENO "SELECT status from mysql_servers WHERE hostname='$ws_ip' and port=$ws_port ORDER BY status LIMIT 1")
  562. log $LINENO "Non-PXC node (${i}) current status '$node_status' in ProxySQL."
  563. done
  564. # Update the ProxySQL status for the new nodes
  565. for i in ${wsrep_address}; do
  566. if [[ -n $current_hosts && " ${current_hosts} " =~ " ${i} " ]]; then
  567. # Lookup the status in the host_info
  568. local host
  569. ws_address=$(separate_ip_port_from_address "$i")
  570. ws_ip=$(echo "$ws_address" | cut -d' ' -f1)
  571. ws_port=$(echo "$ws_address" | cut -d' ' -f2)
  572. # properly escape the characters for grep
  573. local re_i="$(printf '%s' "$ws_ip:$ws_port" | sed 's/[.[\*^$]/\\&/g')"
  574. host=$(echo "$host_info" | grep "${re_i}" | head -1)
  575. ws_hg_id=$(echo $host | cut -d' ' -f2)
  576. ws_status=$(echo $host | cut -d' ' -f3)
  577. log "" "Cluster node (${ws_hg_id}:${i}) current status '$ws_status' in ProxySQL."
  578. else
  579. ws_address=$(separate_ip_port_from_address "$i")
  580. ws_ip=$(echo "$ws_address" | cut -d' ' -f1)
  581. ws_port=$(echo "$ws_address" | cut -d' ' -f2)
  582. ws_hg_status=$(proxysql_exec $LINENO "SELECT hostgroup_id,status from mysql_servers WHERE hostname='$ws_ip' and port=$ws_port")
  583. ws_hg_id=$(echo $ws_hg_status | cut -d' ' -f1)
  584. ws_status=$(echo $ws_hg_status | cut -d' ' -f2)
  585. log $LINENO "Cluster node (${ws_hg_id}:${i}) current status '$ws_status' in ProxySQL database!"
  586. if [ "$ws_status" == "OFFLINE_HARD" ]; then
  587. # The node was OFFLINE_HARD, but its now in the cluster list
  588. # so lets make it OFFLINE_SOFT
  589. proxysql_exec $LINENO "UPDATE mysql_servers set status = 'OFFLINE_SOFT' WHERE hostname='$ws_ip' and port=$ws_port;"
  590. check_cmd $LINENO $? "Cannot update Galera Cluster node $i in the ProxySQL database, Please check the ProxySQL login credentials"
  591. log_if_success $LINENO $? "${ws_hg_id}:${i} node set to OFFLINE_SOFT status to ProxySQL database."
  592. CHECK_STATUS=1
  593. fi
  594. fi
  595. done
  596. debug $LINENO "END update_cluster"
  597. }
  598. # Move the entries in the list from writers to readers
  599. #
  600. # Globals:
  601. # READ_HOSTGROUP_ID
  602. # WRITE_HOSTGROUP_ID
  603. # CHECK_STATUS
  604. #
  605. # Arguments:
  606. # 1: A list of nodes to move to readers (entries are 'server port hostgroup')
  607. #
  608. function move_writers_to_readers() {
  609. debug $LINENO "START move_writers_to_readers($*)"
  610. local offline_writers=$1
  611. debug $LINENO "$offline_writers"
  612. printf "$offline_writers" | while read host port hostgroup status || [ -n "$hostgroup" ]
  613. do
  614. local read_count
  615. debug $LINENO "mode_change_check: Found OFFLINE_SOFT writer, changing to READ status and hostgroup $READ_HOSTGROUP_ID"
  616. read_count=$(proxysql_exec $LINENO "SELECT COUNT(*) FROM mysql_servers WHERE hostgroup_id=$READ_HOSTGROUP_ID AND hostname='$host' AND port=$port")
  617. if [[ $read_count -ne 0 ]]; then
  618. # If node is already a READER, update the READER
  619. if [[ $status == 'ONLINE' ]]; then
  620. proxysql_exec $LINENO "UPDATE mysql_servers SET status='OFFLINE_SOFT',hostgroup_id=$READ_HOSTGROUP_ID, comment='READ', weight=1000 WHERE hostgroup_id=$READ_HOSTGROUP_ID AND hostname='$host' AND port=$port"
  621. check_cmd $LINENO $? "Cannot update Galera Cluster writer node in ProxySQL database, Please check ProxySQL login credentials"
  622. log_if_success $LINENO $? "Changed OFFLINE_SOFT writer to a reader ($READ_HOSTGROUP_ID:$host:$port)"
  623. CHECK_STATUS=1
  624. fi
  625. else
  626. if [[ $status == "ONLINE" ]]; then
  627. local address
  628. address=$(combine_ip_port_into_address "$host" "$port")
  629. # If node is not a reader, add node as a reader
  630. debug $LINENO "Node is not a reader, adding node as a reader"
  631. proxysql_exec $LINENO \
  632. "INSERT INTO mysql_servers (hostname,hostgroup_id,port,status,weight,comment,max_connections)
  633. VALUES ('$host',$READ_HOSTGROUP_ID,$port,'OFFLINE_SOFT',1000,'READ',$MAX_CONNECTIONS);"
  634. check_cmd $LINENO $? "Cannot add Galera Cluster reader node in ProxySQL database, Please check ProxySQL login credentials"
  635. log $LINENO "Adding server $READ_HOSTGROUP_ID:$address with status OFFLINE_SOFT."
  636. CHECK_STATUS=1
  637. fi
  638. fi
  639. done
  640. }
  641. #
  642. # Globals:
  643. # PROXYSQL_DATADIR
  644. # CLUSTER_NAME
  645. # WRITE_HOSTGROUP_ID READ_HOSTGROUP_ID
  646. # MODE
  647. #
  648. # Arguments:
  649. # None
  650. #
  651. function mode_change_check(){
  652. debug $LINENO "START mode_change_check"
  653. # Check if the current writer is in an OFFLINE_SOFT state
  654. local offline_writers
  655. offline_writers=$(proxysql_exec $LINENO "SELECT hostname,port,hostgroup_id,status from mysql_servers where comment in ('WRITE', 'READWRITE') and status <> 'ONLINE' and hostgroup_id in ($WRITE_HOSTGROUP_ID)")
  656. if [[ -n $offline_writers ]]; then
  657. #
  658. # Found a writer node that was in 'OFFLINE_SOFT' state,
  659. # move it to the READ hostgroup unless the MODE is 'loadbal'
  660. #
  661. if [ "$MODE" != "loadbal" ]; then
  662. move_writers_to_readers "$offline_writers"
  663. fi
  664. fi
  665. debug $LINENO "END mode_change_check"
  666. }
  667. #
  668. # Globals:
  669. # DEBUG
  670. # CONFIG_FILE
  671. # WRITE_HOSTGROUP_ID READ_HOSTGROUP_ID
  672. # DEFAULT_HOSTGROUP_ID
  673. # MODE
  674. # ERR_FILE
  675. # PROXYSQL_ADMIN_VERSION
  676. # MODE_COMMENT
  677. # WRITE_WEIGHT
  678. #
  679. # Arguments:
  680. #
  681. function parse_args() {
  682. # Check if we have a functional getopt(1)
  683. if ! getopt --test; then
  684. go_out="$(getopt --options=w:r:c:l:m:p:vh --longoptions=write-hg:,read-hg:,mode:,priority:,config-file:,log:,reload-check-file:,log-text:,max-connections:,debug,version,help \
  685. --name="$(basename "$0")" -- "$@")"
  686. if [[ $? -ne 0 ]]; then
  687. # no place to send output
  688. echo "Script error: getopt() failed" >&2
  689. exit 1
  690. fi
  691. eval set -- "$go_out"
  692. fi
  693. if [[ $go_out == " --" ]];then
  694. usage
  695. exit 1
  696. fi
  697. #
  698. # We iterate through the command-line options twice
  699. # (1) to handle options that don't need permissions (such as --help)
  700. # (2) to handle options that need to be done before other
  701. # options, such as loading the config file
  702. #
  703. for arg
  704. do
  705. case "$arg" in
  706. -- ) shift; break;;
  707. --config-file )
  708. CONFIG_FILE="$2"
  709. check_permission -e $LINENO "$CONFIG_FILE" "proxysql-admin configuration file"
  710. debug $LINENO "--config-file specified, using : $CONFIG_FILE"
  711. shift 2
  712. ;;
  713. --help)
  714. usage
  715. exit 0
  716. ;;
  717. -v | --version)
  718. echo "proxysql_node_monitor version $PROXYSQL_ADMIN_VERSION"
  719. exit 0
  720. ;;
  721. --debug)
  722. DEBUG=1
  723. shift
  724. ;;
  725. *)
  726. shift
  727. ;;
  728. esac
  729. done
  730. #
  731. # Load the config file before reading in the command-line options
  732. #
  733. readonly CONFIG_FILE
  734. if [ ! -e "$CONFIG_FILE" ]; then
  735. warning "" "Could not locate the configuration file: $CONFIG_FILE"
  736. else
  737. check_permission -r $LINENO "$CONFIG_FILE"
  738. debug $LINENO "Loading $CONFIG_FILE"
  739. source "$CONFIG_FILE"
  740. fi
  741. if [[ $DEBUG -ne 0 ]]; then
  742. # For now
  743. if [[ -t 1 ]]; then
  744. ERR_FILE=/dev/stdout
  745. fi
  746. fi
  747. local p_mode=""
  748. # Reset the command line for the next invocation
  749. eval set -- "$go_out"
  750. for arg
  751. do
  752. case "$arg" in
  753. -- ) shift; break;;
  754. -w | --write-hg )
  755. WRITE_HOSTGROUP_ID=$2
  756. shift 2
  757. ;;
  758. -r | --read-hg )
  759. READ_HOSTGROUP_ID=$2
  760. shift 2
  761. ;;
  762. -m | --mode )
  763. p_mode="$2"
  764. shift 2
  765. if [ "$p_mode" != "loadbal" ] && [ "$p_mode" != "singlewrite" ]; then
  766. echo "ERROR: Invalid --mode passed:"
  767. echo " Please choose any of these modes: loadbal, singlewrite"
  768. exit 1
  769. fi
  770. ;;
  771. -p | --priority )
  772. # old parameter
  773. shift 2
  774. ;;
  775. --config-file )
  776. shift 2
  777. # The config-file is loaded before the command-line
  778. # arguments are handled.
  779. ;;
  780. -l | --log )
  781. ERR_FILE="$2"
  782. shift 2
  783. # Test if stdout and stderr are open to a terminal
  784. if [[ $ERR_FILE == "/dev/stdout" || $ERR_FILE == "/dev/stderr" ]]; then
  785. RED=$(tput setaf 1)
  786. NRED=$(tput sgr0)
  787. fi
  788. ;;
  789. --reload-check-file )
  790. RELOAD_CHECK_FILE="$2"
  791. shift 2
  792. ;;
  793. --log-text )
  794. LOG_TEXT="$2"
  795. shift 2
  796. ;;
  797. --max-connections )
  798. MAX_CONNECTIONS="$2"
  799. shift 2
  800. ;;
  801. --debug )
  802. shift;
  803. ;;
  804. -v | --version )
  805. shift;
  806. ;;
  807. -h | --help )
  808. shift;
  809. ;;
  810. esac
  811. done
  812. if [[ $DEBUG -eq 1 ]]; then
  813. DEBUG_ERR_FILE=$ERR_FILE
  814. fi
  815. #Timeout exists for instances where mysqld/proxysql may be hung
  816. TIMEOUT=5
  817. SLAVEREAD_HOSTGROUP_ID=$READ_HOSTGROUP_ID
  818. if [ $SLAVEREAD_HOSTGROUP_ID -eq $WRITE_HOSTGROUP_ID ];then
  819. let SLAVEREAD_HOSTGROUP_ID+=1
  820. fi
  821. DEFAULT_HOSTGROUP_ID=$READ_HOSTGROUP_ID
  822. if [[ $DEFAULT_HOSTGROUP_ID -eq -1 ]]; then
  823. DEFAULT_HOSTGROUP_ID=$WRITE_HOSTGROUP_ID
  824. fi
  825. CHECK_STATUS=0
  826. debug $LINENO "#### PROXYSQL NODE MONITOR ARGUMENT CHECKING"
  827. debug $LINENO "MODE: $MODE"
  828. debug $LINENO "check mode name from proxysql data directory "
  829. CLUSTER_NAME=$(proxysql_exec $LINENO "SELECT comment from scheduler where arg1 LIKE '%--write-hg=$WRITE_HOSTGROUP_ID %' OR arg1 LIKE '%-w $WRITE_HOSTGROUP_ID %'")
  830. check_cmd $LINENO $? "Cannot connect to ProxySQL at $PROXYSQL_HOSTNAME:$PROXYSQL_PORT"
  831. if [[ ! -z $p_mode ]] ; then
  832. MODE=$p_mode
  833. debug $LINENO "command-line: setting MODE to $MODE"
  834. else
  835. # Get the name of the mode file
  836. local proxysql_mode_file
  837. if [[ -z $CLUSTER_NAME ]]; then
  838. proxysql_mode_file="${PROXYSQL_DATADIR}/mode"
  839. else
  840. proxysql_mode_file="${PROXYSQL_DATADIR}/${CLUSTER_NAME}_mode"
  841. fi
  842. if [[ -f "$proxysql_mode_file" && -r "$proxysql_mode_file" ]]; then
  843. MODE=$(cat ${proxysql_mode_file})
  844. debug $LINENO "file: $proxysql_mode_file: setting MODE to $MODE"
  845. fi
  846. fi
  847. if [ "$MODE" == "loadbal" ]; then
  848. MODE_COMMENT="READWRITE"
  849. WRITE_WEIGHT="1000"
  850. else
  851. MODE_COMMENT="READ"
  852. WRITE_WEIGHT="1000000"
  853. fi
  854. if [[ -z $RELOAD_CHECK_FILE ]]; then
  855. error $LINENO "The --reload-check-file option is required."
  856. exit 1
  857. fi
  858. check_permission -r $LINENO "$RELOAD_CHECK_FILE"
  859. # Verify that we have an integer
  860. if ! [ "$MAX_CONNECTIONS" -eq "$MAX_CONNECTIONS" ] 2>/dev/null
  861. then
  862. error $LINENO "Invalid --max-connections value (must be a number) : $MAX_CONNECTIONS"
  863. exit 1
  864. fi
  865. readonly WRITE_HOSTGROUP_ID
  866. readonly READ_HOSTGROUP_ID
  867. readonly SLAVEREAD_HOSTGROUP_ID
  868. readonly MODE
  869. readonly MODE_COMMENT
  870. readonly WRITE_WEIGHT
  871. readonly CLUSTER_NAME
  872. readonly RELOAD_CHECK_FILE
  873. readonly MAX_CONNECTIONS
  874. }
  875. # Returns the address of an available (online) cluster host
  876. #
  877. # Globals:
  878. # WRITE_HOSTGROUP_ID
  879. # READ_HOSTGROUP_ID
  880. #
  881. # Arguments:
  882. # None
  883. #
  884. function find_online_cluster_host() {
  885. # Query the proxysql database for hosts,ports in use
  886. # Then just go through the list until we reach one that responds
  887. local hosts
  888. hosts=$(proxysql_exec $LINENO "SELECT DISTINCT hostname,port FROM mysql_servers WHERE comment<>'SLAVEREAD' AND hostgroup_id in ($WRITE_HOSTGROUP_ID, $READ_HOSTGROUP_ID)")
  889. printf "$hosts" | while read server port || [[ -n $port ]]
  890. do
  891. debug $LINENO "Trying to contact $server:$port..."
  892. slave_exec "$LINENO" "$server" "$port" "select @@port" 1>/dev/null 2>>${DEBUG_ERR_FILE}
  893. if [[ $? -eq 0 ]]; then
  894. printf "$server $port"
  895. return 0
  896. fi
  897. done
  898. # No cluster host available (cannot contact any)
  899. return 1
  900. }
  901. function main() {
  902. # Monitoring user needs 'REPLICATION CLIENT' privilege
  903. log $LINENO "###### Galera Cluster status ######"
  904. if [[ -n $LOG_TEXT ]]; then
  905. log $LINENO "Extra notes : $LOG_TEXT"
  906. fi
  907. debug $LINENO "write hostgroup id : $WRITE_HOSTGROUP_ID"
  908. debug $LINENO "read hostgroup id : $READ_HOSTGROUP_ID"
  909. debug $LINENO "mode : $MODE"
  910. CLUSTER_USERNAME=$(proxysql_exec $LINENO "SELECT variable_value FROM global_variables WHERE variable_name='mysql-monitor_username'")
  911. check_cmd $LINENO $? "Could not retrieve cluster login info from ProxySQL. Please check ProxySQL login credentials"
  912. CLUSTER_PASSWORD=$(proxysql_exec $LINENO "SELECT variable_value FROM global_variables WHERE variable_name='mysql-monitor_password'" "hide_output")
  913. check_cmd $LINENO $? "Could not retrieve cluster login info from ProxySQL. Please check ProxySQL login credentials"
  914. CLUSTER_TIMEOUT=$(proxysql_exec $LINENO "SELECT MAX(MAX(interval_ms / 1000 - 1, 1)) FROM scheduler")
  915. local cluster_host_info
  916. cluster_host_info=$(find_online_cluster_host)
  917. local host=""
  918. local port=""
  919. if [[ -n $cluster_host_info ]]; then
  920. host=$(echo $cluster_host_info | awk '{ print $1 }')
  921. port=$(echo $cluster_host_info | awk '{ print $2 }')
  922. fi
  923. update_cluster "$host" "$port"
  924. mode_change_check
  925. if [ $CHECK_STATUS -eq 0 ]; then
  926. if [[ -n $cluster_host_info ]]; then
  927. log $LINENO "Galera Cluster membership looks good"
  928. else
  929. log $LINENO "Galera Cluster is offline!"
  930. fi
  931. else
  932. echo "1" > ${RELOAD_CHECK_FILE}
  933. log $LINENO "###### MYSQL SERVERS was updated ######"
  934. fi
  935. }
  936. #-------------------------------------------------------------------------------
  937. #
  938. # Step 4 : Begin script execution
  939. #
  940. parse_args "$@"
  941. debug $LINENO "#### START PROXYSQL NODE MONITOR"
  942. main
  943. debug $LINENO "#### END PROXYSQL NODE MONITOR"
  944. exit 0