diff --git a/.code-workspace b/.code-workspace index 91ee21a123..c23ff2b63a 100644 --- a/.code-workspace +++ b/.code-workspace @@ -9,7 +9,10 @@ "**/bin/entrypoint": "python", "**/bin/qserv": "python", "**/bin/qserv-kraken": "python", - "**/bin/qserv-smig": "python" + "**/bin/qserv-smig": "python", + "**/templates/*.yaml": "helm", + "**/templates/**/*.yaml": "helm", + "**/templates/*.tpl": "helm" }, "python.analysis.extraPaths": [ "./python" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a50af4581d..741fa5c05d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -334,37 +334,21 @@ jobs: if: always() run: docker logs ${USER}-czar-http-1 - - name: Czar CMSD Log - if: always() - run: docker logs ${USER}-czar-cmsd-1 - - - name: Czar XROOTD Log - if: always() - run: docker logs ${USER}-czar-xrootd-1 - - name: Czar MariaDB Log if: always() run: docker logs ${USER}-czar-mariadb-1 - - name: Qzerv Worker 0 CMSD Log + - name: Qzerv Worker 0 worker-svc Log if: always() - run: docker logs ${USER}-worker-cmsd-0-1 - - - name: Qzerv Worker 0 XROOTD Log - if: always() - run: docker logs ${USER}-worker-xrootd-0-1 + run: docker logs ${USER}-worker-svc-0-1 - name: Qzerv Worker 0 MariaDB Log if: always() run: docker logs ${USER}-worker-mariadb-0-1 - - name: Qzerv Worker 1 CMSD Log - if: always() - run: docker logs ${USER}-worker-cmsd-1-1 - - - name: Qzerv Worker 1 XROOTD Log + - name: Qzerv Worker 1 worker-svc Log if: always() - run: docker logs ${USER}-worker-xrootd-1-1 + run: docker logs ${USER}-worker-svc-1-1 - name: Qzerv Worker 1 MariaDB Log if: always() diff --git a/deploy/compose/docker-compose.yml b/deploy/compose/docker-compose.yml index d1ea8ec323..7b28c51677 100644 --- a/deploy/compose/docker-compose.yml +++ b/deploy/compose/docker-compose.yml @@ -15,18 +15,11 @@ x-log-volume: - type: bind source: ./log/ target: /config-etc/log/ -x-worker-cmsd: - &worker-cmsd - image: "${QSERV_IMAGE:?err}" - init: true - # ports are published in worker-xrootd because this container uses that container's network stack. -x-worker-xrootd: - &worker-xrootd +x-worker-svc: + &worker-svc image: "${QSERV_IMAGE:?err}" init: true expose: - - "1094" - - "2131" - "3306" # for the worker db, which shares this container's network stack. x-repl-worker: &repl-worker @@ -43,6 +36,7 @@ volumes: volume_czar_xrootd: volume_czar_home: volume_czar_cfg: + volume_czar_transfer: volume_czar_mariadb_data: volume_czar_mariadb_cfg: @@ -53,14 +47,12 @@ volumes: volume_worker_0_data: volume_worker_0_results: - volume_worker_0_xrootd: volume_worker_0_home: volume_worker_0_mariadb_lib: volume_worker_0_mariadb_run: volume_worker_1_data: volume_worker_1_results: - volume_worker_1_xrootd: volume_worker_1_home: volume_worker_1_mariadb_lib: volume_worker_1_mariadb_run: @@ -97,29 +89,25 @@ services: - type: volume source: volume_worker_0_mariadb_run target: /var/run/mysqld # This is where the mariadb container puts the socket file - network_mode: "service:worker-xrootd-0" - worker-xrootd-0: - << : *worker-xrootd + network_mode: "service:worker-svc-0" + + worker-svc-0: + << : *worker-svc command: > - entrypoint worker-xrootd + entrypoint worker-svc --db-uri mysql://qsmaster:CHANGEME@127.0.0.1:3306 --db-admin-uri mysql://root:CHANGEME@127.0.0.1:3306 - --vnid-config "@/usr/local/lib64/libreplica.so {{db_uri}}/qservw_worker 0 0" --repl-instance-id qserv_proj --repl-auth-key replauthkey --repl-admin-auth-key=repladminauthkey --repl-registry-host repl-registry --repl-registry-port 25082 --results-dirname /qserv/data/results - --cmsd-manager-name czar-xrootd - --log-cfg-file=/config-etc/log/log-worker-xrootd.cnf + --log-cfg-file=/config-etc/log/log-worker-svc.cfg volumes: - type: volume source: volume_worker_0_results target: /qserv/data/results - - type: volume - source: volume_worker_0_xrootd - target: /var/run/xrootd - type: volume source: volume_worker_0_home target: /home/qserv @@ -130,43 +118,14 @@ services: networks: default: aliases: - - worker-cmsd-0 - worker-mariadb-0 - worker-cmsd-0: - << : *worker-cmsd - command: > - entrypoint worker-cmsd - --db-uri mysql://qsmaster:CHANGEME@worker-mariadb-0:3306 - --vnid-config "@/usr/local/lib64/libreplica.so mysql://qsmaster:CHANGEME@127.0.0.1:3306/qservw_worker 0 0" - --results-dirname /qserv/data/results - --repl-instance-id qserv_proj - --repl-auth-key replauthkey - --repl-admin-auth-key=repladminauthkey - --repl-registry-host repl-registry - --repl-registry-port 25082 - --cmsd-manager-name czar-xrootd - network_mode: "service:worker-xrootd-0" - volumes: - - type: volume - source: volume_worker_0_results - target: /qserv/data/results - - type: volume - source: volume_worker_0_xrootd - target: /var/run/xrootd - - type: volume - source: volume_worker_0_home - target: /home/qserv - - type: volume - source: volume_worker_0_mariadb_run - target: /qserv/mariadb/run # This matches the ?socket=... location in --db-uri and --db-admin-uri - - << : *log-volume repl-worker-0: << : *repl-worker command: > entrypoint worker-repl --db-admin-uri mysql://root:CHANGEME@worker-mariadb-0:3306/qservw_worker - --repl-connection mysql://qsreplica@repl-mariadb:3306/qservReplica - --log-cfg-file=/config-etc/log/log-repl-worker.cnf + --repl-connection mysql://qsreplica:CHANGEME@repl-mariadb:3306/qservReplica + --log-cfg-file=/config-etc/log/log-repl-worker.cfg -- --instance-id=qserv_proj --auth-key=replauthkey @@ -184,6 +143,7 @@ services: source: volume_worker_0_home target: /home/qserv - << : *log-volume + # worker 1 uses and validates socket file (where possible) to connect to the worker-mariadb worker-mariadb-1: << : *worker-mariadb @@ -201,30 +161,26 @@ services: - type: volume source: volume_worker_1_mariadb_run target: /var/run/mysqld # This is where the mariadb container puts the socket file - network_mode: "service:worker-xrootd-1" - worker-xrootd-1: - << : *worker-xrootd + network_mode: "service:worker-svc-1" + + worker-svc-1: + << : *worker-svc command: > - entrypoint --log-level DEBUG worker-xrootd + entrypoint --log-level DEBUG worker-svc --db-uri mysql://qsmaster:CHANGEME@127.0.0.1:3306?socket={{db_socket}} --db-admin-uri mysql://root:CHANGEME@127.0.0.1:3306?socket={{db_socket}} - --vnid-config "@/usr/local/lib64/libreplica.so mysql://qsmaster:CHANGEME@127.0.0.1:3306/qservw_worker 0 0" --repl-instance-id qserv_proj --repl-auth-key replauthkey --repl-admin-auth-key=repladminauthkey --repl-registry-host repl-registry --repl-registry-port 25082 --results-dirname /qserv/data/results - --cmsd-manager-name czar-xrootd --targs db_socket=/qserv/mariadb/run/mysqld.sock - --log-cfg-file=/config-etc/log/log-worker-xrootd.cnf + --log-cfg-file=/config-etc/log/log-worker-svc.cfg volumes: - type: volume source: volume_worker_1_results target: /qserv/data/results - - type: volume - source: volume_worker_1_xrootd - target: /var/run/xrootd - type: volume source: volume_worker_1_home target: /home/qserv @@ -235,44 +191,15 @@ services: networks: default: aliases: - - worker-cmsd-1 - worker-mariadb-1 - worker-cmsd-1: - << : *worker-cmsd - command: > - entrypoint --log-level DEBUG worker-cmsd - --db-uri mysql://qsmaster:CHANGEME@worker-mariadb-1:3306?socket=/qserv/mariadb/run/mysqld.sock - --vnid-config "@/usr/local/lib64/libreplica.so mysql://qsmaster:CHANGEME@127.0.0.1:3306/qservw_worker 0 0" - --results-dirname /qserv/data/results - --repl-instance-id qserv_proj - --repl-auth-key replauthkey - --repl-admin-auth-key=repladminauthkey - --repl-registry-host repl-registry - --repl-registry-port 25082 - --cmsd-manager-name czar-xrootd - network_mode: "service:worker-xrootd-1" - volumes: - - type: volume - source: volume_worker_1_results - target: /qserv/data/results - - type: volume - source: volume_worker_1_xrootd - target: /var/run/xrootd - - type: volume - source: volume_worker_1_home - target: /home/qserv - - type: volume - source: volume_worker_1_mariadb_run - target: /qserv/mariadb/run - - << : *log-volume repl-worker-1: << : *repl-worker # qserv-replica-worker app does not support socket file yet. command: > entrypoint worker-repl --db-admin-uri mysql://root:CHANGEME@worker-mariadb-1:3306/qservw_worker - --repl-connection mysql://qsreplica@repl-mariadb:3306/qservReplica - --log-cfg-file=/config-etc/log/log-repl-worker.cnf + --repl-connection mysql://qsreplica:CHANGEME@repl-mariadb:3306/qservReplica + --log-cfg-file=/config-etc/log/log-repl-worker.cfg -- --instance-id=qserv_proj --auth-key=replauthkey @@ -290,42 +217,7 @@ services: source: volume_worker_1_home target: /home/qserv - << : *log-volume - czar-xrootd: - image: "${QSERV_IMAGE:?err}" - init: true - command: > - entrypoint xrootd-manager - --cmsd-manager-name czar-xrootd - hostname: czar-xrootd - expose: - - "1094" - - "2131" - volumes: - - type: volume - source: volume_czar_xrootd - target: /var/run/xrootd - - type: volume - source: volume_worker_1_home - target: /home/qserv - - << : *log-volume - networks: - default: - aliases: - - czar-cmsd - czar-cmsd: - image: "${QSERV_IMAGE:?err}" - init: true - # NOTE!! cms-delay-servers must match the number of workers being launched! - command: entrypoint cmsd-manager --cms-delay-servers 2 - network_mode: "service:czar-xrootd" - volumes: - - type: volume - source: volume_czar_xrootd - target: /var/run/xrootd - - type: volume - source: volume_czar_home - target: /home/qserv - - << : *log-volume + czar-mariadb: image: "${QSERV_MARIADB_IMAGE:?err}" init: true @@ -349,6 +241,7 @@ services: - type: volume source: volume_czar_mariadb_run target: /var/run/mysqld + czar-proxy: image: "${QSERV_IMAGE:?err}" init: true @@ -357,8 +250,7 @@ services: --db-uri mysql://qsmaster:CHANGEME@127.0.0.1:3306?socket={{db_socket}} --db-admin-uri mysql://root:CHANGEME@127.0.0.1:3306?socket={{db_socket}} --targs db_socket=/qserv/mariadb/run/mysqld.sock - --xrootd-manager czar-xrootd - --log-cfg-file=/config-etc/log/log-czar-proxy.cnf + --log-cfg-file=/config-etc/log/log-czar-proxy.cfg --repl-instance-id qserv_proj --repl-auth-key replauthkey --repl-admin-auth-key=repladminauthkey @@ -377,6 +269,10 @@ services: - type: volume source: volume_czar_mariadb_run target: /qserv/mariadb/run + - type: volume + source: volume_czar_transfer + target: /tmp + - << : *log-volume expose: - "3306" # for czar-mariadb @@ -393,7 +289,6 @@ services: command: > entrypoint --log-level DEBUG czar-http --db-uri mysql://qsmaster:CHANGEME@czar-mariadb:3306/ - --xrootd-manager czar-xrootd --czar-name http --http-port 4048 --http-threads 4 @@ -404,7 +299,7 @@ services: --http-conn-pool-size 2 --user qsmaster --password CHANGEME - --log-cfg-file=/config-etc/log/log-czar-proxy.cnf + --log-cfg-file=/config-etc/log/log-czar-proxy.cfg --repl-instance-id qserv_proj --repl-auth-key replauthkey --repl-admin-auth-key=repladminauthkey @@ -414,6 +309,9 @@ services: - type: volume source: volume_czar_cfg target: /config-etc + - type: volume + source: volume_czar_transfer + target: /tmp - type: volume source: volume_czar_home target: /home/qserv @@ -445,15 +343,14 @@ services: init: true command: > entrypoint --log-level DEBUG replication-controller - --db-uri mysql://qsreplica@repl-mariadb:3306/qservReplica + --db-uri mysql://qsreplica:CHANGEME@repl-mariadb:3306/qservReplica --db-admin-uri mysql://root:CHANGEME@repl-mariadb:3306/qservReplica --qserv-czar-db=mysql://root:CHANGEME@czar-mariadb:3306/qservMeta - --log-cfg-file=/config-etc/log/log-repl-controller.cnf + --log-cfg-file=/config-etc/log/log-repl-controller.cfg -- --instance-id=qserv_proj --auth-key=replauthkey --admin-auth-key=repladminauthkey - --xrootd-host=czar-xrootd --registry-host=repl-registry --controller-auto-register-workers=1 --qserv-sync-force @@ -479,9 +376,9 @@ services: init: true command: > entrypoint --log-level DEBUG replication-registry - --db-uri mysql://qsreplica@repl-mariadb:3306/qservReplica + --db-uri mysql://qsreplica:CHANGEME@repl-mariadb:3306/qservReplica --db-admin-uri mysql://root:CHANGEME@repl-mariadb:3306/qservReplica - --log-cfg-file=/config-etc/log/log-repl-registry.cnf + --log-cfg-file=/config-etc/log/log-repl-registry.cfg -- --instance-id=qserv_proj --auth-key=replauthkey diff --git a/deploy/compose/log/log-worker-xrootd.cnf b/deploy/compose/log/log-czar-proxy.cfg similarity index 87% rename from deploy/compose/log/log-worker-xrootd.cnf rename to deploy/compose/log/log-czar-proxy.cfg index 5d6ec716e2..be40eb2660 100644 --- a/deploy/compose/log/log-worker-xrootd.cnf +++ b/deploy/compose/log/log-czar-proxy.cfg @@ -4,4 +4,3 @@ log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP %-5X{LWP} %-5p %m%n -log4j.logger.lsst.qserv.xrdssi.msgs=WARN diff --git a/deploy/compose/log/log-repl-controller.cnf b/deploy/compose/log/log-repl-controller.cfg similarity index 100% rename from deploy/compose/log/log-repl-controller.cnf rename to deploy/compose/log/log-repl-controller.cfg diff --git a/deploy/compose/log/log-repl-registry.cnf b/deploy/compose/log/log-repl-registry.cfg similarity index 100% rename from deploy/compose/log/log-repl-registry.cnf rename to deploy/compose/log/log-repl-registry.cfg diff --git a/deploy/compose/log/log-repl-worker.cnf b/deploy/compose/log/log-repl-worker.cfg similarity index 100% rename from deploy/compose/log/log-repl-worker.cnf rename to deploy/compose/log/log-repl-worker.cfg diff --git a/etc/log.cnf b/deploy/compose/log/log-worker-svc.cfg similarity index 86% rename from etc/log.cnf rename to deploy/compose/log/log-worker-svc.cfg index 457d881474..be40eb2660 100644 --- a/etc/log.cnf +++ b/deploy/compose/log/log-worker-svc.cfg @@ -1,6 +1,6 @@ log4j.rootLogger=DEBUG, CONSOLE +#log4j.rootLogger=WARN, CONSOLE log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP %-5X{LWP} %-5p %m%n -log4j.logger.lsst.qserv.xrdssi.msgs=WARN diff --git a/deploy/compose/log/log-czar-proxy.cnf b/deploy/compose/log/log.cfg similarity index 77% rename from deploy/compose/log/log-czar-proxy.cnf rename to deploy/compose/log/log.cfg index c8c8b11b54..be40eb2660 100644 --- a/deploy/compose/log/log-czar-proxy.cnf +++ b/deploy/compose/log/log.cfg @@ -4,5 +4,3 @@ log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP %-5X{LWP} %-5p %m%n -log4j.logger.lsst.qserv.xrdssi.msgs=WARN -#log4j.logger.lsst.qserv.xrdssi.msgs=DEBUG diff --git a/deploy/docker/base/Dockerfile b/deploy/docker/base/Dockerfile index 96fffa939f..b35e70c76f 100644 --- a/deploy/docker/base/Dockerfile +++ b/deploy/docker/base/Dockerfile @@ -2,7 +2,7 @@ # The 'qserv-build-base' target builds a container image to be used as the base of the supported build # environment for Qserv binaries. It includes the compiler toolchain and other developer tools, system # libraries, and relatively-slowly changing third-party libs and tools (boost, antlr, protobuf, mysql-proxy, -# xrootd, etc.) +# etc.) # # The idea is that this container should change relatively infrequently (i.e. every few months or so), and # that developers will be able to pull and cache the latest version generated by CI. Developers should not @@ -140,21 +140,6 @@ RUN cd /tmp \ && cd /tmp \ && rm -rf mysqlproxy -RUN cd /tmp \ - && git clone https://github.com/xrootd/xrootd.git \ - && cd xrootd \ - && git checkout tags/v5.9.0 \ - && git config --global user.email "qserv@slac.stanford.edu" \ - && git config --global user.name "Qserv" \ - && git cherry-pick b02d08c97bb2414e06bd440f120a486422a86d88 5b0983864c053a9d0de759fb65465dba4d2f21bd 9957a250e9635bc993f4b5b7ecd356de626f3e76 \ - && mkdir build \ - && cd build \ - && cmake -DCMAKE_BUILD_TYPE=Debug -DENABLE_PYTHON=off .. \ - && make -j8 \ - && make install \ - && cd /tmp \ - && rm -rf xrootd - RUN cd /tmp \ && git clone https://github.com/yhirose/cpp-httplib.git \ && cd cpp-httplib \ @@ -282,8 +267,7 @@ RUN mkdir -p /qserv/data && \ mkdir /config-etc && \ mkdir /config-etc/ssl && \ mkdir -p /qserv/run/tmp && \ - mkdir -p /var/run/xrootd && \ - chown qserv:qserv /qserv/data /config-etc /config-etc/ssl /qserv/run/tmp /var/run/xrootd + chown qserv:qserv /qserv/data /config-etc /config-etc/ssl /qserv/run/tmp RUN alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 ENV PYTHONPATH="${PYTHONPATH}:/usr/local/python" @@ -298,12 +282,6 @@ COPY --from=qserv-build-base /usr/local/bin/mysql-proxy /usr/local/bin/ COPY --from=qserv-build-base /usr/local/lib/libmysql-*.so /usr/local/lib/ COPY --from=qserv-build-base /usr/local/lib/mysql-proxy/lua/*.so /usr/local/lib/mysql-proxy/lua/ COPY --from=qserv-build-base /usr/local/lib/mysql-proxy/plugins/*.so /usr/local/lib/mysql-proxy/plugins/ - -COPY --from=qserv-build-base /usr/local/include/xrootd/ /usr/local/include/ -COPY --from=qserv-build-base /usr/local/lib64/libXrd*.so /usr/local/lib64/ -COPY --from=qserv-build-base /usr/local/bin/xrootd /usr/local/bin/ -COPY --from=qserv-build-base /usr/local/bin/cmsd /usr/local/bin/ -COPY --from=qserv-build-base /usr/local/bin/xrdfs /usr/local/bin/ COPY --from=qserv-build-base /usr/local/lib64/libcpp-httplib.so /usr/local/lib64/ COPY --from=qserv-build-base /usr/local/lib64/libaws-*.so /usr/local/lib64/ diff --git a/deploy/docker/run/Dockerfile b/deploy/docker/run/Dockerfile index e12a1a21d2..36594f4eaf 100644 --- a/deploy/docker/run/Dockerfile +++ b/deploy/docker/run/Dockerfile @@ -20,7 +20,7 @@ COPY --chown=qserv:qserv lua/ /usr/local/lua/ COPY --chown=qserv:qserv python/ /usr/local/python/ COPY --chown=qserv:qserv qserv/ /usr/local/qserv/ COPY --chown=qserv:qserv etc/ /usr/local/etc/ -COPY --chown=qserv:qserv etc/log.cnf /config-etc/log/log.cnf +COPY --chown=qserv:qserv etc/log.cfg /config-etc/log/log.cfg RUN ldconfig diff --git a/deploy/helm/Chart.yaml b/deploy/helm/Chart.yaml new file mode 100644 index 0000000000..8615304689 --- /dev/null +++ b/deploy/helm/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: "v2" +type: "application" +name: "qserv" +description: "Qserv: a petascale parallel database" +version: 2026.5.2-pre +appVersion: "2026.5.2-pre" diff --git a/deploy/helm/environments/usdf-dev.yaml b/deploy/helm/environments/usdf-dev.yaml new file mode 100644 index 0000000000..0f75f4d334 --- /dev/null +++ b/deploy/helm/environments/usdf-dev.yaml @@ -0,0 +1,34 @@ +qservImageName: ghcr.io/lsst/qserv:2026.5.1-rc1-60-g56d1cae46 +mariadbImageName: ghcr.io/lsst/qserv-mariadb:2026.5.1-rc1 +sslProxyImageName: qserv/mysql-proxy-ssl:latest +ingestHelperImageName: ghcr.io/lsst/qserv-build-base:2026.5.1-rc1-1-gfd3801377 + +czars: + tier: [dev-czar] + +workers: + replicas: 70 + tier: [int-worker, prod-worker] + +replication: + tier: [int-utility] + +ingest: + enable: true + tier: [int-utility] + +czarExternalService: + enable: true + type: LoadBalancer + annotations: + metallb.io/address-pool: sdf-dmz + allocateLoadBalancerNodePorts: false + loadBalancerIP: 134.79.23.221 + loadBalancerSourceRanges: + - 35.232.79.62/32 + - 34.135.212.179/32 + - 35.225.239.81/32 + - 34.121.239.92/32 + - 134.79.0.0/16 + - 172.16.0.0/12 + - 172.24.49.51/32 diff --git a/deploy/helm/environments/usdf-int.yaml b/deploy/helm/environments/usdf-int.yaml new file mode 100644 index 0000000000..a56c706803 --- /dev/null +++ b/deploy/helm/environments/usdf-int.yaml @@ -0,0 +1,34 @@ +qservImageName: ghcr.io/lsst/qserv:2026.5.1-rc1-60-g56d1cae46 +mariadbImageName: ghcr.io/lsst/qserv-mariadb:2026.5.1-rc1 +sslProxyImageName: qserv/mysql-proxy-ssl:latest +ingestHelperImageName: ghcr.io/lsst/qserv-build-base:2026.5.1-rc1-1-gfd3801377 + +czars: + tier: [int-czar] + +workers: + replicas: 35 + tier: [int-worker] + +replication: + tier: [int-utility] + +ingest: + enable: true + tier: [int-utility] + +czarExternalService: + enable: true + type: LoadBalancer + annotations: + metallb.io/address-pool: sdf-dmz + allocateLoadBalancerNodePorts: false + loadBalancerIP: 134.79.23.230 + loadBalancerSourceRanges: + - 35.232.79.62/32 + - 34.135.212.179/32 + - 35.225.239.81/32 + - 34.121.239.92/32 + - 134.79.0.0/16 + - 172.16.0.0/12 + - 172.24.49.51/32 diff --git a/deploy/helm/environments/usdf-prod.yaml b/deploy/helm/environments/usdf-prod.yaml new file mode 100644 index 0000000000..5da76dc05a --- /dev/null +++ b/deploy/helm/environments/usdf-prod.yaml @@ -0,0 +1,34 @@ +qservImageName: ghcr.io/lsst/qserv:2026.5.1-rc1-40-gd22a65ccb +mariadbImageName: ghcr.io/lsst/qserv-mariadb:2026.5.1-rc1 +sslProxyImageName: qserv/mysql-proxy-ssl:latest +ingestHelperImageName: ghcr.io/lsst/qserv-build-base:2026.5.1-rc1-1-gfd3801377 + +czars: + tier: [prod-czar] + +workers: + replicas: 35 + tier: [prod-worker] + +replication: + tier: [prod-utility] + +ingest: + enable: true + tier: [prod-utility] + +czarExternalService: + enable: true + type: LoadBalancer + annotations: + metallb.io/address-pool: sdf-dmz + allocateLoadBalancerNodePorts: false + loadBalancerIP: 134.79.23.229 + loadBalancerSourceRanges: + - 35.232.79.62/32 + - 34.135.212.179/32 + - 35.225.239.81/32 + - 34.121.239.92/32 + - 134.79.0.0/16 + - 172.16.0.0/12 + - 172.24.49.51/32 diff --git a/deploy/helm/templates/_helpers.tpl b/deploy/helm/templates/_helpers.tpl new file mode 100644 index 0000000000..e8f0f94a8b --- /dev/null +++ b/deploy/helm/templates/_helpers.tpl @@ -0,0 +1,49 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "qserv.name" -}} +{{- .Chart.Name | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "qserv.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "qserv.labels" -}} +helm.sh/chart: {{ include "qserv.chart" . }} +{{ include "qserv.selectorLabels" . }} +app.kubernetes.io/version: {{ .Chart.Version | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "qserv.selectorLabels" -}} +app.kubernetes.io/name: {{ include "qserv.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Mode flags +*/}} +{{- define "qserv.enable" -}} +{{- $m := .Values.mode | default "full" -}} +{{- if eq $m "db-only" -}} +czar: false +worker: false +registry: false +replication: false +{{- else -}} {{/* full */}} +czar: true +worker: true +registry: true +replication: true +{{- end -}} +{{- end }} diff --git a/deploy/helm/templates/czar-cm.yaml b/deploy/helm/templates/czar-cm.yaml new file mode 100644 index 0000000000..c2e9d6e30c --- /dev/null +++ b/deploy/helm/templates/czar-cm.yaml @@ -0,0 +1,124 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: czar-config + labels: + {{- include "qserv.labels" . | nindent 4 }} +data: + + 99-qserv-czar.cnf: | + [mysqld] + datadir=/qserv/data/mysql + max-connections=16384 + max_allowed_packet=128M + connect_timeout=28800 + net_read_timeout=90000 + net_write_timeout=90000 + wait_timeout=90000 + symbolic-links=0 + tmp_table_size=4G + max_heap_table_size=4G + use_stat_tables='preferably' + optimizer_use_condition_selectivity=3 + query-cache-size=0 + bulk-insert-buffer-size=1G + + my-proxy.cfg: | + [mysql-proxy] + proxy-address = :14040 + proxy-backend-addresses = 127.0.0.1:3306 + proxy-connect-timeout=30 + log-level=debug + + log-mysql-proxy.cfg: | + log4j.rootLogger=INFO, CONSOLE + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP:%X{LWP} QID:%X{QID} %-5p %c{2} - %m%n + log4j.logger.lsst.qserv.sql.MySqlConnection=DEBUG + log4j.logger.lsst.qserv.qdisp.QueryRequest=DEBUG + log4j.logger.lsst.qserv.rproc.InfileMerger=DEBUG + log4j.logger.lsst.qserv.ccontrol.MergingHandler=DEBUG + + qserv-czar.cfg: | + [css] + technology = mysql + hostname = 127.0.0.1 + port = 3306 + username = qsmaster + password = file:/secrets/czar/mariadb-qsmaster-password + database = qservCssData + + [resultdb] + host = 127.0.0.1 + port = 3306 + user = qsmaster + passwd = file:/secrets/czar/mariadb-qsmaster-password + db = qservResult + transferDir = /qserv/data/results + #maxTransferMemMB = 10000 + maxTransferMemMB = 0 + #transferMinMBInMem = 10 + transferMinMBInMem = 0 + oldestResultKeptDays = 7 + maxsqlconnectionattempts = 10 + maxtablesize_mb = 5100 + engine = myisam + + [qmeta] + host = 127.0.0.1 + port = 3306 + user = qsmaster + passwd = file:/secrets/czar/mariadb-qsmaster-password + db = qservMeta + + [qstatus] + host = 127.0.0.1 + port = 3306 + user = qsmaster + passwd = file:/secrets/czar/mariadb-qsmaster-password + db = qservMeta + + [partitioner] + emptyChunkPath = /qserv/data/qserv + emptyChunkListFile = /qserv/data/qserv/emptyChunks.txt + + [tuning] + #memoryEngine = yes + #largeResultConcurrentMerges = 3 + largeResultConcurrentMerges = 6 + qMetaSecsBetweenChunkCompletionUpdates = 10 + + [qdisppool] + poolSize = 800 + largestPriority = 3 + vectRunSizes = 700:700:100:90 + + [uberjob] + maxChunks = 1000 + + [familymap] + usingChunkSize = 1 + + [replication] + instance_id = {{ .Release.Name }} + auth_key = file:/secrets/repl/auth-key + admin_auth_key = file:/secrets/repl/admin-auth-key + registry_host = qserv-registry + registry_port = 25082 + registry_heartbeat_ival_sec = 1 + http_port = 0 + num_http_threads = 2 + + log-czar-http.cfg: | + log4j.rootLogger=INFO, CONSOLE + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP:%X{LWP} QID:%X{QID} %-5p %c{2} - %m%n + log4j.logger.lsst.qserv.qdisp.Executive=DEBUG + log4j.logger.lsst.qserv.qdisp.QueryRequest=DEBUG + log4j.logger.lsst.qserv.rproc.InfileMerger=DEBUG + log4j.logger.lsst.qserv.ccontrol.MergingHandler=DEBUG + #log4j.logger.lsst.qserv.ccontrol.UserQuerySelect=DEBUG + #log4j.logger.lsst.qserv.xrdreq.QueryManagementAction=DEBUG + #log4j.logger.lsst.qserv.ccontrol.UserQueryQservManager=TRACE diff --git a/deploy/helm/templates/czar-external-svc.yaml b/deploy/helm/templates/czar-external-svc.yaml new file mode 100644 index 0000000000..a42a7ba3de --- /dev/null +++ b/deploy/helm/templates/czar-external-svc.yaml @@ -0,0 +1,38 @@ +{{- $enabled := (include "qserv.enable" . | fromYaml) -}} +{{- if and $enabled.czar .Values.czarExternalService.enable }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "qserv.name" . }}-czar-external + labels: + {{- include "qserv.labels" . | nindent 4 }} + {{- with .Values.czarExternalService.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.czarExternalService.type | default "LoadBalancer" }} + selector: + {{- include "qserv.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: czar + statefulset.kubernetes.io/pod-name: {{ include "qserv.name" . }}-czar-0 + {{- if hasKey .Values.czarExternalService "allocateLoadBalancerNodePorts" }} + allocateLoadBalancerNodePorts: {{ .Values.czarExternalService.allocateLoadBalancerNodePorts }} + {{- end }} + {{- with .Values.czarExternalService.loadBalancerIP }} + loadBalancerIP: {{ . }} + {{- end }} + {{- with .Values.czarExternalService.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- toYaml . | nindent 4 }} + {{- end }} + ports: + - name: czar-http + port: 4048 + targetPort: czar-http + protocol: TCP + - name: mysql-proxy-ssl + port: 4040 + targetPort: mysql-proxy-ssl + protocol: TCP +{{- end }} \ No newline at end of file diff --git a/deploy/helm/templates/czar-smig-job.yaml b/deploy/helm/templates/czar-smig-job.yaml new file mode 100644 index 0000000000..4855a2af2c --- /dev/null +++ b/deploy/helm/templates/czar-smig-job.yaml @@ -0,0 +1,54 @@ +{{- $myjob := .Values.czarSmig | default (dict) -}} +{{- $trig := get $myjob "runId" | default "" | toString -}} +{{- if $trig -}} +{{- $name := include "qserv.name" . -}} +{{- $ns := .Release.Namespace -}} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{$name}}-czar-smig-{{$trig}} + labels: + {{- include "qserv.labels" . | nindent 4 }} + app.kubernetes.io/component: schema +spec: + completionMode: Indexed + completions: {{ .Values.czars.replicas }} + parallelism: {{ .Values.czars.replicas }} + backoffLimit: 0 + ttlSecondsAfterFinished: 600 + template: + metadata: + labels: + {{- include "qserv.labels" . | nindent 8 }} + app.kubernetes.io/component: schema + spec: + restartPolicy: Never + containers: + - name: czar-smig + image: {{ .Values.qservImageName | quote }} + imagePullPolicy: {{ .Values.qservImagePullPolicy | quote }} + env: + - name: JOB_COMPLETION_INDEX + valueFrom: + fieldRef: + fieldPath: metadata.labels['batch.kubernetes.io/job-completion-index'] + - name: CZAR_DB_HOST + value: "{{$name}}-czar-$(JOB_COMPLETION_INDEX).{{$name}}-czar.{{$ns}}.svc" + - name: CZAR_DB_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: czar-secrets + key: mariadb-root-password + - name: CZAR_DB_QSMASTER_PASSWORD + valueFrom: + secretKeyRef: + name: czar-secrets + key: mariadb-qsmaster-password + - name: CZAR_DB_CONN + value: "mysql://root:$(CZAR_DB_ROOT_PASSWORD)@$(CZAR_DB_HOST):3306/qservMeta" + command: ["entrypoint"] + args: + - "smig-update" + - "--czar-connection=$(CZAR_DB_CONN)" + - "--targs=mysqld_user_qserv_password=$(CZAR_DB_QSMASTER_PASSWORD)" +{{- end }} diff --git a/deploy/helm/templates/czar-sts.yaml b/deploy/helm/templates/czar-sts.yaml new file mode 100644 index 0000000000..5a39a4652b --- /dev/null +++ b/deploy/helm/templates/czar-sts.yaml @@ -0,0 +1,223 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "qserv.name" . }}-czar + labels: + {{- include "qserv.labels" . | nindent 4 }} +spec: + serviceName: {{ include "qserv.name" . }}-czar + replicas: {{ .Values.czars.replicas }} + podManagementPolicy: {{ .Values.podManagementPolicy }} + selector: + matchLabels: + {{- include "qserv.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: czar + template: + metadata: + labels: + {{- include "qserv.labels" . | nindent 8 }} + app.kubernetes.io/component: czar + spec: + {{- if .Values.czars.tier }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: qserv.lsst.io/tier + operator: In + values: {{ .Values.czars.tier | toJson }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: [ {{ include "qserv.name" . }} ] + - key: app.kubernetes.io/instance + operator: In + values: [ {{ .Release.Name }} ] + - key: app.kubernetes.io/component + operator: In + values: [ czar ] + topologyKey: kubernetes.io/hostname + {{- end }} + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch + containers: + - name: mariadb + image: {{ .Values.mariadbImageName | quote }} + imagePullPolicy: {{ .Values.mariadbImagePullPolicy | quote }} + ports: + - name: mysql + containerPort: 3306 + env: + - name: MARIADB_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: czar-secrets + key: mariadb-root-password + startupProbe: + exec: + command: + - bash + - -ec + - | + mariadb-admin --protocol=TCP -h 127.0.0.1 --connect-timeout=2 -uroot -p"$$MARIADB_ROOT_PASSWORD" ping + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 60 + readinessProbe: + exec: + command: + - bash + - -ec + - | + mariadb --protocol=TCP -h 127.0.0.1 --connect-timeout=2 -uroot -p"$$MARIADB_ROOT_PASSWORD" -N -e "SELECT 1" + periodSeconds: 5 + timeoutSeconds: 3 + livenessProbe: + exec: + command: + - bash + - -ec + - | + mariadb-admin --protocol=TCP -h 127.0.0.1 --connect-timeout=2 -uroot -p"$$MARIADB_ROOT_PASSWORD" ping + periodSeconds: 10 + timeoutSeconds: 3 + volumeMounts: + - name: mariadb-config + mountPath: /etc/mysql/conf.d/99-qserv-czar.cnf + subPath: 99-qserv-czar.cnf + readOnly: true + - name: tmp + mountPath: /tmp + - name: czar-data + mountPath: /qserv/data + {{- if (include "qserv.enable" . | fromYaml).czar }} + - name: mysql-proxy + image: {{ .Values.qservImageName | quote }} + imagePullPolicy: {{ .Values.qservImagePullPolicy | quote }} + ports: + - name: mysql-proxy + containerPort: 14040 + env: + - name: LSST_LOG_CONFIG + value: /config/log-mysql-proxy.cfg + - name: LD_PRELOAD + value: libjemalloc.so.2 + - name: QSERV_CONFIG + value: /config/qserv-czar.cfg + command: [ "/bin/sh", "-c" ] + args: + - > + exec mysql-proxy + --proxy-lua-script=/usr/local/lua/qserv/scripts/mysqlProxy.lua + --lua-cpath=/usr/local/lua/qserv/lib/czarProxy.so + --defaults-file=/config/my-proxy.cfg + volumeMounts: + - name: czar-data + mountPath: /qserv/data + - name: czar-config + mountPath: /config + readOnly: true + - name: secrets-czar + mountPath: /secrets/czar + readOnly: true + - name: secrets-repl + mountPath: /secrets/repl + readOnly: true + - name: czar-http + image: {{ .Values.qservImageName | quote }} + imagePullPolicy: {{ .Values.qservImagePullPolicy | quote }} + ports: + - name: czar-http + containerPort: 4048 + env: + - name: LSST_LOG_CONFIG + value: /config/log-czar-http.cfg + - name: LD_PRELOAD + value: libjemalloc.so.2 + command: ["tini", "--", "bash", "-c"] + args: + - > + exec qserv-czar-http + --user=qsmaster + --password=$(cat /secrets/czar/mariadb-qsmaster-password) + --config=/config/qserv-czar.cfg + --port=4048 + --threads=100 + --worker-ingest-threads=100 + --ssl-cert-file=/certs/czar/tls.crt + --ssl-private-key-file=/certs/czar/tls.key + --tmp-dir=/qserv/data/ingest + --verbose + volumeMounts: + - name: czar-data + mountPath: /qserv/data + - name: czar-config + mountPath: /config + readOnly: true + - name: czar-cert + mountPath: /certs/czar + readOnly: true + - name: secrets-czar + mountPath: /secrets/czar + readOnly: true + - name: secrets-repl + mountPath: /secrets/repl + readOnly: true + - name: mysql-proxy-ssl + image: {{ .Values.sslProxyImageName | quote }} + imagePullPolicy: {{ .Values.sslProxyImagePullPolicy | quote }} + ports: + - name: mysql-proxy-ssl + containerPort: 4040 + command: [ "proxy" ] + args: + - "4040" + - "14040" + - "/certs/czar/tls.crt" + - "/certs/czar/tls.key" + volumeMounts: + - name: czar-cert + mountPath: /certs/czar + readOnly: true + {{- end }} + volumes: + - name: mariadb-config + configMap: + name: czar-config + defaultMode: 0440 + - name: czar-config + configMap: + name: czar-config + defaultMode: 0440 + - name: czar-cert + secret: + secretName: czar-cert + defaultMode: 0440 + - name: tmp + emptyDir: {} + - name: secrets-czar + secret: + secretName: czar-secrets + defaultMode: 0440 + - name: secrets-repl + secret: + secretName: repl-secrets + defaultMode: 0440 + volumeClaimTemplates: + - metadata: + name: czar-data + labels: + {{- include "qserv.labels" . | nindent 10 }} + spec: + accessModes: [ ReadWriteOnce ] + storageClassName: {{ .Values.czars.storage.class | quote }} + resources: + requests: + storage: {{ .Values.czars.storage.size | quote }} diff --git a/deploy/helm/templates/czar-svc.yaml b/deploy/helm/templates/czar-svc.yaml new file mode 100644 index 0000000000..bcd2d1e432 --- /dev/null +++ b/deploy/helm/templates/czar-svc.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "qserv.name" . }}-czar + labels: + {{- include "qserv.labels" . | nindent 4 }} +spec: + clusterIP: "None" + publishNotReadyAddresses: true + selector: + {{- include "qserv.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: czar + {{- if (include "qserv.enable" . | fromYaml).replication }} + ports: + - name: czar-http + port: 4048 + targetPort: czar-http + - name: mysql-proxy-ssl + port: 4040 + targetPort: mysql-proxy-ssl + {{- end }} diff --git a/deploy/helm/templates/ingest-cm.yaml b/deploy/helm/templates/ingest-cm.yaml new file mode 100644 index 0000000000..4acc7e66b1 --- /dev/null +++ b/deploy/helm/templates/ingest-cm.yaml @@ -0,0 +1,19 @@ +{{- if .Values.ingest.enable -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: ingest-config + labels: + {{- include "qserv.labels" . | nindent 4 }} +data: + + default.conf: | + server { + listen 80; + server_name localhost; + location / { + root /qserv/data/html; + index index.html index.htm; + } + } +{{- end }} diff --git a/deploy/helm/templates/ingest-sts.yaml b/deploy/helm/templates/ingest-sts.yaml new file mode 100644 index 0000000000..57be7729eb --- /dev/null +++ b/deploy/helm/templates/ingest-sts.yaml @@ -0,0 +1,89 @@ +{{- if .Values.ingest.enable -}} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "qserv.name" . }}-ingest + labels: + {{- include "qserv.labels" . | nindent 4 }} +spec: + serviceName: {{ include "qserv.name" . }}-ingest + replicas: 1 + podManagementPolicy: {{ .Values.podManagementPolicy }} + selector: + matchLabels: + {{- include "qserv.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: ingest + template: + metadata: + labels: + {{- include "qserv.labels" . | nindent 8 }} + app.kubernetes.io/component: ingest + spec: + {{- if .Values.ingest.tier }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: qserv.lsst.io/tier + operator: In + values: {{ .Values.ingest.tier | toJson }} + {{- end }} + containers: + - name: nginx + image: nginx:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 80 + volumeMounts: + - name: tmp + mountPath: /tmp + - name: ingest-config + mountPath: /etc/nginx/conf.d/default.conf + subPath: default.conf + readOnly: true + - name: ingest-data + mountPath: /qserv/data + - name: ingest-helper + image: {{ .Values.ingestHelperImageName | quote }} + imagePullPolicy: {{ .Values.ingestHelperPullPolicy | quote }} + env: + - name: QSERV_INSTANCE_ID + value: {{ .Release.Name }} + command: ["tini", "--", "tail", "-f", "/dev/null"] + volumeMounts: + - name: tmp + mountPath: /tmp + - name: ingest-data + mountPath: /qserv/data + - name: secrets-czar + mountPath: /etc/secrets/czar + readOnly: true + - name: secrets-repl + mountPath: /etc/secrets/repl + readOnly: true + volumes: + - name: tmp + emptyDir: {} + - name: ingest-config + configMap: + name: ingest-config + - name: secrets-czar + secret: + secretName: czar-secrets + - name: secrets-repl + secret: + secretName: repl-secrets + volumeClaimTemplates: + - metadata: + name: ingest-data + labels: + {{- include "qserv.labels" . | nindent 10 }} + spec: + accessModes: [ ReadWriteOnce ] + storageClassName: {{ .Values.ingest.storage.class | quote }} + resources: + requests: + storage: {{ .Values.ingest.storage.size | quote }} +{{- end }} diff --git a/deploy/helm/templates/ingest-svc.yaml b/deploy/helm/templates/ingest-svc.yaml new file mode 100644 index 0000000000..82dd4ac9a9 --- /dev/null +++ b/deploy/helm/templates/ingest-svc.yaml @@ -0,0 +1,17 @@ +{{- if .Values.ingest.enable -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "qserv.name" . }}-ingest + labels: + {{- include "qserv.labels" . | nindent 4 }} +spec: + type: ClusterIP + selector: + {{- include "qserv.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: ingest + ports: + - name: http + port: 80 + targetPort: http +{{- end }} diff --git a/deploy/helm/templates/registry-cm.yaml b/deploy/helm/templates/registry-cm.yaml new file mode 100644 index 0000000000..66e6960f4f --- /dev/null +++ b/deploy/helm/templates/registry-cm.yaml @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: registry-config + labels: + {{- include "qserv.labels" . | nindent 4 }} +data: + log-repl-registry.cfg: | + log4j.rootLogger=INFO, CONSOLE + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP:%-5X{LWP} %-5p %c{2} - %m%n + log4j.logger.lsst.qserv.qhttp=WARN + #log4j.logger.lsst.qserv.replica=DEBUG + #log4j.logger.lsst.qserv.replica.Configuration=DEBUG + #log4j.logger.lsst.qserv.replica.ConfigurationIface=DEBUG + #log4j.logger.lsst.qserv.replica.ConfigurationBase=DEBUG + #log4j.logger.lsst.qserv.replica.ConfigurationMySQL=DEBUG + #log4j.logger.lsst.qserv.replica.Controller=DEBUG + #log4j.logger.lsst.qserv.replica.DatabaseMySQL=DEBUG + #log4j.logger.lsst.qserv.replica.MessengerConnector=DEBUG + #log4j.logger.lsst.qserv.replica.DatabaseServicesPool=ERROR + #log4j.logger.lsst.qserv.replica.DatabaseServicesMySQL=DEBUG + #log4j.logger.lsst.qserv.replica.AbortTransactionJob=TRACE + #log4j.logger.lsst.qserv.replica.IndexRequest=DEBUG + #log4j.logger.lsst.qserv.replica.IndexJob=DEBUG + #log4j.logger.lsst.qserv.replica.FixUpJob=DEBUG + #log4j.logger.lsst.qserv.replica.FindAllJob=DEBUG + #log4j.logger.lsst.qserv.replica.QservSyncJob=DEBUG + #log4j.logger.lsst.qserv.replica.PurgeJob=DEBUG + #log4j.logger.lsst.qserv.replica.HttpProcessor=DEBUG + #log4j.logger.lsst.qserv.replica.HttpModuleBase=DEBUG + #log4j.logger.lsst.qserv.replica.SqlJob=DEBUG + #log4j.logger.lsst.qserv.util=DEBUG diff --git a/deploy/helm/templates/registry-deploy.yaml b/deploy/helm/templates/registry-deploy.yaml new file mode 100644 index 0000000000..ab7ac28d31 --- /dev/null +++ b/deploy/helm/templates/registry-deploy.yaml @@ -0,0 +1,79 @@ +{{- if (include "qserv.enable" . | fromYaml).registry -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "qserv.name" . }}-registry + labels: + {{- include "qserv.labels" . | nindent 4 }} +spec: + replicas: 1 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + {{- include "qserv.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: registry + template: + metadata: + labels: + {{- include "qserv.labels" . | nindent 8 }} + app.kubernetes.io/component: registry + spec: + {{- if .Values.replication.tier }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: qserv.lsst.io/tier + operator: In + values: {{ .Values.replication.tier | toJson }} + {{- end }} + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch + containers: + - name: registry + image: {{ .Values.qservImageName | quote }} + imagePullPolicy: {{ .Values.qservImagePullPolicy | quote }} + ports: + - name: http + containerPort: 25082 + env: + - name: LSST_LOG_CONFIG + value: /config/log-repl-registry.cfg + - name: LD_PRELOAD + value: libjemalloc.so.2 + command: ["tini", "--", "bash", "-c"] + args: + - > + exec qserv-replica-registry + --config=mysql://qsreplica:$(cat /secrets/mariadb-qsreplica-password)@qserv-repl-0.qserv-repl/qservReplica + --registry-port=25082 + --instance-id={{ .Release.Name }} + --auth-key="$(cat /secrets/auth-key)" + --admin-auth-key="$(cat /secrets/admin-auth-key)" + --debug + resources: + requests: + memory: 1Gi + volumeMounts: + - name: config + mountPath: /config + readOnly: true + - name: secrets + mountPath: /secrets + readOnly: true + volumes: + - name: config + configMap: + name: registry-config + - name: secrets + secret: + secretName: repl-secrets +{{- end }} diff --git a/deploy/helm/templates/registry-svc.yaml b/deploy/helm/templates/registry-svc.yaml new file mode 100644 index 0000000000..7ea53aa702 --- /dev/null +++ b/deploy/helm/templates/registry-svc.yaml @@ -0,0 +1,17 @@ +{{- if (include "qserv.enable" . | fromYaml).registry -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "qserv.name" . }}-registry + labels: + {{- include "qserv.labels" . | nindent 4 }} +spec: + type: ClusterIP + selector: + {{- include "qserv.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: registry + ports: + - name: http + port: 25082 + targetPort: http +{{- end }} diff --git a/deploy/helm/templates/repl-cm.yaml b/deploy/helm/templates/repl-cm.yaml new file mode 100644 index 0000000000..5122965649 --- /dev/null +++ b/deploy/helm/templates/repl-cm.yaml @@ -0,0 +1,52 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: repl-config + labels: + {{- include "qserv.labels" . | nindent 4 }} +data: + + 99-qserv-repl.cnf: | + [mysqld] + datadir=/qserv/data/mysql + max-connections=16384 + connect_timeout=28800 + net_read_timeout=86400 + net_write_timeout=86400 + wait_timeout=86400 + innodb_lock_wait_timeout=3600 + innodb-read-io-threads=16 + innodb-random-read-ahead=1 + innodb-buffer-pool-size=8589934592 + symbolic-links=0 + tmp_table_size=4G + max_heap_table_size=4G + query-cache-size=0 + + log-repl-contr.cfg: | + log4j.rootLogger=INFO, CONSOLE + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP:%-5X{LWP} %-5p %c{2} - %m%n + #log4j.logger.lsst.qserv.http.BaseModule=DEBUG + #log4j.logger.lsst.qserv.replica=DEBUG + #log4j.logger.lsst.qserv.replica.Configuration=DEBUG + #log4j.logger.lsst.qserv.replica.ConfigurationIface=DEBUG + #log4j.logger.lsst.qserv.replica.ConfigurationBase=DEBUG + #log4j.logger.lsst.qserv.replica.ConfigurationMySQL=DEBUG + #log4j.logger.lsst.qserv.replica.Controller=DEBUG + #log4j.logger.lsst.qserv.replica.DatabaseMySQL=DEBUG + #log4j.logger.lsst.qserv.replica.MessengerConnector=DEBUG + #log4j.logger.lsst.qserv.replica.DatabaseServicesPool=ERROR + #log4j.logger.lsst.qserv.replica.DatabaseServicesMySQL=DEBUG + #log4j.logger.lsst.qserv.replica.AbortTransactionJob=TRACE + #log4j.logger.lsst.qserv.replica.IndexRequest=DEBUG + #log4j.logger.lsst.qserv.replica.DirectorIndexJob=DEBUG + #log4j.logger.lsst.qserv.replica.FixUpJob=DEBUG + #log4j.logger.lsst.qserv.replica.FindAllJob=DEBUG + #log4j.logger.lsst.qserv.replica.QservSyncJob=DEBUG + #log4j.logger.lsst.qserv.replica.PurgeJob=DEBUG + #log4j.logger.lsst.qserv.replica.HttpProcessor=DEBUG + #log4j.logger.lsst.qserv.replica.HttpModuleBase=DEBUG + #log4j.logger.lsst.qserv.replica.SqlJob=DEBUG + #log4j.logger.lsst.qserv.util=TRACE diff --git a/deploy/helm/templates/repl-smig-job.yaml b/deploy/helm/templates/repl-smig-job.yaml new file mode 100644 index 0000000000..19fca42775 --- /dev/null +++ b/deploy/helm/templates/repl-smig-job.yaml @@ -0,0 +1,55 @@ +{{- $myjob := .Values.replSmig | default (dict) -}} +{{- $trig := get $myjob "runId" | default "" | toString -}} +{{- if $trig -}} +{{- $name := include "qserv.name" . -}} +{{- $ns := .Release.Namespace -}} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{$name}}-repl-smig-{{$trig}} + labels: + {{- include "qserv.labels" . | nindent 4 }} + app.kubernetes.io/component: schema +spec: + completionMode: Indexed + completions: 1 + backoffLimit: 0 + ttlSecondsAfterFinished: 600 + template: + metadata: + labels: + {{- include "qserv.labels" . | nindent 8 }} + app.kubernetes.io/component: schema + spec: + restartPolicy: Never + containers: + - name: repl-smig + image: {{ .Values.qservImageName | quote }} + imagePullPolicy: {{ .Values.qservImagePullPolicy | quote }} + env: + - name: JOB_COMPLETION_INDEX + valueFrom: + fieldRef: + fieldPath: metadata.labels['batch.kubernetes.io/job-completion-index'] + - name: REPL_DB_HOST + value: "{{$name}}-repl-$(JOB_COMPLETION_INDEX).{{$name}}-repl.{{$ns}}.svc" + - name: REPL_DB_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: repl-secrets + key: mariadb-root-password + - name: REPL_DB_QSREPLICA_PASSWORD + valueFrom: + secretKeyRef: + name: repl-secrets + key: mariadb-qsreplica-password + - name: REPL_DB_ROOT_CONN + value: "mysql://root:$(REPL_DB_ROOT_PASSWORD)@$(REPL_DB_HOST):3306/qservReplica" + - name: REPL_DB_CONN + value: "mysql://qsreplica:$(REPL_DB_QSREPLICA_PASSWORD)@$(REPL_DB_HOST):3306/qservReplica" + command: ["entrypoint"] + args: + - "smig-update" + - "--repl-connection=$(REPL_DB_ROOT_CONN)" + - "--repl-connection-nonadmin=$(REPL_DB_CONN)" +{{- end }} diff --git a/deploy/helm/templates/repl-sts.yaml b/deploy/helm/templates/repl-sts.yaml new file mode 100644 index 0000000000..02241aefe8 --- /dev/null +++ b/deploy/helm/templates/repl-sts.yaml @@ -0,0 +1,164 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "qserv.name" . }}-repl + labels: + {{- include "qserv.labels" . | nindent 4 }} +spec: + serviceName: {{ include "qserv.name" . }}-repl + replicas: 1 + podManagementPolicy: {{ .Values.podManagementPolicy }} + selector: + matchLabels: + {{- include "qserv.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: repl + template: + metadata: + labels: + {{- include "qserv.labels" . | nindent 8 }} + app.kubernetes.io/component: repl + spec: + {{- if .Values.replication.tier }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: qserv.lsst.io/tier + operator: In + values: {{ .Values.replication.tier | toJson }} + {{- end }} + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch + containers: + - name: mariadb + image: {{ .Values.mariadbImageName | quote }} + imagePullPolicy: {{ .Values.mariadbImagePullPolicy | quote }} + ports: + - name: mysql + containerPort: 3306 + env: + - name: MARIADB_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: repl-secrets + key: mariadb-root-password + startupProbe: + exec: + command: + - bash + - -ec + - | + mariadb-admin --protocol=TCP -h 127.0.0.1 --connect-timeout=2 -uroot -p"$$MARIADB_ROOT_PASSWORD" ping + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 60 + readinessProbe: + exec: + command: + - bash + - -ec + - | + mariadb --protocol=TCP -h 127.0.0.1 --connect-timeout=2 -uroot -p"$$MARIADB_ROOT_PASSWORD" -N -e "SELECT 1" + periodSeconds: 5 + timeoutSeconds: 3 + livenessProbe: + exec: + command: + - bash + - -ec + - | + mariadb-admin --protocol=TCP -h 127.0.0.1 --connect-timeout=2 -uroot -p"$$MARIADB_ROOT_PASSWORD" ping + periodSeconds: 10 + timeoutSeconds: 3 + volumeMounts: + - name: mariadb-config + mountPath: /etc/mysql/conf.d/99-qserv-repl.cnf + subPath: 99-qserv-repl.cnf + readOnly: true + - name: tmp + mountPath: /tmp + - name: repl-data + mountPath: /qserv/data + {{- if (include "qserv.enable" . | fromYaml).replication }} + - name: repl-controller + image: {{ .Values.qservImageName | quote }} + imagePullPolicy: {{ .Values.qservImagePullPolicy | quote }} + ports: + - name: http + containerPort: 25081 + env: + - name: LSST_LOG_CONFIG + value: /config/log-repl-contr.cfg + - name: LD_PRELOAD + value: libjemalloc.so.2 + command: ["tini", "--", "bash", "-c"] + args: + - > + exec qserv-replica-master-http + --registry-host=qserv-registry + --config=mysql://qsreplica:$(cat /secrets/repl/mariadb-qsreplica-password)@localhost/qservReplica + --instance-id={{ .Release.Name }} + --qserv-czar-db=mysql://root:$(cat /secrets/czar/mariadb-root-password)@qserv-czar-0.qserv-czar:3306/qservMeta + --qserv-chunk-map-update + --auth-key="$(cat /secrets/repl/auth-key)" + --admin-auth-key="$(cat /secrets/repl/admin-auth-key)" + --http-root=/usr/local/qserv/www/ + --health-probe-interval=120 + --replication-interval=600 + --qserv-sync-timeout=600 + --controller-ingest-job-monitor-ival-sec=1 + --controller-auto-register-workers=1 + --xrootd-auto-notify=0 + --qserv-sync-disable + --worker-evict-timeout=1800 + --purge + --debug + resources: + requests: + memory: 1Gi + volumeMounts: + - name: repl-data + mountPath: /qserv/data + - name: repl-config + mountPath: /config + readOnly: true + - name: secrets-repl + mountPath: /secrets/repl + readOnly: true + - name: secrets-czar + mountPath: /secrets/czar + readOnly: true + {{- end }} + volumes: + - name: mariadb-config + configMap: + name: repl-config + items: + - key: 99-qserv-repl.cnf + path: 99-qserv-repl.cnf + - name: repl-config + configMap: + name: repl-config + - name: tmp + emptyDir: {} + - name: secrets-repl + secret: + secretName: repl-secrets + - name: secrets-czar + secret: + secretName: czar-secrets + volumeClaimTemplates: + - metadata: + name: repl-data + labels: + {{- include "qserv.labels" . | nindent 10 }} + spec: + accessModes: [ ReadWriteOnce ] + storageClassName: {{ .Values.replication.storage.class | quote }} + resources: + requests: + storage: {{ .Values.replication.storage.size | quote }} diff --git a/deploy/helm/templates/repl-svc.yaml b/deploy/helm/templates/repl-svc.yaml new file mode 100644 index 0000000000..6922609905 --- /dev/null +++ b/deploy/helm/templates/repl-svc.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "qserv.name" . }}-repl + labels: + {{- include "qserv.labels" . | nindent 4 }} +spec: + clusterIP: "None" + publishNotReadyAddresses: true + selector: + {{- include "qserv.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: repl + {{- if (include "qserv.enable" . | fromYaml).replication }} + ports: + - name: http + port: 25081 + targetPort: http + {{- end }} diff --git a/deploy/helm/templates/worker-cm.yaml b/deploy/helm/templates/worker-cm.yaml new file mode 100644 index 0000000000..fb718420fe --- /dev/null +++ b/deploy/helm/templates/worker-cm.yaml @@ -0,0 +1,96 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: worker-config + labels: + {{- include "qserv.labels" . | nindent 4 }} +data: + + 99-qserv-worker.cnf: | + [mysqld] + datadir=/qserv/data/mysql + max-connections=16384 + connect_timeout=28800 + net_read_timeout=90000 + net_write_timeout=90000 + wait_timeout=90000 + symbolic-links=0 + tmp_table_size=4G + max_heap_table_size=4G + myisam_sort_buffer_size=16G + myisam_repair_threads=2 + use_stat_tables='preferably' + optimizer_use_condition_selectivity=3 + query-cache-size=0 + join-buffer-size=4G + + log-repl-worker.cfg: | + log4j.rootLogger=INFO, CONSOLE + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP:%-5X{LWP} %-5p %c{2} - %m%n + #log4j.logger.lsst.qserv.replica=DEBUG + #log4j.logger.lsst.qserv.replica.Configuration=DEBUG + #log4j.logger.lsst.qserv.replica.ConfigurationIface=DEBUG + #log4j.logger.lsst.qserv.replica.ConfigurationBase=DEBUG + #log4j.logger.lsst.qserv.replica.ConfigurationMySQL=DEBUG + #log4j.logger.lsst.qserv.replica.Controller=DEBUG + #log4j.logger.lsst.qserv.replica.DatabaseMySQL=DEBUG + #log4j.logger.lsst.qserv.replica.MessengerConnector=DEBUG + #log4j.logger.lsst.qserv.replica.DatabaseServicesPool=ERROR + #log4j.logger.lsst.qserv.replica.DatabaseServicesMySQL=DEBUG + #log4j.logger.lsst.qserv.replica.AbortTransactionJob=TRACE + #log4j.logger.lsst.qserv.replica.IndexRequest=DEBUG + #log4j.logger.lsst.qserv.replica.IndexJob=DEBUG + #log4j.logger.lsst.qserv.replica.FixUpJob=DEBUG + #log4j.logger.lsst.qserv.replica.FindAllJob=DEBUG + #log4j.logger.lsst.qserv.replica.QservSyncJob=DEBUG + #log4j.logger.lsst.qserv.replica.PurgeJob=DEBUG + #log4j.logger.lsst.qserv.replica.HttpProcessor=DEBUG + #log4j.logger.lsst.qserv.replica.HttpModuleBase=DEBUG + #log4j.logger.lsst.qserv.replica.SqlJob=DEBUG + #log4j.logger.lsst.qserv.util=DEBUG + + log-worker-svc.cfg: | + log4j.rootLogger=INFO, CONSOLE + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP %-5X{LWP} %-5p %m%n + + worker-svc.cf: | + [mysql] + hostname = 127.0.0.1 + port = 3306 + username = qsmaster + password = file:/secrets/worker/mariadb-qsmaster-password + + [czar] + ComNumHttpThreads = 100 + + [scheduler] + thread_pool_size = 70 + required_tasks_completed = 1 + group_size = 20 + maxActiveChunks_snail = 2 + maxActiveChunks_slow = 6 + maxActiveChunks_med = 6 + maxActiveChunks_fast = 6 + + [sqlconnections] + maxsqlconn = 980 + reservedinteractivesqlconn = 930 + + [results] + dirname = /qserv/data/results + num_http_threads = 6 + clean_up_on_start = 1 + + [replication] + instance_id = {{ .Release.Name }} + auth_key = file:/secrets/repl/auth-key + admin_auth_key = file:/secrets/repl/admin-auth-key + registry_host = qserv-registry + registry_port = 25082 + registry_heartbeat_ival_sec = 1 + http_port = 25010 + num_http_threads = 2 diff --git a/deploy/helm/templates/worker-smig-job.yaml b/deploy/helm/templates/worker-smig-job.yaml new file mode 100644 index 0000000000..393a1b05ba --- /dev/null +++ b/deploy/helm/templates/worker-smig-job.yaml @@ -0,0 +1,57 @@ +{{- $myjob := .Values.workerSmig | default (dict) -}} +{{- $trig := get $myjob "runId" | default "" | toString -}} +{{- if $trig -}} +{{- $name := include "qserv.name" . -}} +{{- $ns := .Release.Namespace -}} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{$name}}-worker-smig-{{$trig}} + labels: + {{- include "qserv.labels" . | nindent 4 }} + app.kubernetes.io/component: schema +spec: + completionMode: Indexed + completions: {{ .Values.workers.replicas }} + parallelism: {{ .Values.workers.replicas }} + backoffLimit: 0 + ttlSecondsAfterFinished: 600 + template: + metadata: + labels: + {{- include "qserv.labels" . | nindent 8 }} + app.kubernetes.io/component: schema + spec: + restartPolicy: Never + containers: + - name: worker-smig + image: {{ .Values.qservImageName | quote }} + imagePullPolicy: {{ .Values.qservImagePullPolicy | quote }} + env: + - name: JOB_COMPLETION_INDEX + valueFrom: + fieldRef: + fieldPath: metadata.labels['batch.kubernetes.io/job-completion-index'] + - name: WORKER_DB_HOST + value: "{{$name}}-worker-$(JOB_COMPLETION_INDEX).{{$name}}-worker.{{$ns}}.svc" + - name: WORKER_DB_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: worker-secrets + key: mariadb-root-password + - name: WORKER_DB_QSMASTER_PASSWORD + valueFrom: + secretKeyRef: + name: worker-secrets + key: mariadb-qsmaster-password + - name: WORKER_DB_CONN + value: "mysql://root:$(WORKER_DB_ROOT_PASSWORD)@$(WORKER_DB_HOST):3306/qservw_worker" + command: ["/bin/sh", "-c"] + args: + - | + WORKER_ID=$(printf "%03d" "$JOB_COMPLETION_INDEX") + exec entrypoint smig-update \ + --worker-connection="${WORKER_DB_CONN}" \ + --targs=mysqld_user_qserv_password="${WORKER_DB_QSMASTER_PASSWORD}" \ + --targs=worker_name=worker-${WORKER_ID} +{{- end }} diff --git a/deploy/helm/templates/worker-sts.yaml b/deploy/helm/templates/worker-sts.yaml new file mode 100644 index 0000000000..eb347f635f --- /dev/null +++ b/deploy/helm/templates/worker-sts.yaml @@ -0,0 +1,204 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "qserv.name" . }}-worker + labels: + {{- include "qserv.labels" . | nindent 4 }} +spec: + serviceName: {{ include "qserv.name" . }}-worker + replicas: {{ .Values.workers.replicas }} + podManagementPolicy: {{ .Values.podManagementPolicy }} + selector: + matchLabels: + {{- include "qserv.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: worker + template: + metadata: + labels: + {{- include "qserv.labels" . | nindent 8 }} + app.kubernetes.io/component: worker + spec: + {{- if .Values.workers.tier }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: qserv.lsst.io/tier + operator: In + values: {{ .Values.workers.tier | toJson }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: [ {{ include "qserv.name" . }} ] + - key: app.kubernetes.io/instance + operator: In + values: [ {{ .Release.Name }} ] + - key: app.kubernetes.io/component + operator: In + values: [ worker ] + topologyKey: kubernetes.io/hostname + {{- end }} + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch + containers: + - name: mariadb + image: {{ .Values.mariadbImageName | quote }} + imagePullPolicy: {{ .Values.mariadbImagePullPolicy | quote }} + ports: + - name: mysql + containerPort: 3306 + env: + - name: MARIADB_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: worker-secrets + key: mariadb-root-password + startupProbe: + exec: + command: + - bash + - -ec + - | + mariadb-admin --protocol=TCP -h 127.0.0.1 --connect-timeout=2 -uroot -p"$$MARIADB_ROOT_PASSWORD" ping + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 60 + readinessProbe: + exec: + command: + - bash + - -ec + - | + mariadb --protocol=TCP -h 127.0.0.1 --connect-timeout=2 -uroot -p"$$MARIADB_ROOT_PASSWORD" -N -e "SELECT 1" + periodSeconds: 5 + timeoutSeconds: 3 + livenessProbe: + exec: + command: + - bash + - -ec + - | + mariadb-admin --protocol=TCP -h 127.0.0.1 --connect-timeout=2 -uroot -p"$$MARIADB_ROOT_PASSWORD" ping + periodSeconds: 10 + timeoutSeconds: 3 + volumeMounts: + - name: mariadb-config + mountPath: /etc/mysql/conf.d/99-qserv-worker.cnf + subPath: 99-qserv-worker.cnf + readOnly: true + - name: tmp + mountPath: /tmp + - name: worker-data + mountPath: /qserv/data + {{- if (include "qserv.enable" . | fromYaml).worker }} + - name: repl-worker + image: {{ .Values.qservImageName | quote }} + imagePullPolicy: {{ .Values.qservImagePullPolicy | quote }} + ports: + - name: svc + containerPort: 25000 + - name: fs + containerPort: 25001 + - name: loader + containerPort: 25002 + - name: exporter + containerPort: 25003 + - name: http-loader + containerPort: 25004 + env: + - name: LSST_LOG_CONFIG + value: /config/log-repl-worker.cfg + - name: LD_PRELOAD + value: libjemalloc.so.2 + command: ["tini", "--", "bash", "-c"] + args: + - > + exec qserv-replica-worker + --registry-host=qserv-registry + --config=mysql://qsreplica:$(cat /secrets/repl/mariadb-qsreplica-password)@qserv-repl-0.qserv-repl/qservReplica + --instance-id={{ .Release.Name }} + --qserv-worker-db=mysql://root:$(cat /secrets/worker/mariadb-root-password)@localhost:3306/qservw_worker + --auth-key="$(cat /secrets/repl/auth-key)" + --admin-auth-key="$(cat /secrets/repl/admin-auth-key)" + --worker-ingest-num-retries=4 + --worker-num-async-loader-processing-threads=32 + --schema-upgrade-wait=1 + --schema-upgrade-wait-timeout=600 + --debug + volumeMounts: + - name: worker-config + mountPath: /config + readOnly: true + - name: secrets-worker + mountPath: /secrets/worker + readOnly: true + - name: secrets-repl + mountPath: /secrets/repl + readOnly: true + - name: worker-data + mountPath: /qserv/data + - name: worker-svc + image: {{ .Values.qservImageName | quote }} + imagePullPolicy: {{ .Values.qservImagePullPolicy | quote }} + ports: + - name: http + containerPort: 25010 + env: + - name: LSST_LOG_CONFIG + value: /config/log-worker-svc.cfg + - name: LD_PRELOAD + value: libjemalloc.so.2 + command: ["tini", "--", "bash", "-c"] + args: + - > + exec qserv-worker-http + --config=/config/worker-svc.cf + volumeMounts: + - name: worker-config + mountPath: /config + readOnly: true + - name: secrets-worker + mountPath: /secrets/worker + readOnly: true + - name: secrets-repl + mountPath: /secrets/repl + readOnly: true + - name: worker-data + mountPath: /qserv/data + {{- end }} + volumes: + - name: mariadb-config + configMap: + name: worker-config + items: + - key: 99-qserv-worker.cnf + path: 99-qserv-worker.cnf + - name: worker-config + configMap: + name: worker-config + - name: tmp + emptyDir: {} + - name: secrets-worker + secret: + secretName: worker-secrets + - name: secrets-repl + secret: + secretName: repl-secrets + volumeClaimTemplates: + - metadata: + name: worker-data + labels: + {{- include "qserv.labels" . | nindent 10 }} + spec: + accessModes: [ ReadWriteOnce ] + storageClassName: {{ .Values.workers.storage.class | quote }} + resources: + requests: + storage: {{ .Values.workers.storage.size | quote }} diff --git a/deploy/helm/templates/worker-svc.yaml b/deploy/helm/templates/worker-svc.yaml new file mode 100644 index 0000000000..6ce9405f02 --- /dev/null +++ b/deploy/helm/templates/worker-svc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "qserv.name" . }}-worker + labels: + {{- include "qserv.labels" . | nindent 4 }} +spec: + clusterIP: "None" + publishNotReadyAddresses: true + selector: + {{- include "qserv.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: worker diff --git a/deploy/helm/values.yaml b/deploy/helm/values.yaml new file mode 100644 index 0000000000..c497834ced --- /dev/null +++ b/deploy/helm/values.yaml @@ -0,0 +1,42 @@ +qservImageName: ghcr.io/lsst/qserv:2026.5.1-rc1-40-gd22a65ccb +mariadbImageName: ghcr.io/lsst/qserv-mariadb:2026.5.1-rc1 +sslProxyImageName: qserv/mysql-proxy-ssl:latest +ingestHelperImageName: ghcr.io/lsst/qserv-build-base:2026.5.1-rc1-1-gfd3801377 + +qservImagePullPolicy: IfNotPresent +mariadbImagePullPolicy: IfNotPresent +sslProxyImagePullPolicy: IfNotPresent +ingestHelperPullPolicy: IfNotPresent + +mode: full # full | db-only +podManagementPolicy: Parallel # Parallel | OrderedReady + +czars: + replicas: 1 + # tier: [] + storage: + class: rubin-qserv-storage + size: 5Ti + +workers: + replicas: 5 + # tier: [] + storage: + class: rubin-qserv-storage + size: 10Ti + +replication: + # tier: [] + storage: + class: rubin-qserv-storage + size: 2Ti + +ingest: + enable: false + # tier: [] + storage: + class: rubin-qserv-storage + size: 200Gi + +czarExternalService: + enable: false diff --git a/etc/CMakeLists.txt b/etc/CMakeLists.txt index 4d0439fc51..9fe02e53fa 100644 --- a/etc/CMakeLists.txt +++ b/etc/CMakeLists.txt @@ -1,5 +1,5 @@ install(FILES integration_tests.yaml - log.cnf + log.cfg DESTINATION etc ) diff --git a/deploy/compose/log/log.cnf b/etc/log.cfg similarity index 68% rename from deploy/compose/log/log.cnf rename to etc/log.cfg index 3ef039121b..ec675b61a3 100644 --- a/deploy/compose/log/log.cnf +++ b/etc/log.cfg @@ -1,9 +1,5 @@ log4j.rootLogger=DEBUG, CONSOLE -#log4j.rootLogger=WARN, CONSOLE log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP %-5X{LWP} %-5p %m%n -log4j.logger.lsst.qserv.xrdssi.msgs=WARN -#log4j.logger.lsst.qserv.xrdssi.msgs=DEBUG - diff --git a/python/lsst/qserv/admin/cli/entrypoint.py b/python/lsst/qserv/admin/cli/entrypoint.py index e7c88c1286..e7b8e52fb3 100644 --- a/python/lsst/qserv/admin/cli/entrypoint.py +++ b/python/lsst/qserv/admin/cli/entrypoint.py @@ -39,7 +39,6 @@ OptionGroup, option_case, option_cmd, - option_cmsd_manager_name, option_compare_results, option_czar_connection, option_db_admin_uri, @@ -67,9 +66,7 @@ option_targs, option_tests_yaml, option_unload, - option_vnid_config, option_worker_connection, - option_xrootd_manager, options_targs, ) from .render_targs import render_targs @@ -78,21 +75,15 @@ template_dir = "/usr/local/qserv/templates/" -mysql_proxy_cfg_template = os.path.join(template_dir, "proxy/etc/my-proxy.cnf.jinja") -czar_cfg_template = os.path.join(template_dir, "proxy/etc/qserv-czar.cnf.jinja") -czar_http_cfg_template = os.path.join(template_dir, "http/etc/qserv-czar.cnf.jinja") -cmsd_manager_cfg_template = os.path.join(template_dir, "xrootd/etc/cmsd-manager.cf.jinja") -cmsd_worker_cfg_template = os.path.join(template_dir, "xrootd/etc/cmsd-worker.cf.jinja") -xrdssi_cfg_template = os.path.join(template_dir, "xrootd/etc/xrdssi.cf.jinja") -xrootd_manager_cfg_template = os.path.join(template_dir, "xrootd/etc/xrootd-manager.cf.jinja") - -mysql_proxy_cfg_path = "/config-etc/my-proxy.cnf" -czar_cfg_path = "/config-etc/qserv-czar.cnf" -czar_http_cfg_path = "/config-etc/qserv-czar.cnf" -cmsd_manager_cfg_path = "/config-etc/cmsd-manager.cnf" -cmsd_worker_cfg_path = "/config-etc/cmsd-worker.cf" -xrdssi_cfg_path = "/config-etc/xrdssi-worker.cf" -xrootd_manager_cfg_path = "/config-etc/xrootd-manager.cf" +mysql_proxy_cfg_template = os.path.join(template_dir, "proxy/etc/my-proxy.cfg.jinja") +czar_cfg_template = os.path.join(template_dir, "proxy/etc/qserv-czar.cfg.jinja") +czar_http_cfg_template = os.path.join(template_dir, "http/etc/qserv-czar.cfg.jinja") +worker_svc_cfg_template = os.path.join(template_dir, "worker-svc/etc/worker-svc.cfg.jinja") + +mysql_proxy_cfg_path = "/config-etc/my-proxy.cfg" +czar_cfg_path = "/config-etc/qserv-czar.cfg" +czar_http_cfg_path = "/config-etc/qserv-czar.cfg" +worker_svc_cfg_path = "/config-etc/worker-svc.cfg" socket_option_help = f"""Accepts query key { click.style("socket", bold=True) @@ -116,7 +107,7 @@ worker_db_help = f"""Non-admin URI to the worker database. {socket_option_help} - Populates 'hostname', 'port', and 'socket' under '[mysql]' in the xrdssi config + Populates 'hostname', 'port', and 'socket' under '[mysql]' in the worker config file. Also used to wait for schema to be at the correct version in this database. """ @@ -159,20 +150,6 @@ class CommandInfo: "--verbose", ), ), - ( - "cmsd-manager", - CommandInfo( - "cmsd -c {{cmsd_manager_cfg_path}} -n manager -I v4", - ), - ), - ("xrootd-manager", CommandInfo("xrootd -c {{xrootd_manager_cfg_path}} -n manager -I v4")), - ( - "worker-cmsd", - CommandInfo( - "cmsd -c {{cmsd_worker_cfg_path}} -n worker -I v4 -l @libXrdSsiLog.so -+xrdssi " - "{{xrdssi_cfg_path}}", - ), - ), ( "worker-repl", CommandInfo( @@ -182,10 +159,9 @@ class CommandInfo: ), ), ( - "worker-xrootd", + "worker-svc", CommandInfo( - "xrootd -c {{cmsd_worker_cfg_path}} -n worker -I v4 -l @libXrdSsiLog.so -+xrdssi " - "{{xrdssi_cfg_path}}", + "qserv-worker-http -c {{worker_svc_cfg_path}}", ), ), ( @@ -217,38 +193,20 @@ class CommandInfo: ) -option_cmsd_worker_cfg_file = partial( +option_worker_svc_cfg_file = partial( click.option, - "--cmsd-worker-cfg-file", - help="Path to the cmsd worker config file.", - default=cmsd_worker_cfg_template, + "--worker-svc-cfg-file", + help="Path to the worker-svc config file.", + default=worker_svc_cfg_template, show_default=True, ) -option_cmsd_worker_cfg_path = partial( +option_worker_svc_cfg_path = partial( click.option, - "--cmsd-worker-cfg-path", - help="Location to render cmsd_worker_cfg_file.", - default=cmsd_worker_cfg_path, - show_default=True, -) - - -option_xrdssi_cfg_file = partial( - click.option, - "--xrdssi-cfg-file", - help="Path to the xrdssi config file.", - default=xrdssi_cfg_template, - show_default=True, -) - - -option_xrdssi_cfg_path = partial( - click.option, - "--xrdssi-cfg-path", - help="Location to render xrdssi-cfg-file.", - default=xrdssi_cfg_path, + "--worker-svc-cfg-path", + help="Location to render worker-svc-cfg-file.", + default=worker_svc_cfg_path, show_default=True, ) @@ -558,13 +516,12 @@ def delete_database( help="The admin URI to the proxy's database, used for schema initialization. " + socket_option_help, required=True, ) -@option_xrootd_manager(required=True) @click.option( "--proxy-backend-address", default="127.0.0.1:3306", show_default=True, help="This is the same as the proxy-backend-address option to mysql proxy. This value is substitued " - "into the proxy-backend-address parameter in 'my-proxy.cnf.jinja'.", + "into the proxy-backend-address parameter in 'my-proxy.cfg.jinja'.", ) @click.option( "--proxy-cfg-file", @@ -624,7 +581,6 @@ def proxy(ctx: click.Context, **kwargs: Any) -> None: help="The non-admin URI to the Czar's database, used for non-smig purposes. " + socket_option_help, required=True, ) -@option_xrootd_manager(required=True) @click.option( "--http-port", default="4048", @@ -730,112 +686,11 @@ def czar_http(ctx: click.Context, **kwargs: Any) -> None: ) -@entrypoint.command() -@pass_context -@click.option( - "--cms-delay-servers", - help="Populates 'cms.delay servers' in the cmsd manager config file.", -) -@click.option( - "--cmsd_manager_cfg_file", - help="Path to the cmsd manager config file.", - default=cmsd_manager_cfg_template, - show_default=True, -) -@click.option( - "--cmsd-manager-cfg-path", - help="Location to render cmsd_manager_cfg_file", - default=cmsd_manager_cfg_path, - show_default=True, -) -@options_targs() -@options_cms() -@option_options_file() -def cmsd_manager(ctx: click.Context, **kwargs: Any) -> None: - """Start as a cmsd manager node.""" - targs = utils.targs(ctx) - targs = render_targs(targs) - script.enter_manager_cmsd( - targs=targs, - cmsd_manager_cfg_file=targs["cmsd_manager_cfg_file"], - cmsd_manager_cfg_path=targs["cmsd_manager_cfg_path"], - cmd=targs["cmd"], - ) - - -@entrypoint.command() -@pass_context -@option_cmsd_manager_name() -@click.option( - "--xrootd_manager-cfg-file", - help="Path to the xrootd manager config file.", - default=xrootd_manager_cfg_template, - show_default=True, -) -@click.option( - "--xrootd-manager-cfg-path", - help="Location to render xrootd_manager_cfg_file.", - default=xrootd_manager_cfg_path, - show_default=True, -) -@options_targs() -@options_cms() -@option_options_file() -def xrootd_manager(ctx: click.Context, **kwargs: Any) -> None: - """Start as an xrootd manager node.""" - targs = utils.targs(ctx) - targs = render_targs(targs) - script.enter_xrootd_manager( - targs=targs, - xrootd_manager_cfg_file=targs["xrootd_manager_cfg_file"], - xrootd_manager_cfg_path=targs["xrootd_manager_cfg_path"], - cmd=targs["cmd"], - ) - - -@entrypoint.command(help=f"Start as a worker cmsd node.\n\n{socket_option_description}") -@pass_context -@option_db_uri(help=worker_db_help) -@option_vnid_config(required=True) -@option_vnid_config(required=True) -@option_repl_instance_id(required=True) -@option_repl_auth_key(required=True) -@option_repl_admin_auth_key(required=True) -@option_repl_registry_host(required=True) -@option_repl_registry_port(required=True) -@option_repl_http_port(required=True) -@option_results_dirname() -@option_cmsd_manager_name() -@option_debug() -@option_cmsd_worker_cfg_file() -@option_cmsd_worker_cfg_path() -@option_xrdssi_cfg_file() -@option_xrdssi_cfg_path() -@option_log_cfg_file() -@options_targs() -@options_cms() -@option_options_file() -def worker_cmsd(ctx: click.Context, **kwargs: Any) -> None: - targs = utils.targs(ctx) - targs = render_targs(targs) - script.enter_worker_cmsd( - targs=targs, - db_uri=targs["db_uri"], - cmsd_worker_cfg_file=targs["cmsd_worker_cfg_file"], - cmsd_worker_cfg_path=targs["cmsd_worker_cfg_path"], - xrdssi_cfg_file=targs["xrdssi_cfg_file"], - xrdssi_cfg_path=targs["xrdssi_cfg_path"], - log_cfg_file=targs["log_cfg_file"], - cmd=targs["cmd"], - ) - - -@entrypoint.command(help=f"Start as a worker xrootd node.\n\n{socket_option_description}") +@entrypoint.command(help=f"Start as a worker-svc node.\n\n{socket_option_description}") @pass_context @option_debug() @option_db_uri(help=worker_db_help) @option_db_admin_uri(help=admin_worker_db_help) -@option_vnid_config(required=True) @option_repl_instance_id(required=True) @option_repl_auth_key(required=True) @option_repl_admin_auth_key(required=True) @@ -843,27 +698,22 @@ def worker_cmsd(ctx: click.Context, **kwargs: Any) -> None: @option_repl_registry_port(required=True) @option_repl_http_port(required=True) @option_results_dirname() -@option_cmsd_manager_name() @option_db_qserv_user() -@option_cmsd_worker_cfg_file() -@option_cmsd_worker_cfg_path() -@option_xrdssi_cfg_file() -@option_xrdssi_cfg_path() +@option_worker_svc_cfg_file() +@option_worker_svc_cfg_path() @option_log_cfg_file() @options_targs() @options_cms() @option_options_file() -def worker_xrootd(ctx: click.Context, **kwargs: Any) -> None: +def worker_svc(ctx: click.Context, **kwargs: Any) -> None: targs = utils.targs(ctx) targs = render_targs(targs) - script.enter_worker_xrootd( + script.enter_worker_svc( targs=targs, db_uri=targs["db_uri"], db_admin_uri=targs["db_admin_uri"], - cmsd_worker_cfg_file=targs["cmsd_worker_cfg_file"], - cmsd_worker_cfg_path=targs["cmsd_worker_cfg_path"], - xrdssi_cfg_file=targs["xrdssi_cfg_file"], - xrdssi_cfg_path=targs["xrdssi_cfg_path"], + worker_svc_cfg_file=targs["worker_svc_cfg_file"], + worker_svc_cfg_path=targs["worker_svc_cfg_path"], log_cfg_file=targs["log_cfg_file"], cmd=targs["cmd"], ) @@ -919,10 +769,6 @@ def worker_repl(ctx: click.Context, **kwargs: Any) -> None: + socket_option_help, required=True, ) -@click.option( - "--xrootd-manager", - help="The host name of the xrootd manager node.", -) @option_log_cfg_file() @options_cms() @click.option( diff --git a/python/lsst/qserv/admin/cli/options.py b/python/lsst/qserv/admin/cli/options.py index 1b7d95bbcd..6220fc41b4 100644 --- a/python/lsst/qserv/admin/cli/options.py +++ b/python/lsst/qserv/admin/cli/options.py @@ -192,21 +192,6 @@ def __call__(self, f: Callable) -> Callable: ) -option_vnid_config = partial( - click.option, - "--vnid-config", - help="The config parameters used by the qserv cmsd to get the vnid from the specified " - " source (static string, a file or worker database).", -) - - -option_xrootd_manager = partial( - click.option, - "--xrootd-manager", - help="The host name of the xrootd manager.", -) - - option_tests_yaml = partial( click.option, "--tests-yaml", @@ -341,6 +326,6 @@ def decorators(self) -> list[Callable]: click.option, "--log-cfg-file", help="Path to the log4cxx config file.", - default="/config-etc/log/log.cnf", + default="/config-etc/log/log.cfg", show_default=True, ) diff --git a/python/lsst/qserv/admin/cli/script.py b/python/lsst/qserv/admin/cli/script.py index 0d512feb3a..282aa60a23 100644 --- a/python/lsst/qserv/admin/cli/script.py +++ b/python/lsst/qserv/admin/cli/script.py @@ -266,136 +266,16 @@ def smig_worker(connection: str, update: bool) -> None: _do_smig(worker_smig_dir, "worker", connection, update) -def enter_manager_cmsd( - targs: Targs, - cmsd_manager_cfg_file: str, - cmsd_manager_cfg_path: str, - cmd: str, -) -> None: - """Start a cmsd manager qserv node. - - Parameters - ---------- - targs : `Targs` - The arguments for template expansion. - cmsd_manager_cfg_file : str - Path to the cmsd manager config file. - cmsd_manager_cfg_path : str - Location to render cmsd_manager_cfg_template. - cmd : str - The jinja2 template for the command for this function to execute. - """ - apply_template_cfg_file(cmsd_manager_cfg_file, cmsd_manager_cfg_path, targs) - - env = dict( - os.environ, - LD_PRELOAD=ld_preload, - ) - - sys.exit(_run(args=None, env=env, cmd=cmd)) - - -def enter_xrootd_manager( - targs: Targs, - xrootd_manager_cfg_file: str, - xrootd_manager_cfg_path: str, - cmd: str, -) -> None: - """Start an xrootd manager qserv node. - - Parameters - ---------- - targs : Targs - The arguments for template expansion. - xrootd_manager_cfg_file : str - Path to the cmsd manager config file. - xrootd_manager_cfg_path : str - Location to render cmsd_manager_cfg_template. - cmd : str - The jinja2 template for the command for this function to execute. - """ - apply_template_cfg_file(xrootd_manager_cfg_file, xrootd_manager_cfg_path, targs) - - env = dict( - os.environ, - LD_PRELOAD=ld_preload, - ) - - sys.exit(_run(args=None, env=env, cmd=cmd)) - - -def enter_worker_cmsd( - targs: Targs, - db_uri: str, - cmsd_worker_cfg_file: str, - cmsd_worker_cfg_path: str, - xrdssi_cfg_file: str, - xrdssi_cfg_path: str, - log_cfg_file: str, - cmd: str, -) -> None: - """Start a worker cmsd node. - - Parameters - ---------- - vnid_config : str - The config parameters used by the qserv cmsd to get the vnid - from the specified source (static string, a file or worker database). - targs : Targs - The arguments for template expansion. - db_uri : str - The non-admin URI to the worker's database. - cmsd_worker_cfg_file : str - The path to the worker cmsd config file. - cmsd_worker_cfg_path : str - The location to render the worker cmsd config file. - xrdssi_cfg_file : str - The path to the xrdssi config file. - xrdssi_cfg_path : str - The location to render the the xrdssi config file. - log_cfg_file : `str` - Location of the log4cxx config file. - cmd : str - The jinja2 template for the command for this function to execute. - """ - url = _process_uri( - uri=db_uri, - query_keys=("socket",), - option=options.option_db_uri.args[0], - block=True, - ) - targs["db_host"] = url.host - targs["db_port"] = url.port or "" - targs["db_socket"] = url.query.get("socket", "") - - apply_template_cfg_file(cmsd_worker_cfg_file, cmsd_worker_cfg_path, targs) - apply_template_cfg_file(xrdssi_cfg_file, xrdssi_cfg_path, targs) - - # wait before worker database will be fully initialized as needed - # for the vnid plugin to function correctly - _do_smig_block(worker_smig_dir, "worker", db_uri) - - env = dict( - os.environ, - LD_PRELOAD=ld_preload, - LSST_LOG_CONFIG=log_cfg_file, - ) - - sys.exit(_run(args=None, env=env, cmd=cmd)) - - -def enter_worker_xrootd( +def enter_worker_svc( targs: Targs, db_uri: str, db_admin_uri: str, - cmsd_worker_cfg_file: str, - cmsd_worker_cfg_path: str, - xrdssi_cfg_file: str, - xrdssi_cfg_path: str, + worker_svc_cfg_file: str, + worker_svc_cfg_path: str, log_cfg_file: str, cmd: str, ) -> None: - """Start a worker xrootd node. + """Start a worker wkr node. Parameters ---------- @@ -405,14 +285,10 @@ def enter_worker_xrootd( The non-admin URI to the proxy's database. db_admin_uri : str The admin URI to the proxy's database. - cmsd_worker_cfg_file : str - The path to the worker cmsd config file. - cmsd_worker_cfg_path : str - The location to render to the worker cmsd config file. - xrdssi_cfg_file : str - The path to the xrdssi config file. - xrdssi_cfg_path : str - The location to render to the xrdssi config file. + worker_svc_cfg_file : str + The path to the worker config file. + worker_svc_cfg_path : str + The location to render to the worker config file. log_cfg_file : `str` Location of the log4cxx config file. cmd : `str` @@ -449,15 +325,16 @@ def enter_worker_xrootd( targs["db_socket"] = url.query.get("socket", "") save_template_cfg(targs) - save_template_cfg({"mysqld_user_qserv_password": mysqld_user_qserv_password}) + save_template_cfg( + { + "mysqld_user_qserv_password": mysqld_user_qserv_password, + "worker_name": "", + } + ) smig_worker(db_admin_uri, update=False) - # TODO worker (and manager) xrootd+cmsd pair should "share" the cfg file - # it's in different containers but should be same source & processing. - # Rename these files to be more agnostic. - apply_template_cfg_file(cmsd_worker_cfg_file, cmsd_worker_cfg_path) - apply_template_cfg_file(xrdssi_cfg_file, xrdssi_cfg_path) + apply_template_cfg_file(worker_svc_cfg_file, worker_svc_cfg_path) env = dict( os.environ, @@ -571,7 +448,7 @@ def enter_proxy( The admin URI to the proxy's database. proxy_backend_address : `str` A colon-separated ip address and port number (e.g. "127.0.0.1:3306") - substituted into my-proxy.cnf.jinja, used by mysql proxy. + substituted into my-proxy.cfg.jinja, used by mysql proxy. proxy_cfg_file : `str` Path to the mysql proxy config file. proxy_cfg_path : `str` diff --git a/python/lsst/qserv/admin/qservCli/launch.py b/python/lsst/qserv/admin/qservCli/launch.py index a37daf1ea0..4f1897ac87 100644 --- a/python/lsst/qserv/admin/qservCli/launch.py +++ b/python/lsst/qserv/admin/qservCli/launch.py @@ -245,7 +245,9 @@ def cmake( build_image, "cmake", "..", + "-DCMAKE_BUILD_TYPE=Debug", ] + # "-DCMAKE_BUILD_TYPE=Debug" if dry: print(" ".join(args)) return diff --git a/python/lsst/qserv/schema/README.md b/python/lsst/qserv/schema/README.md index 610be4376d..692399a9f1 100644 --- a/python/lsst/qserv/schema/README.md +++ b/python/lsst/qserv/schema/README.md @@ -51,7 +51,7 @@ positional parameter to the script. Database connection can be specified in couple different ways: - as a sqlalchemy-style URL using -c option: `-c mysql://user:pass@host:port/dbName` - as a reference to a configuration section in some INI file: - `-f etc/qserv-czar.cnf -s css`. + `-f etc/qserv-czar.cfg -s css`. Latter case can be used when database connection parameters are stores in existing configuration files (e.g. files in existing qserv installation), diff --git a/python/lsst/qserv/schema/migrations/czar/migrate-10-to-11.sql b/python/lsst/qserv/schema/migrations/czar/migrate-10-to-11.sql index 854f5624e6..b386806170 100644 --- a/python/lsst/qserv/schema/migrations/czar/migrate-10-to-11.sql +++ b/python/lsst/qserv/schema/migrations/czar/migrate-10-to-11.sql @@ -34,3 +34,18 @@ CREATE OR REPLACE LEFT OUTER JOIN `QTable` AS `qt` ON `qi`.`queryId`=`qt`.`queryId` LEFT OUTER JOIN `QMessages` AS `qm` ON `qi`.`queryId`=`qm`.`queryId` GROUP BY `qi`.`queryId`; + + +CREATE TABLE IF NOT EXISTS `chunkMap` ( + `worker` VARCHAR(256) NOT NULL COMMENT 'A unique identifier of a worker hosting the chunk replica', + `database` VARCHAR(256) NOT NULL COMMENT 'The name of a database', + `table` VARCHAR(256) NOT NULL COMMENT 'The name of a table', + `chunk` INT UNSIGNED NOT NULL COMMENT 'The number of a chunk', + `size` BIGINT UNSIGNED NOT NULL COMMENT 'The size of a chunk') +ENGINE = InnoDB +COMMENT = 'Chunk disposition across workers'; + +CREATE TABLE IF NOT EXISTS `chunkMapStatus` ( + `update_time` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'The most recent update time of the map') +ENGINE = InnoDB +COMMENT = 'Satus info on the chunk map'; diff --git a/python/lsst/qserv/schema/migrations/czar/migrate-11-to-12.sql b/python/lsst/qserv/schema/migrations/czar/migrate-11-to-12.sql index 0adc1075fc..2269c95413 100644 --- a/python/lsst/qserv/schema/migrations/czar/migrate-11-to-12.sql +++ b/python/lsst/qserv/schema/migrations/czar/migrate-11-to-12.sql @@ -1,14 +1,19 @@ --- ------------------------------------------------------------------- --- Rename table QStatsTmp into QProgress to reflect its purpose --- and add a foreign key constraint to QInfo table. --- This table tracks chunk processing progress of the running queries. --- ------------------------------------------------------------------- -ALTER TABLE QStatsTmp RENAME AS QProgress; -ALTER TABLE QProgress ADD CONSTRAINT `fk_queryId` FOREIGN KEY (`queryId`) REFERENCES `QInfo` (`queryId`) ON DELETE CASCADE ON UPDATE CASCADE; -ALTER TABLE QProgress COMMENT = 'Table to track chunk processing progress of the running queries.'; +-- ----------------------------------------------------- +-- Table `chunkMap` +-- ----------------------------------------------------- +CREATE TABLE IF NOT EXISTS `chunkMap` ( + `worker` VARCHAR(256) NOT NULL COMMENT 'A unique identifier of a worker hosting the chunk replica', + `database` VARCHAR(256) NOT NULL COMMENT 'The name of a database', + `table` VARCHAR(256) NOT NULL COMMENT 'The name of a table', + `chunk` INT UNSIGNED NOT NULL COMMENT 'The number of a chunk', + `size` BIGINT UNSIGNED NOT NULL COMMENT 'The size of a chunk') +ENGINE = InnoDB +COMMENT = 'Chunk disposition across workers'; --- ------------------------------------------------------------------- --- Drop the QWorker table as it is no longer needed. --- This table was used to track worker nodes and their statuses. --- ------------------------------------------------------------------- -DROP TABLE IF EXISTS QWorker; +-- ----------------------------------------------------- +-- Table `chunkMapStatus` +-- ----------------------------------------------------- +CREATE TABLE IF NOT EXISTS `chunkMapStatus` ( + `update_time` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'The most recent update time of the map') +ENGINE = InnoDB +COMMENT = 'Satus info on the chunk map'; \ No newline at end of file diff --git a/python/lsst/qserv/schema/migrations/repl/schema_migration.py b/python/lsst/qserv/schema/migrations/repl/schema_migration.py index 67f332bf6d..a97881234e 100644 --- a/python/lsst/qserv/schema/migrations/repl/schema_migration.py +++ b/python/lsst/qserv/schema/migrations/repl/schema_migration.py @@ -114,17 +114,25 @@ def _create_users(self) -> None: "A non-admin replication database connection uri must be provided to initialize the " "replication database." ) - user = make_url(self.repl_connection).username + url = make_url(self.repl_connection) + user = url.username + password = url.password + if not user: raise RuntimeError( "To initialize the replication database, the non-admin connection uri must contain a user " "name." ) + if not password: + raise RuntimeError( + "To initialize the replication database, the non-admin connection uri must contain a " + "password." + ) for stmt in [ - f"CREATE USER IF NOT EXISTS {user}@localhost;", - f"CREATE USER IF NOT EXISTS {user}@'%';", - f"GRANT ALL ON {database}.* TO {user}@localhost;", - f"GRANT ALL ON {database}.* TO {user}@'%';", + f"CREATE USER IF NOT EXISTS {user}@localhost IDENTIFIED BY '{password}';", + f"CREATE USER IF NOT EXISTS {user}@'%' IDENTIFIED BY '{password}';", + f"GRANT ALL ON {database}.* TO {user}@localhost;", + f"GRANT ALL ON {database}.* TO {user}@'%';", "FLUSH PRIVILEGES;", ]: with closing(self.connection.cursor()) as cursor: diff --git a/python/lsst/qserv/schema/migrations/worker/migrate-0-to-1.sql b/python/lsst/qserv/schema/migrations/worker/migrate-0-to-1.sql index dac7afc6bc..b435c4af91 100644 --- a/python/lsst/qserv/schema/migrations/worker/migrate-0-to-1.sql +++ b/python/lsst/qserv/schema/migrations/worker/migrate-0-to-1.sql @@ -81,7 +81,9 @@ CREATE TABLE IF NOT EXISTS `Id` ( -- Add a record with a unique identifier of a worker -INSERT INTO Id (`id`) VALUES (UUID()); +INSERT INTO `Id` (`id`) VALUES ( + COALESCE(NULLIF('{{ worker_name }}', ''), CONCAT(UUID())) +); -- ----------------------------------------------------- -- Create table `QMetadata` diff --git a/python/lsst/qserv/schema/migrations/worker/migrate-None-to-3.sql.jinja b/python/lsst/qserv/schema/migrations/worker/migrate-None-to-3.sql.jinja index 2f7841922d..839050288d 100644 --- a/python/lsst/qserv/schema/migrations/worker/migrate-None-to-3.sql.jinja +++ b/python/lsst/qserv/schema/migrations/worker/migrate-None-to-3.sql.jinja @@ -56,7 +56,9 @@ CREATE TABLE `qservw_worker`.`Id` ( UNIQUE KEY (`type`)) ENGINE=InnoDB; -INSERT INTO `qservw_worker`.`Id` (`id`) VALUES (UUID()); +INSERT INTO `qservw_worker`.`Id` (`id`) VALUES ( + COALESCE(NULLIF('{{ worker_name }}', ''), CONCAT(UUID())) +); -- ----------------------------------------------------- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 437776ac2c..1f5db6fac7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,7 +21,6 @@ find_package(Protobuf REQUIRED) find_package(Python REQUIRED COMPONENTS Development Interpreter) find_package(pybind11 REQUIRED) find_package(Threads REQUIRED) -find_package(XRootD REQUIRED) find_package(httplib REQUIRED) find_package(aws-c-auth REQUIRED) find_package(aws-c-cal REQUIRED) @@ -66,7 +65,7 @@ add_subdirectory(http) add_subdirectory(mysql) add_subdirectory(parser) add_subdirectory(partition) -add_subdirectory(proto) +add_subdirectory(protojson) add_subdirectory(proxy) add_subdirectory(qana) add_subdirectory(qdisp) @@ -79,12 +78,11 @@ add_subdirectory(rproc) add_subdirectory(sql) add_subdirectory(util) add_subdirectory(wbase) +add_subdirectory(wcomms) add_subdirectory(wconfig) add_subdirectory(wcontrol) add_subdirectory(wdb) +add_subdirectory(wmain) add_subdirectory(wpublish) add_subdirectory(wsched) add_subdirectory(www) -add_subdirectory(xrdlog) -add_subdirectory(xrdreq) -add_subdirectory(xrdsvc) diff --git a/src/admin/templates/http/etc/qserv-czar.cnf.jinja b/src/admin/templates/http/etc/qserv-czar.cfg.jinja similarity index 84% rename from src/admin/templates/http/etc/qserv-czar.cnf.jinja rename to src/admin/templates/http/etc/qserv-czar.cfg.jinja index 8b9847cbe4..b2d14f7cd1 100644 --- a/src/admin/templates/http/etc/qserv-czar.cnf.jinja +++ b/src/admin/templates/http/etc/qserv-czar.cfg.jinja @@ -1,6 +1,5 @@ [frontend] -xrootd={{ xrootd_manager }}:1094 #[mgmtdb] #db=qservMeta @@ -55,26 +54,6 @@ port = {{ czar_db_port }} [tuning] largeResultConcurrentMerges = 6 -# xrootdCBThreadsInit must be less than xrootdCBThreadsMax -xrootdCBThreadsMax = 500 -xrootdCBThreadsInit = 50 - -# INSTRUCTIONS: -# -# Allowed range: 1 - 1024. Any number above 1024 will be truncated to 1024. -# The deafault value of the parameter in xrootd is 4. Set it higher in -# Qserv deployments with the large number of chunks per catalog, or/and -# for use cases where many queries are processed by Qserv simultaneously, -# even for a modest number of chunks per catalog. -# -# Set the parameter to 0 to enable auto-tuning. -# -# The general formula for auto-tuning implies incrementing a value of the parameter -# by 1 for every 65,000 "in-flight" requests. -# -# The value set below has been tested to work for 80 simultaneous "shared scan" queries -# of 150,000 chunks each. -xrootdSpread = 0 # Seconds between updates the czar sends to qmeta for completed chunks. # This is per user query and important milestones ignore this limit. @@ -108,8 +87,6 @@ largestPriority = 3 vectRunSizes = 50:50:50:50 # Minimum number of threads running for each queue. No spaces. Values separated by ':' vectMinRunningSizes = 0:1:3:3 -# Maximum number of QueryRequests allowed to be running at one time. -qReqPseudoFifoMaxRunning = 299 [replication] diff --git a/src/admin/templates/proxy/etc/my-proxy.cnf.jinja b/src/admin/templates/proxy/etc/my-proxy.cfg.jinja similarity index 100% rename from src/admin/templates/proxy/etc/my-proxy.cnf.jinja rename to src/admin/templates/proxy/etc/my-proxy.cfg.jinja diff --git a/src/admin/templates/proxy/etc/qserv-czar.cnf.jinja b/src/admin/templates/proxy/etc/qserv-czar.cfg.jinja similarity index 82% rename from src/admin/templates/proxy/etc/qserv-czar.cnf.jinja rename to src/admin/templates/proxy/etc/qserv-czar.cfg.jinja index 2878854e12..3c91df8796 100644 --- a/src/admin/templates/proxy/etc/qserv-czar.cnf.jinja +++ b/src/admin/templates/proxy/etc/qserv-czar.cfg.jinja @@ -1,6 +1,5 @@ [frontend] -xrootd={{ xrootd_manager }}:1094 #[mgmtdb] #db=qservMeta @@ -26,12 +25,29 @@ port = {{ czar_db_port }} # Any table in resultdb that hasn't been updated in this many days is deleted. oldestResultKeptDays = 7 +# Either this should be changed to a high performance docker volume directory +# or /tmp should be mounted as a high performance docker volume directory +# to avoid using limited docker memory to store the contents. +transferDir = /tmp + # maximum number of connection retries to SQL databse (per connection attempt) maxsqlconnectionattempts = 10 # maximum user query result size in MB maxtablesize_mb = 5100 +# maximum number of MB of concurrent csv transfer files allowed to be kept in +# memory, after this point the will be temporarily written to disk. +# 0 is used for testing. 10000 is usually reasonable. +maxTransferMemMB = 0 + +# minimum number of MB for each csv transfer file to be kept in memory +# before possibly going to disk. +# 0 for testing, up to 10 should be reasonable. +transferMinMBInMem = 0 + + + # database connection for QMeta database [qmeta] @@ -57,26 +73,6 @@ port = {{ czar_db_port }} [tuning] #largeResultConcurrentMerges = 3 largeResultConcurrentMerges = 6 -# xrootdCBThreadsInit must be less than xrootdCBThreadsMax -xrootdCBThreadsMax = 500 -xrootdCBThreadsInit = 50 - -# INSTRUCTIONS: -# -# Allowed range: 1 - 1024. Any number above 1024 will be truncated to 1024. -# The deafault value of the parameter in xrootd is 4. Set it higher in -# Qserv deployments with the large number of chunks per catalog, or/and -# for use cases where many queries are processed by Qserv simultaneously, -# even for a modest number of chunks per catalog. -# -# Set the parameter to 0 to enable auto-tuning. -# -# The general formula for auto-tuning implies incrementing a value of the parameter -# by 1 for every 65,000 "in-flight" requests. -# -# The value set below has been tested to work for 80 simultaneous "shared scan" queries -# of 150,000 chunks each. -xrootdSpread = 0 # Seconds between updates the czar sends to qmeta for completed chunks. # This is per user query and important milestones ignore this limit. @@ -97,21 +93,19 @@ notifyWorkersOnCzarRestart = 1 #[debug] #chunkLimit = -1 -# Please see qdisp/QdispPool.h QdispPool::QdispPool for more information +# Please see util/QdispPool.h QdispPool::QdispPool for more information [qdisppool] #size of the pool -poolSize = 50 +poolSize = 1000 # Low numbers are higher priority. Largest priority 3 creates 4 priority queues 0, 1, 2, 3 # Must be greater than 0. largestPriority = 3 # Maximum number of threads running for each queue. No spaces. Values separated by ':' # Using largestPriority = 2 and vectRunsizes = 3:5:8 # queue 0 would have runSize 3, queue 1 would have runSize 5, and queue 2 would have runSize 8. -vectRunSizes = 50:50:50:50 +vectRunSizes = 800:800:500:500 # Minimum number of threads running for each queue. No spaces. Values separated by ':' -vectMinRunningSizes = 0:1:3:3 -# Maximum number of QueryRequests allowed to be running at one time. -qReqPseudoFifoMaxRunning = 299 +vectMinRunningSizes = 0:3:3:3 [replication] diff --git a/src/admin/templates/xrootd/etc/xrdssi.cf.jinja b/src/admin/templates/worker-svc/etc/worker-svc.cfg.jinja similarity index 95% rename from src/admin/templates/xrootd/etc/xrdssi.cf.jinja rename to src/admin/templates/worker-svc/etc/worker-svc.cfg.jinja index 49c9987292..233b587d2a 100644 --- a/src/admin/templates/xrootd/etc/xrdssi.cf.jinja +++ b/src/admin/templates/worker-svc/etc/worker-svc.cfg.jinja @@ -1,4 +1,4 @@ -# Qserv xrdssi plugin configuration file +# Qserv worker configuration file # Default values for parameters are commented [mysql] @@ -71,10 +71,6 @@ reservedinteractivesqlconn = 930 # The name of a folder where query results will be stored. dirname = {{ results_dirname }} -# The port number of the worker XROOTD service for serving files. -# NOTE: the hardcoded value may need to be replaced with a template -xrootd_port = 1094 - # The number of the BOOST ASIO threads for HTTP requests num_http_threads = 4 diff --git a/src/admin/templates/xrootd/etc/cmsd-manager.cf.jinja b/src/admin/templates/xrootd/etc/cmsd-manager.cf.jinja deleted file mode 100644 index f907bcd1e6..0000000000 --- a/src/admin/templates/xrootd/etc/cmsd-manager.cf.jinja +++ /dev/null @@ -1,49 +0,0 @@ -all.role manager - -# Path to write logging and other information -all.adminpath /var/run/xrootd - -# Do not change. This specifies valid virtual paths that can be accessed. -# "nolock" directive prevents write-locking and is important for qserv -# qserv is hardcoded for these paths. -all.export / nolock - -# Specify that no significant free space is required on servers -# Indeed current configuration doesn't expect to be dynamically -# written to, but export the space in R/W mode -cms.space 1k 2k - -# Specify the minimum number of servers that must be subscribed for load -# balancing to be effective. -cms.delay servers {{ cms_delay_servers }} - -# ssi.loglib libxrdlog.so - -# Optional: Prevent dns resolution in logs. -# This may speed up request processing. -xrd.network nodnr - -# This causes hostname resolution to occur at run-time not configuration time -# This is required by k8s -# Andy H. still have to modify the local IP-to-Name cache to account -# for dynamic DNS (it doesn't now). Unfortunately, it's a non-ABI compatible -# change so it will go into Release 5 branch not git master. The caching -# shouldn't really be a problem but if causes you grief simply turn it off by -# also specifying "xrd.network cache 0". Once Andy H. fixes the cache it will work -# correctly with a dynamic DNS with no side-effects (though it's unlikely any of -# them are observed as it is). -xrd.network dyndns -xrd.network cache 0 - -all.manager UNUSED:2131 - -# - cmsd redirector runs on port 2131 -# - cmsd server does not open server socket -# but only client connection to cmsd redirector -# - xrootd default port is 1094 -if exec cmsd - xrd.port 2131 -fi - -# Uncomment the following line for detailed xrootd debugging -# xrootd.trace all debug diff --git a/src/admin/templates/xrootd/etc/cmsd-worker.cf.jinja b/src/admin/templates/xrootd/etc/cmsd-worker.cf.jinja deleted file mode 100644 index 172c2edc51..0000000000 --- a/src/admin/templates/xrootd/etc/cmsd-worker.cf.jinja +++ /dev/null @@ -1,68 +0,0 @@ -# Use server mode -all.role server - -cms.vnid {{ vnid_config }} - -# Use XrdSsi plugin -xrootd.fslib -2 libXrdSsi.so -ssi.svclib libxrdsvc.so -oss.statlib -2 -arevents libXrdSsi.so - -# Force disable asyncronous access -# because of XrdSsi -xrootd.async off - -ssi.trace all debug - -######################################## -# Shared directives (manager and server) -######################################## - -# Path to write logging and other information -all.adminpath /var/run/xrootd - -# Do not change. This specifies valid virtual paths that can be accessed. -# "nolock" directive prevents write-locking and is important for qserv -# qserv is hardcoded for these paths. -all.export / nolock - -# Specify that no significant free space is required on servers -# Indeed current configuration doesn't expect to be dynamically -# written to, but export the space in R/W mode -cms.space 1k 2k - -ssi.loglib libxrdlog.so - -# Optional: Prevent dns resolution in logs. -# This may speed up request processing. -xrd.network nodnr - -# This causes hostname resolution to occur at run-time not configuration time -# This is required by k8s -# Andy H. still have to modify the local IP-to-Name cache to account -# for dynamic DNS (it doesn't now). Unfortunately, it's a non-ABI compatible -# change so it will go into Release 5 branch not git master. The caching -# shouldn't really be a problem but if causes you grief simply turn it off by -# also specifying "xrd.network cache 0". Once Andy H. fixes the cache it will work -# correctly with a dynamic DNS with no side-effects (though it's unlikely any of -# them are observed as it is). -xrd.network dyndns -xrd.network cache 0 - -all.manager {{cmsd_manager_name}}:2131 - -# - cmsd redirector runs on port 2131 -# - cmsd server does not open server socket -# but only client connection to cmsd redirector -# - xrootd default port is 1094 -xrd.port 1094 - -# Uncomment the following line for detailed xrootd debugging -# xrootd.trace all debug - -# Enforce the default limits for the number of threads created/managed by XROOTD. -# Formally, these defaults are supposed to be enforced by the implementation -# as stated in: https://xrootd.web.cern.ch/doc/dev57/xrd_config.htm#_Toc171719950 -# In reality, no limit is set. The problem was reported to the XROOT developers -# in: https://github.com/xrootd/xrootd/issues/2468 -xrd.sched mint 8 maxt 2048 avlt 512 idle 780 diff --git a/src/admin/templates/xrootd/etc/xrootd-manager.cf.jinja b/src/admin/templates/xrootd/etc/xrootd-manager.cf.jinja deleted file mode 100644 index 8972766c89..0000000000 --- a/src/admin/templates/xrootd/etc/xrootd-manager.cf.jinja +++ /dev/null @@ -1,43 +0,0 @@ -# Use manager mode -all.role manager - -# Path to write logging and other information -all.adminpath /var/run/xrootd - -# Do not change. This specifies valid virtual paths that can be accessed. -# "nolock" directive prevents write-locking and is important for qserv -# qserv is hardcoded for these paths. -all.export / nolock - -# Specify that no significant free space is required on servers -# Indeed current configuration doesn't expect to be dynamically -# written to, but export the space in R/W mode -cms.space 1k 2k - -# ssi.loglib libxrdlog.so - -# Optional: Prevent dns resolution in logs. -# This may speed up request processing. -xrd.network nodnr - -# This causes hostname resolution to occur at run-time not configuration time -# This is required by k8s -# Andy H. still have to modify the local IP-to-Name cache to account -# for dynamic DNS (it doesn't now). Unfortunately, it's a non-ABI compatible -# change so it will go into Release 5 branch not git master. The caching -# shouldn't really be a problem but if causes you grief simply turn it off by -# also specifying "xrd.network cache 0". Once Andy H. fixes the cache it will work -# correctly with a dynamic DNS with no side-effects (though it's unlikely any of -# them are observed as it is). -xrd.network dyndns -xrd.network cache 0 - -all.manager {{cmsd_manager_name}}:2131 - -# - cmsd redirector runs on port 2131 -# - cmsd server does not open server socket -# but only client connection to cmsd redirector -# - xrootd default port is 1094 - -# Uncomment the following line for detailed xrootd debugging -# xrootd.trace all debug diff --git a/src/cconfig/CMakeLists.txt b/src/cconfig/CMakeLists.txt index 37a3f75816..47a16e5106 100644 --- a/src/cconfig/CMakeLists.txt +++ b/src/cconfig/CMakeLists.txt @@ -4,15 +4,8 @@ target_sources(cconfig PRIVATE CzarConfig.cc ) -target_include_directories(cconfig PRIVATE - ${XROOTD_INCLUDE_DIRS} -) - target_link_libraries(cconfig PUBLIC log - XrdSsiLib ) -install( - TARGETS cconfig -) +install(TARGETS cconfig) diff --git a/src/cconfig/CzarConfig.cc b/src/cconfig/CzarConfig.cc index c27e7336ca..94425c4c09 100644 --- a/src/cconfig/CzarConfig.cc +++ b/src/cconfig/CzarConfig.cc @@ -28,7 +28,6 @@ #include // Third party headers -#include "XrdSsi/XrdSsiLogger.hh" // LSST headers #include "lsst/log/Log.h" @@ -43,30 +42,15 @@ namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.cconfig.CzarConfig"); -void QservLogger(struct timeval const& mtime, unsigned long tID, const char* msg, int mlen) { - static log4cxx::spi::LocationInfo xrdLoc( - "client", log4cxx::spi::LocationInfo::calcShortFileName("client"), "", 0); - static LOG_LOGGER myLog = LOG_GET("lsst.qserv.xrdssi.msgs"); - - if (myLog.isInfoEnabled()) { - while (mlen && msg[mlen - 1] == '\n') --mlen; // strip all trailing newlines - std::string theMsg(msg, mlen); - lsst::log::Log::MDC("LWP", std::to_string(tID)); - myLog.logMsg(log4cxx::Level::getInfo(), xrdLoc, theMsg); - } -} - -bool dummy = XrdSsiLogger::SetMCB(QservLogger, XrdSsiLogger::mcbClient); } // namespace namespace lsst::qserv::cconfig { std::mutex CzarConfig::_mtxOnInstance; -std::shared_ptr CzarConfig::_instance; +CzarConfig::Ptr CzarConfig::_instance; -std::shared_ptr CzarConfig::create(std::string const& configFileName, - std::string const& czarName) { +CzarConfig::Ptr CzarConfig::create(std::string const& configFileName, std::string const& czarName) { std::lock_guard const lock(_mtxOnInstance); if (_instance == nullptr) { _instance = std::shared_ptr(new CzarConfig(util::ConfigStore(configFileName), czarName)); @@ -74,7 +58,7 @@ std::shared_ptr CzarConfig::create(std::string const& configFileName return _instance; } -std::shared_ptr CzarConfig::instance() { +CzarConfig::Ptr CzarConfig::instance() { std::lock_guard const lock(_mtxOnInstance); if (_instance == nullptr) { throw std::logic_error("CzarConfig::" + std::string(__func__) + ": instance has not been created."); @@ -159,7 +143,7 @@ http::AuthContext CzarConfig::httpAuthContext() const { _replicationAdminAuthKey->getVal()); } -void CzarConfig::setId(qmeta::CzarId id) { +void CzarConfig::setId(CzarId id) { _czarId = id; // Update the relevant section of the JSON-ified configuration. _jsonConfig["actual"]["identity"]["id"] = std::to_string(_czarId); diff --git a/src/cconfig/CzarConfig.h b/src/cconfig/CzarConfig.h index 9b05bbe2ee..de58208e36 100644 --- a/src/cconfig/CzarConfig.h +++ b/src/cconfig/CzarConfig.h @@ -34,8 +34,8 @@ #include // Qserv headers +#include "global/intTypes.h" #include "mysql/MySqlConfig.h" -#include "qmeta/types.h" #include "util/ConfigStore.h" #include "util/ConfigValMap.h" @@ -59,6 +59,7 @@ namespace lsst::qserv::cconfig { */ class CzarConfig { public: + using Ptr = std::shared_ptr; /** * Create an instance of CzarConfig and load parameters from the specifid file. * @note One has to call this method at least once before trying to obtain @@ -69,7 +70,7 @@ class CzarConfig { * @param czarName - the unique name of Czar. * @return the shared pointer to the configuration object */ - static std::shared_ptr create(std::string const& configFileName, std::string const& czarName); + static Ptr create(std::string const& configFileName, std::string const& czarName); /** * Get a pointer to an instance that was created by the last call to @@ -77,7 +78,7 @@ class CzarConfig { * @return the shared pointer to the configuration object * @throws std::logic_error when attempting to call the bethod before creating an instance. */ - static std::shared_ptr instance(); + static Ptr instance(); CzarConfig() = delete; CzarConfig(CzarConfig const&) = delete; @@ -114,34 +115,6 @@ class CzarConfig { */ int getInteractiveChunkLimit() const { return _interactiveChunkLimit->getVal(); } - /* Get hostname and port for xrootd manager - * - * "localhost:1094" is the most reasonable default, even though it is - * the wrong choice for all but small developer installations - * - * @return a string containing ":" - */ - std::string const& getXrootdFrontendUrl() const { return _xrootdFrontendUrl->getVal(); } - - /* Get the maximum number of threads for xrootd to use. - * - * @return the maximum number of threads for xrootd to use. - */ - int getXrootdCBThreadsMax() const { return _xrootdCBThreadsMax->getVal(); } - - /* Get the initial number of threads for xrootd to create and maintain. - * - * @return the initial number of threads for xrootd to use. - */ - int getXrootdCBThreadsInit() const { return _xrootdCBThreadsInit->getVal(); } - - /* - * @return A value of the "spread" parameter. This may improve a performance - * of xrootd for catalogs with the large number of chunks. The default value - * of this parameter in xrootd is 4. - */ - int getXrootdSpread() const { return _xrootdSpread->getVal(); } - /* Get minimum number of seconds between QMeta chunk completion updates. * * @return seconds between QMeta chunk completion updates. @@ -155,8 +128,16 @@ class CzarConfig { /// Getters for result aggregation options. int getMaxTableSizeMB() const { return _maxTableSizeMB->getVal(); } int getMaxSqlConnectionAttempts() const { return _maxSqlConnectionAttempts->getVal(); } + unsigned int getMaxTransferMemMB() const { return _resultMaxTransferMemMB->getVal(); } + /// Return the transfer directory. This is customizable to allow for a + /// high performance volume. + std::string getTransferDir() const { return _resultTransferDir->getVal(); } - /// The size of the TCP connection pool witin the client API that is used + /// Return the minimum amount of memory per UberJob to keep in memory. This much transfer + /// data will be stored in memory regardless of other conditions. + unsigned int getTransferMinMBInMem() const { return _resultTransferMinMBInMem->getVal(); } + + /// The size of the TCP connection pool within the client API that is used /// by the merger to pool result files from workers via the HTTP protocol. int getResultMaxHttpConnections() const { return _resultMaxHttpConnections->getVal(); } @@ -194,6 +175,34 @@ class CzarConfig { /// the method then the monitoring will be disabled. unsigned int czarStatsUpdateIvalSec() const { return _czarStatsUpdateIvalSec->getVal(); } + /// Maximum number of attempts to run a given job before aborting the entire user query. + unsigned int jobMaxAttempts() const { return _jobMaxAttempts->getVal(); } + + /// A worker is considered fully ALIVE if the last update from the worker has been + /// heard in less than _activeWorkerTimeoutAliveSecs seconds. + int getActiveWorkerTimeoutAliveSecs() const { return _activeWorkerTimeoutAliveSecs->getVal(); } + + /// A worker is considered DEAD if it hasn't been heard from in more than + /// _activeWorkerTimeoutDeadSecs. + int getActiveWorkerTimeoutDeadSecs() const { return _activeWorkerTimeoutDeadSecs->getVal(); } + + /// Max lifetime of a message to be sent to an active worker. If the czar has been + /// trying to send a message to a worker and has failed for this many seconds, + /// it gives up at this point, removing elements of the message to save memory. + int getActiveWorkerMaxLifetimeSecs() const { return _activeWorkerMaxLifetimeSecs->getVal(); } + + /// The maximum number of chunks (basically Jobs) allowed in a single UberJob. + int getUberJobMaxChunks() const { return _uberJobMaxChunks->getVal(); } + + /// Return the maximum number of http connections to use for czar commands. + int getCommandMaxHttpConnections() const { return _commandMaxHttpConnections->getVal(); } + + /// Return the sleep time (in milliseconds) between messages sent to active workers. + int getMonitorSleepTimeMilliSec() const { return _monitorSleepTimeMilliSec->getVal(); } + + /// Return true if family map chunk distribution should depend on chunk size. + bool getFamilyMapUsingChunkSize() const { return _familyMapUsingChunkSize->getVal(); } + // Parameters of the Czar management service std::string const& replicationInstanceId() const { return _replicationInstanceId->getVal(); } @@ -222,14 +231,14 @@ class CzarConfig { std::string const& name() const { return _czarName; } /// @return The unique identifier of Czar. - qmeta::CzarId id() const { return _czarId; } + CzarId id() const { return _czarId; } /// Set a unique identifier of Czar. /// @note In the current implementation of Qserv a value of the identifier is not /// available at a time when the configuration is initialized. The identifier is generated /// when registering Czar by name in a special table of teh Qserv database. /// This logic should be fixed in some future version of Qserv. - void setId(qmeta::CzarId id); + void setId(CzarId id); /// @return The interval in seconds for cleaning up the in-progress queries in QMeta. unsigned int getInProgressCleanupIvalSec() const { return _inProgressCleanupIvalSec->getVal(); } @@ -258,8 +267,7 @@ class CzarConfig { /// The unique identifier of the Czar instance, the real vale cannot be /// acquired until later. Using a crazy initial value in hopes of highlighting /// issues. - /// TODO: Is this really the right place for this? (previously undefined) - qmeta::CzarId _czarId = std::numeric_limits::max(); + CzarId _czarId = std::numeric_limits::max(); nlohmann::json _jsonConfig; ///< JSON-ified configuration @@ -293,12 +301,20 @@ class CzarConfig { CVTIntPtr _maxSqlConnectionAttempts = util::ConfigValTInt::create(_configValMap, "resultdb", "maxsqlconnectionattempts", notReq, 10); CVTIntPtr _resultMaxHttpConnections = - util::ConfigValTInt::create(_configValMap, "resultdb", "maxhttpconnections", notReq, 8192); + util::ConfigValTInt::create(_configValMap, "resultdb", "maxhttpconnections", notReq, 2000); CVTIntPtr _oldestResultKeptDays = util::ConfigValTInt::create(_configValMap, "resultdb", "oldestResultKeptDays", notReq, 30); CVTIntPtr _oldestAsyncResultKeptSeconds = util::ConfigValTInt::create( _configValMap, "resultdb", "oldestAsyncResultKeptSeconds", notReq, 3600); + // This must be larger than _maxTableSizeMB when using the "memory" TransferMethod + CVTUIntPtr _resultMaxTransferMemMB = + util::ConfigValTUInt::create(_configValMap, "resultdb", "maxTransferMemMB", notReq, 10000); + CVTStrPtr _resultTransferDir = + util::ConfigValTStr::create(_configValMap, "resultdb", "transferDir", notReq, "/tmp"); + CVTUIntPtr _resultTransferMinMBInMem = + util::ConfigValTUInt::create(_configValMap, "resultdb", "transferMinMBInMem", notReq, 10); + /// Get all the elements in the css section. CVTStrPtr _cssTechnology = util::ConfigValTStr::create(_configValMap, "css", "technology", notReq, "mysql"); @@ -334,8 +350,6 @@ class CzarConfig { CVTStrPtr _qstatusDb = util::ConfigValTStr::create(_configValMap, "qstatus", "db", notReq, "qservStatusData"); - CVTStrPtr _xrootdFrontendUrl = - util::ConfigValTStr::create(_configValMap, "frontend", "xrootd", notReq, "localhost:1094"); CVTStrPtr _emptyChunkPath = util::ConfigValTStr::create(_configValMap, "partitioner", "emptyChunkPath", notReq, "."); CVTIntPtr _maxMsgSourceStore = @@ -347,27 +361,27 @@ class CzarConfig { CVTIntPtr _qdispPoolSize = util::ConfigValTInt::create(_configValMap, "qdisppool", "poolSize", notReq, 1000); CVTIntPtr _qdispMaxPriority = - util::ConfigValTInt::create(_configValMap, "qdisppool", "largestPriority", notReq, 2); + util::ConfigValTInt::create(_configValMap, "qdisppool", "largestPriority", notReq, 3); CVTStrPtr _qdispVectRunSizes = - util::ConfigValTStr::create(_configValMap, "qdisppool", "vectRunSizes", notReq, "50:50:50:50"); + util::ConfigValTStr::create(_configValMap, "qdisppool", "vectRunSizes", notReq, "800:800:500:50"); CVTStrPtr _qdispVectMinRunningSizes = - util::ConfigValTStr::create(_configValMap, "qdisppool", "vectMinRunningSizes", notReq, "0:1:3:3"); + util::ConfigValTStr::create(_configValMap, "qdisppool", "vectMinRunningSizes", notReq, "0:3:3:3"); - CVTIntPtr _xrootdSpread = util::ConfigValTInt::create(_configValMap, "tuning", "xrootdSpread", notReq, 4); + // UberJobs + CVTIntPtr _uberJobMaxChunks = + util::ConfigValTInt::create(_configValMap, "uberjob", "maxChunks", notReq, 10000); CVTIntPtr _qMetaSecsBetweenChunkCompletionUpdates = util::ConfigValTInt::create( _configValMap, "tuning", "qMetaSecsBetweenChunkCompletionUpdates", notReq, 60); CVTIntPtr _interactiveChunkLimit = util::ConfigValTInt::create(_configValMap, "tuning", "interactiveChunkLimit", notReq, 10); - CVTIntPtr _xrootdCBThreadsMax = - util::ConfigValTInt::create(_configValMap, "tuning", "xrootdCBThreadsMax", notReq, 500); - CVTIntPtr _xrootdCBThreadsInit = - util::ConfigValTInt::create(_configValMap, "tuning", "xrootdCBThreadsInit", notReq, 50); CVTBoolPtr _notifyWorkersOnQueryFinish = util::ConfigValTBool::create(_configValMap, "tuning", "notifyWorkersOnQueryFinish", notReq, 1); CVTBoolPtr _notifyWorkersOnCzarRestart = util::ConfigValTBool::create(_configValMap, "tuning", "notifyWorkersOnCzarRestart", notReq, 1); CVTIntPtr _czarStatsUpdateIvalSec = util::ConfigValTInt::create(_configValMap, "tuning", "czarStatsUpdateIvalSec", notReq, 1); + CVTUIntPtr _jobMaxAttempts = + util::ConfigValTUInt::create(_configValMap, "tuning", "jobMaxAttempts", notReq, 150); // Replicator CVTStrPtr _replicationInstanceId = @@ -391,6 +405,24 @@ class CzarConfig { CVTStrPtr _httpUser = util::ConfigValTStr::create(_configValMap, "http", "user", notReq, ""); CVTStrPtr _httpPassword = util::ConfigValTStr::create(_configValMap, "http", "password", notReq, "", hidden); + + // Active Worker + CVTIntPtr _activeWorkerTimeoutAliveSecs = // 5min + util::ConfigValTInt::create(_configValMap, "activeworker", "timeoutAliveSecs", notReq, 60 * 5); + CVTIntPtr _activeWorkerTimeoutDeadSecs = // 10min + util::ConfigValTInt::create(_configValMap, "activeworker", "timeoutDeadSecs", notReq, 60 * 10); + CVTIntPtr _activeWorkerMaxLifetimeSecs = // 1hr + util::ConfigValTInt::create(_configValMap, "activeworker", "maxLifetimeSecs", notReq, 60 * 60); + CVTIntPtr _monitorSleepTimeMilliSec = util::ConfigValTInt::create( + _configValMap, "activeworker", "monitorSleepTimeMilliSec", notReq, 15'000); + + // FamilyMap + CVTBoolPtr _familyMapUsingChunkSize = + util::ConfigValTBool::create(_configValMap, "familymap", "usingChunkSize", notReq, 0); + + /// This may impact `_resultMaxHttpConnections` as too many connections may cause kernel memory issues. + CVTIntPtr _commandMaxHttpConnections = + util::ConfigValTInt::create(_configValMap, "uberjob", "commandMaxHttpConnections", notReq, 2000); }; } // namespace lsst::qserv::cconfig diff --git a/src/ccontrol/CMakeLists.txt b/src/ccontrol/CMakeLists.txt index 625ccd331a..c1cb26ddb3 100644 --- a/src/ccontrol/CMakeLists.txt +++ b/src/ccontrol/CMakeLists.txt @@ -1,9 +1,7 @@ add_library(ccontrol SHARED) -add_dependencies(ccontrol proto) target_include_directories(ccontrol PRIVATE ${ANTLR4_INCLUDE_DIR} - ${XROOTD_INCLUDE_DIRS} ) target_sources(ccontrol PRIVATE @@ -27,17 +25,14 @@ target_sources(ccontrol PRIVATE target_link_libraries(ccontrol PUBLIC boost_regex cconfig + css + global log parser - replica sphgeom - xrdreq - XrdCl ) -install( - TARGETS ccontrol -) +install(TARGETS ccontrol) FUNCTION(ccontrol_tests) FOREACH(TEST IN ITEMS ${ARGV}) @@ -46,6 +41,7 @@ FUNCTION(ccontrol_tests) cconfig ccontrol czar + global parser qana qdisp @@ -54,7 +50,6 @@ FUNCTION(ccontrol_tests) qmeta query rproc - xrdreq Boost::unit_test_framework Threads::Threads ) diff --git a/src/ccontrol/MergingHandler.cc b/src/ccontrol/MergingHandler.cc index fd272e7904..44115fedd7 100644 --- a/src/ccontrol/MergingHandler.cc +++ b/src/ccontrol/MergingHandler.cc @@ -34,25 +34,23 @@ // Third-party headers #include "curl/curl.h" -#include "XrdCl/XrdClFile.hh" // LSST headers #include "lsst/log/Log.h" // Qserv headers +#include "cconfig/CzarConfig.h" #include "ccontrol/msgCode.h" #include "global/clock_defs.h" #include "global/debugUtil.h" #include "http/Client.h" #include "http/ClientConnPool.h" #include "http/Method.h" -#include "mysql/CsvBuffer.h" -#include "proto/ProtoHeaderWrap.h" -#include "proto/worker.pb.h" +#include "mysql/CsvMemDisk.h" #include "qdisp/CzarStats.h" #include "qdisp/Executive.h" #include "qdisp/JobQuery.h" -#include "qdisp/QueryRequest.h" +#include "qdisp/UberJob.h" #include "rproc/InfileMerger.h" #include "util/Bug.h" #include "util/common.h" @@ -86,24 +84,9 @@ lsst::qserv::TimeCountTracker::CALLBACKFUNC const reportFileRecvRate = } }; -/** - * This exception is used by the merging handler to signal the file reader - * that the query has been ended before the file has been completely read. - * The exception is meant to tell the reader to stop reading the file - * and return control to the caller. - */ -class QueryEnded : public std::runtime_error { -public: - using std::runtime_error::runtime_error; -}; - -/** - * The function for reading result files from workers over the HTTP protocol. - * The function reads the file in chunks and calls the callback function - * for each chunk of data read from the file. - */ -string readHttpFileAndMerge(string const& httpUrl, size_t fileSize, - function const& messageIsReady) { +string readHttpFileAndMerge(lsst::qserv::qdisp::UberJob::Ptr const& uberJob, string const& httpUrl, + size_t fileSize, function const& messageIsReady, + shared_ptr const& httpConnPool) { string const context = "MergingHandler::" + string(__func__) + " "; LOGS(_log, LOG_LVL_DEBUG, context << "httpUrl=" << httpUrl); @@ -119,21 +102,24 @@ string readHttpFileAndMerge(string const& httpUrl, size_t fileSize, size_t offset = 0; try { + auto exec = uberJob->getExecutive(); + if (exec == nullptr || exec->getCancelled()) { + throw runtime_error(context + " query was cancelled"); + } string const noClientData; vector const noClientHeaders; - - http::Client reader(http::Method::GET, httpUrl, noClientData, noClientHeaders, - qdisp::QueryRequest::makeHttpClientConfig(), - qdisp::QueryRequest::getHttpConnPool()); + http::ClientConfig clientConfig; + clientConfig.httpVersion = CURL_HTTP_VERSION_1_1; // same as in qhttp + clientConfig.bufferSize = CURL_MAX_READ_SIZE; // 10 MB in the current version of libcurl + clientConfig.tcpKeepAlive = true; + clientConfig.tcpKeepIdle = 5; // the default is 60 sec + clientConfig.tcpKeepIntvl = 5; // the default is 60 sec + http::Client reader(http::Method::GET, httpUrl, noClientData, noClientHeaders, clientConfig, + httpConnPool); // Starts the tracker to measure the performance of the network I/O. transmitRateTracker = make_unique>(reportFileRecvRate); - // This variable is used to track if the file reading was aborted by the callback - // function before the end of the file was reached. Knowing this condition is needed - // for proper error reporting in case the file was not completely read. - bool readAborted = false; - // Start reading the file. The read() method will call the callback function // for each chunk of data read from the file. reader.read([&](char const* inBuf, size_t inBufSize) -> size_t { @@ -143,21 +129,7 @@ string readHttpFileAndMerge(string const& httpUrl, size_t fileSize, transmitRateTracker->addToValue(inBufSize); transmitRateTracker->setSuccess(); transmitRateTracker.reset(); - try { - messageIsReady(inBuf, inBufSize); - } catch (QueryEnded const& ex) { - // This is a normal condition which should be handled gracefully by the algorithm. - LOGS(_log, LOG_LVL_DEBUG, context << ex.what() << ", httpUrl=" << httpUrl); - readAborted = true; - - // Returning a different number of bytes will signal the reader to stop reading - // the file and return control to the caller. The actual value of the returned number - // is not important as long as it is different from the number of bytes that was - // passed in as an argument. - // For more details on the expected behavior of the callback function, - // see http::Client::CallbackType and http::Client::read() documentation. - return inBufSize == 0 ? 1 : 0; - } + messageIsReady(inBuf, inBufSize); offset += inBufSize; // Restart the tracker to measure the reading performance of the next chunk of data. @@ -167,7 +139,8 @@ string readHttpFileAndMerge(string const& httpUrl, size_t fileSize, // the reader to continue reading the file. return inBufSize; }); - if (offset != fileSize && !readAborted) { + + if (offset != fileSize) { throw runtime_error(context + "short read"); } } catch (exception const& ex) { @@ -176,6 +149,16 @@ string readHttpFileAndMerge(string const& httpUrl, size_t fileSize, LOGS(_log, LOG_LVL_ERROR, context << errMsg); return errMsg; } + + // Remove the file from the worker if it still exists. Report and ignore errors. + // The files will be garbage-collected by workers. + try { + http::Client remover(http::Method::DELETE, httpUrl); + remover.read([](char const* inBuf, size_t inBufSize) { return inBufSize; }); + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, context << "failed to remove " << httpUrl << ", ex: " << ex.what()); + } + return string(); } @@ -183,172 +166,168 @@ string readHttpFileAndMerge(string const& httpUrl, size_t fileSize, namespace lsst::qserv::ccontrol { -MergingHandler::MergingHandler(std::shared_ptr merger, std::string const& tableName) - : _infileMerger{merger}, _tableName{tableName} { - _initState(); -} - -MergingHandler::~MergingHandler() { LOGS(_log, LOG_LVL_DEBUG, __func__); } - -bool MergingHandler::flush(proto::ResponseSummary const& resp) { - _wName = resp.wname(); - - // This is needed to ensure the job query would be staying alive for the duration - // of the operation to prevent inconsistency within the application. - auto const jobQuery = getJobQuery().lock(); - if (jobQuery == nullptr) { - LOGS(_log, LOG_LVL_ERROR, __func__ << " failed, jobQuery was NULL"); - return false; - } - LOGS(_log, LOG_LVL_TRACE, - "MergingHandler::" << __func__ << " jobid=" << resp.jobid() << " transmitsize=" - << resp.transmitsize() << " rowcount=" << resp.rowcount() << " rowSize=" - << " attemptcount=" << resp.attemptcount() << " errorcode=" << resp.errorcode() - << " errormsg=" << resp.errormsg()); - - if (resp.errorcode() != 0 || !resp.errormsg().empty()) { - _error = util::Error(resp.errorcode(), resp.errormsg(), util::ErrorCode::MYSQLEXEC); - _setError(ccontrol::MSG_RESULT_ERROR, _error.getMsg()); - LOGS(_log, LOG_LVL_ERROR, - "MergingHandler::" << __func__ << " error from worker:" << resp.wname() << " error: " << _error); - // This way we can track if the worker has reported this error. The current implementation - // requires the large result size to be reported as an error via the InfileMerger regardless - // of an origin of the error (Czar or the worker). Note that large results can be produced - // by the Czar itself, e.g., when the aggregate result of multiple worker queries is too large - // or by the worker when the result set of a single query is too large. - // The error will be reported to the Czar as a part of the response summary. - if (resp.errorcode() == util::ErrorCode::WORKER_RESULT_TOO_LARGE) { - _infileMerger->setResultSizeLimitExceeded(); - } - return false; - } +shared_ptr MergingHandler::_httpConnPool; +mutex MergingHandler::_httpConnPoolMutex; - bool const success = _merge(resp, jobQuery); - if (success) { - _infileMerger->mergeCompleteFor(resp.jobid()); - qdisp::CzarStats::get()->addTotalRowsRecv(resp.rowcount()); - qdisp::CzarStats::get()->addTotalBytesRecv(resp.transmitsize()); +shared_ptr const& MergingHandler::_getHttpConnPool() { + lock_guard const lock(_httpConnPoolMutex); + if (nullptr == _httpConnPool) { + _httpConnPool = make_shared( + cconfig::CzarConfig::instance()->getResultMaxHttpConnections()); } - return success; + return _httpConnPool; } +MergingHandler::MergingHandler(std::shared_ptr const& merger, + std::shared_ptr const& exec) + : _infileMerger(merger), _executive(exec) {} + +MergingHandler::~MergingHandler() { LOGS(_log, LOG_LVL_TRACE, __func__); } + void MergingHandler::errorFlush(std::string const& msg, int code) { - _setError(code, msg); + _setError(code, util::Error::NONE, msg); // Might want more info from result service. // Do something about the error. FIXME. LOGS(_log, LOG_LVL_ERROR, "Error receiving result."); } -bool MergingHandler::finished() const { return _flushed; } - -bool MergingHandler::reset() { - // If we've pushed any bits to the merger successfully, we have to undo them - // to reset to a fresh state. For now, we will just fail if we've already - // begun merging. If we implement the ability to retract a partial result - // merge, then we can use it and do something better. - if (_flushed) { - return false; // Can't reset if we have already pushed state. - } - _initState(); - return true; -} - std::ostream& MergingHandler::print(std::ostream& os) const { - return os << "MergingRequester(" << _tableName << ", flushed=" << (_flushed ? "true)" : "false)"); + return os << "MergingRequester(flushed=" << (_flushed ? "true)" : "false)"); } -void MergingHandler::_initState() { _setError(util::ErrorCode::NONE, string()); } - -bool MergingHandler::_queryIsNoLongerActive(shared_ptr const& jobQuery) const { - // Check if the query got cancelled for any reason. - if (jobQuery->isQueryCancelled()) return true; - - // Check for other indicators that the query may have cancelled or finished. - auto executive = jobQuery->getExecutive(); - if (executive == nullptr || executive->getCancelled() || executive->isLimitRowComplete()) { - return true; +qdisp::MergeEndStatus MergingHandler::_mergeHttp(qdisp::UberJob::Ptr const& uberJob, string const& fileUrl, + uint64_t fileSize) { + if (_flushed) { + throw util::Bug(ERR_LOC, "already flushed"); } - // The final test is to see if any errors have been reported in a context - // of the merger. A presence of errors means that further attempting of merging - // makes no sense. - return !getError().isNone(); -} - -bool MergingHandler::_merge(proto::ResponseSummary const& resp, shared_ptr const& jobQuery) { - if (_flushed) throw util::Bug(ERR_LOC, "already flushed"); - if (resp.transmitsize() == 0) return true; + if (fileSize == 0) return qdisp::MergeEndStatus(true); + auto csvMemDisk = mysql::CsvMemDisk::create(fileSize, uberJob->getQueryId(), uberJob->getUjId()); + _csvMemDisk = csvMemDisk; - // After this final test the job's result processing can't be interrupted. - if (_queryIsNoLongerActive(jobQuery)) return true; + // This must be after setting _csvStream to avoid cancelFileMerge() + // race issues, and it needs to be before the thread starts. + auto exec = uberJob->getExecutive(); + if (exec == nullptr || exec->getCancelled() || exec->isRowLimitComplete()) { + return qdisp::MergeEndStatus(true); + } - // Read from the http stream and push records into the CSV stream in a separate thread. - // Note the fixed capacity of the stream which allows up to 2 records to be buffered - // in the stream. This is enough to hide the latency of the HTTP connection and - // the time needed to read the file. - auto csvStream = mysql::CsvStream::create(2); string fileReadErrorMsg; - thread csvThread([&]() { + auto transferFunc = [&]() { size_t bytesRead = 0; fileReadErrorMsg = ::readHttpFileAndMerge( - resp.fileresource_http(), resp.transmitsize(), [&](char const* buf, uint32_t size) { - bool const queryEnded = _queryIsNoLongerActive(jobQuery); + uberJob, fileUrl, fileSize, + [&](char const* buf, uint32_t size) { bool last = false; - if (buf == nullptr || size == 0 || queryEnded) { + if (buf == nullptr || size == 0) { last = true; } else { - bool const closed = !csvStream->push(buf, size); - if (closed) { - throw ::QueryEnded( - "query " + jobQuery->getIdStr() + - " ended while reading the file, bytesRead=" + to_string(bytesRead) + - ", transmitsize=" + to_string(resp.transmitsize())); - } + csvMemDisk->push(buf, size); bytesRead += size; - last = bytesRead >= resp.transmitsize(); + last = bytesRead >= fileSize; } if (last) { - bool const closed = !csvStream->push(nullptr, 0); - if (queryEnded || closed) { - throw ::QueryEnded( - "query " + jobQuery->getIdStr() + - " ended while reading the file, bytesRead=" + to_string(bytesRead) + - ", transmitsize=" + to_string(resp.transmitsize())); - } + csvMemDisk->push(nullptr, 0); } - }); + }, + MergingHandler::_getHttpConnPool()); // Push the stream terminator to indicate the end of the stream. - // It may be neeeded to unblock the table merger which may be still attempting to read + // It may be needed to unblock the table merger which may be still attempting to read // from the CSV stream. if (!fileReadErrorMsg.empty()) { - csvStream->push(nullptr, 0); + csvMemDisk->push(nullptr, 0); } - }); + }; + csvMemDisk->transferDataFromWorker(transferFunc); + if (csvMemDisk->isCancelled()) { + // Since csvMemDisk was cancelled, avoid merging to avoid risks of contamination. + LOGS(_log, LOG_LVL_DEBUG, __func__ << " csvMemDisk cancelled"); + return qdisp::MergeEndStatus(false); + } + + bool mergeOk = _startMerge(); + if (!mergeOk) { + LOGS(_log, LOG_LVL_DEBUG, __func__ << " merge cancelled"); + return qdisp::MergeEndStatus(false); + } // Attempt the actual merge. - bool const fileMergeSuccess = _infileMerger->merge(resp, csvStream); + bool fileMergeSuccess = _infileMerger->mergeHttp(uberJob, fileSize, csvMemDisk); if (!fileMergeSuccess) { LOGS(_log, LOG_LVL_WARN, __func__ << " merge failed"); util::Error const& err = _infileMerger->getError(); - _setError(ccontrol::MSG_RESULT_ERROR, err.getMsg()); + _setError(ccontrol::MSG_RESULT_ERROR, util::Error::RESULT_IMPORT, err.getMsg()); + } + if (csvMemDisk->getContaminated()) { + LOGS(_log, LOG_LVL_ERROR, __func__ << " merge stream contaminated"); + fileMergeSuccess = false; + _setError(ccontrol::MSG_RESULT_ERROR, util::Error::RESULT_IMPORT, "merge stream contaminated"); } - // Close the stream to unblock the file reader thread if it is still waiting - // for the stream to be consumed. - csvStream->close(); - csvThread.join(); if (!fileReadErrorMsg.empty()) { LOGS(_log, LOG_LVL_WARN, __func__ << " result file read failed"); - _setError(ccontrol::MSG_HTTP_RESULT, fileReadErrorMsg); + _setError(ccontrol::MSG_HTTP_RESULT, util::Error::RESULT_IMPORT, fileReadErrorMsg); } _flushed = true; - return fileMergeSuccess && fileReadErrorMsg.empty(); + + qdisp::MergeEndStatus mergeEStatus(fileMergeSuccess && fileReadErrorMsg.empty()); + if (!mergeEStatus.success) { + // This error check needs to come after the csvThread.join() to ensure writing + // is finished. If any bytes were written, the result table is ruined. + mergeEStatus.contaminated = csvMemDisk->getBytesFetched() > 0; + } + + return mergeEStatus; +} + +bool MergingHandler::cancelFileMerge() { + lock_guard mergeStateLock(_mergeStateMtx); + if (_mergeState == PREMERGE || _mergeState == CANCELLED) { + _mergeState = CANCELLED; + auto csvStrm = _csvMemDisk.lock(); + if (csvStrm != nullptr) { + csvStrm->cancel(); + } + // Merging to the result table hasn't been started, so + // this can be cancelled. + return true; + } + // Cancelling at this point would probably corrupt the result table. + return false; +} + +bool MergingHandler::_startMerge() { + lock_guard mergeStateLock(_mergeStateMtx); + if (_mergeState == PREMERGE) { + _mergeState = MERGING; + // Merging hasn't been cancelled, so it's ok to start. + return true; + } + // Merge was cancelled. + return false; } -void MergingHandler::_setError(int code, std::string const& msg) { - LOGS(_log, LOG_LVL_DEBUG, "_setErr: code: " << code << ", message: " << msg); - std::lock_guard lock(_errorMutex); - _error = Error(code, msg); +void MergingHandler::_setError(int code, int subError, std::string const& msg) { + LOGS(_log, LOG_LVL_DEBUG, "_setError: code: " << code << ", message: " << msg); + auto exec = _executive.lock(); + if (exec == nullptr) return; + exec->addMultiError(code, subError, msg, true); +} + +qdisp::MergeEndStatus MergingHandler::flushHttp(string const& fileUrl, uint64_t fileSize) { + // This is needed to ensure the job query would be staying alive for the duration + // of the operation to prevent inconsistency within the application. + auto const uberJob = getUberJob().lock(); + if (uberJob == nullptr) { + LOGS(_log, LOG_LVL_ERROR, __func__ << " failed, uberJob was NULL"); + return qdisp::MergeEndStatus(false); + } + + LOGS(_log, LOG_LVL_TRACE, + "MergingHandler::" << __func__ << " uberJob=" << uberJob->getIdStr() << " fileUrl=" << fileUrl); + + qdisp::MergeEndStatus mergeStatus = _mergeHttp(uberJob, fileUrl, fileSize); + return mergeStatus; } } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/MergingHandler.h b/src/ccontrol/MergingHandler.h index a8f37d1d42..f57f1d7d1b 100644 --- a/src/ccontrol/MergingHandler.h +++ b/src/ccontrol/MergingHandler.h @@ -32,14 +32,19 @@ #include "qdisp/ResponseHandler.h" // Forward declarations +namespace lsst::qserv::http { +class ClientConnPool; +} // namespace lsst::qserv::http -namespace lsst::qserv::proto { -class ResponseData; -class ResponseSummary; -} // namespace lsst::qserv::proto +namespace lsst::qserv::mysql { +class CsvMemDisk; +} // namespace lsst::qserv::mysql namespace lsst::qserv::qdisp { +class Executive; class JobQuery; +class MergeEndStatus; +class UberJob; } // namespace lsst::qserv::qdisp namespace lsst::qserv::rproc { @@ -49,66 +54,70 @@ class InfileMerger; namespace lsst::qserv::ccontrol { /// MergingHandler is an implementation of a ResponseHandler that implements -/// czar-side knowledge of the worker's response protocol. It leverages XrdSsi's -/// API by pulling the exact number of bytes needed for the next logical -/// fragment instead of performing buffer size and offset -/// management. Fully-constructed protocol messages are then passed towards an -/// InfileMerger. -/// Do to the way the code works, MerginHandler is effectively single threaded. -/// The worker can only send the data for this job back over a single channel -/// and it can only send one transmit on that channel at a time. +/// czar-side knowledge of the worker's response protocol. +/// The czar collects a result file from the worker and merges that into +/// the query result table. class MergingHandler : public qdisp::ResponseHandler { public: typedef std::shared_ptr Ptr; + + enum MergeState { PREMERGE, MERGING, CANCELLED }; + virtual ~MergingHandler(); /// @param merger downstream merge acceptor - /// @param tableName target table for incoming data - MergingHandler(std::shared_ptr merger, std::string const& tableName); + MergingHandler(std::shared_ptr const& merger, + std::shared_ptr const& exec); - /// Process the response and read the result file if no error was reported by a worker. - /// @return true if successful (no error) - bool flush(proto::ResponseSummary const& resp) override; + /// @see ResponseHandler::flushHttp + /// @see MerginHandler::_mergeHttp + /// @see qdisp::MergeEndStatus + qdisp::MergeEndStatus flushHttp(std::string const& fileUrl, std::uint64_t fileSize) override; /// Signal an unrecoverable error condition. No further calls are expected. void errorFlush(std::string const& msg, int code) override; - /// @return true if the receiver has completed its duties. - bool finished() const override; - - bool reset() override; ///< Reset the state that a request can be retried. + /// Stop an ongoing file merge, if possible. + /// @return true if the merge was cancelled. + bool cancelFileMerge() override; /// Print a string representation of the receiver to an ostream std::ostream& print(std::ostream& os) const override; - /// @return an error code and description - Error getError() const override { - std::lock_guard lock(_errorMutex); - return _error; - } - private: - /// Prepare for first call to flush(). - void _initState(); - - bool _merge(proto::ResponseSummary const& resp, std::shared_ptr const& jobQuery); + /// Call InfileMerger to do the work of merging this data to the result. + qdisp::MergeEndStatus _mergeHttp(std::shared_ptr const& uberJob, + std::string const& fileUrl, std::uint64_t fileSize); /// Set error code and string. - void _setError(int code, std::string const& msg); - - /// Check if the query is no longer active. - /// This is used to prevent the query from being processed after it has been cancelled - /// or finished for any reason. - /// @param jobQuery the query to check - /// @return true if the query is no longer active - bool _queryIsNoLongerActive(std::shared_ptr const& jobQuery) const; + /// @see `util::Error` for information on parameters. + void _setError(int code, int subCode, std::string const& msg); + + /// Return true if merging should be started and set _mergeState to MERGING. + /// This should only be called once after the file has been collected and + /// before merging with the result table starts. + bool _startMerge(); + + // All instances of the HTTP client class are members of the same pool. This allows + // connection reuse and a significant reduction of the kernel memory pressure. + // Note that the pool gets instantiated at the very first call to method _getHttpConnPool() + // because the instantiation depends on the availability of the Czar configuration. + static std::shared_ptr const& _getHttpConnPool(); + static std::shared_ptr _httpConnPool; + static std::mutex _httpConnPoolMutex; std::shared_ptr _infileMerger; ///< Merging delegate - std::string _tableName; ///< Target table name - Error _error; ///< Error description - mutable std::mutex _errorMutex; ///< Protect readers from partial updates + std::atomic _errorSet{false}; ///< Set to true when an error is set. bool _flushed{false}; ///< flushed to InfileMerger? std::string _wName{"~"}; ///< worker name + + std::weak_ptr _executive; ///< Weak pointer to the executive for errors. + std::weak_ptr _csvMemDisk; ///< Weak pointer to cancel infile merge. + + /// Indicates merge state of the result table relating to the UberJob associated with + /// instance of MergingHandler. + MergeState _mergeState = PREMERGE; + std::mutex _mergeStateMtx; ///< Protectes _mergeState }; } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/UserQuery.h b/src/ccontrol/UserQuery.h index 1989916884..1cd553e73f 100644 --- a/src/ccontrol/UserQuery.h +++ b/src/ccontrol/UserQuery.h @@ -39,12 +39,11 @@ // Qserv headers #include "ccontrol/QueryState.h" #include "global/intTypes.h" -#include "qmeta/types.h" // Forward decl -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { class MessageStore; -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta namespace lsst::qserv::ccontrol { @@ -74,7 +73,7 @@ class UserQuery { virtual void discard() = 0; // Delegate objects - virtual std::shared_ptr getMessageStore() = 0; + virtual std::shared_ptr getMessageStore() = 0; /// This method should disappear when we start supporting results /// in locations other than MySQL tables. We'll switch to getResultLocation() diff --git a/src/ccontrol/UserQueryAsyncResult.cc b/src/ccontrol/UserQueryAsyncResult.cc index a20f615c9f..00a3a772f6 100644 --- a/src/ccontrol/UserQueryAsyncResult.cc +++ b/src/ccontrol/UserQueryAsyncResult.cc @@ -29,26 +29,29 @@ // Qserv headers #include "cconfig/CzarConfig.h" #include "qmeta/Exceptions.h" +#include "qmeta/JobStatus.h" #include "qmeta/QMeta.h" -#include "qdisp/JobStatus.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "sql/SqlConnection.h" #include "sql/SqlConnectionFactory.h" #include "sql/SqlResults.h" +using namespace std; + namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.ccontrol.UserQueryAsyncResult"); } namespace lsst::qserv::ccontrol { -UserQueryAsyncResult::UserQueryAsyncResult(QueryId queryId, qmeta::CzarId czarId, +// Constructors +UserQueryAsyncResult::UserQueryAsyncResult(QueryId queryId, CzarId czarId, std::shared_ptr const& qMeta) : UserQuery(), _queryId(queryId), _czarId(czarId), _qMeta(qMeta), - _messageStore(std::make_shared()) { + _messageStore(std::make_shared()) { LOGS(_log, LOG_LVL_DEBUG, "UserQueryAsyncResult: QID=" << queryId); // get query info from QMeta @@ -75,15 +78,17 @@ void UserQueryAsyncResult::submit() { // if there are messages already it means the error was detected, stop right here if (_messageStore->messageCount() > 0) { + LOGS(_log, LOG_LVL_WARN, + "UserQueryAsyncResult::" << __func__ + << " submit giving up, messageCount=" << _messageStore->messageCount()); return; } // Presently we cannot return query results that originated from different czar if (_qInfo.czarId() != _czarId) { - // TODO: tell user which czar was it? - std::string message = "Query originated from different czar"; - _messageStore->addErrorMessage("SYSTEM", message); - return; + LOGS(_log, LOG_LVL_WARN, + "UserQueryAsyncResult::submit Query originated from different czar=" << _qInfo.czarId() + << " expected=" << _czarId); } // TODO: check user name, does not matter now as we are not keeping tack of users. @@ -112,58 +117,53 @@ void UserQueryAsyncResult::submit() { auto const czarConfig = cconfig::CzarConfig::instance(); auto const resultDbConn = sql::SqlConnectionFactory::make(czarConfig->getMySqlResultConfig()); sql::SqlErrorObject sqlErrObj; - if (!resultDbConn->tableExists(_qInfo.msgTableName(), sqlErrObj) or - !resultDbConn->tableExists(resultTableName, sqlErrObj)) { - std::string message = "Result or message table does not exist, result is likely expired."; - LOGS(_log, LOG_LVL_DEBUG, message); - _messageStore->addErrorMessage("SYSTEM", message); - return; - } - // all checks are OK, copy message table from original query - // into the message store, at this point original result table must be unlocked - std::string query = "SELECT chunkId, code, message, severity, timeStamp FROM " + _qInfo.msgTableName(); - sql::SqlResults sqlResults; - if (!resultDbConn->runQuery(query, sqlResults, sqlErrObj)) { - LOGS(_log, LOG_LVL_ERROR, "Failed to retrieve message table data: " << sqlErrObj.errMsg()); - std::string message = "Failed to retrieve message table data."; - _messageStore->addErrorMessage("SYSTEM_SQL", message); + if (!resultDbConn->tableExists(resultTableName, sqlErrObj)) { + string message = "Result table does not exist, result is likely expired."; + LOGS(_log, LOG_LVL_INFO, + message << " msgTable=" << _qInfo.msgTableName() << " resultTable=" << resultTableName); + _messageStore->addErrorMessage("SYSTEM", message); return; } - // copy messages - int count = 0; - for (auto&& row : sqlResults) { - try { - int chunkId = boost::lexical_cast(row[0].first); - int code = boost::lexical_cast(row[1].first); - std::string message = row[2].first; - std::string sevStr = row[3].first; - int64_t timestampMilli = boost::lexical_cast(row[4].first); - MessageSeverity sev = sevStr == "INFO" ? MSG_INFO : MSG_ERROR; - qdisp::JobStatus::Clock::duration duration = std::chrono::milliseconds(timestampMilli); - qdisp::JobStatus::TimeType timestamp(duration); - _messageStore->addMessage(chunkId, "DUPLICATE", code, message, sev, timestamp); - } catch (std::exception const& exc) { - LOGS(_log, LOG_LVL_ERROR, "Error reading message table data: " << exc.what()); - std::string message = "Error reading message table data."; - _messageStore->addErrorMessage("SYSTEM", message); + if (resultDbConn->tableExists(_qInfo.msgTableName(), sqlErrObj)) { + // all checks are OK, copy message table from original query + // into the message store, at this point original result table must be unlocked + std::string query = + "SELECT chunkId, code, message, severity, timeStamp FROM " + _qInfo.msgTableName(); + sql::SqlResults sqlResults; + if (!resultDbConn->runQuery(query, sqlResults, sqlErrObj)) { + LOGS(_log, LOG_LVL_ERROR, "Failed to retrieve message table data: " << sqlErrObj.errMsg()); + std::string message = "Failed to retrieve message table data."; + _messageStore->addErrorMessage("SYSTEM_SQL", message); return; } - ++count; - } - LOGS(_log, LOG_LVL_DEBUG, "Copied " << count << " messages from " << _qInfo.msgTableName()); - - // Original message table is not useful any more because the result table - // will be deleted by proxy anyways. Until we have better lifetime management - // of results I'm going to drop this table now, meaning result can be only - // retrieved once. - query = "DROP TABLE " + _qInfo.msgTableName(); - if (!resultDbConn->runQuery(query, sqlErrObj)) { - LOGS(_log, LOG_LVL_ERROR, "Failed to drop message table: " << sqlErrObj.errMsg()); - // Users do not care about this error, so don't send it upstream. + + // copy messages + int count = 0; + for (auto&& row : sqlResults) { + try { + int chunkId = boost::lexical_cast(row[0].first); + int code = boost::lexical_cast(row[1].first); + std::string message = row[2].first; + std::string sevStr = row[3].first; + int64_t timestampMilli = boost::lexical_cast(row[4].first); + MessageSeverity sev = sevStr == "INFO" ? MSG_INFO : MSG_ERROR; + qmeta::JobStatus::Clock::duration duration = std::chrono::milliseconds(timestampMilli); + qmeta::JobStatus::TimeType timestamp(duration); + _messageStore->addMessage(chunkId, "SYSTEM", code, message, sev, timestamp); + } catch (std::exception const& exc) { + LOGS(_log, LOG_LVL_ERROR, "Error reading message table data: " << exc.what()); + std::string message = "Error reading message table data."; + _messageStore->addErrorMessage("SYSTEM", message); + return; + } + ++count; + } + LOGS(_log, LOG_LVL_DEBUG, "Copied " << count << " messages from " << _qInfo.msgTableName()); } else { - LOGS(_log, LOG_LVL_DEBUG, "Deleted message table " << _qInfo.msgTableName()); + LOGS(_log, LOG_LVL_WARN, + "Message table " << _qInfo.msgTableName() << " does not exist, skipping message copy"); } // done diff --git a/src/ccontrol/UserQueryAsyncResult.h b/src/ccontrol/UserQueryAsyncResult.h index 84b5f407f3..227111b547 100644 --- a/src/ccontrol/UserQueryAsyncResult.h +++ b/src/ccontrol/UserQueryAsyncResult.h @@ -29,17 +29,13 @@ // Qserv headers #include "ccontrol/UserQuery.h" #include "qmeta/QInfo.h" -#include "qmeta/types.h" // Forward declarations -namespace lsst::qserv::qdisp { -class MessageStore; -} - namespace lsst::qserv::qmeta { +class MessageStore; class QMeta; -} +} // namespace lsst::qserv::qmeta // This header declarations @@ -62,7 +58,7 @@ class UserQueryAsyncResult : public UserQuery { * @param czarId ID for current czar * @param qMeta QMeta instance */ - UserQueryAsyncResult(QueryId queryId, qmeta::CzarId czarId, std::shared_ptr const& qMeta); + UserQueryAsyncResult(QueryId queryId, CzarId czarId, std::shared_ptr const& qMeta); UserQueryAsyncResult(UserQueryAsyncResult const&) = delete; UserQueryAsyncResult& operator=(UserQueryAsyncResult const&) = delete; @@ -87,7 +83,7 @@ class UserQueryAsyncResult : public UserQuery { void discard() override {} // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } /// This method should disappear when we start supporting results /// in locations other than MySQL tables. We'll switch to getResultLocation() @@ -105,10 +101,10 @@ class UserQueryAsyncResult : public UserQuery { private: QueryId _queryId; - qmeta::CzarId _czarId; + CzarId _czarId; std::shared_ptr _qMeta; qmeta::QInfo _qInfo; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; QueryState _qState = UNKNOWN; }; diff --git a/src/ccontrol/UserQueryFactory.cc b/src/ccontrol/UserQueryFactory.cc index 28479d122b..3ede5d752c 100644 --- a/src/ccontrol/UserQueryFactory.cc +++ b/src/ccontrol/UserQueryFactory.cc @@ -51,10 +51,11 @@ #include "ccontrol/UserQueryType.h" #include "css/CssAccess.h" #include "css/KvInterfaceImplMem.h" +#include "czar/Czar.h" #include "mysql/MySqlConfig.h" #include "parser/ParseException.h" #include "qdisp/Executive.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "qmeta/QMetaMysql.h" #include "qmeta/QMetaSelect.h" #include "qmeta/QProgress.h" @@ -67,6 +68,7 @@ #include "rproc/InfileMerger.h" #include "sql/SqlConnection.h" #include "sql/SqlConnectionFactory.h" +#include "util/QdispPool.h" namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.ccontrol.UserQueryFactory"); @@ -222,12 +224,9 @@ std::shared_ptr makeUserQuerySharedResources( //////////////////////////////////////////////////////////////////////// UserQueryFactory::UserQueryFactory(qproc::DatabaseModels::Ptr const& dbModels, std::string const& czarName) : _userQuerySharedResources(makeUserQuerySharedResources(dbModels, czarName)), + _qmetaSecondsBetweenUpdates(cconfig::CzarConfig::instance()->getQMetaSecondsBetweenChunkUpdates()), _useQservRowCounterOptimization(true), _asioIoService() { - auto const czarConfig = cconfig::CzarConfig::instance(); - _executiveConfig = std::make_shared( - czarConfig->getXrootdFrontendUrl(), czarConfig->getQMetaSecondsBetweenChunkUpdates()); - // When czar crashes/exits while some queries are still in flight they // are left in EXECUTING state in QMeta. We want to cleanup that state // to avoid confusion. Note that when/if clean czar restart is implemented @@ -235,7 +234,7 @@ UserQueryFactory::UserQueryFactory(qproc::DatabaseModels::Ptr const& dbModels, s _userQuerySharedResources->queryMetadata->cleanupQueriesAtStart(_userQuerySharedResources->czarId); // Add logging context with czar ID - qmeta::CzarId czarId = _userQuerySharedResources->czarId; + auto const czarId = _userQuerySharedResources->czarId; LOG_MDC_INIT([czarId]() { LOG_MDC("CZID", std::to_string(czarId)); }); // BOOST ASIO service is started to process asynchronous timer requests @@ -258,7 +257,7 @@ UserQueryFactory::~UserQueryFactory() { } UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::string const& defaultDb, - qdisp::SharedResources::Ptr const& qdispSharedResources, + util::QdispPool::Ptr const& qdispPool, std::string const& userQueryId, std::string const& msgTableName, std::string const& resultDb) { // result location could potentially be specified by SUBMIT command, for now @@ -298,6 +297,7 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st } auto stmt = parser->getSelectStmt(); + std::lock_guard focatoryLock(_factoryMtx); // handle special database/table names if (_stmtRefersToProcessListTable(stmt, defaultDb)) { return _makeUserQueryProcessList(stmt, _userQuerySharedResources, userQueryId, resultDb, aQuery, @@ -332,7 +332,6 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st } // This is a regular SELECT for qserv - // Currently using the database for results to get schema information. auto qs = std::make_shared(_userQuerySharedResources->css, _userQuerySharedResources->databaseModels, defaultDb, @@ -351,11 +350,11 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st sessionValid = false; } - auto messageStore = std::make_shared(); + auto messageStore = std::make_shared(); std::shared_ptr executive; std::shared_ptr infileMergerConfig; if (sessionValid) { - executive = qdisp::Executive::create(*_executiveConfig, messageStore, qdispSharedResources, + executive = qdisp::Executive::create(_qmetaSecondsBetweenUpdates, messageStore, qdispPool, _userQuerySharedResources->queryProgress, _userQuerySharedResources->queryProgressHistory, qs, _asioIoService); @@ -364,25 +363,32 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st infileMergerConfig->debugNoMerge = _debugNoMerge; } + auto czarConfig = cconfig::CzarConfig::instance(); + int uberJobMaxChunks = czarConfig->getUberJobMaxChunks(); + // This, effectively invalid, UserQuerySelect object should report errors from both `errorExtra` // and errors that the QuerySession `qs` has stored internally. auto uq = std::make_shared( qs, messageStore, executive, _userQuerySharedResources->databaseModels, infileMergerConfig, _userQuerySharedResources->secondaryIndex, _userQuerySharedResources->queryMetadata, _userQuerySharedResources->queryProgress, _userQuerySharedResources->czarId, errorExtra, - async, resultDb); + async, resultDb, uberJobMaxChunks); + if (sessionValid) { uq->qMetaRegister(resultLocation, msgTableName); uq->setupMerger(); uq->saveResultQuery(); + executive->setUserQuerySelect(uq); } return uq; } else if (UserQueryType::isSelectResult(query, userJobId)) { + std::lock_guard factoryLock(_factoryMtx); auto uq = std::make_shared(userJobId, _userQuerySharedResources->czarId, _userQuerySharedResources->queryMetadata); LOGS(_log, LOG_LVL_DEBUG, "make UserQueryAsyncResult: userJobId=" << userJobId); return uq; } else if (UserQueryType::isShowProcessList(query, full)) { + std::lock_guard factoryLock(_factoryMtx); LOGS(_log, LOG_LVL_DEBUG, "make UserQueryProcessList: full=" << (full ? 'y' : 'n')); try { return std::make_shared(full, _userQuerySharedResources->qMetaSelect, @@ -392,6 +398,7 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st return std::make_shared(exc.what()); } } else if (UserQueryType::isCall(query)) { + std::lock_guard factoryLock(_factoryMtx); auto parser = std::make_shared( query, _userQuerySharedResources->makeUserQueryResources(userQueryId, resultDb)); return parser->getUserQuery(); @@ -403,6 +410,7 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st return std::make_shared(std::string("ParseException:") + e.what()); } auto uq = parser->getUserQuery(); + std::lock_guard factoryLock(_factoryMtx); auto setQuery = std::static_pointer_cast(uq); if (setQuery->varName() == "QSERV_ROW_COUNTER_OPTIMIZATION") { _useQservRowCounterOptimization = setQuery->varValue() != "0"; @@ -414,6 +422,7 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st } return uq; } else { + std::lock_guard factoryLock(_factoryMtx); // something that we don't recognize auto uq = std::make_shared("Invalid or unsupported query: " + query); return uq; diff --git a/src/ccontrol/UserQueryFactory.h b/src/ccontrol/UserQueryFactory.h index a467ea07ad..35cecbdce7 100644 --- a/src/ccontrol/UserQueryFactory.h +++ b/src/ccontrol/UserQueryFactory.h @@ -42,7 +42,7 @@ // Local headers #include "global/stringTypes.h" -#include "qdisp/SharedResources.h" +#include "util/QdispPool.h" namespace lsst::qserv::ccontrol { class UserQuery; @@ -82,7 +82,7 @@ class UserQueryFactory : private boost::noncopyable { /// @param msgTableName: Name of the message table without database name. /// @return new UserQuery object std::shared_ptr newUserQuery(std::string const& query, std::string const& defaultDb, - qdisp::SharedResources::Ptr const& qdispSharedResources, + std::shared_ptr const& qdispPool, std::string const& userQueryId, std::string const& msgTableName, std::string const& resultDb); @@ -92,7 +92,7 @@ class UserQueryFactory : private boost::noncopyable { private: std::shared_ptr _userQuerySharedResources; - std::shared_ptr _executiveConfig; + int _qmetaSecondsBetweenUpdates; ///< Seconds between qmeta updates. bool _useQservRowCounterOptimization; bool _debugNoMerge = false; // BOOST ASIO service is started to process asynchronous timer requests @@ -103,6 +103,13 @@ class UserQueryFactory : private boost::noncopyable { boost::asio::io_service _asioIoService; std::unique_ptr _asioWork; std::unique_ptr _asioTimerThread; + + /// This protects the CSS calls inside qs->analyzeQuery(query, stmt); as well + /// as some changes UserQueries may be making to databases. + /// TODO: It would be safer to have CSS be thread safe. + /// TODO: Go through all of the affected database interactions and make sure + /// they are thread safe without this mutex. + std::mutex _factoryMtx; }; } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/UserQueryInvalid.h b/src/ccontrol/UserQueryInvalid.h index 3296bf47f7..1c910c3d81 100644 --- a/src/ccontrol/UserQueryInvalid.h +++ b/src/ccontrol/UserQueryInvalid.h @@ -32,8 +32,7 @@ // Qserv headers #include "ccontrol/UserQuery.h" -#include "qdisp/MessageStore.h" -#include "qmeta/types.h" +#include "qmeta/MessageStore.h" // Forward decl @@ -44,7 +43,7 @@ namespace lsst::qserv::ccontrol { class UserQueryInvalid : public UserQuery { public: UserQueryInvalid(std::string const& message) - : _message(message), _messageStore(std::make_shared()) {} + : _message(message), _messageStore(std::make_shared()) {} UserQueryInvalid(UserQueryInvalid const&) = delete; UserQueryInvalid& operator=(UserQueryInvalid const&) = delete; @@ -69,11 +68,11 @@ class UserQueryInvalid : public UserQuery { virtual void discard() override {} // Delegate objects - virtual std::shared_ptr getMessageStore() override { return _messageStore; } + virtual std::shared_ptr getMessageStore() override { return _messageStore; } private: std::string const _message; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; }; } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/UserQueryProcessList.cc b/src/ccontrol/UserQueryProcessList.cc index d83919dc30..637835b41e 100644 --- a/src/ccontrol/UserQueryProcessList.cc +++ b/src/ccontrol/UserQueryProcessList.cc @@ -36,7 +36,7 @@ #include "css/CssAccess.h" #include "css/CssError.h" #include "cconfig/CzarConfig.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "qmeta/Exceptions.h" #include "qmeta/QMetaSelect.h" #include "query/FromList.h" @@ -64,11 +64,11 @@ namespace lsst::qserv::ccontrol { // Constructor UserQueryProcessList::UserQueryProcessList(std::shared_ptr const& statement, std::shared_ptr const& qMetaSelect, - qmeta::CzarId czarId, std::string const& userQueryId, + CzarId czarId, std::string const& userQueryId, std::string const& resultDb) : _qMetaSelect(qMetaSelect), _czarId(czarId), - _messageStore(std::make_shared()), + _messageStore(std::make_shared()), _resultTableName(::g_nextResultTableId(userQueryId)), _resultDb(resultDb) { // The SQL statement should be mostly OK alredy but we need to change @@ -91,11 +91,11 @@ UserQueryProcessList::UserQueryProcessList(std::shared_ptr co } UserQueryProcessList::UserQueryProcessList(bool full, std::shared_ptr const& qMetaSelect, - qmeta::CzarId czarId, std::string const& userQueryId, + CzarId czarId, std::string const& userQueryId, std::string const& resultDb) : _qMetaSelect(qMetaSelect), _czarId(czarId), - _messageStore(std::make_shared()), + _messageStore(std::make_shared()), _resultTableName(::g_nextResultTableId(userQueryId)), _resultDb(resultDb) { _query = "SELECT `qi`.`queryId` `ID`,`qi`.`qType` `TYPE`,`qc`.`czar` `CZAR`,`qc`.`czarId` `CZAR_ID`," diff --git a/src/ccontrol/UserQueryProcessList.h b/src/ccontrol/UserQueryProcessList.h index 67ac9cf058..a16218f4aa 100644 --- a/src/ccontrol/UserQueryProcessList.h +++ b/src/ccontrol/UserQueryProcessList.h @@ -33,7 +33,6 @@ // Qserv headers #include "ccontrol/UserQuery.h" #include "qmeta/QMetaSelect.h" -#include "qmeta/types.h" // Forward decl namespace lsst::qserv::qmeta { @@ -58,7 +57,7 @@ class UserQueryProcessList : public UserQuery { * @param userQueryId Unique string identifying query */ UserQueryProcessList(std::shared_ptr const& statement, - std::shared_ptr const& qMetaSelect, qmeta::CzarId czarId, + std::shared_ptr const& qMetaSelect, CzarId czarId, std::string const& userQueryId, std::string const& resultDb); /** @@ -69,8 +68,8 @@ class UserQueryProcessList : public UserQuery { * @param czarId Czar ID for QMeta queries * @param userQueryId Unique string identifying query */ - UserQueryProcessList(bool full, std::shared_ptr const& qMetaSelect, - qmeta::CzarId czarId, std::string const& userQueryId, std::string const& resultDb); + UserQueryProcessList(bool full, std::shared_ptr const& qMetaSelect, CzarId czarId, + std::string const& userQueryId, std::string const& resultDb); UserQueryProcessList(UserQueryProcessList const&) = delete; UserQueryProcessList& operator=(UserQueryProcessList const&) = delete; @@ -95,7 +94,7 @@ class UserQueryProcessList : public UserQuery { void discard() override; // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } /// @return Name of the result table for this query, can be empty std::string getResultTableName() const override { return _resultTableName; } @@ -111,9 +110,9 @@ class UserQueryProcessList : public UserQuery { std::string _getResultOrderBy() const { return _orderBy; } std::shared_ptr _qMetaSelect; - qmeta::CzarId const _czarId; ///< Czar ID in QMeta database + CzarId const _czarId; QueryState _qState = UNKNOWN; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; std::string _resultTableName; std::string _query; ///< query to execute on QMeta database std::string _orderBy; diff --git a/src/ccontrol/UserQueryQueries.cc b/src/ccontrol/UserQueryQueries.cc index 753d277ded..1b948a7f91 100644 --- a/src/ccontrol/UserQueryQueries.cc +++ b/src/ccontrol/UserQueryQueries.cc @@ -36,7 +36,7 @@ #include "cconfig/CzarConfig.h" #include "css/CssAccess.h" #include "css/CssError.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "qmeta/Exceptions.h" #include "qmeta/QMetaSelect.h" #include "query/FromList.h" @@ -63,12 +63,11 @@ namespace lsst::qserv::ccontrol { // Constructor UserQueryQueries::UserQueryQueries(std::shared_ptr const& statement, - std::shared_ptr const& qMetaSelect, - qmeta::CzarId czarId, std::string const& userQueryId, - std::string const& resultDb) + std::shared_ptr const& qMetaSelect, CzarId czarId, + std::string const& userQueryId, std::string const& resultDb) : _qMetaSelect(qMetaSelect), _czarId(czarId), - _messageStore(std::make_shared()), + _messageStore(std::make_shared()), _resultTableName(::g_nextResultTableId(userQueryId)), _resultDb(resultDb) { // The SQL statement should be mostly OK alredy but we need to change diff --git a/src/ccontrol/UserQueryQueries.h b/src/ccontrol/UserQueryQueries.h index 5318ee2afa..b762cd76eb 100644 --- a/src/ccontrol/UserQueryQueries.h +++ b/src/ccontrol/UserQueryQueries.h @@ -33,7 +33,6 @@ // Qserv headers #include "ccontrol/UserQuery.h" #include "qmeta/QMetaSelect.h" -#include "qmeta/types.h" // Forward decl namespace lsst::qserv::qmeta { @@ -58,7 +57,7 @@ class UserQueryQueries : public UserQuery { * @param userQueryId Unique string identifying query */ UserQueryQueries(std::shared_ptr const& statement, - std::shared_ptr const& qMetaSelect, qmeta::CzarId czarId, + std::shared_ptr const& qMetaSelect, CzarId czarId, std::string const& userQueryId, std::string const& resultDb); UserQueryQueries(UserQueryQueries const&) = delete; @@ -84,7 +83,7 @@ class UserQueryQueries : public UserQuery { void discard() override; // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } /// @return Name of the result table for this query, can be empty std::string getResultTableName() const override { return _resultTableName; } @@ -100,9 +99,9 @@ class UserQueryQueries : public UserQuery { std::string _getResultOrderBy() const { return _orderBy; } std::shared_ptr _qMetaSelect; - qmeta::CzarId const _czarId; ///< Czar ID in QMeta database + CzarId const _czarId; QueryState _qState = UNKNOWN; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; std::string _resultTableName; std::string _query; ///< query to execute on QMeta database std::string _orderBy; diff --git a/src/ccontrol/UserQueryResources.cc b/src/ccontrol/UserQueryResources.cc index aced1baeb7..1becf418bc 100644 --- a/src/ccontrol/UserQueryResources.cc +++ b/src/ccontrol/UserQueryResources.cc @@ -28,6 +28,7 @@ #include "cconfig/CzarConfig.h" #include "qmeta/QMeta.h" #include "qmeta/QProgress.h" + namespace lsst::qserv::ccontrol { UserQuerySharedResources::UserQuerySharedResources( diff --git a/src/ccontrol/UserQueryResources.h b/src/ccontrol/UserQueryResources.h index c325662e33..29f1e42a4c 100644 --- a/src/ccontrol/UserQueryResources.h +++ b/src/ccontrol/UserQueryResources.h @@ -27,7 +27,7 @@ #include #include -#include "qmeta/types.h" +#include "global/intTypes.h" #include "mysql/MySqlConfig.h" namespace lsst::qserv::ccontrol { @@ -83,7 +83,7 @@ class UserQuerySharedResources { std::shared_ptr queryProgressHistory; std::shared_ptr qMetaSelect; std::shared_ptr databaseModels; - qmeta::CzarId czarId; ///< Czar ID in QMeta database + CzarId czarId; int const interactiveChunkLimit; /** diff --git a/src/ccontrol/UserQueryResultDelete.cc b/src/ccontrol/UserQueryResultDelete.cc index 51024329b7..f798b91b45 100644 --- a/src/ccontrol/UserQueryResultDelete.cc +++ b/src/ccontrol/UserQueryResultDelete.cc @@ -39,7 +39,7 @@ #include "cconfig/CzarConfig.h" #include "qmeta/Exceptions.h" #include "qmeta/QMeta.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "sql/SqlConnection.h" #include "sql/SqlConnectionFactory.h" #include "sql/SqlErrorObject.h" @@ -57,7 +57,7 @@ namespace lsst::qserv::ccontrol { UserQueryResultDelete::UserQueryResultDelete(shared_ptr const& queryResources, string const& value) - : _value(value), _queryResources(queryResources), _messageStore(make_shared()) {} + : _value(value), _queryResources(queryResources), _messageStore(make_shared()) {} void UserQueryResultDelete::submit() { LOGS(_log, LOG_LVL_DEBUG, "UserQueryResultDelete::submit: " << _value); diff --git a/src/ccontrol/UserQueryResultDelete.h b/src/ccontrol/UserQueryResultDelete.h index 759d391a07..66b51a6131 100644 --- a/src/ccontrol/UserQueryResultDelete.h +++ b/src/ccontrol/UserQueryResultDelete.h @@ -38,9 +38,9 @@ #include "ccontrol/QueryState.h" #include "global/intTypes.h" -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { class MessageStore; -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta namespace lsst::qserv::ccontrol { @@ -74,12 +74,13 @@ class UserQueryResultDelete : public UserQuery { void discard() override {} // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } private: std::string const _value; std::shared_ptr const _queryResources; - std::shared_ptr _messageStore; + std::string _resultTableName; + std::shared_ptr _messageStore; QueryState _qState{UNKNOWN}; }; diff --git a/src/ccontrol/UserQuerySelect.cc b/src/ccontrol/UserQuerySelect.cc index 9d8b42cf93..206f4dde9c 100644 --- a/src/ccontrol/UserQuerySelect.cc +++ b/src/ccontrol/UserQuerySelect.cc @@ -75,28 +75,35 @@ #include "ccontrol/MergingHandler.h" #include "ccontrol/TmpTableName.h" #include "ccontrol/UserQueryError.h" +#include "czar/Czar.h" +#include "czar/CzarFamilyMap.h" +#include "czar/CzarRegistry.h" #include "global/constants.h" #include "global/LogContext.h" -#include "proto/worker.pb.h" #include "qdisp/Executive.h" -#include "qdisp/MessageStore.h" +#include "qdisp/JobQuery.h" +#include "qmeta/MessageStore.h" +#include "qmeta/QMeta.h" #include "qmeta/Exceptions.h" -#include "qdisp/QdispPool.h" #include "qmeta/QMeta.h" #include "qmeta/QProgress.h" #include "qproc/IndexMap.h" #include "qproc/QuerySession.h" -#include "qproc/TaskMsgFactory.h" #include "query/ColumnRef.h" #include "query/FromList.h" #include "query/JoinRef.h" #include "query/QueryTemplate.h" #include "query/SelectList.h" #include "query/SelectStmt.h" +#include "qdisp/UberJob.h" #include "query/ValueExpr.h" #include "rproc/InfileMerger.h" #include "sql/Schema.h" -#include "xrdreq/QueryManagementAction.h" +#include "util/Bug.h" +#include "util/IterableFormatter.h" +#include "util/QdispPool.h" + +using namespace std; namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.ccontrol.UserQuerySelect"); @@ -104,15 +111,14 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.ccontrol.UserQuerySelect"); namespace lsst::qserv::ccontrol { -UserQuerySelect::UserQuerySelect(std::shared_ptr const& qs, - std::shared_ptr const& messageStore, - std::shared_ptr const& executive, - std::shared_ptr const& dbModels, - std::shared_ptr const& infileMergerConfig, - std::shared_ptr const& secondaryIndex, - std::shared_ptr const& queryMetadata, - std::shared_ptr const& queryProgress, qmeta::CzarId czarId, - std::string const& errorExtra, bool async, std::string const& resultDb) +/// Constructor +UserQuerySelect::UserQuerySelect( + shared_ptr const& qs, shared_ptr const& messageStore, + shared_ptr const& executive, shared_ptr const& dbModels, + shared_ptr const& infileMergerConfig, + shared_ptr const& secondaryIndex, + shared_ptr const& queryMetadata, shared_ptr const& queryProgress, + CzarId czarId, string const& errorExtra, bool async, string const& resultDb, int uberJobMaxChunks) : _qSession(qs), _messageStore(messageStore), _executive(executive), @@ -124,48 +130,45 @@ UserQuerySelect::UserQuerySelect(std::shared_ptr const& qs, _czarId(czarId), _errorExtra(errorExtra), _resultDb(resultDb), - _async(async) {} + _async(async), + _uberJobMaxChunks(uberJobMaxChunks) {} -std::string UserQuerySelect::getError() const { - std::string div = (_errorExtra.size() && _qSession->getError().size()) ? " " : ""; +string UserQuerySelect::getError() const { + string div = (_errorExtra.size() && _qSession->getError().size()) ? " " : ""; return _qSession->getError() + div + _errorExtra; } void UserQuerySelect::kill() { - LOGS(_log, LOG_LVL_DEBUG, "UserQuerySelect kill"); - // The lock must be held for the entire query cancellation operation to prevent - // the race condition if the query join() or discard() happens at the same time. - std::lock_guard lock(_killMutex); - if (_killed) return; - - // If either pointer is nullptr then it's too late for killing the query. The query - // has already been finished or it's in a process of beging finished. - if (_executive == nullptr || _infileMerger == nullptr) { - LOGS(_log, LOG_LVL_DEBUG, "UserQuerySelect kill: the query is already finished"); - return; + LOGS(_log, LOG_LVL_INFO, "UserQuerySelect KILL"); + lock_guard lock(_killMutex); + if (!_killed) { + _killed = true; + auto exec = _executive; + int64_t collectedRows = (exec) ? exec->getTotalResultRows() : -1; + size_t collectedBytes = _infileMerger->getTotalResultSize(); + try { + if (exec != nullptr) { + exec->squash("UserQuerySelect::kill"); + } + } catch (UserQueryError const& e) { + // Silence merger discarding errors, because this object is being + // released. Client no longer cares about merger errors. + } + // Since this is being aborted, collectedRows and collectedBytes are going to + // be off a bit as results were still coming in. A rough idea should be + // good enough. + _qMetaUpdateStatus(qmeta::QInfo::ABORTED, collectedRows, collectedBytes, 0); } - try { - _executive->squash(); - } catch (UserQueryError const& e) { - // Silence merger discarding errors, because this object is being - // released. Client no longer cares about merger errors. - } - // Since this is being aborted, collectedRows and collectedBytes are going to - // be off a bit as results were still coming in. A rough idea should be - // good enough. - _qMetaUpdateStatus(qmeta::QInfo::ABORTED, _executive->getTotalResultRows(), - _infileMerger->getTotalResultSize(), 0); - _killed = true; } -std::string UserQuerySelect::_getResultOrderBy() const { return _qSession->getResultOrderBy(); } +string UserQuerySelect::_getResultOrderBy() const { return _qSession->getResultOrderBy(); } -std::string UserQuerySelect::getResultQuery() const { +string UserQuerySelect::getResultQuery() const { query::SelectList selectList; auto const& valueExprList = *_qSession->getStmt().getSelectList().getValueExprList(); for (auto const& valueExpr : valueExprList) { if (valueExpr->isStar()) { - auto useSelectList = std::make_shared(); + auto useSelectList = make_shared(); useSelectList->addValueExpr(valueExpr); query::SelectStmt starStmt(useSelectList, _qSession->getStmt().getFromList().clone()); sql::Schema schema; @@ -181,7 +184,7 @@ std::string UserQuerySelect::getResultQuery() const { // If the value is a column ref _and_ there was not a user defined alias, then the TablePlugin // will have assigned an alias that included the table name. We don't want that table name to // appear in the results in that case, so just assign the column. Otherwise, use the alias. - std::shared_ptr newValueExpr; + shared_ptr newValueExpr; if (valueExpr->isColumnRef() && not valueExpr->getAliasIsUserDefined()) { newValueExpr = query::ValueExpr::newColumnExpr(valueExpr->getAlias()); newValueExpr->setAlias(valueExpr->getColumnRef()->getColumn()); @@ -199,9 +202,8 @@ std::string UserQuerySelect::getResultQuery() const { query::QueryTemplate qt(query::QueryTemplate::DEFINE_VALUE_ALIAS_USE_TABLE_ALIAS); selectList.renderTo(qt); - std::string resultQuery = - "SELECT " + qt.sqlFragment() + " FROM " + _resultDb + "." + getResultTableName(); - std::string orderBy = _getResultOrderBy(); + string resultQuery = "SELECT " + qt.sqlFragment() + " FROM " + _resultDb + "." + getResultTableName(); + string orderBy = _getResultOrderBy(); if (not orderBy.empty()) { resultQuery += " " + orderBy; } @@ -210,6 +212,11 @@ std::string UserQuerySelect::getResultQuery() const { } void UserQuerySelect::submit() { + auto exec = _executive; + if (exec == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "UserQuerySelect::submit() executive is null at start"); + return; + } _qSession->finalize(); // Using the QuerySession, generate query specs (text, db, chunkId) and then @@ -217,11 +224,12 @@ void UserQuerySelect::submit() { LOGS(_log, LOG_LVL_DEBUG, "UserQuerySelect beginning submission"); assert(_infileMerger); - auto taskMsgFactory = std::make_shared(); - TmpTableName ttn(_queryId, _qSession->getOriginal()); - int sequence = 0; - auto queryTemplates = _qSession->makeQueryTemplates(); + _ttn = make_shared(_queryId, _qSession->getOriginal()); + vector chunks; + mutex chunksMtx; + JobId sequence = 0; + auto queryTemplates = _qSession->makeQueryTemplates(); LOGS(_log, LOG_LVL_DEBUG, "first query template:" << (queryTemplates.size() > 0 ? queryTemplates[0].sqlFragment() : "none produced.")); @@ -235,46 +243,286 @@ void UserQuerySelect::submit() { LOGS(_log, LOG_LVL_WARN, "Failed QProgress::insert, ex: " << e.what()); } - _executive->setScanInteractive(_qSession->getScanInteractive()); + exec->setScanInteractive(_qSession->getScanInteractive()); + exec->setScanInfo(_qSession->getScanInfo()); + + string dbName(""); + bool dbNameSet = false; - for (auto i = _qSession->cQueryBegin(), e = _qSession->cQueryEnd(); i != e && !_executive->getCancelled(); + for (auto i = _qSession->cQueryBegin(), e = _qSession->cQueryEnd(); i != e && !exec->getCancelled(); ++i) { auto& chunkSpec = *i; - std::function funcBuildJob = [this, sequence, // sequence must be a copy - &chunkSpec, &queryTemplates, &ttn, - &taskMsgFactory](util::CmdData*) { - QSERV_LOGCONTEXT_QUERY(_queryId); - - bool const fillInChunkIdTag = false; - qproc::ChunkQuerySpec::Ptr const cs = - _qSession->buildChunkQuerySpec(queryTemplates, chunkSpec, fillInChunkIdTag); - std::string chunkResultName = ttn.make(cs->chunkId); - - ResourceUnit ru; - ru.setAsDbChunk(cs->db, cs->chunkId); - qdisp::JobDescription::Ptr jobDesc = qdisp::JobDescription::create( - _czarId, _executive->getId(), sequence, ru, - std::make_shared(_infileMerger, chunkResultName), taskMsgFactory, cs, - chunkResultName); - _executive->add(jobDesc); - }; + QSERV_LOGCONTEXT_QUERY(_queryId); + + // TODO:UJTemplate The template(s) is generated here and later it is compared to other + // templates. It would be better to create the list of query templates here + // and just store the index into the list of templates in the `cs`. + qproc::ChunkQuerySpec::Ptr cs; + { + lock_guard lock(chunksMtx); + bool fillInChunkIdTag = false; // do not fill in the chunkId + cs = _qSession->buildChunkQuerySpec(queryTemplates, chunkSpec, fillInChunkIdTag); + chunks.push_back(cs->chunkId); + } - auto cmd = std::make_shared(funcBuildJob); - _executive->queueJobStart(cmd); + // This should only need to be set once as all jobs should have the same database name. + if (cs->db != dbName) { + if (dbNameSet) { + LOGS(_log, LOG_LVL_ERROR, "dbName change from " << dbName << " to " << cs->db); + return; + } + dbName = cs->db; + _queryDbName = dbName; + dbNameSet = true; + } + + ResourceUnit ru; + ru.setAsDbChunk(cs->db, cs->chunkId); + qdisp::JobDescription::Ptr jobDesc = + qdisp::JobDescription::create(_czarId, exec->getId(), sequence, ru, cs); + auto job = exec->add(jobDesc); ++sequence; } + /// At this point the executive has a map of all jobs with the chunkIds as the key. + // This is needed to prevent Czar::_monitor from starting things before they are ready. + exec->setAllJobsCreated(); + buildAndSendUberJobs(); + LOGS(_log, LOG_LVL_DEBUG, "total jobs in query=" << sequence); - _executive->waitForAllJobsToStart(); + // Waiting for all jobs to start seems to provide more consistent results. + exec->waitForAllJobsToStart(); +} + +bool avoidThisWorker(czar::CzarChunkMap::WorkerChunksData::Ptr const& targetWorker, + protojson::WorkerContactInfo::WCMapPtr const& wContactMap, + qdisp::JobQuery::Ptr const& jqPtr, + std::shared_ptr const& czFamilyMap) { + auto iter = wContactMap->find(targetWorker->getWorkerId()); + if (iter == wContactMap->end()) return false; + auto const wInfo = iter->second; + return wInfo == nullptr || jqPtr->isWorkerInAvoidMap(wInfo, czFamilyMap->getLastUpdateTime()); +} + +void UserQuerySelect::buildAndSendUberJobs() { + string const funcN("UserQuerySelect::" + string(__func__) + " QID=" + to_string(_queryId)); + LOGS(_log, LOG_LVL_DEBUG, funcN << " start " << _uberJobMaxChunks); + + // Ensure `_monitor()` doesn't do anything until everything is ready. + auto exec = _executive; + if (exec == nullptr) { + LOGS(_log, LOG_LVL_ERROR, funcN << " called with null exec " << getQueryIdString()); + return; + } + + if (!exec->isAllJobsCreated()) { + LOGS(_log, LOG_LVL_INFO, funcN << " executive isn't ready to generate UberJobs."); + return; + } + + if (exec->getSuperfluous()) { + LOGS(_log, LOG_LVL_INFO, funcN << " executive superfluous, result already found."); + return; + } + if (exec->getCancelled()) { + LOGS(_log, LOG_LVL_INFO, funcN << " executive cancelled."); + return; + } + + // Only one thread should be generating UberJobs for this user query at any given time. + lock_guard fcLock(_buildUberJobMtx); + LOGS(_log, LOG_LVL_DEBUG, "UserQuerySelect::" << __func__ << " totalJobs=" << exec->getTotalJobs()); + + vector uberJobs; + + qdisp::Executive::ChunkIdJobMapType unassignedChunksInQuery = exec->unassignedChunksInQuery(); + if (unassignedChunksInQuery.empty()) { + LOGS(_log, LOG_LVL_DEBUG, funcN << " no unassigned Jobs"); + return; + } + + // Get czar info and the worker contactMap. + auto czarPtr = czar::Czar::getCzar(); + auto czFamilyMap = czarPtr->getCzarFamilyMap(); + auto czChunkMap = czFamilyMap->getChunkMap(_queryDbName); + auto czRegistry = czarPtr->getCzarRegistry(); + // wContactMap is constant and safe to access without mutex lock. + auto const wContactMap = czRegistry->waitForWorkerContactMap(); + + if (czChunkMap == nullptr) { + LOGS(_log, LOG_LVL_ERROR, funcN << " no map found for queryDbName=" << _queryDbName); + // Make an empty chunk map so all jobs are flagged as needing to be reassigned. + // There's a chance that a family will be replicated by the registry. + czChunkMap = czar::CzarChunkMap::create(); + } + + auto const [chunkMapPtr, workerChunkMapPtr] = czChunkMap->getMaps(); + // Make a map of all jobs in the executive. + // TODO:DM-53239 Maybe a check should be made that all databases are in the same family? + + // keep cycling through workers until no more chunks to place. + // - create a map of UberJobs key=, val=> + // - for chunkId in `unassignedChunksInQuery` + // - use `chunkMapPtr` to find the shared scan workerId for chunkId + // - if not existing in the map, make a new uberjob + // - if existing uberjob at max jobs, create a new uberjob + // - once all chunks in the query have been put in uberjobs, find contact info + // for each worker + // - add worker to each uberjob. + // - For failures - If a worker cannot be contacted, that's an uberjob failure. + // - uberjob failures (due to communications problems) will result in the uberjob + // being broken up into multiple UberJobs going to different workers. + // - If an UberJob fails, the UberJob is killed and all the Jobs it contained + // are flagged as needing re-assignment and this function will be called + // again to put those Jobs in new UberJobs. Correctly re-assigning the + // Jobs requires accurate information from the registry about which workers + // are alive or dead. + struct WInfoAndUJPtr { + using Ptr = shared_ptr; + qdisp::UberJob::Ptr uberJobPtr; + protojson::WorkerContactInfo::Ptr wInf; + }; + map workerJobMap; + vector missingChunks; + + int attemptCountIncreased = 0; + // unassignedChunksInQuery needs to be in numerical order so that UberJobs contain chunk numbers in + // numerical order. The workers run shared scans in numerical order of chunkId numbers. + // Numerical order keeps the number of partially complete UberJobs running on a worker to a minimum, + // and should minimize the time for the first UberJob on the worker to complete. + for (auto const& [chunkId, jqPtr] : unassignedChunksInQuery) { + bool const increaseAttemptCount = true; + jqPtr->getDescription()->incrAttemptCount(exec, increaseAttemptCount); + attemptCountIncreased++; + + // If too many workers are down, there will be a chunk that cannot be found. + // Just continuing should leave jobs `unassigned` with their attempt count + // increased. Either the chunk will be found and jobs assigned, or the jobs' + // attempt count will reach max and the query will be cancelled + auto lambdaMissingChunk = [&](string const& msg) { + missingChunks.push_back(chunkId); + auto logLvl = (missingChunks.size() % 1000 == 1) ? LOG_LVL_WARN : LOG_LVL_TRACE; + LOGS(_log, logLvl, msg); + }; + + auto iter = chunkMapPtr->find(chunkId); + if (iter == chunkMapPtr->end()) { + lambdaMissingChunk(funcN + " No chunkData for=" + to_string(chunkId)); + continue; + } + czar::CzarChunkMap::ChunkData::Ptr chunkData = iter->second; + auto targetWorker = chunkData->getPrimaryScanWorker().lock(); + bool avoidWorker = avoidThisWorker(targetWorker, wContactMap, jqPtr, czFamilyMap); + if (targetWorker == nullptr || targetWorker->isDead() || avoidWorker) { + LOGS(_log, LOG_LVL_WARN, + funcN << " No primary scan worker for chunk=" + chunkData->dump() + << ((targetWorker == nullptr) ? " targ was null" : " targ was dead")); + // Try to assign a different worker to this job + auto workerHasThisChunkMap = chunkData->getWorkerHasThisMapCopy(); + bool found = false; + for (auto wIter = workerHasThisChunkMap.begin(); wIter != workerHasThisChunkMap.end() && !found; + ++wIter) { + auto maybeTarg = wIter->second.lock(); + if (maybeTarg != nullptr && !maybeTarg->isDead()) { + avoidWorker = avoidThisWorker(maybeTarg, wContactMap, jqPtr, czFamilyMap); + if (!avoidWorker) { + targetWorker = maybeTarg; + found = true; + LOGS(_log, LOG_LVL_WARN, + funcN << " Alternate worker=" << targetWorker->getWorkerId() + << " found for chunk=" << chunkData->dump()); + } + } + } + if (!found) { + lambdaMissingChunk(funcN + + " No primary or alternate worker found for chunk=" + chunkData->dump()); + continue; + } + } + // Add this job to the appropriate UberJob, making the UberJob if needed. + string workerId = targetWorker->getWorkerId(); + WInfoAndUJPtr::Ptr& wInfUJ = workerJobMap[workerId]; + if (wInfUJ == nullptr) { + wInfUJ = make_shared(); + auto iter = wContactMap->find(workerId); + if (iter == wContactMap->end()) { + // This should never happen. However, if the worker contact info isn't found in the DB, + // the attempt count for this job will eventually reach max and the job will cancel itself. + LOGS(_log, LOG_LVL_ERROR, + funcN << " workerId=" << workerId << " could not be found in wContactMap."); + break; + } + wInfUJ->wInf = iter->second; + } + + if (wInfUJ->uberJobPtr == nullptr) { + // Create a new UberJob for this worker. + auto ujId = _uberJobIdSeq++; // keep ujId consistent + string uberResultName = _ttn->make(ujId); + auto respHandler = + ccontrol::MergingHandler::Ptr(new ccontrol::MergingHandler(_infileMerger, exec)); + auto uJob = qdisp::UberJob::create(exec, respHandler, ujId, _czarId, wInfUJ->wInf, + czFamilyMap->getLastUpdateTime()); + uJob->setWorkerContactInfo(wInfUJ->wInf); + wInfUJ->uberJobPtr = uJob; + }; + + wInfUJ->uberJobPtr->addJob(jqPtr); + + if (wInfUJ->uberJobPtr->getJobCount() >= _uberJobMaxChunks) { + // Queue the UberJob to be sent to a worker + exec->addAndQueueUberJob(wInfUJ->uberJobPtr); + + // Clear the pointer so a new UberJob is created later if needed. + wInfUJ->uberJobPtr = nullptr; + } + } + + if (!missingChunks.empty()) { + string errStr = funcN + " a worker could not be found for these chunks "; + int maxList = 0; + for (auto const& chk : missingChunks) { + errStr += to_string(chk) + ","; + if (++maxList > 50) { + errStr += " too many to show all."; + break; + } + } + errStr += " All will be retried later. Total missing=" + to_string(missingChunks.size()); + LOGS(_log, LOG_LVL_ERROR, errStr); + } + + if (attemptCountIncreased > 0) { + LOGS(_log, LOG_LVL_WARN, + funcN << " increased attempt count for " << attemptCountIncreased << " Jobs"); + } + + // Queue unqued UberJobs, these have less than the max number of jobs. + for (auto const& [wIdKey, winfUjPtr] : workerJobMap) { + if (winfUjPtr != nullptr) { + auto& ujPtr = winfUjPtr->uberJobPtr; + if (ujPtr != nullptr) { + exec->addAndQueueUberJob(ujPtr); + } + } + } + + LOGS(_log, LOG_LVL_DEBUG, funcN << " " << exec->dumpUberJobCounts()); } QueryState UserQuerySelect::join() { - bool successful = _executive->join(); // Wait for all data + auto exec = _executive; + if (exec == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "UserQuerySelect::join() called with null exec " << getQueryIdString()); + return ERROR; + } + bool successful = exec->join(); // Wait for all data // Since all data are in, run final SQL commands like GROUP BY. size_t collectedBytes = 0; int64_t finalRows = 0; - bool const resultSizeLimitExceeded = _infileMerger->resultSizeLimitExceeded(); + bool const resultSizeLimitExceeded = exec->resultSizeLimitExceeded(); if (!_infileMerger->finalize(collectedBytes, finalRows)) { successful = false; LOGS(_log, LOG_LVL_ERROR, "InfileMerger::finalize failed"); @@ -282,13 +530,13 @@ QueryState UserQuerySelect::join() { _messageStore->addMessage(-1, "MERGE", 1105, "Failure while merging result", MessageSeverity::MSG_ERROR); } - _executive->updateProxyMessages(); + exec->updateProxyMessages(); try { // The lock is required to prevent the race condition if the query cancellation // happens at the same time as the merger is being discarded. - _discardMerger(std::lock_guard(_killMutex)); - } catch (std::exception const& exc) { + _discardMerger(lock_guard(_killMutex)); + } catch (exception const& exc) { // exception here means error in qserv logic, we do not want to leak // it or expose it to user, just dump it to log LOGS(_log, LOG_LVL_ERROR, "exception from _discardMerger: " << exc.what()); @@ -297,63 +545,62 @@ QueryState UserQuerySelect::join() { // Update the permanent message table. _qMetaUpdateMessages(); - int64_t collectedRows = _executive->getTotalResultRows(); + int64_t collectedRows = exec->getTotalResultRows(); // finalRows < 0 indicates there was no postprocessing, so collected rows and final rows should be the // same. if (finalRows < 0) finalRows = collectedRows; - // Notify workers on the query completion/cancellation to ensure - // resources are properly cleaned over there as well. - proto::QueryManagement::Operation operation = proto::QueryManagement::COMPLETE; + QueryState state = SUCCESS; if (successful) { _qMetaUpdateStatus(qmeta::QInfo::COMPLETED, collectedRows, collectedBytes, finalRows); - LOGS(_log, LOG_LVL_INFO, "Joined everything (success)"); + LOGS(_log, LOG_LVL_INFO, "Joined everything (success) QID=" << getQueryId()); } else if (_killed) { // status is already set to ABORTED - LOGS(_log, LOG_LVL_ERROR, "Joined everything (killed)"); - operation = proto::QueryManagement::CANCEL; + LOGS(_log, LOG_LVL_ERROR, "Joined everything (killed) QID=" << getQueryId()); state = ERROR; } else { auto const status = resultSizeLimitExceeded ? qmeta::QInfo::FAILED_LR : qmeta::QInfo::FAILED; _qMetaUpdateStatus(status, collectedRows, collectedBytes, finalRows); - LOGS(_log, LOG_LVL_ERROR, "Joined everything (failure!)"); - operation = proto::QueryManagement::CANCEL; + LOGS(_log, LOG_LVL_ERROR, + "Joined everything (failure!) QID=" << getQueryId() << " status=" << status); state = ERROR; } auto const czarConfig = cconfig::CzarConfig::instance(); - if (czarConfig->notifyWorkersOnQueryFinish()) { - try { - xrdreq::QueryManagementAction::notifyAllWorkers(czarConfig->getXrootdFrontendUrl(), operation, - _czarId, _queryId); - } catch (std::exception const& ex) { - LOGS(_log, LOG_LVL_WARN, ex.what()); - } - } + + // Notify workers on the query completion/cancellation to ensure + // resources are properly cleaned over there as well. + czar::Czar::getCzar()->getActiveWorkerMap()->addToDoneDeleteFiles(exec->getId()); return state; } -void UserQuerySelect::_discardMerger(std::lock_guard const& lock) { - _infileMergerConfig.reset(); +void UserQuerySelect::_discardMerger(lock_guard const& lock) { if (_infileMerger && !_infileMerger->isFinished()) { throw UserQueryError(getQueryIdString() + " merger unfinished, cannot discard"); } - _infileMerger.reset(); + _infileMergerConfig.reset(); } void UserQuerySelect::discard() { // The lock must be held for the entire discard operation to prevent // the race condition if the query cancellation happens at the same time // the query resources are being discarded. - std::lock_guard lock(_killMutex); + lock_guard lock(_killMutex); if (_killed) return; + auto exec = _executive; + if (exec == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "UserQuerySelect::discard called with null exec " << getQueryIdString()); + return; + } + // Make sure resources are released. - if (_executive && _executive->getNumInflight() > 0) { + if (exec->getNumInflight() > 0) { throw UserQueryError(getQueryIdString() + " Executive unfinished, cannot discard"); } + + // Deleting the executive may save some time if results were found early. _executive.reset(); - _messageStore.reset(); - _qSession.reset(); + try { _discardMerger(lock); } catch (UserQueryError const& e) { @@ -371,7 +618,7 @@ void UserQuerySelect::setupMerger() { "setting mergeStmt:" << (_infileMergerConfig->mergeStmt != nullptr ? _infileMergerConfig->mergeStmt->getQueryTemplate().sqlFragment() : "nullptr")); - _infileMerger = std::make_shared(*_infileMergerConfig, _databaseModels); + _infileMerger = make_shared(*_infileMergerConfig, _databaseModels); auto&& preFlightStmt = _qSession->getPreFlightStmt(); if (preFlightStmt == nullptr) { @@ -389,15 +636,15 @@ void UserQuerySelect::setupMerger() { _infileMerger->setMergeStmtFromList(_infileMergerConfig->mergeStmt); } -void UserQuerySelect::_expandSelectStarInMergeStatment(std::shared_ptr const& mergeStmt) { +void UserQuerySelect::_expandSelectStarInMergeStatment(shared_ptr const& mergeStmt) { if (nullptr != mergeStmt) { auto& selectList = *(mergeStmt->getSelectList().getValueExprList()); for (auto valueExprItr = selectList.begin(); valueExprItr != selectList.end(); ++valueExprItr) { auto& valueExpr = *valueExprItr; if (valueExpr->isStar()) { - auto valueExprVec = std::make_shared(); + auto valueExprVec = make_shared(); valueExprVec->push_back(valueExpr); - auto starStmt = query::SelectStmt(std::make_shared(valueExprVec), + auto starStmt = query::SelectStmt(make_shared(valueExprVec), mergeStmt->getFromListPtr()); sql::Schema schema; if (not _infileMerger->getSchemaForQueryResults(starStmt, schema)) { @@ -411,7 +658,7 @@ void UserQuerySelect::_expandSelectStarInMergeStatment(std::shared_ptrsaveResultQuery(_queryId, getResultQuery()); } void UserQuerySelect::_setupChunking() { - LOGS(_log, LOG_LVL_TRACE, "Setup chunking _qSession: " << _qSession); + LOGS(_log, LOG_LVL_TRACE, "Setup chunking"); + std::shared_ptr im; + std::shared_ptr eSet = _qSession->getEmptyChunks(); + { + eSet = _qSession->getEmptyChunks(); + if (!eSet) { + eSet = make_shared(); + LOGS(_log, LOG_LVL_WARN, "Missing empty chunks info for dominantDbs"); + } + } + // FIXME add operator<< for QuerySession + LOGS(_log, LOG_LVL_TRACE, "_qSession: " << _qSession); if (_qSession->hasChunks()) { auto areaRestrictors = _qSession->getAreaRestrictors(); auto secIdxRestrictors = _qSession->getSecIdxRestrictors(); @@ -454,14 +712,14 @@ void UserQuerySelect::_setupChunking() { _qSession->setScanInteractive(); } -void UserQuerySelect::qMetaRegister(std::string const& resultLocation, std::string const& msgTableName) { +void UserQuerySelect::qMetaRegister(string const& resultLocation, string const& msgTableName) { qmeta::QInfo::QType qType = _async ? qmeta::QInfo::ASYNC : qmeta::QInfo::SYNC; - std::string user = "anonymous"; // we do not have access to that info yet + string user = "anonymous"; // we do not have access to that info yet /// Chunking information is required before registering the query. _setupChunking(); - std::string qTemplate; + string qTemplate; auto const& stmtVector = _qSession->getStmtParallel(); for (auto itr = stmtVector.begin(); itr != stmtVector.end(); ++itr) { auto stmt = *itr; @@ -475,7 +733,7 @@ void UserQuerySelect::qMetaRegister(std::string const& resultLocation, std::stri } } - std::string qMerge; + string qMerge; auto mergeStmt = _qSession->getMergeStmt(); if (mergeStmt) { qMerge = mergeStmt->getQueryTemplate().sqlFragment(); @@ -496,14 +754,14 @@ void UserQuerySelect::qMetaRegister(std::string const& resultLocation, std::stri const auto& tables = _qSession->getStmt().getFromList().getTableRefList(); for (auto itr = tables.begin(); itr != tables.end(); ++itr) { // add table name - tableNames.push_back(std::make_pair((*itr)->getDb(), (*itr)->getTable())); + tableNames.push_back(make_pair((*itr)->getDb(), (*itr)->getTable())); // add its joins if any const auto& joins = (*itr)->getJoins(); for (auto jtr = joins.begin(); jtr != joins.end(); ++jtr) { const auto& right = (*jtr)->getRight(); if (right) { - tableNames.push_back(std::make_pair(right->getDb(), right->getTable())); + tableNames.push_back(make_pair(right->getDb(), right->getTable())); } } } @@ -516,20 +774,21 @@ void UserQuerySelect::qMetaRegister(std::string const& resultLocation, std::stri LOGS(_log, LOG_LVL_DEBUG, "UserQuery registered " << _qSession->getOriginal()); // update #QID# with actual query ID - boost::replace_all(_resultLoc, "#QID#", std::to_string(_queryId)); + boost::replace_all(_resultLoc, "#QID#", to_string(_queryId)); // guess query result location if (_resultLoc.compare(0, 6, "table:") == 0) { _resultTable = _resultLoc.substr(6); } else { // we only support results going to tables for now, abort for anything else - std::string const msg = "Unexpected result location '" + _resultLoc + "'"; + string const msg = "Unexpected result location '" + _resultLoc + "'"; _messageStore->addMessage(-1, "SYSTEM", 1146, msg, MessageSeverity::MSG_ERROR); throw UserQueryError(getQueryIdString() + _errorExtra); } - if (_executive != nullptr) { - _executive->setQueryId(_queryId); + auto exec = _executive; + if (exec != nullptr) { + exec->setQueryId(_queryId); } else { LOGS(_log, LOG_LVL_WARN, "No Executive, assuming invalid query"); } @@ -545,7 +804,7 @@ void UserQuerySelect::qMetaRegister(std::string const& resultLocation, std::stri // Throwing exception stops submit() but it does not set any // error condition, only prints error message to the log. To communicate // error message to caller we need to set _errorExtra - std::string const msg = "Table '" + itr->first + "." + itr->second + "' does not exist"; + string const msg = "Table '" + itr->first + "." + itr->second + "' does not exist"; _messageStore->addMessage(-1, "SYSTEM", 1146, msg, MessageSeverity::MSG_ERROR); throw UserQueryError(getQueryIdString() + _errorExtra); } @@ -564,11 +823,10 @@ void UserQuerySelect::_qMetaUpdateStatus(qmeta::QInfo::QStatus qStatus, size_t r } void UserQuerySelect::_qMetaUpdateMessages() { - auto msgStore = getMessageStore(); try { - _queryMetadata->addQueryMessages(_queryId, msgStore); + _queryMetadata->addQueryMessages(_queryId, _messageStore); } catch (qmeta::SqlError const& ex) { - LOGS(_log, LOG_LVL_WARN, "UserQuerySelect::_qMetaUpdateMessages failed, ex: " << ex.what()); + LOGS(_log, LOG_LVL_ERROR, "UserQuerySelect::_qMetaUpdateMessages failed, ex: " << ex.what()); } } diff --git a/src/ccontrol/UserQuerySelect.h b/src/ccontrol/UserQuerySelect.h index 942b4e9485..b59fac4efe 100644 --- a/src/ccontrol/UserQuerySelect.h +++ b/src/ccontrol/UserQuerySelect.h @@ -32,6 +32,7 @@ */ // System headers +#include #include #include #include @@ -41,19 +42,17 @@ // Qserv headers #include "ccontrol/UserQuery.h" #include "css/StripingParams.h" -#include "qdisp/SharedResources.h" #include "qmeta/QInfo.h" -#include "qmeta/types.h" #include "qproc/ChunkSpec.h" // Forward declarations namespace lsst::qserv::qdisp { class Executive; -class MessageStore; class QdispPool; } // namespace lsst::qserv::qdisp namespace lsst::qserv::qmeta { +class MessageStore; class QMeta; class QProgress; } // namespace lsst::qserv::qmeta @@ -76,18 +75,21 @@ class InfileMergerConfig; namespace lsst::qserv::ccontrol { +class TmpTableName; + /// UserQuerySelect : implementation of the UserQuery for regular SELECT statements. class UserQuerySelect : public UserQuery { public: UserQuerySelect(std::shared_ptr const& qs, - std::shared_ptr const& messageStore, + std::shared_ptr const& messageStore, std::shared_ptr const& executive, std::shared_ptr const& dbModels, std::shared_ptr const& infileMergerConfig, std::shared_ptr const& secondaryIndex, std::shared_ptr const& queryMetadata, - std::shared_ptr const& queryProgress, qmeta::CzarId czarId, - std::string const& errorExtra, bool async, std::string const& resultDb); + std::shared_ptr const& queryProgress, CzarId czarId, + std::string const& errorExtra, bool async, std::string const& resultDb, + int uberJobMaxChunks); UserQuerySelect(UserQuerySelect const&) = delete; UserQuerySelect& operator=(UserQuerySelect const&) = delete; @@ -121,7 +123,7 @@ class UserQuerySelect : public UserQuery { void discard() override; // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } /// @return Name of the result table for this query, can be empty std::string getResultTableName() const override { return _resultTable; } @@ -148,6 +150,11 @@ class UserQuerySelect : public UserQuery { /// save the result query in the query metadata void saveResultQuery(); + /// Use the query and jobs information in the executive to construct and run whatever + /// UberJobs are needed. This can be called multiple times by Czar::_monitor + /// to reassign failed jobs or jobs that were never assigned. + void buildAndSendUberJobs(); + private: /// @return ORDER BY part of SELECT statement that gets executed by the proxy std::string _getResultOrderBy() const; @@ -168,7 +175,7 @@ class UserQuerySelect : public UserQuery { // Delegate classes std::shared_ptr _qSession; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; std::shared_ptr _executive; std::shared_ptr _databaseModels; std::shared_ptr _infileMergerConfig; @@ -177,16 +184,27 @@ class UserQuerySelect : public UserQuery { std::shared_ptr _queryMetadata; std::shared_ptr _queryProgress; - qmeta::CzarId _czarId; ///< Czar ID in QMeta database - QueryId _queryId = 0; ///< Query ID in QMeta database + CzarId _czarId; + QueryId _queryId = 0; std::string _queryIdStr = QueryIdHelper::makeIdStr(0, true); ///< Initialized to unknown bool _killed = false; std::mutex _killMutex; mutable std::string _errorExtra; ///< Additional error information std::string _resultTable; ///< Result table name std::string _resultLoc; ///< Result location - std::string _resultDb; ///< Result database (todo is this the same as resultLoc??) + std::string _resultDb; ///< Result database bool _async; ///< true for async query + + /// The maximum number of chunks allowed in an UberJob, set from config. + int const _uberJobMaxChunks; + std::atomic _uberJobIdSeq{1}; ///< Sequence number for UberJobs in this query. + std::shared_ptr _ttn; ///< Temporary table name generator. + + /// Primary database name for the query. + std::string _queryDbName; + + /// Only one thread should run buildAndSendUberJobs() for this query at a time. + std::mutex _buildUberJobMtx; }; } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/UserQuerySelectCountStar.cc b/src/ccontrol/UserQuerySelectCountStar.cc index 1e19106f04..7ff60a0d71 100644 --- a/src/ccontrol/UserQuerySelectCountStar.cc +++ b/src/ccontrol/UserQuerySelectCountStar.cc @@ -33,7 +33,7 @@ #include "ccontrol/UserQueryError.h" #include "ccontrol/UserQueryType.h" #include "global/LogContext.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "qmeta/QInfo.h" #include "qmeta/QMetaSelect.h" #include "query/SelectStmt.h" @@ -57,11 +57,11 @@ UserQuerySelectCountStar::UserQuerySelectCountStar(std::string query, std::shared_ptr const& queryMetadata, std::string const& userQueryId, std::string const& rowsTable, std::string const& resultDb, - std::string const& countSpelling, qmeta::CzarId czarId, + std::string const& countSpelling, CzarId czarId, bool async) : _qMetaSelect(qMetaSelect), _queryMetadata(queryMetadata), - _messageStore(std::make_shared()), + _messageStore(std::make_shared()), _userQueryId(userQueryId), _rowsTable(rowsTable), _resultDb(resultDb), diff --git a/src/ccontrol/UserQuerySelectCountStar.h b/src/ccontrol/UserQuerySelectCountStar.h index 0ad7e61266..438a10e243 100644 --- a/src/ccontrol/UserQuerySelectCountStar.h +++ b/src/ccontrol/UserQuerySelectCountStar.h @@ -34,12 +34,10 @@ // Forward decl namespace lsst::qserv { -namespace qdisp { -class MessageStore; -} namespace qmeta { +class MessageStore; class QMetaSelect; -} +} // namespace qmeta namespace query { class SelectStmt; } @@ -56,8 +54,8 @@ class UserQuerySelectCountStar : public UserQuery { UserQuerySelectCountStar(std::string query, std::shared_ptr const& qMetaSelect, std::shared_ptr const& queryMetadata, std::string const& userQueryId, std::string const& rowsTable, - std::string const& resultDb, std::string const& countSpelling, - qmeta::CzarId czarId, bool async); + std::string const& resultDb, std::string const& countSpelling, CzarId czarId, + bool async); virtual ~UserQuerySelectCountStar() {} @@ -79,7 +77,7 @@ class UserQuerySelectCountStar : public UserQuery { void discard() override {} // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } /// This method should disappear when we start supporting results /// in locations other than MySQL tables. We'll switch to getResultLocation() @@ -118,7 +116,7 @@ class UserQuerySelectCountStar : public UserQuery { std::shared_ptr _qMetaSelect; std::shared_ptr const& _queryMetadata; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; std::string _resultTable; std::string _resultLoc; ///< Result location std::string _userQueryId; @@ -126,7 +124,7 @@ class UserQuerySelectCountStar : public UserQuery { std::string _resultDb; std::string _countSpelling; // keeps track of how "COUNT" is spelled, for the result query. std::string _query; // The original query text (without SUBMIT if async) - qmeta::CzarId _czarId; + CzarId _czarId; QueryId _queryId; /// QueryId in a standard string form, initially set to unknown. std::string _queryIdStr{QueryIdHelper::makeIdStr(0, true)}; diff --git a/src/ccontrol/UserQuerySet.cc b/src/ccontrol/UserQuerySet.cc index 6007e66a6f..ce6eb55bde 100644 --- a/src/ccontrol/UserQuerySet.cc +++ b/src/ccontrol/UserQuerySet.cc @@ -22,11 +22,11 @@ #include "UserQuerySet.h" // Qserv headers -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" namespace lsst::qserv::ccontrol { UserQuerySet::UserQuerySet(std::string const& varName, std::string const& varValue) - : _varName(varName), _varValue(varValue), _messageStore(std::make_shared()) {} + : _varName(varName), _varValue(varValue), _messageStore(std::make_shared()) {} } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/UserQuerySet.h b/src/ccontrol/UserQuerySet.h index 1d7748c1a6..f9d50a9ffb 100644 --- a/src/ccontrol/UserQuerySet.h +++ b/src/ccontrol/UserQuerySet.h @@ -68,7 +68,7 @@ class UserQuerySet : public UserQuery { void discard() override {} // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } /// This method should disappear when we start supporting results /// in locations other than MySQL tables. We'll switch to getResultLocation() @@ -91,7 +91,7 @@ class UserQuerySet : public UserQuery { std::string _varName; std::string _varValue; QueryState _qState{SUCCESS}; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; }; } // namespace lsst::qserv::ccontrol diff --git a/src/czar/ActiveWorker.cc b/src/czar/ActiveWorker.cc new file mode 100644 index 0000000000..7d434255b1 --- /dev/null +++ b/src/czar/ActiveWorker.cc @@ -0,0 +1,322 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "czar/ActiveWorker.h" + +// System headers +#include + +// Qserv headers +#include "cconfig/CzarConfig.h" +#include "czar/Czar.h" +#include "http/Client.h" +#include "http/MetaModule.h" +#include "protojson/PwHideJson.h" +#include "util/common.h" +#include "util/QdispPool.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.ActiveWorker"); +} // namespace + +namespace lsst::qserv::czar { + +string ActiveWorker::getStateStr(State st) { + switch (st) { + case ALIVE: + return string("ALIVE"); + case QUESTIONABLE: + return string("QUESTIONABLE"); + case DEAD: + return string("DEAD"); + } + return string("unknown"); +} + +bool ActiveWorker::compareContactInfo(protojson::WorkerContactInfo const& wcInfo) const { + lock_guard lg(_aMtx); + auto wInfo_ = _wqsData->getWInfo(); + if (wInfo_ == nullptr) return false; + return wInfo_->isSameContactInfo(wcInfo); +} + +void ActiveWorker::setWorkerContactInfo(protojson::WorkerContactInfo::Ptr const& wcInfo) { + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " new info=" << wcInfo->dump()); + lock_guard lg(_aMtx); + _wqsData->setWInfo(wcInfo); +} + +void ActiveWorker::_changeStateTo(State newState, double secsSinceUpdate, string const& note) { + auto lLvl = (newState == DEAD) ? LOG_LVL_ERROR : LOG_LVL_WARN; + LOGS(_log, lLvl, + note << " oldState=" << getStateStr(_state) << " newState=" << getStateStr(newState) + << " secsSince=" << secsSinceUpdate); + _state = newState; +} + +void ActiveWorker::updateStateAndSendMessages(double timeoutAliveSecs, double timeoutDeadSecs, + double maxLifetime) { + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " start"); + bool newlyDeadWorker = false; + protojson::WorkerContactInfo::Ptr wInfo_; + { + lock_guard lg(_aMtx); + wInfo_ = _wqsData->getWInfo(); + if (wInfo_ == nullptr) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " no WorkerContactInfo"); + return; + } + double secsSinceUpdate = wInfo_->timeSinceRegUpdateSeconds(); + LOGS(_log, LOG_LVL_TRACE, + cName(__func__) << " wInfo=" << wInfo_->dump() + << " secsSince=" << wInfo_->timeSinceRegUpdateSeconds() + << " secsSinceUpdate=" << secsSinceUpdate); + + // Update the last time the registry contacted this worker. + // TODO:DM-53240 - This needs to be added to the dashboard. + switch (_state) { + case ALIVE: { + if (secsSinceUpdate >= timeoutAliveSecs) { + _changeStateTo(QUESTIONABLE, secsSinceUpdate, cName(__func__)); + } + break; + } + case QUESTIONABLE: { + if (secsSinceUpdate < timeoutAliveSecs) { + _changeStateTo(ALIVE, secsSinceUpdate, cName(__func__)); + } + if (secsSinceUpdate >= timeoutDeadSecs) { + _changeStateTo(DEAD, secsSinceUpdate, cName(__func__)); + // All uberjobs for this worker need to die. + newlyDeadWorker = true; + } + break; + } + case DEAD: { + if (secsSinceUpdate < timeoutAliveSecs) { + _changeStateTo(ALIVE, secsSinceUpdate, cName(__func__)); + } else { + // Don't waste time on this worker until the registry has heard from it. + return; + } + break; + } + } + } + + // _aMtx and _awMapMtx must not be held when calling this. + if (newlyDeadWorker) { + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " worker " << wInfo_->wId << " appears to have died, reassigning its jobs."); + czar::Czar::getCzar()->killIncompleteUbjerJobsOn(wInfo_->wId); + } + + shared_ptr jsWorkerReqPtr; + { + // Go through the _qIdDoneKeepFiles, _qIdDoneDeleteFiles, and _qIdDeadUberJobs lists to build a + // message to send to the worker. + jsWorkerReqPtr = _wqsData->toJson(maxLifetime); + } + + // Always send the message as it's a way to inform the worker that this + // czar is functioning and capable of receiving requests. + Ptr thisPtr = shared_from_this(); + auto sendStatusMsgFunc = [thisPtr, wInfo_, jsWorkerReqPtr](util::CmdData*) { + thisPtr->_sendStatusMsg(wInfo_, jsWorkerReqPtr); + }; + + auto cmd = util::PriorityCommand::Ptr(new util::PriorityCommand(sendStatusMsgFunc)); + auto qdisppool = czar::Czar::getCzar()->getQdispPool(); + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " queuing message"); + qdisppool->queCmd(cmd, 1); +} + +void ActiveWorker::_sendStatusMsg(protojson::WorkerContactInfo::Ptr const& wInf, + std::shared_ptr const& jsWorkerReqPtr) { + auto& jsWorkerReq = *jsWorkerReqPtr; + auto const method = http::Method::POST; + if (wInf == nullptr) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " wInfo was null."); + return; + } + auto [ciwId, ciwHost, ciwManag, ciwPort] = wInf->getAll(); + string const url = "http://" + ciwHost + ":" + to_string(ciwPort) + "/querystatus"; + vector const headers = {"Content-Type: application/json"}; + auto const& czarConfig = cconfig::CzarConfig::instance(); + + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " REQ " << jsWorkerReq); + string const requestContext = "Czar: '" + http::method2string(method) + "' stat request to '" + url + "'"; + LOGS(_log, LOG_LVL_TRACE, + cName(__func__) << " czarPost url=" << url << " request=" << jsWorkerReq.dump() + << " headers=" << headers[0]); + http::Client client(method, url, jsWorkerReq.dump(), headers); + bool transmitSuccess = false; + string exceptionWhat; + json response; + try { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " read start"); + response = client.readAsJson(); + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " read end"); + auto respMsg = protojson::ResponseMsg::createFromJson(response); + if (respMsg->success) { + bool startupTimeChanged = false; + startupTimeChanged = _wqsData->handleResponseJson(response); + transmitSuccess = true; + if (startupTimeChanged) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " worker startupTime changed, likely rebooted."); + // kill all incomplete UberJobs on this worker. + czar::Czar::getCzar()->killIncompleteUbjerJobsOn(wInf->wId); + } + } else { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " transmit failure:" << *respMsg); + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_ERROR, requestContext + " transmit failure, ex: " + ex.what()); + exceptionWhat = ex.what(); + } + if (!transmitSuccess) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " transmit failure " << protojson::pwHide(jsWorkerReq) + << " resp=" << protojson::pwHide(response)); + } +} + +void ActiveWorker::addToDoneDeleteFiles(QueryId qId) { _wqsData->addToDoneDeleteFiles(qId); } + +void ActiveWorker::addToDoneKeepFiles(QueryId qId) { _wqsData->addToDoneKeepFiles(qId); } + +void ActiveWorker::removeDeadUberJobsFor(QueryId qId) { _wqsData->removeDeadUberJobsFor(qId); } + +void ActiveWorker::addDeadUberJob(QueryId qId, UberJobId ujId) { + auto now = CLOCK::now(); + _wqsData->addDeadUberJob(qId, ujId, now); +} + +protojson::WorkerContactInfo::Ptr ActiveWorker::getWInfo() const { + std::lock_guard lg(_aMtx); + if (_wqsData == nullptr) return nullptr; + return _wqsData->getWInfo(); +} + +ActiveWorker::State ActiveWorker::getState() const { + std::lock_guard lg(_aMtx); + return _state; +} + +string ActiveWorker::dump() const { + lock_guard lg(_aMtx); + return _dump(); +} + +string ActiveWorker::_dump() const { + stringstream os; + os << "ActiveWorker " << (_wqsData->dump()); + return os.str(); +} + +void ActiveWorkerMap::setCzarCancelAfterRestart(CzarId czId, QueryId lastQId) { + _czarCancelAfterRestart = true; + _czarCancelAfterRestartCzId = czId; + _czarCancelAfterRestartQId = lastQId; +} + +ActiveWorker::Ptr ActiveWorkerMap::getActiveWorker(string const& workerId) const { + lock_guard lck(_awMapMtx); + auto iter = _awMap.find(workerId); + if (iter == _awMap.end()) return nullptr; + return iter->second; +} + +void ActiveWorkerMap::sendActiveWorkersMessages() { + // Send messages to each active worker as needed + vector awVect; + { + lock_guard lck(_awMapMtx); + for (auto const& [wName, awPtr] : _awMap) { + awVect.push_back(awPtr); + } + } + + // _awMapMtx must be free before calling updateStateAndSendMessages + for (auto&& awPtr : awVect) { + awPtr->updateStateAndSendMessages(_timeoutAliveSecs, _timeoutDeadSecs, _maxLifetime); + } +} + +void ActiveWorkerMap::addToDoneDeleteFiles(QueryId qId) { + lock_guard lck(_awMapMtx); + for (auto const& [wName, awPtr] : _awMap) { + awPtr->addToDoneDeleteFiles(qId); + awPtr->removeDeadUberJobsFor(qId); + } +} + +void ActiveWorkerMap::addToDoneKeepFiles(QueryId qId) { + lock_guard lck(_awMapMtx); + for (auto const& [wName, awPtr] : _awMap) { + awPtr->addToDoneKeepFiles(qId); + awPtr->removeDeadUberJobsFor(qId); + } +} + +ActiveWorkerMap::ActiveWorkerMap(std::shared_ptr const& czarConfig) + : _timeoutAliveSecs(czarConfig->getActiveWorkerTimeoutAliveSecs()), + _timeoutDeadSecs(czarConfig->getActiveWorkerTimeoutDeadSecs()), + _maxLifetime(czarConfig->getActiveWorkerMaxLifetimeSecs()) {} + +void ActiveWorkerMap::updateMap(protojson::WorkerContactInfo::WCMap const& wcMap, + protojson::CzarContactInfo::Ptr const& czInfo, + protojson::AuthContext const& authContext_) { + // Go through wcMap, update existing entries in _awMap, create new entries for those that don't exist, + lock_guard awLg(_awMapMtx); + for (auto const& [wcKey, wcVal] : wcMap) { + auto iter = _awMap.find(wcKey); + if (iter == _awMap.end()) { + auto newAW = ActiveWorker::create(wcVal, czInfo, authContext_); + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " ActiveWorker created for " << wcKey << " " << newAW->dump()); + _awMap[wcKey] = newAW; + if (_czarCancelAfterRestart) { + newAW->setCzarCancelAfterRestart(_czarCancelAfterRestartCzId, _czarCancelAfterRestartQId); + } + } else { + auto aWorker = iter->second; + if (!aWorker->compareContactInfo(*wcVal)) { + // This should not happen, but try to handle it gracefully if it does. + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " worker contact info changed for " << wcKey + << " new=" << wcVal->dump() << " old=" << aWorker->dump()); + // If there is existing information, only host and port values will change. + aWorker->setWorkerContactInfo(wcVal); + } + aWorker->getWInfo()->setRegUpdateTime(wcVal->getRegUpdateTime()); + } + } +} + +} // namespace lsst::qserv::czar diff --git a/src/czar/ActiveWorker.h b/src/czar/ActiveWorker.h new file mode 100644 index 0000000000..0fd0373c8c --- /dev/null +++ b/src/czar/ActiveWorker.h @@ -0,0 +1,253 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_CZAR_ACTIVEWORKER_H +#define LSST_QSERV_CZAR_ACTIVEWORKER_H + +// System headers +#include +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// qserv headers +#include "protojson/WorkerQueryStatusData.h" +#include "util/Bug.h" + +namespace lsst::qserv::cconfig { +class CzarConfig; +} + +// This header declarations +namespace lsst::qserv::czar { + +/// This class is used to track information important to the czar and a +/// specific worker. Primarily the czar cares about the worker being alive +/// and informing the worker that various query IDs and UberJobs +/// have finished or need to be cancelled. +/// - maintain list of done/cancelled queries for an active worker, and send +/// that list to the worker. Once the worker has accepted the list, remove +/// all of those queryId's from the lists. +/// - maintain a list of killed UberJobs. If an UberJob is killed, nothing +/// will every look for its files, so they should be deleted, and the +/// worker should avoid working on Tasks for that UberJob. +/// The only UberJob deaths that need to be sent to a worker is when +/// the czar kills an UberJob because the worker died/vanished, and +/// the only time this would be sent is when a worker came back from +/// the dead. +/// The reason this only applies to died/vanished workers is that all +/// other workers know their UberJobs are dead because the worker killed +/// them. If the worker isn't told, it will continue working on +/// the UberJob until it finishes, and then find out the UberJob was killed +/// when it tries to return results to the czar. The worker should delete +/// files for said UberJob at that point. +/// So, this should be very rare, only results in extra load. +/// +/// If a worker goes missing from the registry, it is considered DEAD and may be +/// removed after a period of time. +/// If a worker hasn't been heard from in (timeout period), it is considered QUESIONABLE. +/// If a QUESTIONABLE worker hasn't been heard from in (timeout period), its state is changed +/// to DEAD. +/// +/// When a worker becomes DEAD: (see Czar::_monitor). +/// - Affected UberJobs are killed. +/// - New UberJobs are built to handle unassigned jobs where dead workers are skipped and +/// the jobs are assigned to alternate workers. +/// +class ActiveWorker : public std::enable_shared_from_this { +public: + using Ptr = std::shared_ptr; + + enum State { ALIVE = 0, QUESTIONABLE, DEAD }; + + ActiveWorker() = delete; + ActiveWorker(ActiveWorker const&) = delete; + ActiveWorker& operator=(ActiveWorker const&) = delete; + + std::string cName(const char* fName) { + auto wqsd = _wqsData; + return std::string("ActiveWorker::") + fName + " " + ((wqsd == nullptr) ? "?" : wqsd->dump()); + } + + static std::string getStateStr(State st); + + static Ptr create(protojson::WorkerContactInfo::Ptr const& wInfo, + protojson::CzarContactInfo::Ptr const& czInfo, + protojson::AuthContext const& authContext_) { + return Ptr(new ActiveWorker(wInfo, czInfo, authContext_)); + } + + /// This function should only be called before the _monitor thread is started + /// and shortly after czar startup: it tells all workers to delete all + /// query information for queries with czarId `czId` and queryId less than + /// or equal to `lastQId`. + void setCzarCancelAfterRestart(CzarId czId, QueryId lastQId) { + if (_cancelAfterCzarResetSent.exchange(true) == false) { + _wqsData->setCzarCancelAfterRestart(czId, lastQId); + } + } + + protojson::WorkerContactInfo::Ptr getWInfo() const; + + ~ActiveWorker() = default; + + /// Return true if there were differences in worker id, host, or port values. + bool compareContactInfo(protojson::WorkerContactInfo const& wcInfo) const; + + void setWorkerContactInfo(protojson::WorkerContactInfo::Ptr const& wcInfo); + + /// Check this workers state (by looking at contact information) and queue + /// the WorkerQueryStatusData message `_wqsData` to be sent if this worker + /// isn't DEAD. + void updateStateAndSendMessages(double timeoutAliveSecs, double timeoutDeadSecs, double maxLifetime); + + /// Add `qId` to list of QueryId's that the worker can discard all tasks and + /// result files for. This `qId` will be removed from the list once the worker + /// has responded to the `_wqsData` message with this `qId` in the appropriate + /// list. + /// It is expected that all completed or cancelled queries on this worker will + /// be added to this list. + void addToDoneDeleteFiles(QueryId qId); + + /// Add `qId` to list of QueryId's that the worker where the worker must hold + /// onto result files but tasks can be eliminated. This `qId` will be removed + /// from the list once the worker has responded to the `_wqsData` message with + /// this `qId` in the appropriate list. + void addToDoneKeepFiles(QueryId qId); + + /// Add the uberjob to the list of dead uberjobs. This `qId` will be removed + /// from the list once the worker has responded to the `_wqsData` message with + /// this `qId` in the appropriate list. Or the `qId` is in a + /// removeDeadUberJobsFor() call. + void addDeadUberJob(QueryId qId, UberJobId ujId); + + /// If a query is completed or cancelled, there's no reason to track the + /// individual UberJobs anymore, so this function will get rid of them. + void removeDeadUberJobsFor(QueryId qId); + + State getState() const; + + std::string dump() const; + +private: + ActiveWorker(protojson::WorkerContactInfo::Ptr const& wInfo, + protojson::CzarContactInfo::Ptr const& czInfo, protojson::AuthContext const& authContext_) + : _wqsData(protojson::WorkerQueryStatusData::create(wInfo, czInfo, authContext_)) { + if (_wqsData == nullptr) { + throw util::Bug(ERR_LOC, "ActiveWorker _wqsData null"); + } + } + + /// Change the state to `newState` and log if it is different. + /// _aMtx must be held before calling. + void _changeStateTo(State newState, double secsSinceUpdate, std::string const& note); + + /// Send the `jsWorkerReqPtr` json message to the worker referenced by `wInf` to + /// transmit the `_wqsData` state. + void _sendStatusMsg(protojson::WorkerContactInfo::Ptr const& wInf, + std::shared_ptr const& jsWorkerReqPtr); + + /// Dump a log string for this object. + /// _aMtx must be held before calling. + std::string _dump() const; + + /// Contains data that needs to be sent to workers about finished/cancelled + /// user queries and UberJobs. It must not be null. + protojson::WorkerQueryStatusData::Ptr const _wqsData; + + State _state{QUESTIONABLE}; ///< current state of this worker. + + mutable std::mutex _aMtx; ///< protects _wInfo, _state, _qIdDoneKeepFiles, _qIdDoneDeleteFiles + + /// Flag to limit sending of czar cancel after reset message. + std::atomic _cancelAfterCzarResetSent{false}; +}; + +/// This class maintains a list of all workers, indicating which are considered active. +/// Communication problems with workers could cause interesting race conditions, so +/// workers will remain on the list for a very long time after they have disappeared +/// in case they come back from the dead. +class ActiveWorkerMap { +public: + using Ptr = std::shared_ptr; + ActiveWorkerMap() = default; + ActiveWorkerMap(ActiveWorkerMap const&) = delete; + ActiveWorkerMap operator=(ActiveWorkerMap const&) = delete; + + ActiveWorkerMap(std::shared_ptr const& czarConfig); + + ~ActiveWorkerMap() = default; + + std::string cName(const char* fName) { return std::string("ActiveWorkerMap::") + fName + " "; } + + /// Use information gathered from the registry to update the map. The registry + /// contains last contact time (used for determining aliveness) and worker contact information. + void updateMap(protojson::WorkerContactInfo::WCMap const& wcMap, + protojson::CzarContactInfo::Ptr const& czInfo, protojson::AuthContext const& authContext_); + + /// If this is to be called, it must be called before Czar::_monitor is started: + /// It tells the workers all queries from `czId` with QueryIds less than `lastQId` + /// should be cancelled. + void setCzarCancelAfterRestart(CzarId czId, QueryId lastQId); + + /// Return a pointer to the `ActiveWorker` associated with `workerId`. + ActiveWorker::Ptr getActiveWorker(std::string const& workerId) const; + + /// Call `updateStateAndSendMessages` for all workers in this map. + void sendActiveWorkersMessages(); + + /// Add `qId` to the list of query ids where the worker can throw away all related + /// Tasks and result files. This is used for all completed user queries and cancelled + /// user queries. + void addToDoneDeleteFiles(QueryId qId); + + /// Add `qId` to the list of query ids where the worker must hold onto result + /// files but all incomplete Tasks can be stopped. This is used for `rowLimitComplete` + /// where enough rows have been found to complete a user query with a LIMIT + /// clause. The czar may still need to collect the result files from the worker. + /// Once the czar has completed the user query, the `qId` will be added to + /// `addToDoneDeleteFiles` so the workers will delete the files. + void addToDoneKeepFiles(QueryId qId); + +private: + std::map _awMap; ///< Key is worker id. + mutable std::mutex _awMapMtx; ///< protects _awMap; + + /// @see CzarConfig::getActiveWorkerTimeoutAliveSecs() + double _timeoutAliveSecs = 60.0 * 5.0; + + /// @see CzarConfig::getActiveWorkerTimeoutDeadSecs() + double _timeoutDeadSecs = 60.0 * 10.0; + + /// @see CzarConfig::getActiveWorkerMaxLifetimeSecs() + double _maxLifetime = 60.0 * 60.0; + + bool _czarCancelAfterRestart = false; + CzarId _czarCancelAfterRestartCzId = 0; + QueryId _czarCancelAfterRestartQId = 0; +}; + +} // namespace lsst::qserv::czar + +#endif // LSST_QSERV_CZAR_ACTIVEWORKER_H diff --git a/src/czar/CMakeLists.txt b/src/czar/CMakeLists.txt index 501b82b596..4fd8d268e1 100644 --- a/src/czar/CMakeLists.txt +++ b/src/czar/CMakeLists.txt @@ -1,14 +1,19 @@ add_library(czar SHARED) target_sources(czar PRIVATE + ActiveWorker.cc ChttpModule.cc Czar.cc + CzarChunkMap.cc + CzarFamilyMap.cc + CzarRegistry.cc CzarThreads.cc HttpCzarIngestCsvModule.cc HttpCzarIngestModuleBase.cc HttpCzarIngestModule.cc HttpCzarQueryModule.cc HttpCzarSvc.cc + HttpCzarWorkerModule.cc HttpMonitorModule.cc HttpSvc.cc MessageTable.cc @@ -16,31 +21,26 @@ target_sources(czar PRIVATE WorkerIngestProcessor.cc ) -target_include_directories(czar PRIVATE - ${XROOTD_INCLUDE_DIRS} -) - target_link_libraries(czar PUBLIC cconfig http + partition + protojson qdisp qhttp + replica util log - XrdSsiLib cpp-httplib boost_program_options ) -install( - TARGETS czar -) +install(TARGETS czar) function(CZAR_UTILS) foreach(UTIL IN ITEMS ${ARGV}) add_executable(${UTIL}) target_sources(${UTIL} PRIVATE ${UTIL}.cc) - target_include_directories(${UTIL} PRIVATE ${XROOTD_INCLUDE_DIRS}) target_link_libraries(${UTIL} PRIVATE cconfig ccontrol @@ -48,7 +48,6 @@ function(CZAR_UTILS) global mysql parser - partition qana qdisp qproc @@ -64,3 +63,30 @@ endfunction() czar_utils( qserv-czar-http ) + +function(czar_tests) + foreach(TEST IN ITEMS ${ARGV}) + add_executable(${TEST} ${TEST}.cc) + target_link_libraries(${TEST} PUBLIC + cconfig + ccontrol + czar + global + mysql + parser + qana + qdisp + qproc + qmeta + query + rproc + sql + Boost::unit_test_framework + ) + add_test(NAME ${TEST} COMMAND ${TEST}) + endforeach() +endfunction() + +czar_tests( + testCzar +) diff --git a/src/czar/Czar.cc b/src/czar/Czar.cc index 19f8775b9a..d959f6b828 100644 --- a/src/czar/Czar.cc +++ b/src/czar/Czar.cc @@ -41,15 +41,23 @@ #include "ccontrol/UserQueryResources.h" #include "ccontrol/UserQuerySelect.h" #include "ccontrol/UserQueryType.h" +#include "czar/ActiveWorker.h" +#include "czar/CzarFamilyMap.h" #include "czar/CzarErrors.h" #include "czar/CzarThreads.h" #include "czar/HttpSvc.h" #include "czar/MessageTable.h" +#include "czar/CzarRegistry.h" #include "global/LogContext.h" -#include "proto/worker.pb.h" +#include "http/Client.h" +#include "http/ClientConnPool.h" +#include "http/MetaModule.h" +#include "http/Method.h" +#include "mysql/CsvMemDisk.h" +#include "protojson/UberJobErrorMsg.h" +#include "protojson/UberJobReadyMsg.h" #include "qdisp/CzarStats.h" -#include "qdisp/QdispPool.h" -#include "qdisp/SharedResources.h" +#include "qdisp/Executive.h" #include "qproc/DatabaseModels.h" #include "rproc/InfileMerger.h" #include "sql/SqlConnection.h" @@ -58,27 +66,17 @@ #include "util/common.h" #include "util/FileMonitor.h" #include "util/IterableFormatter.h" +#include "util/QdispPool.h" #include "util/String.h" -#include "xrdreq/QueryManagementAction.h" -#include "XrdSsi/XrdSsiProvider.hh" using namespace std; -extern XrdSsiProvider* XrdSsiProviderClient; - // This macro is used to convert empty strings into "0" in order to avoid // problems with calling std::atoi() when the string is empty. #define ZERO_IF_EMPTY_STR(x) ((x.empty()) ? "0" : (x)) namespace { -string const createAsyncResultTmpl( - "CREATE TABLE IF NOT EXISTS %1% " - "(jobId BIGINT, resultLocation VARCHAR(1024))" - "ENGINE=MEMORY;" - "INSERT INTO %1% (jobId, resultLocation) " - "VALUES (%2%, '%3%')"); - LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.Czar"); } // anonymous namespace @@ -86,15 +84,85 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.Czar"); namespace lsst::qserv::czar { Czar::Ptr Czar::_czar; +uint64_t const Czar::czarStartupTime = millisecSinceEpoch(CLOCK::now()); Czar::Ptr Czar::createCzar(string const& configFilePath, string const& czarName) { _czar.reset(new Czar(configFilePath, czarName)); return _czar; } +void Czar::_monitor() { + string const funcN("Czar::_monitor"); + uint16_t loopCount = 0; // unsigned to wrap around + while (_monitorLoop) { + ++loopCount; + this_thread::sleep_for(_monitorSleepTime); + LOGS(_log, LOG_LVL_DEBUG, funcN << " start0"); + + /// Check database for changes in worker chunk assignments and aliveness + try { + // TODO:DM-53239 The read() is incredibly expensive until the database has + // a "changed" field of some kind (preferably timestamp) to + // indicate the last time it changed. + // For Now, just do one read every few times through this loop. + if (loopCount % 10 == 0 || true) { + _czarFamilyMap->read(); + } + } catch (ChunkMapException const& cmex) { + // There are probably chunks that don't exist on any alive worker, + // continue on in hopes that workers will show up with the missing chunks + // later. + LOGS(_log, LOG_LVL_ERROR, funcN << " family map read problems " << cmex.what()); + } + + // Send appropriate messages to all ActiveWorkers. This will + // check if workers have died by timeout. + _czarRegistry->sendActiveWorkersMessages(); + + /// Create new UberJobs (if possible) for all jobs that are + /// unassigned for any reason. + map> execMap; + { + // Make a copy of all valid Executives + lock_guard execMapLock(_executiveMapMtx); + // Use an iterator so it's easy/quick to delete dead weak pointers. + auto iter = _executiveMap.begin(); + while (iter != _executiveMap.end()) { + auto qIdKey = iter->first; + shared_ptr exec = iter->second.lock(); + if (exec == nullptr) { + iter = _executiveMap.erase(iter); + } else { + execMap[qIdKey] = exec; + ++iter; + } + } + } + // Use the copy to create new UberJobs as needed + for (auto&& [qIdKey, execVal] : execMap) { + execVal->assignJobsToUberJobs(); + } + + // To prevent anything from slipping through the cracks: + // Workers will keep trying to transmit results until they think the czar is dead. + // If a worker thinks the czar died, it will cancel all related jobs that it has, + // and if the czar sends a status message to that worker, that worker will send back + // a separate message (see WorkerCzarComIssue) saying it killed everything that this + // czar gave it. Upon getting this message from a worker, this czar will reassign + // everything it had sent to that worker. + } +} + // Constructors Czar::Czar(string const& configFilePath, string const& czarName) - : _czarName(czarName), _czarConfig(cconfig::CzarConfig::create(configFilePath, czarName)) { + : _czarName(czarName), + _czarConfig(cconfig::CzarConfig::create(configFilePath, czarName)), + _idCounter(), + _uqFactory(), + _clientToQuery(), + _monitorSleepTime(_czarConfig->getMonitorSleepTimeMilliSec()), + _activeWorkerMap(new ActiveWorkerMap(_czarConfig)), + _fqdn(util::get_current_host_fqdn_wait()) { // set id counter to milliseconds since the epoch, mod 1 year. struct timeval tv; gettimeofday(&tv, nullptr); @@ -111,47 +179,50 @@ Czar::Czar(string const& configFilePath, string const& czarName) // the name of the Czar gets translated into a numeric identifier. _czarConfig->setId(_uqFactory->userQuerySharedResources()->czarId); + auto const czarId = _czarConfig->id(); + size_t const MB_SIZE_BYTES = 1024 * 1024; + size_t maxResultTableSizeBytes = _czarConfig->getMaxTableSizeMB() * MB_SIZE_BYTES; + size_t maxMemToUse = _czarConfig->getMaxTransferMemMB() * MB_SIZE_BYTES; + string const transferDirectory = _czarConfig->getTransferDir(); + std::size_t const transferMinBytesInMem = _czarConfig->getTransferMinMBInMem() * MB_SIZE_BYTES; + mysql::TransferTracker::setup(maxMemToUse, transferDirectory, transferMinBytesInMem, + maxResultTableSizeBytes, czarId); + // Tell workers to cancel any queries that were submitted before this restart of Czar. - // Figure out which query (if any) was recorded in Czar database before the restart. + // Figure out which query (if any) was recorded in Czar databases before the restart. // The id will be used as the high-watermark for queries that need to be cancelled. // All queries that have identifiers that are strictly less than this one will // be affected by the operation. // if (_czarConfig->notifyWorkersOnCzarRestart()) { try { - xrdreq::QueryManagementAction::notifyAllWorkers(_czarConfig->getXrootdFrontendUrl(), - proto::QueryManagement::CANCEL_AFTER_RESTART, - _czarConfig->id(), _lastQueryIdBeforeRestart()); + QueryId lastQId = _lastQueryIdBeforeRestart(); + _activeWorkerMap->setCzarCancelAfterRestart(_czarConfig->id(), lastQId); } catch (std::exception const& ex) { LOGS(_log, LOG_LVL_WARN, ex.what()); } } + _czarFamilyMap = CzarFamilyMap::create(_uqFactory->userQuerySharedResources()->queryMetadata); + int qPoolSize = _czarConfig->getQdispPoolSize(); int maxPriority = std::max(0, _czarConfig->getQdispMaxPriority()); string vectRunSizesStr = _czarConfig->getQdispVectRunSizes(); vector vectRunSizes = util::String::parseToVectInt(vectRunSizesStr, ":", 1); string vectMinRunningSizesStr = _czarConfig->getQdispVectMinRunningSizes(); vector vectMinRunningSizes = util::String::parseToVectInt(vectMinRunningSizesStr, ":", 0); + LOGS(_log, LOG_LVL_INFO, - "INFO qdisp config qPoolSize=" << qPoolSize << " maxPriority=" << maxPriority << " vectRunSizes=" - << vectRunSizesStr << " -> " << util::prettyCharList(vectRunSizes) - << " vectMinRunningSizes=" << vectMinRunningSizesStr << " -> " - << util::prettyCharList(vectMinRunningSizes)); - qdisp::QdispPool::Ptr qdispPool = - make_shared(qPoolSize, maxPriority, vectRunSizes, vectMinRunningSizes); - qdisp::CzarStats::setup(qdispPool); - - _qdispSharedResources = qdisp::SharedResources::create(qdispPool); - - int xrootdCBThreadsMax = _czarConfig->getXrootdCBThreadsMax(); - int xrootdCBThreadsInit = _czarConfig->getXrootdCBThreadsInit(); - LOGS(_log, LOG_LVL_INFO, "config xrootdCBThreadsMax=" << xrootdCBThreadsMax); - LOGS(_log, LOG_LVL_INFO, "config xrootdCBThreadsInit=" << xrootdCBThreadsInit); - XrdSsiProviderClient->SetCBThreads(xrootdCBThreadsMax, xrootdCBThreadsInit); - int const xrootdSpread = _czarConfig->getXrootdSpread(); - LOGS(_log, LOG_LVL_INFO, "config xrootdSpread=" << xrootdSpread); - XrdSsiProviderClient->SetSpread(xrootdSpread); + " qdisp config qPoolSize=" << qPoolSize << " maxPriority=" << maxPriority << " vectRunSizes=" + << vectRunSizesStr << " -> " << util::prettyCharList(vectRunSizes) + << " vectMinRunningSizes=" << vectMinRunningSizesStr << " -> " + << util::prettyCharList(vectMinRunningSizes)); + _qdispPool = make_shared(qPoolSize, maxPriority, vectRunSizes, vectMinRunningSizes); + + qdisp::CzarStats::setup(_qdispPool); + + _commandHttpPool = shared_ptr( + new http::ClientConnPool(_czarConfig->getCommandMaxHttpConnections())); LOGS(_log, LOG_LVL_INFO, "Creating czar instance with name " << czarName); LOGS(_log, LOG_LVL_INFO, "Czar config: " << *_czarConfig); @@ -177,11 +248,22 @@ Czar::Czar(string const& configFilePath, string const& czarName) _czarConfig->setReplicationHttpPort(port); // Start special threads. - startRegistryUpdate(_czarConfig); startGarbageCollect(_czarConfig); startGarbageCollectAsync(_czarConfig); startGarbageCollectInProgress(_czarConfig, _uqFactory->userQuerySharedResources()->czarId, _uqFactory->userQuerySharedResources()->queryMetadata); + _czarRegistry = CzarRegistry::create(_czarConfig, _activeWorkerMap); + + // Start the monitor thread + thread monitorThrd(&Czar::_monitor, this); + _monitorThrd = move(monitorThrd); +} + +Czar::~Czar() { + LOGS(_log, LOG_LVL_DEBUG, "Czar::~Czar()"); + _monitorLoop = false; + _monitorThrd.join(); + LOGS(_log, LOG_LVL_DEBUG, "Czar::~Czar() end"); } SubmitResult Czar::submitQuery(string const& query, map const& hints) { @@ -222,8 +304,9 @@ SubmitResult Czar::submitQuery(string const& query, map const& h } // make new UserQuery - ccontrol::UserQuery::Ptr const uq = _uqFactory->newUserQuery(query, defaultDb, getQdispSharedResources(), - userQueryId, msgTableName, resultDb); + // this is atomic + ccontrol::UserQuery::Ptr uq = + _uqFactory->newUserQuery(query, defaultDb, getQdispPool(), userQueryId, msgTableName, resultDb); // Add logging context with query ID QSERV_LOGCONTEXT_QUERY(uq->getQueryId()); @@ -246,19 +329,23 @@ SubmitResult Czar::submitQuery(string const& query, map const& h // spawn background thread to wait until query finishes to unlock, // note that lambda stores copies of uq and msgTable. auto finalizer = [uq, msgTable]() mutable { + string qidstr = to_string(uq->getQueryId()); // Add logging context with query ID QSERV_LOGCONTEXT_QUERY(uq->getQueryId()); LOGS(_log, LOG_LVL_DEBUG, "submitting new query"); uq->submit(); - uq->join(); + ccontrol::QueryState qState = uq->join(); + bool querySuccess = (qState == ccontrol::QueryState::SUCCESS); try { - msgTable.unlock(uq); + // This will try to save messages to the message table before unlocking. + msgTable.unlock(uq, querySuccess); if (uq) uq->discard(); } catch (std::exception const& exc) { // TODO? if this fails there is no way to notify client, and client // will likely hang because table may still be locked. LOGS(_log, LOG_LVL_ERROR, "Query finalization failed (client likely hangs): " << exc.what()); } + uq.reset(); }; LOGS(_log, LOG_LVL_DEBUG, "starting finalizer thread for query"); thread finalThread(finalizer); @@ -319,45 +406,45 @@ void Czar::killQuery(string const& query, string const& clientId) { int threadId; QueryId queryId; if (ccontrol::UserQueryType::isKill(query, threadId)) { - LOGS(_log, LOG_LVL_DEBUG, "thread ID: " << threadId); + LOGS(_log, LOG_LVL_INFO, "KILL thread ID: " << threadId); lock_guard lock(_mutex); // find it in the client map based in client/thread id ClientThreadId ctId(clientId, threadId); auto iter = _clientToQuery.find(ctId); if (iter == _clientToQuery.end()) { - LOGS(_log, LOG_LVL_INFO, "Cannot find client thread id: " << threadId); - throw std::runtime_error("Unknown thread ID: " + query); + LOGS(_log, LOG_LVL_INFO, "KILL Cannot find client thread id: " << threadId); + throw std::runtime_error("KILL Unknown thread ID: " + query); } uq = iter->second.lock(); } else if (ccontrol::UserQueryType::isCancel(query, queryId)) { - LOGS(_log, LOG_LVL_DEBUG, "query ID: " << queryId); + LOGS(_log, LOG_LVL_INFO, "KILL query ID: " << queryId); lock_guard lock(_mutex); // find it in the client map based in client/thread id auto iter = _idToQuery.find(queryId); if (iter == _idToQuery.end()) { - LOGS(_log, LOG_LVL_INFO, "Cannot find query id: " << queryId); - throw std::runtime_error("Unknown or finished query ID: " + query); + LOGS(_log, LOG_LVL_INFO, "KILL Cannot find query id: " << queryId); + throw std::runtime_error("KILL unknown or finished query ID: " + query); } uq = iter->second.lock(); } else { - throw std::runtime_error("Failed to parse query: " + query); + throw std::runtime_error("KILL failed to parse query: " + query); } // assume this cannot fail or throw if (uq) { - LOGS(_log, LOG_LVL_DEBUG, "Killing query: " << uq->getQueryId()); + LOGS(_log, LOG_LVL_INFO, "KILLing query: " << uq->getQueryId()); // query killing can potentially take very long and we do now want to block // proxy from serving other requests so run it in a detached thread thread killThread([uq]() { uq->kill(); - LOGS(_log, LOG_LVL_DEBUG, "Finished killing query: " << uq->getQueryId()); + LOGS(_log, LOG_LVL_INFO, "Finished KILLing query: " << uq->getQueryId()); }); killThread.detach(); } else { - LOGS(_log, LOG_LVL_DEBUG, "Query has expired/finished: " << query); - throw std::runtime_error("Query has already finished: " + query); + LOGS(_log, LOG_LVL_INFO, "KILL query has expired/finished: " << query); + throw std::runtime_error("KILL query has already finished: " + query); } } @@ -419,8 +506,15 @@ void Czar::_makeAsyncResult(string const& asyncResultTable, QueryId queryId, str throw exc; } + string const createAsyncResultTmpl( + "CREATE TABLE IF NOT EXISTS %1% " + "(jobId BIGINT, resultLocation VARCHAR(1024))" + "ENGINE=MEMORY;" + "INSERT INTO %1% (jobId, resultLocation) " + "VALUES (%2%, '%3%')"); + string query = - (boost::format(::createAsyncResultTmpl) % asyncResultTable % queryId % resultLocEscaped).str(); + (boost::format(createAsyncResultTmpl) % asyncResultTable % queryId % resultLocEscaped).str(); if (not sqlConn->runQuery(query, sqlErr)) { SqlError exc(ERR_LOC, "Failure creating async result table", sqlErr); @@ -451,6 +545,7 @@ SubmitResult Czar::getQueryInfo(QueryId queryId) const { ", sql=" + sql; throw runtime_error(msg); } + vector colStatus; vector colCzarId; vector colCzarType; @@ -471,6 +566,7 @@ SubmitResult Czar::getQueryInfo(QueryId queryId) const { ", sql=" + sql; throw runtime_error(msg); } + if (colStatus.size() != 1) { string const msg = context + "Unknown user query, err=" + err.printErrMsg() + ", sql=" + sql; throw runtime_error(msg); @@ -556,4 +652,147 @@ QueryId Czar::_lastQueryIdBeforeRestart() const { return stoull(queryIdStr); } +void Czar::insertExecutive(QueryId qId, std::shared_ptr const& execPtr) { + lock_guard lgMap(_executiveMapMtx); + _executiveMap[qId] = execPtr; +} + +std::shared_ptr Czar::getExecutiveFromMap(QueryId qId) { + lock_guard lgMap(_executiveMapMtx); + auto iter = _executiveMap.find(qId); + if (iter == _executiveMap.end()) { + return nullptr; + } + std::shared_ptr exec = iter->second.lock(); + if (exec == nullptr) { + _executiveMap.erase(iter); + } + return exec; +} + +std::map> Czar::getExecMapCopy() const { + // Copy list of executives so the mutex isn't held forever. + std::map> execMap; + { + lock_guard lgMap(_executiveMapMtx); + execMap = _executiveMap; + } + return execMap; +} + +void Czar::killIncompleteUbjerJobsOn(std::string const& restartedWorkerId) { + // Copy list of executives so the mutex isn't held forever. + std::map> execMap; + { + lock_guard lgMap(_executiveMapMtx); + execMap = _executiveMap; + } + + // For each executive, go through its list of uberjobs and cancel those jobs + // with workerId == restartedWorkerId && + for (auto const& [eKey, wPtrExec] : execMap) { + auto exec = wPtrExec.lock(); + if (exec != nullptr) { + exec->killIncompleteUberJobsOnWorker(restartedWorkerId); + } + } +} + +protojson::ExecutiveRespMsg::Ptr Czar::handleUberJobReadyMsg( + std::shared_ptr const& jrMsg, string const& note) { + auto queryId = jrMsg->queryId; + auto czarId = jrMsg->czarId; + auto uberJobId = jrMsg->uberJobId; + + qdisp::Executive::Ptr exec = czar::Czar::getCzar()->getExecutiveFromMap(queryId); + if (exec == nullptr) { + LOGS(_log, LOG_LVL_WARN, + note << " null exec QID:" << queryId << " ujId=" << uberJobId << " cz=" << czarId); + // This means the user query is done and the results on the worker won't be needed + throw invalid_argument(string("HttpCzarWorkerModule::_handleJobReady No executive for qid=") + + to_string(queryId) + " czar=" + to_string(czarId)); + } + + qdisp::UberJob::Ptr uj = exec->findUberJob(uberJobId); + if (uj == nullptr) { + LOGS(_log, LOG_LVL_WARN, + note << " null uj QID:" << queryId << " ujId=" << uberJobId << " cz=" << czarId); + throw invalid_argument(string("HttpCzarWorkerModule::_handleJobReady No UberJob for qid=") + + to_string(queryId) + " ujId=" + to_string(uberJobId) + + " czar=" + to_string(czarId)); + } + uj->setResultFileSize(jrMsg->fileUrlInfo.fileSize); + exec->checkResultFileSize(jrMsg->fileUrlInfo.fileSize); + + auto importRes = uj->importResultFile(jrMsg->fileUrlInfo); + return importRes; +} + +protojson::ExecutiveRespMsg::Ptr Czar::handleUberJobReadyMsgNoThrow( + std::shared_ptr const& jrMsg, string const& note) { + protojson::ExecutiveRespMsg::Ptr execRespMsg; + try { + execRespMsg = handleUberJobReadyMsg(jrMsg, note); + } catch (invalid_argument const& ex) { + LOGS(_log, LOG_LVL_WARN, note << " exception: " << ex.what()); + // The message was parsed, but this UberJob is no longer needed by the czar. + execRespMsg = protojson::ExecutiveRespMsg::create(false, true, jrMsg->queryId, jrMsg->uberJobId, + jrMsg->czarId, "uberJobEnded", ex.what()); + } + return execRespMsg; +} + +protojson::ExecutiveRespMsg::Ptr Czar::handleUberJobErrorMsg( + std::shared_ptr const& jrMsg, string const& note) { + auto queryId = jrMsg->queryId; + auto czarId = jrMsg->czarId; + auto uberJobId = jrMsg->uberJobId; + string const idMsg = + "qId=" + to_string(queryId) + " ujId=" + to_string(uberJobId) + " czId=" + to_string(czarId); + auto execRespMsg = protojson::ExecutiveRespMsg::create(false, false, queryId, uberJobId, czarId); + + // Find UberJob + qdisp::Executive::Ptr exec = czar::Czar::getCzar()->getExecutiveFromMap(queryId); + if (exec == nullptr) { + // exec==nullptr just means this czar no longer has any use for any data associated with this QID. + LOGS(_log, LOG_LVL_WARN, note << " No executive for " << idMsg); + execRespMsg->success = true; + execRespMsg->dataObsolete = true; + execRespMsg->errorType = "queryEnded"; + execRespMsg->note = "null Executive"; + return execRespMsg; + } + qdisp::UberJob::Ptr uj = exec->findUberJob(uberJobId); + if (uj == nullptr) { + LOGS(_log, LOG_LVL_WARN, note << " No UberJob for " << idMsg); + execRespMsg->success = true; + execRespMsg->dataObsolete = true; + execRespMsg->errorType = "uberJobEnded"; + execRespMsg->note = "null UberJob"; + return execRespMsg; + } + + uj->workerError(jrMsg->multiError, *execRespMsg); + return execRespMsg; +} + +void Czar::incrCommErrCount(std::string const& type, std::string const& worker, std::string const& note) { + LOGS(_log, LOG_LVL_WARN, "Czar::incrCommErrCount " << type << " worker=" << worker << " " << note); + stringstream os; + lock_guard lg(_commErrCountMtx); + auto key = std::make_pair(type, worker); + auto iter = _commErrCountMap.find(key); + if (iter == _commErrCountMap.end()) { + _commErrCountMap[key] = 1; + } else { + iter->second += 1; + } + os << "Czar::incrCommErrCount {"; + for (auto const& [key, val] : _commErrCountMap) { + LOGS(_log, LOG_LVL_WARN, "(" << key.first << " worker=" << key.second << " count=" << val << ")"); + } + os << "}"; + LOGS(_log, LOG_LVL_WARN, os.str()); +} + } // namespace lsst::qserv::czar diff --git a/src/czar/Czar.h b/src/czar/Czar.h index e25bb0f833..f08ca9c39a 100644 --- a/src/czar/Czar.h +++ b/src/czar/Czar.h @@ -37,10 +37,11 @@ #include "ccontrol/UserQuery.h" #include "ccontrol/UserQueryFactory.h" #include "czar/SubmitResult.h" +#include "global/clock_defs.h" #include "global/intTypes.h" #include "global/stringTypes.h" #include "mysql/MySqlConfig.h" -#include "qdisp/SharedResources.h" +#include "protojson/ResponseMsg.h" #include "util/ConfigStore.h" #include "util/Timer.h" @@ -51,15 +52,33 @@ class CzarConfig; } // namespace lsst::qserv::cconfig namespace lsst::qserv::czar { +class ActiveWorkerMap; class HttpSvc; } // namespace lsst::qserv::czar +namespace lsst::qserv::http { +class ClientConnPool; +} // namespace lsst::qserv::http + +namespace lsst::qserv::protojson { +class UberJobErrorMsg; +class UberJobReadyMsg; +class UberJobStatusMsg; +} // namespace lsst::qserv::protojson + namespace lsst::qserv::util { class FileMonitor; } // namespace lsst::qserv::util +namespace lsst::qserv::qdisp { +class Executive; +} // namespace lsst::qserv::qdisp + namespace lsst::qserv::czar { +class CzarFamilyMap; +class CzarRegistry; + /// @addtogroup czar /** @@ -73,6 +92,7 @@ class Czar { Czar(Czar const&) = delete; Czar& operator=(Czar const&) = delete; + ~Czar(); /** * Submit query for execution. @@ -110,13 +130,63 @@ class Czar { */ static Ptr getCzar() { return _czar; } - /// Return a pointer to QdispSharedResources - qdisp::SharedResources::Ptr getQdispSharedResources() { return _qdispSharedResources; } + /// Remove all old tables in the qservResult database. + void removeOldResultTables(); /// @param queryId The unique identifier of the previously submitted user query /// @return The reconstructed info for the query SubmitResult getQueryInfo(QueryId queryId) const; + std::shared_ptr getCzarFamilyMap() const { return _czarFamilyMap; } + + std::shared_ptr getCzarRegistry() const { return _czarRegistry; } + + /// Add an Executive to the map of executives. + void insertExecutive(QueryId qId, std::shared_ptr const& execPtr); + + /// Get the executive associated with `qId`, this may be nullptr. + std::shared_ptr getExecutiveFromMap(QueryId qId); + + std::shared_ptr getActiveWorkerMap() const { return _activeWorkerMap; } + + std::map> getExecMapCopy() const; + + /// This function kills incomplete UberJobs associated with `workerId`. + /// This is done when it is believed a worker has died. The executive + /// un-assignes the Jobs associated with the UberJobs and then + /// adds the ids to lists for the affected worker. If the worker + /// reconnects, it will stop work on those UberJobs when it gets the + /// list. + void killIncompleteUbjerJobsOn(std::string const& workerId); + + std::shared_ptr getQdispPool() const { return _qdispPool; } + + std::shared_ptr getCommandHttpPool() const { return _commandHttpPool; } + + std::string const& getFqdn() const { return _fqdn; } + + /// Starts the process of collecting a result file from the worker. + /// @throws std::invalid_argument + protojson::ExecutiveRespMsg::Ptr handleUberJobReadyMsg( + std::shared_ptr const& jrMsg, std::string const& note); + + /// Same as handleUberJobReadyMsg but returns an altered message instead of throwing. + protojson::ExecutiveRespMsg::Ptr handleUberJobReadyMsgNoThrow( + std::shared_ptr const& jrMsg, std::string const& note); + + /// Handle an UberJob processing error from the worker, does not throw exceptions. + /// It alters the returned response message instead of throwing an exception. + protojson::ExecutiveRespMsg::Ptr handleUberJobErrorMsg( + std::shared_ptr const& jrMsg, std::string const& note); + + /// Increment a communication error count. Just logging them now as it probably is not an + /// issue, but may be they have been happening and it would be useful to know. + void incrCommErrCount(std::string const& type, std::string const& worker, std::string const& note); + + /// Startup time of czar, sent to workers so they can detect that the czar was + /// was restarted when this value changes. + static uint64_t const czarStartupTime; + private: /// Private constructor for singleton. Czar(std::string const& configFilePath, std::string const& czarName); @@ -136,6 +206,9 @@ class Czar { /// @return An identifier of the last query that was recorded in the query metadata table QueryId _lastQueryIdBeforeRestart() const; + /// Periodically check for system changes and use those changes to try to finish queries. + void _monitor(); + static Ptr _czar; ///< Pointer to single instance of the Czar. // combines client name (ID) and its thread ID into one unique ID @@ -152,16 +225,63 @@ class Czar { IdToQuery _idToQuery; ///< maps query ID to query (for currently running queries) std::mutex _mutex; ///< protects _uqFactory, _clientToQuery, and _idToQuery - /// Thread pool for handling Responses from XrdSsi, - /// the PsuedoFifo to prevent czar from calling most recent requests, - /// and any other resources for use by query executives. - qdisp::SharedResources::Ptr _qdispSharedResources; + util::Timer _lastRemovedTimer; ///< Timer to limit table deletions. + std::mutex _lastRemovedMtx; ///< protects _lastRemovedTimer + + /// Prevents multiple concurrent calls to _removeOldTables(). + std::atomic _removingOldTables{false}; + std::thread _oldTableRemovalThread; ///< thread needs to remain valid while running. /// Reloads the log configuration file on log config file change. std::shared_ptr _logFileMonitor; /// The HTTP server processing Czar management requests. std::shared_ptr _controlHttpSvc; + + /// Map of which chunks on which workers and shared scan order. + std::shared_ptr _czarFamilyMap; + + /// Connection to the registry to register the czar and get worker contact information. + std::shared_ptr _czarRegistry; + + mutable std::mutex _executiveMapMtx; ///< protects _executiveMap + std::map> + _executiveMap; ///< Map of executives for queries in progress. + + std::thread _monitorThrd; ///< Thread to run the _monitor() + + /// Set to false on system shutdown to stop _monitorThrd. + std::atomic _monitorLoop{true}; + + /// Wait time between checks to. + std::chrono::milliseconds _monitorSleepTime; + + /// Keeps track of all workers (alive or otherwise) that this czar + /// may communicate with. Once created, the pointer never changes. + std::shared_ptr _activeWorkerMap; + + /// A combined priority queue and thread pool to regulate czar communications + /// with workers. Once created, the pointer never changes. + /// TODO:DM-52990 - It may be better to have a pool for each worker as it + /// may be possible for a worker to have communications + /// problems in a way that would wedge the pool. This can + /// probably be done fairly easily by having pools + /// attached to ActiveWorker in _activeWorkerMap. + /// Previously, the czar had no reasonable way to + /// know where Jobs were going. + std::shared_ptr _qdispPool; + + /// Pool of http client connections for sending commands (UberJobs + /// and worker status requests). + std::shared_ptr _commandHttpPool; + + /// FQDN for this czar. + std::string const _fqdn; + + /// Map of communication error counts by type and worker, protected by _commErrCountMtx. + /// Key - - value - count of errors of that type for that worker. + std::map, int> _commErrCountMap; + mutable std::mutex _commErrCountMtx; ///< protects _commErrCountMap }; } // namespace lsst::qserv::czar diff --git a/src/czar/CzarChunkMap.cc b/src/czar/CzarChunkMap.cc new file mode 100644 index 0000000000..694c27833d --- /dev/null +++ b/src/czar/CzarChunkMap.cc @@ -0,0 +1,296 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "czar/CzarChunkMap.h" + +// System headers +#include + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "qmeta/QMeta.h" +#include "cconfig/CzarConfig.h" +#include "czar/Czar.h" +#include "czar/CzarRegistry.h" +#include "qmeta/Exceptions.h" +#include "util/Bug.h" +#include "util/TimeUtils.h" + +using namespace std; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.CzarChunkMap"); +} // namespace + +namespace lsst::qserv::czar { + +CzarChunkMap::CzarChunkMap() {} + +CzarChunkMap::~CzarChunkMap() { LOGS(_log, LOG_LVL_DEBUG, "CzarChunkMap::~CzarChunkMap()"); } + +void CzarChunkMap::calcChunkMap(ChunkMap const& chunkMap, ChunkVector& chunksSortedBySize) { + // Calculate total bytes for all chunks. + for (auto&& [chunkIdNum, chunkData] : chunkMap) { + chunkData->_calcTotalBytes(); + chunksSortedBySize.push_back(chunkData); + } + + sortChunks(chunksSortedBySize); +} + +void CzarChunkMap::sortChunks(std::vector& chunksSortedBySize) { + /// Return true if a->_totalBytes > b->_totalBytes + auto sortBySizeDesc = [](ChunkData::Ptr const& a, ChunkData::Ptr const& b) { + if (b == nullptr && a != nullptr) return true; + if (a == nullptr) return false; + return a->_totalBytes > b->_totalBytes; + }; + + std::sort(chunksSortedBySize.begin(), chunksSortedBySize.end(), sortBySizeDesc); +} + +void CzarChunkMap::verify(string const& familyName) const { + auto&& wcMap = *_workerChunkMap; + auto&& chunkMap = *_chunkMap; + // Use a set to prevent duplicate ids caused by replication levels > 1. + set allChunkIds; + int errorCount = 0; + for (auto const& [wkr, wkrData] : wcMap) { + for (auto const& [chunkId, chunkData] : wkrData->_chunkDataMap) { + allChunkIds.insert(chunkId); + } + } + + for (auto const& [chunkId, chunkDataPtr] : chunkMap) { + if (chunkDataPtr == nullptr) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " family=" << familyName << " chunkId=" << chunkId << " had nullptr"); + ++errorCount; + continue; + } + auto primeScanWkr = chunkDataPtr->_primaryScanWorker.lock(); + if (primeScanWkr == nullptr) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " family=" << familyName << " chunkId=" << chunkId + << " missing primaryScanWorker"); + ++errorCount; + continue; + } + if (primeScanWkr->_sharedScanChunkMap.find(chunkId) == primeScanWkr->_sharedScanChunkMap.end()) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " family=" << familyName << " chunkId=" << chunkId + << " should have been (and was not) in the sharedScanChunkMap for " + << primeScanWkr->_workerId); + ++errorCount; + continue; + } + auto iter = allChunkIds.find(chunkId); + if (iter != allChunkIds.end()) { + allChunkIds.erase(iter); + } else { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " family=" << familyName << " chunkId=" << chunkId + << " chunkId was not in allChunks list"); + ++errorCount; + continue; + } + } + + auto missing = allChunkIds.size(); + if (missing > 0) { + string allMissingIds; + for (auto const& cId : allChunkIds) { + allMissingIds += to_string(cId) + ","; + } + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " There were " << missing << " missing chunks from the scan list " + << allMissingIds); + ++errorCount; + } + + if (errorCount > 0) { + // Original creation of the family map will keep re-reading until there are no problems. + // _monitor will log this and keep using the old maps. + throw ChunkMapException(ERR_LOC, "verification failed with " + to_string(errorCount) + " errors " + + " family=" + familyName); + } + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " family=" << familyName << " verified"); +} + +string CzarChunkMap::dumpChunkMap() const { + stringstream os; + os << "ChunkMap{"; + for (auto const& [cId, cDataPtr] : *_chunkMap) { + os << "(cId=" << cId << ":"; + os << ((cDataPtr == nullptr) ? "null" : cDataPtr->dump()) << ")"; + } + os << "}"; + return os.str(); +} + +string CzarChunkMap::dumpWorkerChunkMap(WorkerChunkMap const& wcMap) { + stringstream os; + os << "WorkerChunkMap{"; + for (auto const& [wId, wDataPtr] : wcMap) { + os << "(wId=" << wId << ":"; + os << ((wDataPtr == nullptr) ? "null" : wDataPtr->dump()) << ")"; + } + os << "}"; + return os.str(); +} + +void CzarChunkMap::ChunkData::_calcTotalBytes() { + _totalBytes = 0; + for (auto const& [key, val] : _dbTableMap) { + _totalBytes += val; + } +} + +void CzarChunkMap::ChunkData::addToWorkerHasThis(std::shared_ptr const& worker) { + if (worker == nullptr) { + throw ChunkMapException(ERR_LOC, cName(__func__) + " worker was null"); + } + + _workerHasThisMap[worker->_workerId] = worker; +} + +map> CzarChunkMap::ChunkData::getWorkerHasThisMapCopy() + const { + map> newMap = _workerHasThisMap; + return newMap; +} + +shared_ptr CzarChunkMap::organize() { + auto chunksSortedBySize = make_shared(); + auto missingChunks = make_shared(); + + calcChunkMap(*_chunkMap, *chunksSortedBySize); + + // At this point we have + // - _workerChunkMap has a map of workerData by worker id with each worker having a map of ChunkData + // - _chunkMap has a map of all chunkData by chunk id + // - chunksSortedBySize a list of chunks sorted with largest first. + // From here need to assign shared scan chunk priority (i.e. the worker + // that will handle the chunk in shared scans, unless it is dead.) + // Go through the chunksSortedBySize list and assign each chunk to worker that has both: + // - a copy of the chunk + // - the worker currently has the smallest totalScanSize. + // When this is done, all workers should have lists of chunks with similar total sizes + // and missing chunks should be empty. + for (auto&& chunkData : *chunksSortedBySize) { + SizeT smallest = std::numeric_limits::max(); + WorkerChunksData::Ptr smallestWkr = nullptr; + // Find worker with smallest total size. + for (auto&& [wkrId, wkrDataWeak] : chunkData->_workerHasThisMap) { + auto wkrData = wkrDataWeak.lock(); + if (wkrData == nullptr) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " unexpected null weak ptr for " << wkrId); + continue; // maybe the next one will be okay. + } + + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) << " wkrId=" << wkrData << " tsz=" << wkrData->_sharedScanTotalSize + << " smallest=" << smallest); + if (wkrData->_sharedScanTotalSize < smallest) { + smallestWkr = wkrData; + smallest = smallestWkr->_sharedScanTotalSize; + } + } + if (smallestWkr == nullptr) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) + " no smallesWkr found for chunk=" + to_string(chunkData->_chunkId)); + missingChunks->push_back(chunkData); + } else { + smallestWkr->_sharedScanChunkMap[chunkData->_chunkId] = chunkData; + smallestWkr->_sharedScanTotalSize += chunkData->_totalBytes; + chunkData->_primaryScanWorker = smallestWkr; + LOGS(_log, LOG_LVL_DEBUG, + " chunk=" << chunkData->_chunkId << " assigned to scan on " << smallestWkr->_workerId); + } + } + return missingChunks; +} + +string CzarChunkMap::ChunkData::dump() const { + stringstream os; + auto primaryWorker = _primaryScanWorker.lock(); + os << "{ChunkData id=" << _chunkId << " totalBytes=" << _totalBytes; + os << " primaryWorker=" << ((primaryWorker == nullptr) ? "null" : primaryWorker->_workerId); + os << " workers{"; + for (auto const& [wId, wData] : _workerHasThisMap) { + os << "(" << wId << ")"; + } + os << "} tables{"; + for (auto const& [dbTbl, sz] : _dbTableMap) { + os << "(" << dbTbl.first << "." << dbTbl.second << " sz=" << sz << ")"; + } + os << "}}"; + return os.str(); +} + +bool CzarChunkMap::WorkerChunksData::isDead() { + if (_activeWorker == nullptr) { + // At startup, these may not be available + auto czarPtr = Czar::getCzar(); + if (czarPtr == nullptr) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " czarPtr is null, this should only happen in unit test."); + return false; + } + auto awMap = Czar::getCzar()->getActiveWorkerMap(); + if (awMap == nullptr) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " awMap is null."); + return true; + } + _activeWorker = awMap->getActiveWorker(_workerId); + if (_activeWorker == nullptr) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " activeWorker not found."); + return true; + } + } + auto wState = _activeWorker->getState(); + bool dead = wState == ActiveWorker::DEAD; + if (dead) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " is dead"); + } + return dead; +} + +string CzarChunkMap::WorkerChunksData::dump() const { + stringstream os; + os << "{WorkerChunksData id=" << _workerId << " scanTotalSize=" << _sharedScanTotalSize; + os << " chunkDataIds{"; + for (auto const& [chunkId, chunkData] : _chunkDataMap) { + os << "(" << chunkId << ")"; + } + os << "} sharedScanChunks{"; + for (auto const& [chunkId, chunkData] : _sharedScanChunkMap) { + os << "(" << chunkId << ")"; + } + os << "}}"; + return os.str(); +} + +} // namespace lsst::qserv::czar diff --git a/src/czar/CzarChunkMap.h b/src/czar/CzarChunkMap.h new file mode 100644 index 0000000000..8a9e290515 --- /dev/null +++ b/src/czar/CzarChunkMap.h @@ -0,0 +1,261 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +#ifndef LSST_QSERV_CZAR_CZARCHUNKMAP_H +#define LSST_QSERV_CZAR_CZARCHUNKMAP_H + +// System headers +#include +#include +#include +#include +#include +#include +#include + +// Qserv headers +#include "global/clock_defs.h" +#include "util/Issue.h" + +namespace lsst::qserv::qmeta { +class QMeta; +struct QMetaChunkMap; +} // namespace lsst::qserv::qmeta + +namespace lsst::qserv::czar { + +class ActiveWorker; +class CzarFamilyMap; + +class ChunkMapException : public util::Issue { +public: + ChunkMapException(Context const& ctx, std::string const& msg) : util::Issue(ctx, msg) {} +}; + +/// This class is used to organize worker chunk table information so that it +/// can be used to send jobs to the appropriate worker and inform workers +/// what chunks they can expect to handle in shared scans. +/// The data for the maps is provided by the Replicator and stored in the +/// QMeta database. +/// When the data is changed, there is a timestamp that is updated, which +/// will cause new maps to be made by this class. +/// +/// The maps generated are constant objects stored with shared pointers. As +/// such, it should be possible for numerous threads to use each map +/// simultaneously provided they have their own pointers to the maps. +/// The pointers to the maps are mutex protected to safely allow map updates. +/// +/// Generated by CzarFamilyMap::_read() and CzarFamilyMap::_insertIntoMaps(...). +/// Instances of CzarChunkMap are immutable after construction. Updates replace +/// the entire CzarChunkMap object (atomic swap), so readers can access it +/// without taking a mutex. This enables high-performance, lock-free lookups +/// when routing millions of jobs to the correct worker. +/// +/// The czar is expected to heavily use the +/// `getMaps() -> WorkerChunkMap -> getSharedScanChunkMap()` +/// to send jobs to workers, as that gets an ordered list of all chunks +/// the worker should handle during a shared scan. +/// `getMaps() -> ChunkMap` is expected to be more useful if there is a +/// failure and a chunk query needs to go to a different worker. +/// +/// Workers failing or new workers being added is expected to be a rare event. +/// The current algorithm to split chunks between the workers tries to split +/// the work evenly. However, if a new worker is added, it's likely that +/// the new distribution of chunks for shared scans will put the chunks on +/// different workers than previously, which in turn will result in the system +/// being less efficient until all the old scans are complete. If workers +/// being added or removed from the system becomes frequent, the algorithm should +/// probably change to try to maintain some chunk location consistency once +/// the system is up. +/// +/// Failed workers are generally expected to come back online at some point, +/// so switches to alternate workers tend to be temporary. Workers being +/// added is expected to be permanent and is likely to cause significant +/// changes to where most jobs for particular chunks will be sent. +class CzarChunkMap { +public: + using Ptr = std::shared_ptr; + using SizeT = uint64_t; + + std::string cName(const char* func) const { return std::string("CzarChunkMap::") + func; } + + CzarChunkMap(CzarChunkMap const&) = delete; + CzarChunkMap& operator=(CzarChunkMap const&) = delete; + + static Ptr create() { return Ptr(new CzarChunkMap()); } + + ~CzarChunkMap(); + + class WorkerChunksData; + + /// Essentially a structure for storing data about which tables and workers are associated with this + /// chunk. + class ChunkData { + public: + using Ptr = std::shared_ptr; + ChunkData(int chunkId_) : _chunkId(chunkId_) {} + + std::string cName(const char* func) const { + return std::string("ChunkData::") + func + " " + std::to_string(_chunkId); + } + int64_t getChunkId() const { return _chunkId; } + SizeT getTotalBytes() const { return _totalBytes; } + + std::weak_ptr getPrimaryScanWorker() const { return _primaryScanWorker; } + + /// Add `worker` to the `_workerHasThisMap` to indicate that worker has a copy + /// of this chunk. + void addToWorkerHasThis(std::shared_ptr const& worker); + + /// Return a copy of _workerHasThisMap. + std::map> getWorkerHasThisMapCopy() const; + + std::string dump() const; + + friend CzarChunkMap; + friend CzarFamilyMap; + + private: + int64_t const _chunkId; ///< The Id number for this chunk. + SizeT _totalBytes = 0; ///< The total number of bytes used by all tables in this chunk. + std::weak_ptr _primaryScanWorker; ///< The worker to be used to shared scans. + + /// Key is databaseName+tableName, value is size in bytes. + std::map, SizeT> _dbTableMap; + + /// Map of workers that have this chunk + std::map> _workerHasThisMap; + + /// Add up the bytes in each table for this chunk to get `_totalBytes` + void _calcTotalBytes(); + }; + + /// Essentially a structure for storing which chunks are associated with a worker. + class WorkerChunksData { + public: + using Ptr = std::shared_ptr; + WorkerChunksData(std::string const& workerId) : _workerId(workerId) {} + + std::string cName(const char* func) { + return std::string("WorkerChunksData::") + func + " " + _workerId; + } + + /// Return the worker's id string. + std::string const& getWorkerId() const { return _workerId; } + + /// Return the number of bytes contained in all chunks/tables to be + /// accessed in a full table scan on this worker. + SizeT getSharedScanTotalSize() const { return _sharedScanTotalSize; } + + /// Return true if this worker is dead, according to `ActiveWorkerMap`. + bool isDead(); + + /// Return a reference to `_sharedScanChunkMap`. A copy of the pointer + /// to this class (or the containing map) should be held to ensure the reference. + std::map const& getSharedScanChunkMap() const { return _sharedScanChunkMap; } + + std::string dump() const; + + friend CzarChunkMap; + friend CzarFamilyMap; + + private: + std::string const _workerId; + + /// Map of all chunks found on the worker where key is chunkId + std::map _chunkDataMap; + + /// Map of chunks this worker will handle during shared scans. + /// Since scans are done in order of chunk id numbers, it helps + /// to have this in chunk id number order. + /// At some point, this should be sent to workers so they + /// can make more accurate time estimates for chunk completion. + std::map _sharedScanChunkMap; + + /// The total size (in bytes) of all chunks on this worker that + /// are to be used in shared scans. + SizeT _sharedScanTotalSize = 0; + + /// Used to determine if this worker is alive and set + /// when the test is made. + std::shared_ptr _activeWorker; + }; + + using WorkerChunkMap = std::map; + using ChunkMap = std::map; + using ChunkVector = std::vector; + + /// Sort the chunks in `chunksSortedBySize` in descending order by total size in bytes. + static void sortChunks(ChunkVector& chunksSortedBySize); + + /// Calculate the total bytes in each chunk and then sort the resulting ChunkVector by chunk size, + /// descending. + static void calcChunkMap(ChunkMap const& chunkMap, ChunkVector& chunksSortedBySize); + + /// Verify that all chunks belong to at least one worker and that all chunks are represented in shared + /// scans. + /// @throws ChunkMapException + void verify(std::string const& familyName) const; + + std::string dumpChunkMap() const; + + static std::string dumpWorkerChunkMap(WorkerChunkMap const& wcMap); + + /// Return shared pointers to const `_chunkMap` and const `_workerChunkMap`, which should be + /// held until finished with the data. + std::pair, + std::shared_ptr> + getMaps() const { + return {_chunkMap, _workerChunkMap}; + } + + /// Use the information from the registry to `organize` `_chunkMap` and `_workerChunkMap` + /// into their expected formats, which also should define where a chunk is always + /// run during shared scans. + /// This is a critical function for defining which workers will handle which jobs. + /// @return a vector of ChunkData::Ptr of chunks where no worker was found. + std::shared_ptr organize(); + +private: + CzarChunkMap(); + + /// Return shared pointers to `_chunkMap` and `_workerChunkMap`, which should be held until + /// finished with the data. + std::pair, std::shared_ptr> + _getMaps() const { + return {_chunkMap, _workerChunkMap}; + } + + /// Map of all workers and which chunks they contain. + std::shared_ptr _workerChunkMap{new WorkerChunkMap()}; + + /// Map of all chunks in the system with chunkId number as the key and the values contain + /// information about the tables in those chunks and which worker is responsible for + /// handling the chunk in a shared scan. + std::shared_ptr _chunkMap{new ChunkMap()}; + + friend CzarFamilyMap; +}; + +} // namespace lsst::qserv::czar + +#endif // LSST_QSERV_CZAR_CZARCHUNKMAP_H diff --git a/src/czar/CzarFamilyMap.cc b/src/czar/CzarFamilyMap.cc new file mode 100644 index 0000000000..38005f80e4 --- /dev/null +++ b/src/czar/CzarFamilyMap.cc @@ -0,0 +1,239 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "czar/CzarFamilyMap.h" + +// System headers +#include + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "qmeta/QMeta.h" +#include "cconfig/CzarConfig.h" +#include "czar/Czar.h" +#include "czar/CzarRegistry.h" +#include "qmeta/Exceptions.h" +#include "util/Bug.h" +#include "util/TimeUtils.h" + +using namespace std; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.CzarFamilyMap"); +} // namespace + +namespace lsst::qserv::czar { + +CzarFamilyMap::Ptr CzarFamilyMap::create(std::shared_ptr const& qmeta) { + return Ptr(new CzarFamilyMap(qmeta)); +} + +CzarFamilyMap::CzarFamilyMap(std::shared_ptr const& qmeta) : _qmeta(qmeta) {} + +bool CzarFamilyMap::read() { + bool mapsSet = false; + try { + mapsSet = _read(); + } catch (qmeta::QMetaError const& qExc) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) + " could not read DB " << qExc.what()); + } + return mapsSet; +} + +bool CzarFamilyMap::_read() { + LOGS(_log, LOG_LVL_TRACE, "CzarFamilyMap::_read() start"); + // If replacing the map, this may take a bit of time, but it's probably + // better to wait for new maps if something changed. + std::lock_guard gLock(_familyMapMtx); + qmeta::QMetaChunkMap qChunkMap = _qmeta->getChunkMap(_lastUpdateTime); + if (_lastUpdateTime == qChunkMap.updateTime) { + // If "_lastUpdateTime == qChunkMap.updateTime", qChunkMap is empty. + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " no need to read last=" + << util::TimeUtils::timePointToDateTimeString(_lastUpdateTime) + << " map=" << util::TimeUtils::timePointToDateTimeString(qChunkMap.updateTime)); + return false; + } + + // Make the new maps. + auto czConfig = cconfig::CzarConfig::instance(); + bool usingChunkSize = czConfig->getFamilyMapUsingChunkSize(); + shared_ptr familyMapPtr = makeNewMaps(qChunkMap, usingChunkSize); + + verify(familyMapPtr); + + for (auto const& [fam, ccMap] : *familyMapPtr) { + LOGS(_log, LOG_LVL_INFO, "{family=" << fam << "{" << ccMap->dumpChunkMap() << "}}"); + } + + _familyMap = familyMapPtr; + + _lastUpdateTime = qChunkMap.updateTime; + + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " read and verified " + << util::TimeUtils::timePointToDateTimeString(_lastUpdateTime)); + + LOGS(_log, LOG_LVL_TRACE, "CzarChunkMap::_read() end"); + return true; +} + +std::shared_ptr CzarFamilyMap::makeNewMaps( + qmeta::QMetaChunkMap const& qChunkMap, bool usingChunkSize) { + // Create new maps. + std::shared_ptr newFamilyMap = make_shared(); + + // Workers -> Databases map + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " workers.sz=" << qChunkMap.workers.size()); + for (auto const& [workerId, dbs] : qChunkMap.workers) { + // Databases -> Tables map + for (auto const& [dbName, tables] : dbs) { + // Tables -> Chunks map + for (auto const& [tableName, chunks] : tables) { + // vector of ChunkInfo + for (qmeta::QMetaChunkMap::ChunkInfo const& chunkInfo : chunks) { + try { + int64_t chunkNum = chunkInfo.chunk; + CzarChunkMap::SizeT sz = 1; + if (usingChunkSize) { + sz = chunkInfo.size; + } + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) << "workerdId=" << workerId << " db=" << dbName << " table=" + << tableName << " chunk=" << chunkNum << " sz=" << sz); + _insertIntoMaps(newFamilyMap, workerId, dbName, tableName, chunkNum, sz); + } catch (invalid_argument const& exc) { + throw ChunkMapException( + ERR_LOC, cName(__func__) + " invalid_argument workerdId=" + workerId + + " db=" + dbName + " table=" + tableName + + " chunk=" + to_string(chunkInfo.chunk) + " " + exc.what()); + } catch (out_of_range const& exc) { + throw ChunkMapException( + ERR_LOC, cName(__func__) + " out_of_range workerdId=" + workerId + + " db=" + dbName + " table=" + tableName + + " chunk=" + to_string(chunkInfo.chunk) + " " + exc.what()); + } + } + } + } + } + + // This needs to be done for each CzarChunkMap in the family map. + for (auto&& [familyName, chunkMapPtr] : *newFamilyMap) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " working on " << familyName); + auto missing = chunkMapPtr->organize(); + if (missing != nullptr && !missing->empty()) { + // TODO:DM-53240 Some element of the dashboard should be made aware of this. Also, + // TODO:DM-53239 maybe this should check all families before throwing. + // There are implications that maybe the replicator should not + // tell the czar about families/databases that do not have + // at least one copy of each chunk with data loaded on a worker. + string chunkIdStr; + for (auto const& chunkData : *missing) { + chunkIdStr += to_string(chunkData->getChunkId()) + " "; + } + throw ChunkMapException( + ERR_LOC, cName(__func__) + " family=" + familyName + " is missing chunks " + chunkIdStr); + } + } + + return newFamilyMap; +} + +void CzarFamilyMap::_insertIntoMaps(std::shared_ptr const& newFamilyMap, + string const& workerId, string const& dbName, string const& tableName, + int64_t chunkIdNum, CzarChunkMap::SizeT sz) { + // Get the CzarChunkMap for this family + auto familyName = getFamilyNameFromDbName(dbName); + LOGS(_log, LOG_LVL_TRACE, + cName(__func__) << " familyInsrt{w=" << workerId << " fN=" << familyName << " dbN=" << dbName + << " tblN=" << tableName << " chunk=" << chunkIdNum << " sz=" << sz << "}"); + auto& nfMap = *newFamilyMap; + CzarChunkMap::Ptr czarChunkMap; + auto familyIter = nfMap.find(familyName); + if (familyIter == nfMap.end()) { + czarChunkMap = CzarChunkMap::Ptr(new CzarChunkMap()); + nfMap[familyName] = czarChunkMap; + } else { + czarChunkMap = familyIter->second; + } + + auto [chunkMapPtr, wcMapPtr] = czarChunkMap->_getMaps(); + + CzarChunkMap::WorkerChunkMap& wcMap = *wcMapPtr; + CzarChunkMap::ChunkMap& chunkMap = *chunkMapPtr; + + // Get or make the worker entry + CzarChunkMap::WorkerChunksData::Ptr workerChunksData; + auto iterWC = wcMap.find(workerId); + if (iterWC == wcMap.end()) { + workerChunksData = CzarChunkMap::WorkerChunksData::Ptr(new CzarChunkMap::WorkerChunksData(workerId)); + wcMap[workerId] = workerChunksData; + } else { + workerChunksData = iterWC->second; + } + + // Get or make the ChunkData entry in chunkMap + CzarChunkMap::ChunkData::Ptr chunkData; + auto iterChunkData = chunkMap.find(chunkIdNum); + if (iterChunkData == chunkMap.end()) { + chunkData = CzarChunkMap::ChunkData::Ptr(new CzarChunkMap::ChunkData(chunkIdNum)); + chunkMap[chunkIdNum] = chunkData; + } else { + chunkData = iterChunkData->second; + } + + // Set or verify the table information + auto iterDT = chunkData->_dbTableMap.find({dbName, tableName}); + if (iterDT == chunkData->_dbTableMap.end()) { + // doesn't exist so set it up + chunkData->_dbTableMap[{dbName, tableName}] = sz; + } else { + // Verify that it matches other data + auto const& dbTbl = iterDT->first; + auto tblSz = iterDT->second; + auto const& dbN = dbTbl.first; + auto const& tblN = dbTbl.second; + if (dbName != dbN || tblN != tableName || tblSz != sz) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " data mismatch for " << dbName << "." << tableName << "=" << sz << " vs " + << dbN << "." << tblN << "=" << tblSz); + } + } + + // Link WorkerData the single chunkData instance for the chunkId + workerChunksData->_chunkDataMap[chunkIdNum] = chunkData; + + // Add worker to the list of workers containing the chunk. + chunkData->addToWorkerHasThis(workerChunksData); +} + +void CzarFamilyMap::verify(std::shared_ptr const& familyMap) { + for (auto&& [familyName, czarChunkMapPtr] : *familyMap) { + czarChunkMapPtr->verify(familyName); + } +} + +} // namespace lsst::qserv::czar diff --git a/src/czar/CzarFamilyMap.h b/src/czar/CzarFamilyMap.h new file mode 100644 index 0000000000..18f3f1baaa --- /dev/null +++ b/src/czar/CzarFamilyMap.h @@ -0,0 +1,152 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +#ifndef LSST_QSERV_CZAR_CZARFAMILYMAP_H +#define LSST_QSERV_CZAR_CZARFAMILYMAP_H + +// System headers +#include +#include +#include +#include +#include +#include +#include + +// Qserv headers +#include "czar/CzarChunkMap.h" + +namespace lsst::qserv::qmeta { +class QMeta; +struct QMetaChunkMap; +} // namespace lsst::qserv::qmeta + +namespace lsst::qserv::czar { + +/// This class is used to organize worker chunk table information so that it +/// can be used to send jobs to the appropriate worker and inform workers +/// what chunks they can expect to handle in shared scans, focusing at the +/// family level. +/// The data for the maps is provided by the Replicator and stored in the +/// QMeta database. +/// When the data is changed, there is a timestamp that is updated, which +/// will cause new maps to be made by this class. +/// +/// The maps generated should be treated as immutable objects stored with +/// shared pointers. As such, it should be possible for numerous threads +/// to use each map simultaneously provided they have their own pointers +/// to the maps. +/// The pointers to the maps are mutex protected to safely allow map updates. +// +// TODO:DM-53239 Currently, each family only has one database and they share +// a name. Once a table mapping databases to families is available, it needs +// to be used to map databases to families in this class. +class CzarFamilyMap { +public: + using Ptr = std::shared_ptr; + typedef std::map FamilyMapType; + typedef std::map DbNameToFamilyNameType; + + static Ptr create(std::shared_ptr const& qmeta); + + CzarFamilyMap() = delete; + CzarFamilyMap(CzarFamilyMap const&) = delete; + CzarFamilyMap& operator=(CzarFamilyMap const&) = delete; + + ~CzarFamilyMap() = default; + + /// For unit testing only + /// @param dbNameToFamilyNameType - valid map of db to family name for the unit test. + // TODO::UJ define member instance for `_dbNameToFamilyName` + CzarFamilyMap(std::shared_ptr const& dbNameToFamilyName) {} + + std::string cName(const char* fName) const { + return std::string("CzarFamilyMap::") + ((fName == nullptr) ? "?" : fName); + } + + /// Family names are unknown until a table has been added to the database, so + /// the dbName will be used as the family name until the table exists. + std::string getFamilyNameFromDbName(std::string const& dbName) const { + // TODO:DM-53239 use a member instance of std::shared_ptr + // once info is available in QMeta. + return dbName; + } + + /// Return the chunk map for the database `dbName` + CzarChunkMap::Ptr getChunkMap(std::string const& dbName) const { + auto familyName = getFamilyNameFromDbName(dbName); + return _getChunkMap(familyName); + } + + /// Returns the time the data for this Family map was put in the database. + TIMEPOINT getLastUpdateTime() const { return _lastUpdateTime; } + + /// Read the registry information from the database, if not already set. + bool read(); + + /// Make a new FamilyMapType map including ChunkMap and WorkerChunkMap from the data + /// in `qChunkMap`. Each family has its own ChunkMap and WorkerChunkMap. + /// @param qChunkMap - data source for the family map + /// @param usingChunkSize - true if the distribution of chunks will depend on the + /// size of the chunks/ + /// + /// NOTE: This is likely an expensive operation and should probably only + /// be called if new workers have been added or chunks have been moved. + std::shared_ptr makeNewMaps(qmeta::QMetaChunkMap const& qChunkMap, bool usingChunkSize); + + /// Verify the `familyMap` does not have errors. + static void verify(std::shared_ptr const& familyMap); + +private: + /// Try to `_read` values for maps from `qmeta`. + CzarFamilyMap(std::shared_ptr const& qmeta); + + /// Read the registry information from the database, stopping if + /// it hasn't been updated. + // TODO:DM-53239 add a changed timestamp (similar to the existing updated timestamp) + // to the registry database and only update when changed. + bool _read(); + + /// Insert the new element described by the parameters into the `newFamilyMap` as appropriate. + void _insertIntoMaps(std::shared_ptr const& newFamilyMap, std::string const& workerId, + std::string const& dbName, std::string const& tableName, int64_t chunkIdNum, + CzarChunkMap::SizeT sz); + + /// Return the chunk map for the `familyName` + CzarChunkMap::Ptr _getChunkMap(std::string const& familyName) const { + std::lock_guard familyLock(_familyMapMtx); + auto iter = _familyMap->find(familyName); + return (iter == _familyMap->end()) ? nullptr : iter->second; + } + + std::shared_ptr _qmeta; ///< Database connection to collect json worker list. + + /// The last time the maps were updated with information from the replicator. + TIMEPOINT _lastUpdateTime; // initialized to 0; + + std::shared_ptr _familyMap{new FamilyMapType()}; + mutable std::mutex _familyMapMtx; ///< protects _familyMap, _timeStamp, and _qmeta. +}; + +} // namespace lsst::qserv::czar + +#endif // LSST_QSERV_CZAR_CZARFAMILYMAP_H diff --git a/src/czar/CzarRegistry.cc b/src/czar/CzarRegistry.cc new file mode 100644 index 0000000000..82e40559a3 --- /dev/null +++ b/src/czar/CzarRegistry.cc @@ -0,0 +1,239 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "czar/CzarRegistry.h" + +// System headers +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "cconfig/CzarConfig.h" +#include "czar/CzarChunkMap.h" +#include "czar/Czar.h" +#include "http/Client.h" +#include "http/Method.h" +#include "util/common.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.CzarRegistry"); +} // namespace + +namespace lsst::qserv::czar { + +CzarRegistry::CzarRegistry(cconfig::CzarConfig::Ptr const& czarConfig, + ActiveWorkerMap::Ptr const& activeWorkerMap) + : _czarConfig(czarConfig), _activeWorkerMap(activeWorkerMap) { + // Begin periodically updating worker's status in the Replication System's registry. + // This will continue until the application gets terminated. + thread registryUpdateThread(&CzarRegistry::_registryUpdateLoop, this); + _czarHeartbeatThrd = move(registryUpdateThread); + + thread registryWorkerUpdateThread(&CzarRegistry::_registryWorkerInfoLoop, this); + _czarWorkerInfoThrd = move(registryWorkerUpdateThread); +} + +CzarRegistry::~CzarRegistry() { + _loop = false; + if (_czarHeartbeatThrd.joinable()) { + _czarHeartbeatThrd.join(); + } + if (_czarWorkerInfoThrd.joinable()) { + _czarWorkerInfoThrd.join(); + } +} + +protojson::WorkerContactInfo::WCMapPtr CzarRegistry::getWorkerContactMap() const { + lock_guard lockG(_cmapMtx); + return _contactMap; +} + +void CzarRegistry::_registryUpdateLoop() { + auto const method = http::Method::POST; + string const url = "http://" + _czarConfig->replicationRegistryHost() + ":" + + to_string(_czarConfig->replicationRegistryPort()) + "/czar"; + vector const headers = {"Content-Type: application/json"}; + json const request = json::object({{"instance_id", _czarConfig->replicationInstanceId()}, + {"auth_key", _czarConfig->replicationAuthKey()}, + {"czar", + {{"name", _czarConfig->name()}, + {"id", _czarConfig->id()}, + {"management-port", _czarConfig->replicationHttpPort()}, + {"host-name", util::get_current_host_fqdn()}}}}); + string const requestContext = "Czar: '" + http::method2string(method) + "' request to '" + url + "'"; + LOGS(_log, LOG_LVL_TRACE, + __func__ << " czarPost url=" << url << " request=" << request.dump() << " headers=" << headers[0]); + http::Client client(method, url, request.dump(), headers); + while (_loop) { + LOGS(_log, LOG_LVL_TRACE, + __func__ << " loop url=" << url << " request=" << request.dump() << " headers=" << headers[0]); + try { + json const response = client.readAsJson(); + if (0 == response.at("success").get()) { + string const error = response.at("error").get(); + LOGS(_log, LOG_LVL_ERROR, requestContext + " was denied, error: '" + error + "'."); + // TODO: Controlled shutdown would be nice instead of abort. + abort(); + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, requestContext + " failed, ex: " + ex.what()); + } + this_thread::sleep_for(chrono::seconds(max(1U, _czarConfig->replicationRegistryHearbeatIvalSec()))); + } +} + +void CzarRegistry::_registryWorkerInfoLoop() { + // Get worker information from the registry + protojson::AuthContext const authContext(_czarConfig->replicationInstanceId(), + _czarConfig->replicationAuthKey()); + uint64_t const czarStartTime = Czar::czarStartupTime; + string const fqdn = util::get_current_host_fqdn(); + + vector const headers; + auto const method = http::Method::GET; + string const url = "http://" + _czarConfig->replicationRegistryHost() + ":" + + to_string(_czarConfig->replicationRegistryPort()) + + "/services?instance_id=" + _czarConfig->replicationInstanceId(); + string const requestContext = "Czar: '" + http::method2string(method) + "' request to '" + url + "'"; + LOGS(_log, LOG_LVL_TRACE, __func__ << " url=" << url); + http::Client client(method, url, string(), headers); + while (_loop) { + try { + json const response = client.readAsJson(); + if (0 == response.at("success").get()) { + string const error = response.at("error").get(); + LOGS(_log, LOG_LVL_ERROR, requestContext + " was denied, error: '" + error + "'."); + // TODO: Is there a better thing to do than just log this here? + } else { + protojson::WorkerContactInfo::WCMapPtr wMap = _buildMapFromJson(response); + // Update the values in the map + { + auto czInfo = protojson::CzarContactInfo::create(_czarConfig->name(), _czarConfig->id(), + _czarConfig->replicationHttpPort(), fqdn, + czarStartTime); + lock_guard lck(_cmapMtx); + if (wMap != nullptr) { + _contactMap = wMap; + _latestMapUpdate = CLOCK::now(); + _activeWorkerMap->updateMap(*_contactMap, czInfo, authContext); + } + } + } + LOGS(_log, LOG_LVL_TRACE, __func__ << " resp=" << response); + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, requestContext + " failed, ex: " + ex.what()); + } + this_thread::sleep_for(chrono::seconds(15)); + } +} + +protojson::WorkerContactInfo::WCMapPtr CzarRegistry::_buildMapFromJson(nlohmann::json const& response) { + auto const& jsServices = response.at("services"); + auto const& jsWorkers = jsServices.at("workers"); + auto wMap = protojson::WorkerContactInfo::WCMapPtr(new protojson::WorkerContactInfo::WCMap()); + for (auto const& [key, value] : jsWorkers.items()) { + auto const& jsQserv = value.at("qserv"); + LOGS(_log, LOG_LVL_DEBUG, __func__ << " key=" << key << " jsQ=" << jsQserv); + + // The names for items here are different than the names used by workers. + auto wInfo = protojson::WorkerContactInfo::createFromJsonRegistry(key, jsQserv); + + LOGS(_log, LOG_LVL_DEBUG, __func__ << " wInfot=" << wInfo->dump()); + auto iter = wMap->find(key); + if (iter != wMap->end()) { + LOGS(_log, LOG_LVL_ERROR, __func__ << " duplicate key " << key << " in " << response); + if (!wInfo->isSameContactInfo(*(iter->second))) { + LOGS(_log, LOG_LVL_ERROR, __func__ << " incongruent key " << key << " in " << response); + return nullptr; + } + // ignore the duplicate, since it matches the previous one. + } else { + wMap->insert({key, wInfo}); + } + } + return wMap; +} + +bool CzarRegistry::_compareMapContactInfo(protojson::WorkerContactInfo::WCMap const& other) const { + VMUTEX_HELD(_cmapMtx); + if (_contactMap == nullptr) { + // If _contactMap is null, it needs to be replaced. + return false; + } + if (other.size() != _contactMap->size()) { + return false; + } + for (auto const& [key, wInfo] : *_contactMap) { + auto iter = other.find(key); + if (iter == other.end()) { + return false; + } else { + if (!(iter->second->isSameContactInfo(*wInfo))) { + return false; + } + } + } + return true; +} + +protojson::WorkerContactInfo::WCMapPtr CzarRegistry::waitForWorkerContactMap() const { + protojson::WorkerContactInfo::WCMapPtr contMap = nullptr; + while (contMap == nullptr) { + { + lock_guard lockG(_cmapMtx); + contMap = _contactMap; + } + if (contMap == nullptr) { + // This should only ever happen at startup if there's trouble getting data. + LOGS(_log, LOG_LVL_WARN, "waitForWorkerContactMap() _contactMap unavailable waiting for info"); + this_thread::sleep_for(1s); + } + } + return contMap; +} + +void CzarRegistry::sendActiveWorkersMessages() { + // Send messages to each active worker as needed + _activeWorkerMap->sendActiveWorkersMessages(); +} + +void CzarRegistry::endUserQueryOnWorkers(QueryId qId, bool deleteWorkerResults) { + // Add query id to the appropriate list. + if (deleteWorkerResults) { + _activeWorkerMap->addToDoneDeleteFiles(qId); + } else { + _activeWorkerMap->addToDoneKeepFiles(qId); + } + + // With lists updated, send out messages. + _activeWorkerMap->sendActiveWorkersMessages(); +} + +} // namespace lsst::qserv::czar diff --git a/src/czar/CzarRegistry.h b/src/czar/CzarRegistry.h new file mode 100644 index 0000000000..08d24a7bcc --- /dev/null +++ b/src/czar/CzarRegistry.h @@ -0,0 +1,131 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_CZAR_CZARREGISTRY_H +#define LSST_QSERV_CZAR_CZARREGISTRY_H + +// System headers +#include +#include +#include +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "czar/ActiveWorker.h" +#include "global/clock_defs.h" +#include "util/Mutex.h" + +namespace lsst::qserv::cconfig { +class CzarConfig; +} // namespace lsst::qserv::cconfig + +namespace lsst::qserv::czar { + +/// This class connects to the Replication System's Registry to register this czar and get +/// worker contact information. +/// The assumptions going forward are that the CzarChunkMap provides the real location of +/// where all chunks are located and any workers in that map that are missing from this +/// map are just temporary communications problems. A real prolonged failure of a worker +/// will result in a new CzarChunkMap being created. As such, problems with missing +/// worker contact information will be handled in Job creation +/// in UserQueryFactory::newUserQuery and will be treated in similar manner as not being +/// able to contact a worker. +/// +/// There really shouldn't be communications problems, but there are, the best course of +/// action would probably be to destroy the first instance of this and create a new one. +/// +class CzarRegistry { +public: + using Ptr = std::shared_ptr; + + /// Return a pointer to a new CzarRegistry object. + static Ptr create(std::shared_ptr const& czarConfig, + std::shared_ptr const& activeWorkerMap) { + return Ptr(new CzarRegistry(czarConfig, activeWorkerMap)); + } + + ~CzarRegistry(); + + /// Return _contactMap, the object that the returned pointer points to is + /// constant and no attempts should be made to change it. + protojson::WorkerContactInfo::WCMapPtr getWorkerContactMap() const; + + /// Return _contactMap, the object that the returned pointer points to is + /// constant and no attempts should be made to change it. This + /// function will wait forever for a valid contact map to be ready. + protojson::WorkerContactInfo::WCMapPtr waitForWorkerContactMap() const; + + /// Send all live workers the `WorkerQueryStatusData` message for + /// that worker. This may result in the worker sending back the + /// `WorkerCzarComIssue` message if there were communication problems. + void sendActiveWorkersMessages(); + + /// Add the query id to the list of queries to end on workers and + /// send the messages, deleting all result files if + /// `deleteWorkerResults` is true. + void endUserQueryOnWorkers(QueryId qId, bool deleteWorkerResults); + +private: + CzarRegistry() = delete; + CzarRegistry(std::shared_ptr const& czarConfig, + std::shared_ptr const& activeWorkerMap); + + /// This function will keep periodically updating Czar's info in the Replication System's Registry + /// until _loop is set to false. + /// Communications problems are logged but ignored. This should probably change. + void _registryUpdateLoop(); + + /// This function collects worker contact information from the Replication System's Registry + /// until _loop is set to false. + /// Communications problems are logged but ignored. This should probably change. + void _registryWorkerInfoLoop(); + + /// Build a new WorkerContactMap from the json `response` + protojson::WorkerContactInfo::WCMapPtr _buildMapFromJson(nlohmann::json const& response); + + /// Return true if maps are the same size and all of the elements have the same contact info. + /// NOTE: _cmapMtx must be held when calling. + bool _compareMapContactInfo(protojson::WorkerContactInfo::WCMap const& other) const; + + std::shared_ptr const _czarConfig; ///< Pointer to the CzarConfig. + + std::atomic _loop{true}; ///< Threads will continue to run until this is set false. + std::thread _czarHeartbeatThrd; ///< This thread continually registers this czar with the registry. + std::thread _czarWorkerInfoThrd; ///< This thread continuously collects worker contact information. + + /// Pointer to the map of worker contact information. + protojson::WorkerContactInfo::WCMapPtr _contactMap; + TIMEPOINT _latestMapUpdate; ///< The last time the _contactMap was updated, unrelated to + ///< WorkerContactInfo update. + mutable MUTEX _cmapMtx; /// Protects _contactMap, _latestUpdate + + /// Map for tracking worker aliveness, it has its own internal mutex. + std::shared_ptr const _activeWorkerMap; +}; + +} // namespace lsst::qserv::czar + +#endif // LSST_QSERV_CZAR_CZARREGISTRY_H diff --git a/src/czar/CzarThreads.cc b/src/czar/CzarThreads.cc index da53ae14a6..f55b577c10 100644 --- a/src/czar/CzarThreads.cc +++ b/src/czar/CzarThreads.cc @@ -67,41 +67,6 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.Czar"); namespace lsst::qserv::czar { -void registryUpdate(shared_ptr const& czarConfig) { - auto const method = http::Method::POST; - string const url = "http://" + czarConfig->replicationRegistryHost() + ":" + - to_string(czarConfig->replicationRegistryPort()) + "/czar"; - vector const headers = {"Content-Type: application/json"}; - json const request = json::object({{"version", http::MetaModule::version}, - {"instance_id", czarConfig->replicationInstanceId()}, - {"auth_key", czarConfig->replicationAuthKey()}, - {"czar", - {{"name", czarConfig->name()}, - {"id", czarConfig->id()}, - {"management-port", czarConfig->replicationHttpPort()}, - {"host-name", util::get_current_host_fqdn()}}}}); - string const requestContext = "'" + http::method2string(method) + "' request to '" + url + "'"; - http::Client client(method, url, request.dump(), headers); - while (true) { - try { - json const response = client.readAsJson(); - if (0 == response.at("success").get()) { - string const error = response.at("error").get(); - ERROR_(requestContext << " was denied, error: '" << error << "'."); - abort(); - } - } catch (exception const& ex) { - WARN_(requestContext << " failed, ex: " << ex.what()); - } - this_thread::sleep_for(chrono::seconds(max(1U, czarConfig->replicationRegistryHearbeatIvalSec()))); - } -} - -void startRegistryUpdate(shared_ptr czarConfig) { - thread t(registryUpdate, czarConfig); - t.detach(); -} - inline string searchForOldTablesQuery(string const& resultDbName, int const resultAgeDay) { return "SELECT table_name,create_time FROM information_schema.tables WHERE table_schema='" + resultDbName + @@ -257,7 +222,7 @@ void startGarbageCollectAsync(shared_ptr czarConfig) { t.detach(); } -void startGarbageCollectInProgress(shared_ptr czarConfig, qmeta::CzarId czarId, +void startGarbageCollectInProgress(shared_ptr czarConfig, CzarId czarId, shared_ptr queryMetadata) { // Sanitize a value of the configuration parameters to tolerate a misconfiguration of Czar. chrono::seconds const cleanupInterval = max(czarConfig->getInProgressCleanupIvalSec(), 1U) * 1s; diff --git a/src/czar/CzarThreads.h b/src/czar/CzarThreads.h index 0864304d42..4db846f6e7 100644 --- a/src/czar/CzarThreads.h +++ b/src/czar/CzarThreads.h @@ -25,7 +25,7 @@ #include // Qserv headers -#include "qmeta/types.h" +#include "global/intTypes.h" // Forward declarations @@ -101,7 +101,7 @@ void startGarbageCollectAsync(std::shared_ptr czarConfig); * @param czarId The identifier of the Czar instance. * @param queryMetadata A pointer to the QMeta service. */ -void startGarbageCollectInProgress(std::shared_ptr czarConfig, qmeta::CzarId czarId, +void startGarbageCollectInProgress(std::shared_ptr czarConfig, CzarId czarId, std::shared_ptr queryMetadata); } // namespace lsst::qserv::czar diff --git a/src/czar/HttpCzarQueryModule.cc b/src/czar/HttpCzarQueryModule.cc index 7877b34525..c9c3895c58 100644 --- a/src/czar/HttpCzarQueryModule.cc +++ b/src/czar/HttpCzarQueryModule.cc @@ -247,7 +247,7 @@ json HttpCzarQueryModule::_waitAndExtractResult(SubmitResult const& submitResult // at any point of time after the query has been submitted. If the query is still // executing the thread will block until the query is completed or failed. string const messageSelectQuery = - "SELECT chunkId, code, message, severity+0, timeStamp FROM " + submitResult.messageTable; + "SELECT chunkId, code, message, severity, timeStamp FROM " + submitResult.messageTable; sql::SqlResults messageQueryResults; sql::SqlErrorObject messageQueryErr; if (!conn->runQuery(messageSelectQuery, messageQueryResults, messageQueryErr)) { @@ -258,7 +258,7 @@ json HttpCzarQueryModule::_waitAndExtractResult(SubmitResult const& submitResult throw http::Error(context() + __func__, msg); } - // Read thе message table to see if the user query suceeded or failed + // Read thе message table to see if the user query succeeded or failed vector chunkId; vector code; vector message; @@ -274,13 +274,15 @@ json HttpCzarQueryModule::_waitAndExtractResult(SubmitResult const& submitResult throw http::Error(context() + __func__, msg); } string errorMsg; + bool errorFound = false; for (size_t i = 0; i < chunkId.size(); ++i) { - if (stoi(code[i]) > 0) { - errorMsg += "[chunkId=" + chunkId[i] + " code=" + code[i] + " message=" + message[i] + - " severity=" + severity[i] + "], "; + errorMsg += "[chunkId=" + chunkId[i] + " code=" + code[i] + " message=" + message[i] + + " severity=" + severity[i] + "], "; + if (severity[i] == "ERROR") { // MessageSeverity::MSG_ERROR + errorFound = true; } } - if (!errorMsg.empty()) { + if (errorFound) { messageQueryResults.freeResults(); _dropTable(submitResult.messageTable); _dropTable(submitResult.resultTable); @@ -290,7 +292,6 @@ json HttpCzarQueryModule::_waitAndExtractResult(SubmitResult const& submitResult messageQueryResults.freeResults(); _dropTable(submitResult.messageTable); } - // Read a result set from the result table, package it into the JSON object // and sent it back to a user. sql::SqlResults resultQueryResults; diff --git a/src/czar/HttpCzarSvc.cc b/src/czar/HttpCzarSvc.cc index 23d36d396b..401f293d12 100644 --- a/src/czar/HttpCzarSvc.cc +++ b/src/czar/HttpCzarSvc.cc @@ -192,4 +192,14 @@ void HttpCzarSvc::_registerHandlers() { }); } +string HttpCzarConfig::dump() const { + stringstream os; + os << "HttpCzarConfig{port=" << port << " numThreads=" << numThreads + << " numWorkerIngestThreads=" << numWorkerIngestThreads << " sslCertFile = ** sslPrivateKeyFile = **" + << " tmpDir=" << tmpDir << " maxQueuedRequests=" << maxQueuedRequests + << " clientConnPoolSize=" << clientConnPoolSize << " numBoostAsioThreads=" << numBoostAsioThreads + << "}"; + return os.str(); +}; + } // namespace lsst::qserv::czar diff --git a/src/czar/HttpCzarSvc.h b/src/czar/HttpCzarSvc.h index 2b984fbdd0..eec28f2542 100644 --- a/src/czar/HttpCzarSvc.h +++ b/src/czar/HttpCzarSvc.h @@ -43,10 +43,6 @@ namespace httplib { class SSLServer; } // namespace httplib -namespace lsst::qserv::wcontrol { -class Foreman; -} // namespace lsst::qserv::wcontrol - // This header declarations namespace lsst::qserv::czar { @@ -64,6 +60,7 @@ struct HttpCzarConfig { std::size_t maxQueuedRequests = 0; ///< 0 implies unlimited std::size_t clientConnPoolSize = 0; ///< 0 implies the default set by libcurl std::size_t numBoostAsioThreads = 0; ///< 0 implies the number of hardware threads + std::string dump() const; }; /** diff --git a/src/czar/HttpCzarWorkerModule.cc b/src/czar/HttpCzarWorkerModule.cc new file mode 100644 index 0000000000..d29a7effa2 --- /dev/null +++ b/src/czar/HttpCzarWorkerModule.cc @@ -0,0 +1,209 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "czar/HttpCzarWorkerModule.h" + +// System headers +#include +#include + +// Qserv headers +#include "cconfig/CzarConfig.h" +#include "czar/Czar.h" +#include "protojson/PwHideJson.h" +#include "protojson/ResponseMsg.h" +#include "protojson/UberJobErrorMsg.h" +#include "protojson/UberJobReadyMsg.h" +#include "protojson/WorkerCzarComIssue.h" +#include "qdisp/Executive.h" +#include "qdisp/UberJob.h" +#include "global/intTypes.h" +#include "http/Exceptions.h" +#include "http/RequestQuery.h" +#include "util/String.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using json = nlohmann::json; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.HttpCzarWorkerModule"); +} + +namespace lsst::qserv::czar { + +void HttpCzarWorkerModule::process(string const& context, shared_ptr const& req, + shared_ptr const& resp, string const& subModuleName, + http::AuthType const authType) { + HttpCzarWorkerModule module(context, req, resp); + module.execute(subModuleName, authType); +} + +HttpCzarWorkerModule::HttpCzarWorkerModule(string const& context, shared_ptr const& req, + shared_ptr const& resp) + : QhttpModule(context, req, resp) {} + +json HttpCzarWorkerModule::executeImpl(string const& subModuleName) { + string const func = string(__func__) + "[sub-module='" + subModuleName + "']"; + debug(func); + cconfig::CzarConfig::instance()->replicationInstanceId(); + enforceCzarName(func); + if (subModuleName == "QUERYJOB-ERROR") + return _queryJobError(); + else if (subModuleName == "QUERYJOB-READY") { + return _queryJobReady(); + } else if (subModuleName == "WORKERCZARCOMISSUE") + return _workerCzarComIssue(); + throw invalid_argument(context() + func + " unsupported sub-module"); +} + +json HttpCzarWorkerModule::_queryJobError() { + debug(__func__); + checkApiVersion(__func__, 34); + auto ret = _handleJobError(__func__); + return json::object(); +} + +json HttpCzarWorkerModule::_queryJobReady() { + debug(__func__); + checkApiVersion(__func__, 34); + auto ret = _handleJobReady(__func__); + return ret; +} + +json HttpCzarWorkerModule::_workerCzarComIssue() { + debug(__func__); + checkApiVersion(__func__, 34); + auto ret = _handleWorkerCzarComIssue(__func__); + return ret; +} + +json HttpCzarWorkerModule::_handleJobError(string const& func) { + string const fName("HttpCzarWorkerModule::_handleJobError"); + LOGS(_log, LOG_LVL_DEBUG, fName << " start"); + // Metadata-only responses for the file-based protocol should not have any data + + // Parse and verify the json message and then kill the UberJob. + try { + auto const& jsReq = body().objJson; + auto jrMsg = protojson::UberJobErrorMsg::createFromJson(jsReq); + auto importRes = czar::Czar::getCzar()->handleUberJobErrorMsg(jrMsg, fName); + return importRes->toJson(); + } catch (std::invalid_argument const& iaEx) { + LOGS(_log, LOG_LVL_ERROR, + "HttpCzarWorkerModule::_handleJobError received " + << iaEx.what() << " js=" << protojson::pwHide(body().objJson)); + protojson::ExecutiveRespMsg respMsg(false, false, 0, 0, 0, "parse", iaEx.what()); + return respMsg.toJson(); + } +} + +json HttpCzarWorkerModule::_handleJobReady(string const& func) { + string const fName = "HttpCzarWorkerModule::_handleJobReady"; + LOGS(_log, LOG_LVL_DEBUG, fName << " start"); + // Metadata-only responses for the file-based protocol should not have any data + + // Parse and verify the json message and then have the uberjob import the file. + try { + auto const& jsReq = body().objJson; + auto jrMsg = protojson::UberJobReadyMsg::createFromJson(jsReq); + auto importRes = czar::Czar::getCzar()->handleUberJobReadyMsg(jrMsg, fName); + return importRes->toJson(); + } catch (std::invalid_argument const& iaEx) { + LOGS(_log, LOG_LVL_ERROR, + "HttpCzarWorkerModule::_handleJobReady received " + << iaEx.what() << " js=" << protojson::pwHide(body().objJson)); + protojson::ExecutiveRespMsg respMsg(false, false, 0, 0, 0, "parse", iaEx.what()); + return respMsg.toJson(); + } +} + +json HttpCzarWorkerModule::_handleWorkerCzarComIssue(string const& func) { + string const fName("HttpCzarWorkerModule::_handleWorkerCzarComIssue"); + LOGS(_log, LOG_LVL_DEBUG, fName << " start"); + // Parse and verify the json message and then deal with the problems. + string wId = "unknown"; + try { + protojson::AuthContext const authC(cconfig::CzarConfig::instance()->replicationInstanceId(), + cconfig::CzarConfig::instance()->replicationAuthKey()); + auto const& jsReq = body().objJson; + auto wccIssue = protojson::WorkerCzarComIssue::createFromJson(jsReq, authC); + + wId = wccIssue->getWorkerInfo()->wId; + if (wccIssue->getThoughtCzarWasDeadTime() > 0) { + LOGS(_log, LOG_LVL_WARN, + "HttpCzarWorkerModule::_handleWorkerCzarComIssue worker=" + << wId << " thought czar was dead and killed related uberjobs."); + + // Find all incomplete UberJobs with this workerId and re-assign them. + // Use a copy to avoid mutex issues. + auto execMap = czar::Czar::getCzar()->getExecMapCopy(); + for (auto const& [exKey, execWeak] : execMap) { + auto execPtr = execWeak.lock(); + if (execPtr == nullptr) continue; + execPtr->killIncompleteUberJobsOnWorker(wId); + } + } + + // Responses are sent for all `failedTransmits` in the message. If + // something couldn't be parsed, the response indicates that and + // the UberJob will be abandoned by the worker. If the query + // could finish without the results of that uberjob, it indicates + // that the result file is obsolete. If the this was successful, + // the worker just waits for the czar to collect the file as usual. + // In all cases, the worker will remove the item from its + // `failedTransmits` list so it won't be tried again. + vector execRespMsgs; + auto failedTransmits = wccIssue->takeFailedTransmitsMap(); + for (auto& [key, elem] : *failedTransmits) { + protojson::UberJobStatusMsg::Ptr& statusMsg = elem; + auto rdyMsg = dynamic_pointer_cast(statusMsg); + if (rdyMsg != nullptr) { + // Put the file on a queue to be collected later. + auto exRespMsg = czar::Czar::getCzar()->handleUberJobReadyMsgNoThrow(rdyMsg, fName); + execRespMsgs.push_back(exRespMsg); + } else { + auto errMsg = dynamic_pointer_cast(statusMsg); + // Kill the UberJob or user query depending on the error. (Doesn't throw) + auto exRespMsg = czar::Czar::getCzar()->handleUberJobErrorMsg(errMsg, fName); + execRespMsgs.push_back(exRespMsg); + } + } + auto jsRet = wccIssue->responseToJson(wccIssue->getThoughtCzarWasDeadTime(), execRespMsgs); + LOGS(_log, LOG_LVL_TRACE, + "HttpCzarWorkerModule::_handleWorkerCzarComIssue jsRet=" << protojson::pwHide(jsRet)); + return jsRet; + } catch (std::invalid_argument const& iaEx) { + LOGS(_log, LOG_LVL_ERROR, + "HttpCzarWorkerModule::_handleWorkerCzarComIssue received " + << iaEx.what() << " js=" << protojson::pwHide(body().objJson)); + // This is very bad as there's no way to know what is going wrong. Just one of these is surviveable, + // but if it keeps happening, the system is unstable. + Czar::getCzar()->incrCommErrCount("WorkerCzarComIssue", wId, iaEx.what()); + protojson::ResponseMsg respMsg(false, "parse", iaEx.what()); + return respMsg.toJson(); + } +} + +} // namespace lsst::qserv::czar diff --git a/src/czar/HttpCzarWorkerModule.h b/src/czar/HttpCzarWorkerModule.h new file mode 100644 index 0000000000..a6d21536c0 --- /dev/null +++ b/src/czar/HttpCzarWorkerModule.h @@ -0,0 +1,88 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_CZAR_HTTPCZARWORKERMODULE_H +#define LSST_QSERV_CZAR_HTTPCZARWORKERMODULE_H + +// System headers +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "czar/QhttpModule.h" + +// Forward declarations +namespace lsst::qserv::qhttp { +class Request; +class Response; +} // namespace lsst::qserv::qhttp + +// This header declarations +namespace lsst::qserv::czar { + +/// This class is used to handle messages to this czar from the workers. +class HttpCzarWorkerModule : public QhttpModule { +public: + /// @note supported values for parameter 'subModuleName' are: + /// 'QUERYJOB-ERROR' - error in a QUERYJOB + /// 'QUERYJOB-READY' - + /// @throws std::invalid_argument for unknown values of parameter 'subModuleName' + static void process(std::string const& context, std::shared_ptr const& req, + std::shared_ptr const& resp, std::string const& subModuleName, + http::AuthType const authType = http::AuthType::NONE); + + HttpCzarWorkerModule() = delete; + HttpCzarWorkerModule(HttpCzarWorkerModule const&) = delete; + HttpCzarWorkerModule& operator=(HttpCzarWorkerModule const&) = delete; + + ~HttpCzarWorkerModule() final = default; + +protected: + nlohmann::json executeImpl(std::string const& subModuleName) final; + +private: + HttpCzarWorkerModule(std::string const& context, std::shared_ptr const& req, + std::shared_ptr const& resp); + + /// Called to handle message indicating this czar needs to handle an error on a worker. + nlohmann::json _queryJobError(); + + /// Called to indicate an UberJob is ready with data that needs to be collected. + nlohmann::json _queryJobReady(); + + /// Called to indicate there were problems with the worker trying to reach this czar. + nlohmann::json _workerCzarComIssue(); + + /// Translates the message and calls the Czar to collect the data. + nlohmann::json _handleJobReady(std::string const& func); + + /// Translates the error and calls the Czar to take action. + nlohmann::json _handleJobError(std::string const& func); + + /// Translates the issues and calls the Czar to take action. + nlohmann::json _handleWorkerCzarComIssue(std::string const& func); +}; + +} // namespace lsst::qserv::czar + +#endif // LSST_QSERV_CZAR_HTTPCZARWORKERMODULE_H diff --git a/src/czar/HttpMonitorModule.cc b/src/czar/HttpMonitorModule.cc index fdce70b05d..6a9b37b089 100644 --- a/src/czar/HttpMonitorModule.cc +++ b/src/czar/HttpMonitorModule.cc @@ -53,7 +53,7 @@ HttpMonitorModule::HttpMonitorModule(string const& context, shared_ptrreplicationInstanceId()); enforceCzarName(func); if (subModuleName == "CONFIG") diff --git a/src/czar/HttpSvc.cc b/src/czar/HttpSvc.cc index cddaf17b3f..3d953cdab8 100644 --- a/src/czar/HttpSvc.cc +++ b/src/czar/HttpSvc.cc @@ -28,6 +28,7 @@ // Qserv headers #include "cconfig/CzarConfig.h" #include "czar/HttpMonitorModule.h" +#include "czar/HttpCzarWorkerModule.h" #include "http/MetaModule.h" #include "qhttp/Server.h" @@ -90,6 +91,21 @@ uint16_t HttpSvc::start() { [self](shared_ptr const& req, shared_ptr const& resp) { HttpMonitorModule::process(::serviceName, req, resp, "STATUS"); }}}); + _httpServerPtr->addHandlers( + {{"POST", "/queryjob-error", + [self](shared_ptr const& req, shared_ptr const& resp) { + HttpCzarWorkerModule::process(::serviceName, req, resp, "QUERYJOB-ERROR"); + }}}); + _httpServerPtr->addHandlers( + {{"POST", "/queryjob-ready", + [self](shared_ptr const& req, shared_ptr const& resp) { + HttpCzarWorkerModule::process(::serviceName, req, resp, "QUERYJOB-READY"); + }}}); + _httpServerPtr->addHandlers( + {{"POST", "/workerczarcomissue", + [self](shared_ptr const& req, shared_ptr const& resp) { + HttpCzarWorkerModule::process(::serviceName, req, resp, "WORKERCZARCOMISSUE"); + }}}); _httpServerPtr->start(); // Initialize the I/O context and start the service threads. At this point diff --git a/src/czar/HttpSvc.h b/src/czar/HttpSvc.h index 8fd75a25f8..36d346978f 100644 --- a/src/czar/HttpSvc.h +++ b/src/czar/HttpSvc.h @@ -35,10 +35,6 @@ namespace lsst::qserv::qhttp { class Server; } // namespace lsst::qserv::qhttp -namespace lsst::qserv::wcontrol { -class Foreman; -} // namespace lsst::qserv::wcontrol - // This header declarations namespace lsst::qserv::czar { diff --git a/src/czar/MessageTable.cc b/src/czar/MessageTable.cc index 088aac835b..e027190e8e 100644 --- a/src/czar/MessageTable.cc +++ b/src/czar/MessageTable.cc @@ -35,7 +35,7 @@ #include "ccontrol/ConfigMap.h" #include "ccontrol/UserQuery.h" #include "czar/CzarErrors.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "sql/SqlConnection.h" #include "sql/SqlConnectionFactory.h" @@ -54,10 +54,6 @@ std::string const createTmpl( std::string const createAndLockTmpl(createTmpl + "; LOCK TABLES %1% WRITE;"); -std::string const writeTmpl( - "INSERT INTO %1% (chunkId, code, message, severity, timeStamp) " - "VALUES (%2%, %3%, '%4$." MAX_MESSAGE_LEN "s', '%5%', %6%)"); - // mysql can only unlock all locked tables, // there is no command to unlock single table std::string const unlockTmpl("UNLOCK TABLES"); @@ -95,8 +91,8 @@ void MessageTable::lock() { } // Release lock on message table so that proxy can proceed -void MessageTable::unlock(ccontrol::UserQuery::Ptr const& userQuery) { - _saveQueryMessages(userQuery); +void MessageTable::unlock(ccontrol::UserQuery::Ptr const& userQuery, bool querySuccess) { + _saveQueryMessages(userQuery, querySuccess); sql::SqlErrorObject sqlErr; LOGS(_log, LOG_LVL_DEBUG, "unlocking message table " << _tableName); @@ -108,36 +104,42 @@ void MessageTable::unlock(ccontrol::UserQuery::Ptr const& userQuery) { } // store all messages from current session to the table -void MessageTable::_saveQueryMessages(ccontrol::UserQuery::Ptr const& userQuery) { +void MessageTable::_saveQueryMessages(ccontrol::UserQuery::Ptr const& userQuery, bool querySuccess) { if (not userQuery) { return; } - auto msgStore = userQuery->getMessageStore(); int completeCount = 0; int cancelCount = 0; std::string multiErrStr = ""; - std::string severity = "INFO"; + std::string severity = (querySuccess) ? "INFO" : "ERROR"; // Collect information about the query and put it in the message table. - int msgCount = msgStore->messageCount(); - for (int i = 0; i != msgCount; ++i) { - const qdisp::QueryMessage& qm = msgStore->getMessage(i); - std::string src = qm.msgSource; - if (src == "COMPLETE") { - ++completeCount; - } else if (src == "CANCEL") { - ++cancelCount; - } else if (src == "MULTIERROR") { - multiErrStr += qm.description + "\n"; - severity = "ERROR"; + { + auto msgStore = userQuery->getMessageStore(); + if (msgStore != nullptr) { + int msgCount = msgStore->messageCount(); + for (int i = 0; i != msgCount; ++i) { + const qmeta::QueryMessage& qm = msgStore->getMessage(i); + std::string src = qm.msgSource; + if (src == "COMPLETE") { + ++completeCount; + } else if (src == "CANCEL") { + ++cancelCount; + } else if (src == "MULTIERROR") { + multiErrStr += qm.description + "\n"; + } + } } } std::string cMsg("Completed chunks="); cMsg += std::to_string(completeCount) + " cancelled chunks=" + std::to_string(cancelCount) + "\n"; cMsg += multiErrStr; LOGS(_log, LOG_LVL_DEBUG, " MULTIERROR:" << cMsg); - std::string summaryQ = (boost::format(::writeTmpl) % _tableName % "-1" % "-1" % + std::string const writeTmpl( + "INSERT INTO %1% (chunkId, code, message, severity, timeStamp) " + "VALUES (%2%, %3%, '%4$." MAX_MESSAGE_LEN "s', '%5%', %6%)"); + std::string summaryQ = (boost::format(writeTmpl) % _tableName % "-1" % "-1" % _sqlConn->escapeString(cMsg) % severity % std::time(nullptr)) .str(); sql::SqlErrorObject sqlE; diff --git a/src/czar/MessageTable.h b/src/czar/MessageTable.h index 089b916721..07a109f064 100644 --- a/src/czar/MessageTable.h +++ b/src/czar/MessageTable.h @@ -59,13 +59,17 @@ class MessageTable { /// Create and lock the table void lock(); - /// Release lock on message table so that proxy can proceed - void unlock(ccontrol::UserQuery::Ptr const& userQuery); + /// Release lock on message table so that proxy can proceed. This will also + /// try to store messages in the message table before unlocking, + /// see _saveQueryMessages. + void unlock(ccontrol::UserQuery::Ptr const& userQuery, bool querySuccess); protected: private: - /// store all messages from current session to the table - void _saveQueryMessages(ccontrol::UserQuery::Ptr const& userQuery); + /// This will try to store messages in the message table. + /// If `querySuccess` is false, at least one message with severity ERROR will be + /// added to the table. + void _saveQueryMessages(ccontrol::UserQuery::Ptr const& userQuery, bool querySuccess); std::string const _tableName; std::shared_ptr _sqlConn; diff --git a/src/czar/SubmitResult.h b/src/czar/SubmitResult.h index 5db5760d18..6f62f11c74 100644 --- a/src/czar/SubmitResult.h +++ b/src/czar/SubmitResult.h @@ -28,7 +28,6 @@ // Qserv headers #include "global/intTypes.h" -#include "qmeta/types.h" namespace lsst::qserv::czar { @@ -47,7 +46,7 @@ struct SubmitResult { // Populated by Czar::getQueryInfo only for queries which are still in flight std::string status; ///< 'EXECUTING','COMPLETED','FAILED','FAILED_LR','ABORTED' - qmeta::CzarId czarId = 0; ///< The identifier of the czar which is processing the query + CzarId czarId = 0; ///< The identifier of the czar which is processing the query std::string czarType; ///< The type of the czar which is processing the query int totalChunks = 0; ///< The total number of chunks required by the query int completedChunks = 0; ///< The number of chunks that have been processed so far diff --git a/src/czar/qserv-czar-http.cc b/src/czar/qserv-czar-http.cc index cc24095eae..e61bdfffd2 100644 --- a/src/czar/qserv-czar-http.cc +++ b/src/czar/qserv-czar-http.cc @@ -33,6 +33,9 @@ // Third party headers #include "boost/program_options.hpp" +// LSST headers +#include "lsst/log/Log.h" + // Qserv headers #include "cconfig/CzarConfig.h" #include "czar/Czar.h" @@ -47,6 +50,7 @@ namespace qserv = lsst::qserv; namespace { char const* const help = "The HTTP-based Czar frontend."; char const* const context = "[CZAR-HTTP-FRONTEND]"; +LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.czarhttp"); } // namespace int main(int argc, char* argv[]) { @@ -57,7 +61,7 @@ int main(int argc, char* argv[]) { desc.add_options()("verbose,v", "Produce verbose output."); desc.add_options()("czar-name", po::value()->default_value("http"), "The name of this Czar frontend. Assign a unique name to each Czar."); - desc.add_options()("config", po::value()->default_value("/config-etc/qserv-czar.cnf"), + desc.add_options()("config", po::value()->default_value("/config-etc/qserv-czar.cfg"), "The configuration file."); desc.add_options()("user", po::value()->default_value(""), "The login name of a user for connecting to the frontend."); @@ -131,21 +135,25 @@ int main(int argc, char* argv[]) { return 0; } bool const verbose = vm.count("verbose") > 0; + + std::stringstream os; + os << ::context << " Czar name: " << czarName << "\n" + << ::context << " Configuration file: " << configFilePath << "\n" + << ::context << " Port: " << httpCzarConfig.port << "\n" + << ::context << " Number of threads: " << httpCzarConfig.numThreads << "\n" + << ::context << " Number of worker ingest threads: " << httpCzarConfig.numWorkerIngestThreads << "\n" + << ::context << " SSL certificate file: " << httpCzarConfig.sslCertFile << "\n" + << ::context << " SSL private key file: " << httpCzarConfig.sslPrivateKeyFile << "\n" + << ::context << " Temporary directory: " << httpCzarConfig.tmpDir << "\n" + << ::context << " Max.number of queued requests: " << httpCzarConfig.maxQueuedRequests << "\n" + << ::context << " Connection pool size (libcurl): " << httpCzarConfig.clientConnPoolSize << "\n" + << ::context << " Number of BOOST ASIO threads: " << httpCzarConfig.numBoostAsioThreads << "\n" + << ::context << " HTTP user: " << httpUser << "\n" + << ::context << " HTTP password: ******" << endl; + LOGS(_log, LOG_LVL_INFO, " czar-http startup " << os.str()); + if (verbose) { - cout << ::context << " Czar name: " << czarName << "\n" - << ::context << " Configuration file: " << configFilePath << "\n" - << ::context << " Port: " << httpCzarConfig.port << "\n" - << ::context << " Number of threads: " << httpCzarConfig.numThreads << "\n" - << ::context << " Number of worker ingest threads: " << httpCzarConfig.numWorkerIngestThreads - << "\n" - << ::context << " SSL certificate file: " << httpCzarConfig.sslCertFile << "\n" - << ::context << " SSL private key file: " << httpCzarConfig.sslPrivateKeyFile << "\n" - << ::context << " Temporary directory: " << httpCzarConfig.tmpDir << "\n" - << ::context << " Max.number of queued requests: " << httpCzarConfig.maxQueuedRequests << "\n" - << ::context << " Connection pool size (libcurl): " << httpCzarConfig.clientConnPoolSize << "\n" - << ::context << " Number of BOOST ASIO threads: " << httpCzarConfig.numBoostAsioThreads << "\n" - << ::context << " HTTP user: " << httpUser << "\n" - << ::context << " HTTP password: ******" << endl; + cout << os.str(); } try { auto const czar = czar::Czar::createCzar(configFilePath, czarName); diff --git a/src/czar/testCzar.cc b/src/czar/testCzar.cc new file mode 100644 index 0000000000..0dc612266e --- /dev/null +++ b/src/czar/testCzar.cc @@ -0,0 +1,205 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// System headers +#include +#include +#include + +// Third-party headers +#include "boost/asio.hpp" +#include "nlohmann/json.hpp" + +// Boost unit test header +#define BOOST_TEST_MODULE Czar_1 +#include + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "qmeta/QMeta.h" +#include "czar/CzarFamilyMap.h" + +namespace test = boost::test_tools; +using namespace lsst::qserv; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.testCzar"); +} + +using namespace std; + +BOOST_AUTO_TEST_SUITE(Suite) + +void insertIntoQChunkMap(qmeta::QMetaChunkMap& qChunkMap, string const& workerId, string const& dbName, + string const& tableName, unsigned int chunkNum, size_t sz) { + qChunkMap.workers[workerId][dbName][tableName].push_back(qmeta::QMetaChunkMap::ChunkInfo{chunkNum, sz}); +} + +qmeta::QMetaChunkMap convertJsonToChunkMap(nlohmann::json const& jsChunks) { + qmeta::QMetaChunkMap qChunkMap; + for (auto const& [workerId, dbs] : jsChunks.items()) { + for (auto const& [dbName, tables] : dbs.items()) { + for (auto const& [tableName, chunks] : tables.items()) { + for (auto const& [index, chunkNumNSz] : chunks.items()) { + try { + int64_t chunkNum = chunkNumNSz.at(0); + int64_t sz = chunkNumNSz.at(1); + LOGS(_log, LOG_LVL_DEBUG, + "workerdId=" << workerId << " db=" << dbName << " table=" << tableName + << " chunk=" << chunkNum << " sz=" << sz); + insertIntoQChunkMap(qChunkMap, workerId, dbName, tableName, chunkNum, sz); + } catch (invalid_argument const& exc) { + throw czar::ChunkMapException( + ERR_LOC, string(__func__) + " invalid_argument workerdId=" + workerId + + " db=" + dbName + " table=" + tableName + + " chunk=" + to_string(chunkNumNSz) + " " + exc.what()); + } catch (out_of_range const& exc) { + throw czar::ChunkMapException( + ERR_LOC, string(__func__) + " out_of_range workerdId=" + workerId + + " db=" + dbName + " table=" + tableName + + " chunk=" + to_string(chunkNumNSz) + " " + exc.what()); + } + } + } + } + } + return qChunkMap; +} + +BOOST_AUTO_TEST_CASE(CzarChunkMap) { + // Each chunk only occurs on one worker + string test1 = R"( + { + "ce1c1b79-e6fb-11ee-a46b-0242c0a80308": + {"qcase01": + {"Object":[[1234567890,0],[6630,1460],[6800,6068],[6968,1000],[6971,2716],[7140,4556],[7310,2144],[7648,1568]], + "Source":[[1234567890,0],[6630,37084],[6800,163888],[6968,33044],[6971,67016],[7140,145300],[7310,83872],[7648,30096]] + }, + "qcase02": + {"Object":[[1234567890,0],[7310,0]], + "Source":[[1234567890,0],[7310,0]] + }, + "qcase03": + {"RefDeepSrcMatch":[[1234567890,0],[7165,76356]], + "RefObject":[[1234567890,0],[7165,119616]], + "RunDeepForcedSource":[[1234567890,0],[7165,130617531]], + "RunDeepSource":[[1234567890,0],[7165,578396]] + } + }, + "ddc3f1b9-e6fb-11ee-a46b-0242c0a80304": + {"qcase01": + {"Object":[[1234567890,0],[6631,1612],[6801,4752],[6970,5780],[7138,3212],[7308,2144],[7478,4608]], + "Source":[[1234567890,0],[6631,45724],[6801,123940],[6970,151660],[7138,97252],[7308,56784],[7478,99304]] + }, + "qcase02": + {"Object":[[1234567890,0],[7480,1055000]], + "Source":[[1234567890,0],[7480,2259419]] + }, + "qcase03": + {"RefDeepSrcMatch":[[1234567890,0],[6995,7728]], + "RefObject":[[1234567890,0],[6995,10920]], + "RunDeepForcedSource":[[1234567890,0],[6995,11708834]], + "RunDeepSource":[[1234567890,0],[6995,58604]] + } + } + } + )"; + + /// 3 workers, each containing all chunks. + string test2 = R"( + { + "ce1c1b79-e6fb-11ee-a46b-0242c0a80308": + {"qcase01": + {"Object":[[1234567890,0],[6631,1612],[6801,4752],[6970,5780],[7138,3212],[7308,2144],[7478,4608], + [6630,1460],[6800,6068],[6968,1000],[6971,2716],[7140,4556],[7310,2144],[7648,1568]], + "Source":[[1234567890,0],[6631,45724],[6801,123940],[6970,151660],[7138,97252],[7308,56784],[7478,99304], + [6630,37084],[6800,163888],[6968,33044],[6971,67016],[7140,145300],[7310,83872],[7648,30096]] + }, + "qcase02": + {"Object":[[1234567890,0],[7480,1055000],[7310,0]], + "Source":[[1234567890,0],[7480,2259419],[7310,0]] + }, + "qcase03": + {"RefDeepSrcMatch":[[1234567890,0],[6995,7728],[7165,76356]], + "RefObject":[[1234567890,0],[6995,10920],[7165,119616]], + "RunDeepForcedSource":[[1234567890,0],[6995,11708834],[7165,130617531]], + "RunDeepSource":[[1234567890,0],[6995,58604],[7165,578396]] + } + }, + "brnd1b79-e6fb-11ee-a46b-0242c0a80308": + {"qcase01": + {"Object":[[1234567890,0],[6631,1612],[6801,4752],[6970,5780],[7138,3212],[7308,2144],[7478,4608], + [6630,1460],[6800,6068],[6968,1000],[6971,2716],[7140,4556],[7310,2144],[7648,1568]], + "Source":[[1234567890,0],[6631,45724],[6801,123940],[6970,151660],[7138,97252],[7308,56784],[7478,99304], + [6630,37084],[6800,163888],[6968,33044],[6971,67016],[7140,145300],[7310,83872],[7648,30096]] + }, + "qcase02": + {"Object":[[1234567890,0],[7480,1055000],[7310,0]], + "Source":[[1234567890,0],[7480,2259419],[7310,0]] + }, + "qcase03": + {"RefDeepSrcMatch":[[1234567890,0],[6995,7728],[7165,76356]], + "RefObject":[[1234567890,0],[6995,10920],[7165,119616]], + "RunDeepForcedSource":[[1234567890,0],[6995,11708834],[7165,130617531]], + "RunDeepSource":[[1234567890,0],[6995,58604],[7165,578396]] + } + }, + "ddc3f1b9-e6fb-11ee-a46b-0242c0a80304": + {"qcase01": + {"Object":[[1234567890,0],[6631,1612],[6801,4752],[6970,5780],[7138,3212],[7308,2144],[7478,4608], + [6630,1460],[6800,6068],[6968,1000],[6971,2716],[7140,4556],[7310,2144],[7648,1568]], + "Source":[[1234567890,0],[6631,45724],[6801,123940],[6970,151660],[7138,97252],[7308,56784],[7478,99304], + [6630,37084],[6800,163888],[6968,33044],[6971,67016],[7140,145300],[7310,83872],[7648,30096]] + }, + "qcase02": + {"Object":[[1234567890,0],[7480,1055000],[7310,0]], + "Source":[[1234567890,0],[7480,2259419],[7310,0]] + }, + "qcase03": + {"RefDeepSrcMatch":[[1234567890,0],[6995,7728],[7165,76356]], + "RefObject":[[1234567890,0],[6995,10920],[7165,119616]], + "RunDeepForcedSource":[[1234567890,0],[6995,11708834],[7165,130617531]], + "RunDeepSource":[[1234567890,0],[6995,58604],[7165,578396]] + } + } + } + )"; + + auto dbToFamily = make_shared(); + czar::CzarFamilyMap czFamMap(dbToFamily); + + auto jsTest1 = nlohmann::json::parse(test1); + qmeta::QMetaChunkMap qChunkMap1 = convertJsonToChunkMap(jsTest1); + auto familyMap = czFamMap.makeNewMaps(qChunkMap1, true); + czar::CzarFamilyMap::verify(familyMap); // Throws on failure. + LOGS(_log, LOG_LVL_DEBUG, "CzarFamilyMap test 1 passed"); + + auto jsTest2 = nlohmann::json::parse(test2); + qmeta::QMetaChunkMap qChunkMap2 = convertJsonToChunkMap(jsTest2); + auto familyMap2 = czFamMap.makeNewMaps(qChunkMap2, true); + czar::CzarFamilyMap::verify(familyMap2); // Throws on failure. + LOGS(_log, LOG_LVL_DEBUG, "CzarFamilyMap test 2 passed"); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/global/CMakeLists.txt b/src/global/CMakeLists.txt index f0bf7ad498..088e72f6fa 100644 --- a/src/global/CMakeLists.txt +++ b/src/global/CMakeLists.txt @@ -6,15 +6,14 @@ target_sources(global PRIVATE ResourceUnit.cc sqltoken.cc stringUtil.cc + UberJobBase.cc ) target_link_libraries(global PUBLIC log ) -install( - TARGETS global -) +install(TARGETS global) FUNCTION(global_tests) FOREACH(TEST IN ITEMS ${ARGV}) @@ -28,6 +27,6 @@ FUNCTION(global_tests) ENDFUNCTION() global_tests( - testResourceUnit testStringUtil ) + diff --git a/src/global/ResourceUnit.cc b/src/global/ResourceUnit.cc index 64144b8436..3bbe5372bd 100644 --- a/src/global/ResourceUnit.cc +++ b/src/global/ResourceUnit.cc @@ -31,43 +31,6 @@ namespace lsst::qserv { -////////////////////////////////////////////////////////////////////// -// lsst::qserv::ResourceUnit::Tokenizer -// A simple class to tokenize paths. -////////////////////////////////////////////////////////////////////// -class ResourceUnit::Tokenizer { -public: - Tokenizer(std::string const& s, char sep = '/') : _cursor(0), _next(0), _s(s), _sep(sep) { _seek(); } - - std::string token() { return _s.substr(_cursor, _next - _cursor); } - - int tokenAsInt() { - int num; - std::istringstream csm(token()); - csm >> num; - return num; - } - - void next() { - assert(!done()); - _cursor = _next + 1; - _seek(); - } - - bool done() { return _next == std::string::npos; } - -private: - void _seek() { _next = _s.find_first_of(_sep, _cursor); } - - std::string::size_type _cursor; - std::string::size_type _next; - std::string const _s; - char const _sep; -}; - -////////////////////////////////////////////////////////////////////// -ResourceUnit::ResourceUnit(std::string const& path) : _unitType(GARBAGE), _chunk(-1) { _setFromPath(path); } - std::string ResourceUnit::path() const { std::stringstream ss; ss << _pathSep << prefix(_unitType); @@ -90,14 +53,6 @@ std::string ResourceUnit::path() const { return ss.str(); } -std::string ResourceUnit::var(std::string const& key) const { - VarMap::const_iterator ci = _vars.find(key); - if (ci != _vars.end()) { - return ci->second; - } - return std::string(); -} - std::string ResourceUnit::prefix(UnitType const& r) { switch (r) { case DBCHUNK: @@ -122,88 +77,6 @@ void ResourceUnit::setAsDbChunk(std::string const& db, int chunk) { _chunk = chunk; } -bool ResourceUnit::_markGarbageIfDone(Tokenizer& t) { - if (t.done()) { - _unitType = GARBAGE; - return true; - } - return false; -} - -void ResourceUnit::_setFromPath(std::string const& path) { - std::string rTypeString; - Tokenizer t(path, _pathSep); - if (!t.token().empty()) { // Expect leading separator (should start with /) - _unitType = UNKNOWN; - return; - } - if (_markGarbageIfDone(t)) { - return; - } // Consider using GOTO structure. - t.next(); - rTypeString = t.token(); - if (rTypeString == prefix(DBCHUNK)) { - // XrdSsi query - if (_markGarbageIfDone(t)) { - return; - } - _unitType = DBCHUNK; - t.next(); - _db = t.token(); - if (_db.empty()) { - _unitType = GARBAGE; - return; - } - if (_markGarbageIfDone(t)) { - return; - } - t.next(); - if (t.token().empty()) { - _unitType = GARBAGE; - return; - } - _chunk = t.tokenAsInt(); - _ingestLeafAndKeys(t.token()); - } else if (rTypeString == prefix(QUERY)) { - _unitType = QUERY; - if (!t.done()) { - _unitType = GARBAGE; - return; - } - } else { - _unitType = GARBAGE; - } -} - -/// Ingest key-value pairs from a string including the last portion of the path, -/// e.g., somenumber?key1=val1&key2=val2 -void ResourceUnit::_ingestLeafAndKeys(std::string const& leafPlusKeys) { - std::string::size_type start; - start = leafPlusKeys.find_first_of(_varSep, 0); - _vars.clear(); - - if (start == std::string::npos) { // No keys found - return; - } - ++start; - Tokenizer t(leafPlusKeys.substr(start), _varDelim); - for (std::string defn = t.token(); !defn.empty(); t.next()) { - _ingestKeyStr(defn); - } -} - -/// Ingest key-value pairs from a packed key-value representation. -/// e.g., key1=val1&key2=val2 -void ResourceUnit::_ingestKeyStr(std::string const& keyStr) { - std::string::size_type equalsPos; - equalsPos = keyStr.find_first_of('='); - if (equalsPos == std::string::npos) { // No = clause, value-less key. - _vars[keyStr] = std::string(); // empty insert. - } else { - _vars[keyStr.substr(0, equalsPos)] = keyStr.substr(equalsPos + 1); - } -} - std::ostream& operator<<(std::ostream& os, ResourceUnit const& ru) { return os << "Resource(" << ru.path() << ")"; } diff --git a/src/global/ResourceUnit.h b/src/global/ResourceUnit.h index ad4a1ef0be..50cd69b0e9 100644 --- a/src/global/ResourceUnit.h +++ b/src/global/ResourceUnit.h @@ -33,22 +33,13 @@ namespace lsst::qserv { -/// ResourceUnit contains a name for an XrdSsi-resolvable resource unit. -//// -/// Not sure this belongs in global, but czar, worker both need it. -/// Other components may as well. -//// -/// Note that while key-value specifiers are parsed from the path string at -/// construction, the code for generating a path that includes the key-value -/// portion is not implemented. It is unclear whether we need the generation -/// capability, now that key-value pairs can be packed in protobufs messages. +/// This class is used to store the database and chunk id of a resource. class ResourceUnit { public: class Checker; enum UnitType { GARBAGE, DBCHUNK, UNKNOWN, QUERY }; ResourceUnit() = default; - explicit ResourceUnit(std::string const& path); ResourceUnit(ResourceUnit const&) = default; ResourceUnit& operator=(ResourceUnit const&) = default; ~ResourceUnit() = default; @@ -62,9 +53,6 @@ class ResourceUnit { std::string const& db() const { return _db; } int chunk() const { return _chunk; } - /// Lookup extended path variables (?k=val syntax) - std::string var(std::string const& key) const; - /// @return the path prefix element for a given request type. static std::string prefix(UnitType const& r); @@ -75,32 +63,15 @@ class ResourceUnit { void setAsDbChunk(std::string const& db, int chunk = DUMMY_CHUNK); private: - class Tokenizer; - void _setFromPath(std::string const& path); - void _ingestLeafAndKeys(std::string const& leafPlusKeys); - void _ingestKeyStr(std::string const& keyStr); - bool _markGarbageIfDone(Tokenizer& t); - UnitType _unitType = UnitType::GARBAGE; //< Type of unit std::string _db; //< for DBCHUNK type int _chunk = -1; //< for DBCHUNK type - typedef std::map VarMap; - VarMap _vars; //< Key-value specifiers - static char const _pathSep = '/'; - static char const _varSep = '?'; - static char const _varDelim = '&'; friend std::ostream& operator<<(std::ostream& os, ResourceUnit const& ru); }; -class ResourceUnit::Checker { -public: - virtual ~Checker() {} - virtual bool operator()(ResourceUnit const& ru) = 0; -}; - } // namespace lsst::qserv #endif // LSST_QSERV_RESOURCEUNIT_H diff --git a/src/proto/ProtoHeaderWrap.h b/src/global/UberJobBase.cc similarity index 58% rename from src/proto/ProtoHeaderWrap.h rename to src/global/UberJobBase.cc index d93624b7a2..c9d4f51e6f 100644 --- a/src/proto/ProtoHeaderWrap.h +++ b/src/global/UberJobBase.cc @@ -1,7 +1,5 @@ -// -*- LSST-C++ -*- /* * LSST Data Management System - * Copyright 2015-2016 LSST Corporation. * * This product includes software developed by the * LSST Project (http://www.lsst.org/). @@ -13,7 +11,8 @@ * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the LSST License Statement and @@ -21,27 +20,33 @@ * see . */ -#ifndef LSST_QSERV_PROTO_PROTO_HEADER_WRAP_H -#define LSST_QSERV_PROTO_PROTO_HEADER_WRAP_H -/** - * @file - * - * @brief Wrap the google protocol header in a fixed size container. - * - * @author John Gates, SLAC - */ +// Class header +#include "global/UberJobBase.h" // System headers -#include +#include + +// Third-party headers + +// Qserv headers + +// LSST headers + +using namespace std; + +namespace lsst::qserv { -namespace lsst::qserv::proto { +std::ostream& UberJobBase::dump(std::ostream& os) const { + os << _idStr; + return os; +} -class ProtoHeaderWrap { -public: - static const size_t PROTOBUFFER_HARD_LIMIT; - static const size_t PROTOBUFFER_DESIRED_LIMIT; -}; +std::string UberJobBase::dump() const { + std::ostringstream os; + dump(os); + return os.str(); +} -} // namespace lsst::qserv::proto +std::ostream& operator<<(std::ostream& os, UberJobBase const& uj) { return uj.dump(os); } -#endif +} // namespace lsst::qserv diff --git a/src/global/UberJobBase.h b/src/global/UberJobBase.h new file mode 100644 index 0000000000..7436e8aadb --- /dev/null +++ b/src/global/UberJobBase.h @@ -0,0 +1,74 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_GLOBAL_UBERJOBBASE_H +#define LSST_QSERV_GLOBAL_UBERJOBBASE_H + +// System headers +#include +#include + +// Qserv headers +#include "global/intTypes.h" + +namespace lsst::qserv { + +/// Base class for UberJobs. Expected children are +/// qdisp::UberJob - track and expedite an UberJob for qdisp::Executive on a czar. +/// wbase::UberJobData - track Task objects being run for an UberJob on the worker. +class UberJobBase : public std::enable_shared_from_this { +public: + using Ptr = std::shared_ptr; + + UberJobBase() = delete; + UberJobBase(UberJobBase const&) = delete; + UberJobBase& operator=(UberJobBase const&) = delete; + + virtual ~UberJobBase() = default; + + virtual std::string cName(const char* funcN) const { + return std::string("UberJobBase::") + funcN + " " + getIdStr(); + } + + QueryId getQueryId() const { return _queryId; } + UberJobId getUjId() const { return _uberJobId; } + CzarId getCzarId() const { return _czarId; } + std::string const& getIdStr() const { return _idStr; } + + virtual std::ostream& dump(std::ostream& os) const; + std::string dump() const; + friend std::ostream& operator<<(std::ostream& os, UberJobBase const& uj); + +protected: + UberJobBase(QueryId queryId_, UberJobId uberJobId_, CzarId czarId_) + : _queryId(queryId_), + _uberJobId(uberJobId_), + _czarId(czarId_), + _idStr("QID=" + std::to_string(queryId_) + "_ujId=" + std::to_string(uberJobId_)) {} + + QueryId const _queryId; + UberJobId const _uberJobId; + CzarId const _czarId; ///< At some point in the future, changing czarId may be possible. + std::string const _idStr; +}; + +} // namespace lsst::qserv + +#endif // LSST_QSERV_GLOBAL_UBERJOBBASE_H diff --git a/src/global/clock_defs.h b/src/global/clock_defs.h index d582b416f8..25d3b08bf8 100644 --- a/src/global/clock_defs.h +++ b/src/global/clock_defs.h @@ -23,6 +23,7 @@ #define LSST_QSERV_GLOBAL_CLOCKDEFS_H // System headers +#include #include #include #include @@ -36,6 +37,10 @@ namespace lsst::qserv { using CLOCK = std::chrono::system_clock; using TIMEPOINT = std::chrono::time_point; +inline uint64_t millisecSinceEpoch(TIMEPOINT tm) { + return std::chrono::duration_cast(tm.time_since_epoch()).count(); +} + /// RAII class to help track a changing sum through a begin and end time. template class TimeCountTracker { diff --git a/src/global/constants.h b/src/global/constants.h index 77ed416e1e..dcbb9d1f26 100644 --- a/src/global/constants.h +++ b/src/global/constants.h @@ -48,15 +48,6 @@ const char SUBCHUNK_TAG[] = "%S\007S%"; /// when generating concrete query text from a template. const char CHUNK_TAG[] = "%C\007C%"; -/** - * The absolute maximum number of job attempts. The number - * of attempts before cancelling a query can (and probably should) - * be smaller than this. - * This is value is used for encoding jobId and attemptCount. - * For readability values should be 10, 100, 1000, etc. - */ -const int MAX_JOB_ATTEMPTS = 100; - /// Used for undefined variable which should contain positive integer const int NOTSET = -1; diff --git a/src/global/intTypes.h b/src/global/intTypes.h index 4182544f7f..582dd54ae7 100644 --- a/src/global/intTypes.h +++ b/src/global/intTypes.h @@ -37,6 +37,9 @@ typedef std::vector Int32Vector; /// Typedef for Query ID in query metadata. typedef std::uint64_t QueryId; +typedef std::int64_t JobId; +typedef std::int64_t UberJobId; +typedef std::uint32_t CzarId; /// Class to provide a consistent format for QueryIds in the log file class QueryIdHelper { @@ -45,15 +48,15 @@ class QueryIdHelper { /// @parameter qid - query id number. /// @parameter invalid - true, qid is not a valid user query id. static std::string makeIdStr(QueryId qid, bool invalid = false) { - if (invalid) return "QI=?:"; - return "QI=" + std::to_string(qid) + ":"; + if (invalid) return "QID=?:"; + return "QID=" + std::to_string(qid) + ":"; } /// Returns a standardized user query id string with jobId. /// @parameter qid - query id number. /// @parameter jobId - the job id number. /// @parameter invalid - true, qid is not a valid user query id. - static std::string makeIdStr(QueryId qid, int jobId, bool invalid = false) { + static std::string makeIdStr(QueryId qid, JobId jobId, bool invalid = false) { if (invalid) return makeIdStr(qid, true) + "?;"; return makeIdStr(qid) + std::to_string(jobId) + ";"; } diff --git a/src/global/testResourceUnit.cc b/src/global/testResourceUnit.cc deleted file mode 100644 index dfde0e3c23..0000000000 --- a/src/global/testResourceUnit.cc +++ /dev/null @@ -1,91 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2015 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -/// testResourceUnit - -// Third-party headers - -// Qserv headers -#include "global/ResourceUnit.h" - -// Boost unit test header -#define BOOST_TEST_MODULE ResourceUnit_1 -#include - -namespace test = boost::test_tools; -using lsst::qserv::ResourceUnit; - -struct Fixture { - Fixture() : dummy(0) {} - - int dummy; - ~Fixture(void) {}; -}; -int const MAGIC_SIZE = 80; - -BOOST_FIXTURE_TEST_SUITE(Suite, Fixture) - -BOOST_AUTO_TEST_CASE(Garbage) { - char p[][MAGIC_SIZE] = {// Convert to std vector list init when available - // Missing chunk number - "/chk/qcase01", "/chk/abc/", - // Bad resource type - "/chk2/abc", "/abc/", "/abc/chk/g", - // Missing/bad params - "/q", "/q/", "/q/Hello", "/result", "/result/"}; - int const pSize = 10; - for (auto i = p, e = p + pSize; i != e; ++i) { - ResourceUnit r(*i); - BOOST_CHECK_MESSAGE(r.unitType() == ResourceUnit::GARBAGE, std::string("Expected garbage: ") + *i); - } -} - -BOOST_AUTO_TEST_CASE(DbChunk) { - char p[][MAGIC_SIZE] = { - "/chk/qcase01/123", - "/chk/abc/456", - }; - int const pSize = 2; - std::vector r; - for (auto i = p, e = p + pSize; i != e; ++i) { - r.push_back(ResourceUnit(*i)); - BOOST_CHECK_EQUAL(r.back().unitType(), ResourceUnit::DBCHUNK); - } - BOOST_CHECK_EQUAL(r[0].db(), "qcase01"); - BOOST_CHECK_EQUAL(r[1].db(), "abc"); - BOOST_CHECK_EQUAL(r[0].chunk(), 123); - BOOST_CHECK_EQUAL(r[1].chunk(), 456); - - r[0].setAsDbChunk("foo", 1111); - r[1].setAsDbChunk("bar", 968); - BOOST_CHECK_EQUAL(r[0].path(), "/chk/foo/1111"); - BOOST_CHECK_EQUAL(r[1].path(), "/chk/bar/968"); -} - -BOOST_AUTO_TEST_CASE(Query) { - ResourceUnit const res1("/query"); - BOOST_CHECK_EQUAL(res1.unitType(), ResourceUnit::QUERY); - ResourceUnit const res2("/query/abc"); - BOOST_CHECK_EQUAL(res2.unitType(), ResourceUnit::GARBAGE); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/http/BaseModule.cc b/src/http/BaseModule.cc index 93d15f6aeb..ddf00a107a 100644 --- a/src/http/BaseModule.cc +++ b/src/http/BaseModule.cc @@ -92,13 +92,15 @@ void BaseModule::checkApiVersion(string const& func, unsigned int minVersion, st void BaseModule::enforceInstanceId(string const& func, string const& requiredInstanceId) const { string const instanceId = method() == "GET" ? query().requiredString("instance_id") : body().required("instance_id"); - debug(func, "instance_id: " + instanceId); + trace(func, "instance_id: " + instanceId); if (instanceId != requiredInstanceId) { throw invalid_argument(context() + func + " Qserv instance identifier mismatch. Client sent '" + instanceId + "' instead of '" + requiredInstanceId + "'."); } } +void BaseModule::trace(string const& msg) const { LOGS(_log, LOG_LVL_TRACE, context() << msg); } + void BaseModule::info(string const& msg) const { LOGS(_log, LOG_LVL_INFO, context() << msg); } void BaseModule::debug(string const& msg) const { LOGS(_log, LOG_LVL_DEBUG, context() << msg); } diff --git a/src/http/BaseModule.h b/src/http/BaseModule.h index 1e615fcbbe..f6f295c9d7 100644 --- a/src/http/BaseModule.h +++ b/src/http/BaseModule.h @@ -113,6 +113,10 @@ class BaseModule { RequestBodyJSON& body() { return _body; } // Message loggers for the corresponding log levels + // TODO:DM-52998 all of these defeat the purpose of using macros, warn and error are probably + // fine to keep as they should rarely be called. + void trace(std::string const& msg) const; + void trace(std::string const& context, std::string const& msg) const { trace(context + " " + msg); } void info(std::string const& msg) const; void info(std::string const& context, std::string const& msg) const { info(context + " " + msg); } @@ -215,6 +219,8 @@ class BaseModule { */ void sendData(nlohmann::json& result); + AuthContext getAuthContext() const { return _authContext; } + private: /** * Check the authorization keys provided in the request's body against the expected values. diff --git a/src/http/Client.cc b/src/http/Client.cc index 42401a2cc3..253ff3801c 100644 --- a/src/http/Client.cc +++ b/src/http/Client.cc @@ -145,7 +145,6 @@ void Client::read(CallbackType const& onDataRead) { } _curlEasyErrorChecked("curl_easy_setopt(CURLOPT_HTTPHEADER)", curl_easy_setopt(_hcurl, CURLOPT_HTTPHEADER, _hlist)); - _curlEasyErrorChecked("curl_easy_setopt(CURLOPT_FAILONERROR)", curl_easy_setopt(_hcurl, CURLOPT_FAILONERROR, 1L)); _curlEasyErrorChecked("curl_easy_setopt(CURLOPT_WRITEFUNCTION)", diff --git a/src/http/Module.h b/src/http/Module.h index 8a89a4e8f2..6bd149fb16 100644 --- a/src/http/Module.h +++ b/src/http/Module.h @@ -85,6 +85,13 @@ class Module : public BaseModule { */ virtual nlohmann::json executeImpl(std::string const& subModuleName) = 0; + /** + * Send a response back to a requester of a service. + * @param content The content to be sent back. + * @param contentType The type of the content to be sent back. + */ + virtual void sendResponse(std::string const& content, std::string const& contentType) = 0; + private: /** * Pull the raw request body and translate it into a JSON object. diff --git a/src/http/RequestBodyJSON.cc b/src/http/RequestBodyJSON.cc index 5c79f609c7..9d3c5247de 100644 --- a/src/http/RequestBodyJSON.cc +++ b/src/http/RequestBodyJSON.cc @@ -96,4 +96,4 @@ json RequestBodyJSON::_get(string const& func, string const& name) const { return objJson.at(name); } -} // namespace lsst::qserv::http \ No newline at end of file +} // namespace lsst::qserv::http diff --git a/src/http/RequestBodyJSON.h b/src/http/RequestBodyJSON.h index 6bf6a8068f..7e15f63db7 100644 --- a/src/http/RequestBodyJSON.h +++ b/src/http/RequestBodyJSON.h @@ -42,6 +42,15 @@ class RequestBodyJSON { /// parsed body of the request nlohmann::json objJson = nlohmann::json::object(); + RequestBodyJSON() = default; + RequestBodyJSON(RequestBodyJSON const&) = default; + RequestBodyJSON& operator=(RequestBodyJSON const&) = default; + + ~RequestBodyJSON() = default; + + /// Make a new RequestBody based on `js` + RequestBodyJSON(nlohmann::json const& js) : objJson(js) {} + /** * Check if the specified parameter is present in the input JSON object. * @param obj JSON object to be inspected. @@ -73,8 +82,11 @@ class RequestBodyJSON { throw std::invalid_argument("RequestBodyJSON::" + std::string(__func__) + "[static] parameter 'obj' is not a valid JSON object"); } - if (obj.find(name) != obj.end()) return obj[name]; - throw std::invalid_argument("RequestBodyJSON::" + std::string(__func__) + + + if (auto const iter = obj.find(name); iter != obj.end()) { + return *iter; + } + throw std::invalid_argument("RequestBody::" + std::string(__func__) + "[static] required parameter " + name + " is missing in the request body"); } diff --git a/src/mysql/CMakeLists.txt b/src/mysql/CMakeLists.txt index 3438141219..dbe2cd6cc9 100644 --- a/src/mysql/CMakeLists.txt +++ b/src/mysql/CMakeLists.txt @@ -1,28 +1,28 @@ add_library(mysql SHARED) -add_dependencies(mysql proto) target_sources(mysql PRIVATE + CsvBuffer.cc + CsvMemDisk.cc LocalInfile.cc MySqlConfig.cc MySqlConnection.cc MySqlUtils.cc - CsvBuffer.cc SchemaFactory.cc ) target_link_libraries(mysql PUBLIC log mysqlclient_r + util ) -install( - TARGETS mysql -) +install(TARGETS mysql) add_executable(testLocalInfile testLocalInfile.cc) target_link_libraries(testLocalInfile mysql + util Boost::unit_test_framework ) diff --git a/src/mysql/CsvBuffer.cc b/src/mysql/CsvBuffer.cc index 40df209521..9cc3aa801a 100644 --- a/src/mysql/CsvBuffer.cc +++ b/src/mysql/CsvBuffer.cc @@ -33,11 +33,15 @@ // Third-party headers #include +// LSST headers +#include "lsst/log/Log.h" + // Qserv headers #include "mysql/LocalInfileError.h" #include "mysql/MySqlUtils.h" namespace { + std::string const mysqlNull("\\N"); int const largeRowThreshold = 500 * 1024; // should be less than 0.5 * infileBufferSize @@ -259,6 +263,7 @@ CsvStream::CsvStream(std::size_t maxRecords) : _maxRecords(maxRecords) { bool CsvStream::push(char const* data, std::size_t size) { std::unique_lock lock(_mtx); _cv.wait(lock, [this] { return _records.size() < _maxRecords || _closed; }); + if (_closed) return false; if (data != nullptr && size != 0) { _records.emplace_back(std::make_shared(data, size)); @@ -272,8 +277,20 @@ bool CsvStream::push(char const* data, std::size_t size) { std::shared_ptr CsvStream::pop() { std::unique_lock lock(_mtx); - _cv.wait(lock, [this] { return !_records.empty() || _closed; }); - if (_closed && _records.empty()) return std::make_shared(); + _cv.wait(lock, [this]() { return (!_records.empty() || _closed); }); + + if (_records.empty()) { + // _closed must be true. + // The hope is that this never happens, but to keep the system + // from locking up, send out illegal characters to force fail + // the merge. Need to keep sending characters until the + // database stops asking for them. + // See CsvStream::cancel() + _contaminated = true; + auto pstr = std::make_shared("$"); + _cv.notify_one(); + return pstr; + } std::shared_ptr front = _records.front(); _records.pop_front(); _cv.notify_one(); @@ -304,6 +321,8 @@ class CsvStreamBuffer : public CsvBuffer { public: explicit CsvStreamBuffer(std::shared_ptr const& csvStream) : _csvStream(csvStream) {} + ~CsvStreamBuffer() override = default; + unsigned fetch(char* buffer, unsigned bufLen) override { if (bufLen == 0) { throw LocalInfileError("CsvStreamBuffer::fetch Can't fetch non-positive bytes"); @@ -321,6 +340,7 @@ class CsvStreamBuffer : public CsvBuffer { unsigned const bytesToCopy = std::min(bufLen, static_cast(_str->size() - _offset)); ::memcpy(buffer, _str->data() + _offset, bytesToCopy); _offset += bytesToCopy; + _csvStream->increaseBytesWrittenBy(bytesToCopy); return bytesToCopy; } diff --git a/src/mysql/CsvBuffer.h b/src/mysql/CsvBuffer.h index f52981adb2..2fd4dc9604 100644 --- a/src/mysql/CsvBuffer.h +++ b/src/mysql/CsvBuffer.h @@ -44,6 +44,8 @@ namespace lsst::qserv::mysql { */ class CsvBuffer { public: + virtual ~CsvBuffer() = default; + /// Fetch a number of bytes into a buffer. Return the number of bytes /// fetched. Returning less than bufLen does NOT indicate EOF. virtual unsigned fetch(char* buffer, unsigned bufLen) = 0; @@ -119,10 +121,19 @@ class CsvStream { * Close the stream. The method is meant to be used to unblock the push() method * in case the stream is still being used by multiple threads. After the method is called, * the push() method will not accept new records and will return false to indicate that - * the stream is closed. + * the stream is closed. May be called if results are no longer needed. */ void close(); + void increaseBytesWrittenBy(size_t bytesToCopy) { _bytesWritten += bytesToCopy; } + size_t getBytesWritten() const { return _bytesWritten; } + + /** + * If this returns true, the result table has been contaminated by bad characters + * in an effort to keep the system from hanging, and the UserQuery is done. + */ + bool getContaminated() const { return _contaminated; } + private: CsvStream(std::size_t maxRecords); @@ -131,6 +142,8 @@ class CsvStream { std::atomic _closed{false}; std::size_t const _maxRecords; std::list> _records; + std::atomic _bytesWritten; + std::atomic _contaminated = false; }; /** diff --git a/src/mysql/CsvMemDisk.cc b/src/mysql/CsvMemDisk.cc new file mode 100644 index 0000000000..1c00d0f7b4 --- /dev/null +++ b/src/mysql/CsvMemDisk.cc @@ -0,0 +1,249 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2014-2015 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "mysql/CsvMemDisk.h" + +// System headers +#include +#include +#include +#include +#include +#include + +// Third-party headers +#include + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "mysql/LocalInfileError.h" +#include "mysql/MySqlUtils.h" +#include "util/Bug.h" + +using namespace std; +namespace sfs = std::filesystem; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.mysql.CsvMemDisk"); +} // namespace + +namespace lsst::qserv::mysql { + +TransferTracker::Ptr TransferTracker::_globalMt; + +void TransferTracker::setup(std::size_t max, string const& directory, std::size_t minMBInMem, + std::size_t maxResultTableSizeBytes, CzarId czarId) { + if (_globalMt != nullptr) { + throw util::Bug(ERR_LOC, "MemoryTracker::setup called when MemoryTracker already setup!"); + } + _globalMt = TransferTracker::Ptr(new TransferTracker(max, directory, minMBInMem, czarId)); +} + +bool TransferTracker::verifyDir(string const& dirName) { + sfs::path dir = dirName; + if (!(sfs::exists(dir) && sfs::is_directory(dir))) { + LOGS(_log, LOG_LVL_ERROR, "verifyDir, " + dirName + " is not a valid directory"); + return false; + } + return true; +} + +TransferTracker::MemoryRaii::Ptr TransferTracker::createRaii(size_t fileSize) { + MemoryRaii::Ptr pRaii(new MemoryRaii(fileSize)); + return pRaii; +} + +void TransferTracker::_incrTotal(size_t sz) { + lock_guard ulck(_mtx); + _total += sz; +} + +void TransferTracker::_decrTotal(size_t sz) { + lock_guard ulck(_mtx); + if (sz > _total) { + throw util::Bug(ERR_LOC, + "MemoryTracker::_decrTotal sz=" + to_string(sz) + " > total=" + to_string(_total)); + } + _total -= sz; +} + +CsvMemDisk::CsvMemDisk(std::size_t expectedBytes, QueryId qId, UberJobId ujId) + : _expectedBytes(expectedBytes), _qId(qId), _ujId(ujId) { + auto memTrack = TransferTracker::get(); + if (memTrack == nullptr) { + throw util::Bug(ERR_LOC, "CsvStrMemDisk constructor MemoryTracker is NULL"); + } + sfs::path fPath = memTrack->getDirectory(); + string fileName = memTrack->getBaseFileName() + "_" + to_string(memTrack->getCzarId()) + "_" + + to_string(_qId) + "_" + to_string(ujId); + fPath /= fileName; + _filePath = fPath; + _minBytesInMem = memTrack->getMinBytesInMem(); +} + +void CsvMemDisk::transferDataFromWorker(std::function transferFunc) { + auto memTrack = TransferTracker::get(); + if (memTrack == nullptr) { + throw util::Bug(ERR_LOC, "CsvStrMemDisk::waitReadyToRead MemoryTracker is NULL"); + } + _memRaii = memTrack->createRaii(_expectedBytes); + transferFunc(); +} + +bool CsvMemDisk::_mustWriteToTmpFile() { + // Once writing to file, this instance must keep writing to file. + if (_writingToTmpFile) return true; + + auto memTrack = TransferTracker::get(); + // If too much memory is being used for transfers, start writing large transfers to files. + if (memTrack->getTotal() > memTrack->getMax()) { + if (_records.size() > _minRecordsSize && _bytesRead > _minBytesInMem) { + _writingToTmpFile = true; + } + } + return _writingToTmpFile; +} + +void CsvMemDisk::push(char const* data, size_t size) { + // Push is always ok, no need to wait. + if (_cancelled) return; + _bytesRead += size; + if (_mustWriteToTmpFile()) { + _writeToTmpfile(data, size); + return; + } + if (data != nullptr && size != 0) { + _records.emplace_back(make_shared(data, size)); + } else { + // Empty string is meant to indicate the end of the stream. + _records.emplace_back(make_shared()); + } +} + +shared_ptr CsvMemDisk::pop() { + if (_records.size() > 0) { + shared_ptr front = _records.front(); + _records.pop_front(); + return front; + } + return _readFromTmpFile(); +} + +void CsvMemDisk::_writeToTmpfile(char const* data, std::size_t size) { + // Open the file if needed + auto oldState = _fState.exchange(OPEN_W); + if (oldState == INIT) { + _file.open(_filePath, fstream::out); + } + if (!_file.is_open() || _fState != OPEN_W) { + LOGS(_log, LOG_LVL_ERROR, + "CsvStrMemDisk::_writeTofile file isn't open " << _filePath << " or bad state=" << _fState); + _fileError = true; + return; + } + + _file.write(data, size); + _bytesWrittenToTmp += size; +} + +std::shared_ptr CsvMemDisk::_readFromTmpFile() { + if (_fState == OPEN_W) { + _fState = CLOSE_W; + _file.close(); + } + auto oldState = _fState.exchange(OPEN_R); + if (oldState == CLOSE_W) { + _file.open(_filePath, fstream::in); + _bytesLeft = _bytesWrittenToTmp; + } + if (!_file.is_open() || _fState != OPEN_R) { + // This is extremely unlikely and means something has gone wrong with the file system. + // If something has gone wrong with the file system, a crash may be incoming. + if (!getContaminated()) + LOGS(_log, LOG_LVL_ERROR, + "CsvStrMemDisk::_readFromfile file isn't open " << _filePath << " or bad state=" << _fState); + _setContaminated(); + return make_shared("$"); + } + + std::size_t buffSz = std::min(1'000'000ul, _bytesLeft); + auto strPtr = make_shared(); + strPtr->resize(buffSz); + _file.read(strPtr->data(), buffSz); + _bytesLeft -= buffSz; + return strPtr; +} + +CsvMemDisk::~CsvMemDisk() { + if (_fState != INIT) { + LOGS(_log, LOG_LVL_INFO, "~CsvStrMemDisk() remove " << _filePath); + _file.close(); + std::remove(_filePath.c_str()); + } +} + +class CsvMemDiskBuffer : public CsvBuffer { +public: + explicit CsvMemDiskBuffer(shared_ptr const& csvMemDisk) : _csvMemDisk(csvMemDisk) {} + + ~CsvMemDiskBuffer() override = default; + + unsigned fetch(char* buffer, unsigned bufLen) override { + if (bufLen == 0) { + throw LocalInfileError("CsvMemDiskBuffer::fetch Can't fetch non-positive bytes"); + } + auto csvMd = _csvMemDisk.lock(); + if (csvMd == nullptr) return 0; + if (_str == nullptr) { + _str = csvMd->pop(); + _offset = 0; + } + if (_str->empty()) return 0; + if (_offset >= _str->size()) { + _str = csvMd->pop(); + _offset = 0; + if (_str->empty()) return 0; + } + unsigned const bytesToCopy = min(bufLen, static_cast(_str->size() - _offset)); + ::memcpy(buffer, _str->data() + _offset, bytesToCopy); + _offset += bytesToCopy; + csvMd->increaseBytesFetched(bytesToCopy); + return bytesToCopy; + } + + string dump() const override { return "CsvMemDiskBuffer"; } + +private: + weak_ptr _csvMemDisk; + shared_ptr _str; + size_t _offset = 0; +}; + +shared_ptr newCsvMemDiskBuffer(shared_ptr const& csvMemDisk) { + return make_shared(csvMemDisk); +} + +} // namespace lsst::qserv::mysql diff --git a/src/mysql/CsvMemDisk.h b/src/mysql/CsvMemDisk.h new file mode 100644 index 0000000000..ec4b5a231a --- /dev/null +++ b/src/mysql/CsvMemDisk.h @@ -0,0 +1,218 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2015 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_MYSQL_CSVMEMDISK_H +#define LSST_QSERV_MYSQL_CSVMEMDISK_H + +// System headers +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Third-party headers +#include + +// qserv headers +#include "global/intTypes.h" +#include "mysql/CsvBuffer.h" + +namespace lsst::qserv::mysql { + +/// Track how much space is needed to store the current UberJob results while +/// transferring them from the workers and merging them to the result table. +/// How this effects the process depends on the TransferMethod. RAII methods +/// are used to ensure all allocations are freed. +/// MEMORYDISK - Instead new transfers waiting for memory to be +/// freed, most of the data will be written to disk when `_max` is +/// reached. The current plan is, per UberJob, to write create a +/// few CsvBuffers as is done now, and then write everything to +/// disk, and have pop read off disk when it runs out of existing +/// CsvBuffers. UberJobs with reasonable result sizes should be +/// unaffected. +/// @see CsvStrMemDisk +class TransferTracker { +public: + using Ptr = std::shared_ptr; + + TransferTracker() = delete; + + static bool verifyDir(std::string const& dirName); + static std::string getBaseFileName() { return std::string("qservtransfer"); } + + /// This class makes certain that any memory added to MemoryTracker + /// is removed from MemoryTracker. + class MemoryRaii { + public: + using Ptr = std::shared_ptr; + MemoryRaii() = delete; + ~MemoryRaii() { _globalMt->_decrTotal(memSize); } + + size_t const memSize; + friend class TransferTracker; + + private: + /// Only to be called by createRaii(), which locks the mutex. + explicit MemoryRaii(size_t memSize_) : memSize(memSize_) { _globalMt->_incrTotal(memSize); } + }; + friend class MemoryRaii; + + static void setup(std::size_t max, std::string const& directory, std::size_t minBytesInMem, + std::size_t maxResultTableSizeBytes, CzarId czarId); + static Ptr get() { return _globalMt; } + + /// Create a MemoryRaii instance to track `fileSize` bytes, and wait for free memory if `wait` is true. + MemoryRaii::Ptr createRaii(size_t fileSize); + + size_t getTotal() const { + std::lock_guard lg(_mtx); + return _total; + } + + std::size_t getMax() const { return _max; } + std::string getDirectory() const { return _directory; } + std::size_t getMinBytesInMem() const { return _minBytesInMem; } + CzarId getCzarId() const { return _czarId; } + +private: + TransferTracker(std::size_t max, std::string const& directory, std::size_t minBytesInMem, CzarId czarId) + : _max(max), _directory(directory), _minBytesInMem(minBytesInMem), _czarId(czarId) {} + + /// This function only to be called via createRaii. + void _incrTotal(size_t sz); + + /// This function only to be called by ~MemoryRaii() + void _decrTotal(size_t sz); + + static Ptr _globalMt; + + mutable std::mutex _mtx; + std::size_t _total = 0; + std::size_t const _max; + std::string const _directory; + std::size_t const _minBytesInMem; + CzarId const _czarId; +}; + +/// Store transfer data in memory until too much memory is being used. +/// By setting the maximum acceptable amount of memory to 0, this +/// effectively becomes writing results to disk. +/// Collecting data from the worker, writing it to disk, reading +/// it back, and merging is expected to be linear, run within a +/// single thread. +/// The intention is that most reasonable size requests can be handled +/// within memory, which is highly likely to be the fastest method. +/// If a lot of memory (more than TransferTraker::_max) is being used by +/// all current transfers, then transfers greater than _minBytesInMem +/// will be written to disk until memory is free. +/// If _contaminated or _fileError get set to true, there are probably +/// catastrophic file system problems. +class CsvMemDisk { +public: + enum FileState { INIT, OPEN_W, CLOSE_W, OPEN_R, CLOSED }; + + static std::shared_ptr create(std::size_t expectedBytes, QueryId qId, UberJobId ujId) { + return std::shared_ptr(new CsvMemDisk(expectedBytes, qId, ujId)); + } + + CsvMemDisk() = delete; + CsvMemDisk(CsvMemDisk const&) = delete; + CsvMemDisk& operator=(CsvMemDisk const&) = delete; + ~CsvMemDisk(); + + void push(char const* data, std::size_t size); + + std::shared_ptr pop(); + + /// This version never waits. + void transferDataFromWorker(std::function transferFunc); + + /// True if a file error happened before results would be contaminated. + bool isFileError() const { return _fileError; } + + /// Stop transferring data if the query has been cancelled. + /// @return true if merging could be cancelled. If merging + /// to the result table has started, it must finish + /// or the table will be corrupted. + void cancel() { _cancelled = true; } + + bool isCancelled() const { return _cancelled; } + + /// Indicates there was a file system error and the file could not be opened. + bool getContaminated() const { return _contaminated; } + + void increaseBytesFetched(size_t bytesToCopy) { _bytesFetched += bytesToCopy; } + size_t getBytesFetched() const { return _bytesFetched; } + +private: + CsvMemDisk(std::size_t expectedBytes, QueryId qId, UberJobId ujId); + + void _writeToTmpfile(char const* data, std::size_t size); + + /// Read from the file, which should only happen after all writing has finished. + std::shared_ptr _readFromTmpFile(); + + bool _mustWriteToTmpFile(); + + void _setContaminated() { _contaminated = true; } + + std::atomic _cancelled = false; + size_t _bytesFetched = 0; + std::list> _records; + + size_t _bytesRead = 0; + size_t const _expectedBytes; + + /// Indicates there was a file system error and the file could not be opened. + bool _contaminated = false; + + /// Have at least on record ready to be pushed + unsigned int const _minRecordsSize = 1; + std::size_t _minBytesInMem; + + bool _writingToTmpFile = false; + std::string const _directory; + std::string const _baseName; + QueryId const _qId; + UberJobId const _ujId; + + std::atomic _fState = INIT; + std::string _filePath; ///< file path, constant once set. + std::fstream _file; + + bool _fileError = false; + std::size_t _bytesWrittenToTmp = 0; + std::size_t _bytesLeft = 0; + + TransferTracker::MemoryRaii::Ptr _memRaii; +}; + +std::shared_ptr newCsvMemDiskBuffer(std::shared_ptr const& csvMemDisk); + +} // namespace lsst::qserv::mysql +#endif // LSST_QSERV_MYSQL_CSVMEMDISK_H diff --git a/src/mysql/LocalInfile.cc b/src/mysql/LocalInfile.cc index b073654972..e999996267 100644 --- a/src/mysql/LocalInfile.cc +++ b/src/mysql/LocalInfile.cc @@ -77,6 +77,7 @@ LocalInfile::LocalInfile(char const* filename, std::shared_ptr csvBuf } LocalInfile::~LocalInfile() { + LOGS(_log, LOG_LVL_TRACE, "~LocalInfile"); if (_buffer) { delete[] _buffer; } @@ -153,7 +154,7 @@ std::string LocalInfile::Mgr::prepareSrc(std::shared_ptr const& csvBu int LocalInfile::Mgr::local_infile_init(void** ptr, const char* filename, void* userdata) { assert(userdata); LocalInfile::Mgr* m = static_cast(userdata); - auto csvBuffer = m->get(std::string(filename)); + auto csvBuffer = m->getCsv(std::string(filename)); assert(csvBuffer); LocalInfile* lf = new LocalInfile(filename, csvBuffer); *ptr = lf; @@ -187,10 +188,10 @@ void LocalInfile::Mgr::setBuffer(std::string const& filename, std::shared_ptr LocalInfile::Mgr::get(std::string const& filename) { +std::shared_ptr LocalInfile::Mgr::getCsv(std::string const& filename) { std::lock_guard lock(_mapMutex); - CsvBufferMap::iterator i = _map.find(filename); - if (i == _map.end()) { + auto i = _mapCsv.find(filename); + if (i == _mapCsv.end()) { return std::shared_ptr(); } return i->second; @@ -206,7 +207,7 @@ std::string LocalInfile::Mgr::_nextFilename() { bool LocalInfile::Mgr::_set(std::string const& filename, std::shared_ptr const& csvBuffer) { std::lock_guard lock(_mapMutex); - auto res = _map.insert(std::pair>(filename, csvBuffer)); + auto res = _mapCsv.insert(std::pair>(filename, csvBuffer)); return res.second; } diff --git a/src/mysql/LocalInfile.h b/src/mysql/LocalInfile.h index d9c46ba93b..9c03b16b34 100644 --- a/src/mysql/LocalInfile.h +++ b/src/mysql/LocalInfile.h @@ -125,7 +125,7 @@ class LocalInfile::Mgr : boost::noncopyable { std::string insertBuffer(std::shared_ptr const& csvBuffer); void setBuffer(std::string const& s, std::shared_ptr const& csvBuffer); - std::shared_ptr get(std::string const& filename); + std::shared_ptr getCsv(std::string const& filename); private: /// @return next filename @@ -135,7 +135,7 @@ class LocalInfile::Mgr : boost::noncopyable { bool _set(std::string const& filename, std::shared_ptr const& csvBuffer); typedef std::map> CsvBufferMap; - CsvBufferMap _map; + CsvBufferMap _mapCsv; std::mutex _mapMutex; }; diff --git a/src/mysql/testCsvBuffer.cc b/src/mysql/testCsvBuffer.cc index 971bfff32d..3e0c8c927b 100644 --- a/src/mysql/testCsvBuffer.cc +++ b/src/mysql/testCsvBuffer.cc @@ -107,13 +107,15 @@ BOOST_AUTO_TEST_CASE(TestClosingCsvStreamBuffer) { BOOST_CHECK(!csvStream->push("0123456789", 10)); }); - std::thread consumer([csvBuf]() { + std::thread consumer([csvBuf, csvStream]() { char buffer[20]; BOOST_CHECK_EQUAL(csvBuf->fetch(buffer, sizeof(buffer)), 3); BOOST_CHECK_EQUAL(std::string(buffer, 3), "abc"); BOOST_CHECK_EQUAL(csvBuf->fetch(buffer, sizeof(buffer)), 3); BOOST_CHECK_EQUAL(std::string(buffer, 3), "def"); - BOOST_CHECK_EQUAL(csvBuf->fetch(buffer, sizeof(buffer)), 0); + BOOST_CHECK_EQUAL(csvStream->getContaminated(), false); + csvBuf->fetch(buffer, sizeof(buffer)); + BOOST_CHECK_EQUAL(csvStream->getContaminated(), true); }); producer.join(); consumer.join(); diff --git a/src/mysql/testMySqlUtils.cc b/src/mysql/testMySqlUtils.cc index c8725f7864..f7185388cb 100644 --- a/src/mysql/testMySqlUtils.cc +++ b/src/mysql/testMySqlUtils.cc @@ -29,7 +29,6 @@ #include namespace test = boost::test_tools; -// namespace gio = google::protobuf::io; struct Fixture { Fixture(void) {} diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt index 9c4de547ec..99d55c57eb 100644 --- a/src/parser/CMakeLists.txt +++ b/src/parser/CMakeLists.txt @@ -10,7 +10,7 @@ antlr_target(QSMySqlParser QSMySqlParser.g4 PARSER LISTENER add_library(parser SHARED) -target_include_directories(parser PRIVATE +target_include_directories(parser PRIVATE ${ANTLR4_INCLUDE_DIR} ) @@ -29,6 +29,4 @@ target_link_libraries(parser PUBLIC antlr4-runtime ) -install( - TARGETS parser -) +install(TARGETS parser) diff --git a/src/partition/Chunker.h b/src/partition/Chunker.h index f6a810d54d..bf80d5e2e6 100644 --- a/src/partition/Chunker.h +++ b/src/partition/Chunker.h @@ -62,8 +62,8 @@ class ChunkLocation { bool overlap = false; ChunkLocation() = default; - ChunkLocation(ChunkLocation const &) = default; - ChunkLocation &operator=(ChunkLocation const &) = default; + ChunkLocation(ChunkLocation const&) = default; + ChunkLocation& operator=(ChunkLocation const&) = default; ChunkLocation(int32_t chunkId_, int32_t subChunkId_, bool overlap_) : chunkId(chunkId_), subChunkId(subChunkId_), overlap(overlap_) {} @@ -72,7 +72,7 @@ class ChunkLocation { uint32_t hash() const { return std::hash{}(static_cast(chunkId)); } /// Order chunk locations by chunk ID. - bool operator<(ChunkLocation const &loc) const { return chunkId < loc.chunkId; } + bool operator<(ChunkLocation const& loc) const { return chunkId < loc.chunkId; } }; /// A Chunker locates points according to the following simple partitioning scheme. @@ -91,7 +91,7 @@ class Chunker { public: Chunker(double overlap, int32_t numStripes, int32_t numSubStripesPerStripe); - Chunker(ConfigStore const &config); + Chunker(ConfigStore const& config); ~Chunker(); @@ -104,36 +104,36 @@ class Chunker { SphericalBox const getSubChunkBounds(int32_t chunkId, int32_t subChunkId) const; /// Find the non-overlap location of the given position. - ChunkLocation const locate(std::pair const &position) const; + ChunkLocation const locate(std::pair const& position) const; /// Append the locations of the given position to the `locations` vector. /// If `chunkId` is negative, all locations are appended. Otherwise, only /// those in the corresponding chunk are appended. - void locate(std::pair const &position, int32_t chunkId, - std::vector &locations) const; + void locate(std::pair const& position, int32_t chunkId, + std::vector& locations) const; /// Return the IDs of all chunks overlapping the given box and belonging /// to the given node. The target node is specified as an integer in the /// range `[0, numNodes)` and a chunk with ID C belongs to the node given /// by hash(C) modulo `numNodes`. - std::vector const getChunksIn(SphericalBox const ®ion, uint32_t node, + std::vector const getChunksIn(SphericalBox const& region, uint32_t node, uint32_t numNodes) const; /// Return the IDs of all chunks overlapping the given box. - std::vector const getChunksIn(SphericalBox const ®ion) const { + std::vector const getChunksIn(SphericalBox const& region) const { return getChunksIn(region, 0u, 1u); } /// Append IDs for all sub-chunks of `chunkId` to `subChunks`. - void getSubChunks(std::vector &subChunks, int32_t chunkId) const; + void getSubChunks(std::vector& subChunks, int32_t chunkId) const; /// Define configuration variables for partitioning. - static void defineOptions(boost::program_options::options_description &opts); + static void defineOptions(boost::program_options::options_description& opts); private: // Disable copy construction and assignment. - Chunker(Chunker const &); - Chunker &operator=(Chunker const &); + Chunker(Chunker const&); + Chunker& operator=(Chunker const&); void _initialize(double overlap, int32_t numStripes, int32_t numSubStripesPerStripe); @@ -154,7 +154,7 @@ class Chunker { } void _upDownOverlap(double lon, int32_t chunkId, int32_t stripe, int32_t subStripe, - std::vector &locations) const; + std::vector& locations) const; double _overlap; double _subStripeHeight; diff --git a/src/partition/Csv.cc b/src/partition/Csv.cc index 4ffd2ef6ed..72e3dd6001 100644 --- a/src/partition/Csv.cc +++ b/src/partition/Csv.cc @@ -76,7 +76,7 @@ Dialect::Dialect(char delimiter, char escape, char quote) _validate(); } -Dialect::Dialect(std::string const &null, char delimiter, char escape, char quote) +Dialect::Dialect(std::string const& null, char delimiter, char escape, char quote) : _null(null), _scanLut(new uint8_t[NUM_CHARS]), _delimiter(delimiter), @@ -85,7 +85,7 @@ Dialect::Dialect(std::string const &null, char delimiter, char escape, char quot _validate(); } -Dialect::Dialect(ConfigStore const &config, std::string const &prefix) +Dialect::Dialect(ConfigStore const& config, std::string const& prefix) : _null(), _scanLut(new uint8_t[NUM_CHARS]) { _delimiter = config.get(prefix + "delimiter"); if (config.flag(prefix + "no-quote")) { @@ -109,7 +109,7 @@ Dialect::Dialect(ConfigStore const &config, std::string const &prefix) _validate(); } -Dialect::Dialect(Dialect const &dialect) +Dialect::Dialect(Dialect const& dialect) : _null(dialect._null), _scanLut(new uint8_t[NUM_CHARS]), _nullHasSpecial(dialect._nullHasSpecial), @@ -121,7 +121,7 @@ Dialect::Dialect(Dialect const &dialect) Dialect::~Dialect() {} -Dialect &Dialect::operator=(Dialect const &dialect) { +Dialect& Dialect::operator=(Dialect const& dialect) { if (this != &dialect) { _null = dialect._null; _nullHasSpecial = dialect._nullHasSpecial; @@ -133,7 +133,7 @@ Dialect &Dialect::operator=(Dialect const &dialect) { return *this; } -size_t Dialect::decode(char *buf, char const *value, size_t size) const { +size_t Dialect::decode(char* buf, char const* value, size_t size) const { if (_quote == '\0' && _escape == '\0') { if (size > MAX_FIELD_SIZE) { throw std::runtime_error("CSV field value is too long to decode."); @@ -175,7 +175,7 @@ size_t Dialect::decode(char *buf, char const *value, size_t size) const { return j; } -size_t Dialect::encode(char *buf, char const *value, size_t size) const { +size_t Dialect::encode(char* buf, char const* value, size_t size) const { if (value == 0) { std::memcpy(buf, _null.data(), _null.size()); return _null.size(); @@ -265,7 +265,7 @@ size_t Dialect::encode(char *buf, char const *value, size_t size) const { return j; } -void Dialect::defineOptions(po::options_description &opts, std::string const &prefix) { +void Dialect::defineOptions(po::options_description& opts, std::string const& prefix) { opts.add_options()((prefix + "null").c_str(), po::value(), "NULL CSV field value string. Leaving this option unspecified " "results in a dialect specific default - if quoting is enabled, " @@ -287,7 +287,7 @@ void Dialect::defineOptions(po::options_description &opts, std::string const &pr // Scan the given string for occurrences of the CR, LF, escape, quote or // delimiter characters, and return a bitwise or of the HAS_xxx constants // indicating which were found. -int Dialect::_scan(char const *value, size_t size) const { +int Dialect::_scan(char const* value, size_t size) const { uint8_t flags = 0; for (size_t i = 0; i < size; ++i) { uint8_t c = static_cast(value[i]); @@ -363,9 +363,9 @@ Editor::Field::~Field() { } } -Editor::Editor(Dialect const &inputDialect, Dialect const &outputDialect, - std::vector const &inputFieldNames, - std::vector const &outputFieldNames) +Editor::Editor(Dialect const& inputDialect, Dialect const& outputDialect, + std::vector const& inputFieldNames, + std::vector const& outputFieldNames) : _inputDialect(inputDialect), _outputDialect(outputDialect), _dialectsMatch(_inputDialect == _outputDialect), @@ -377,7 +377,7 @@ Editor::Editor(Dialect const &inputDialect, Dialect const &outputDialect, _initialize(inputFieldNames, outputFieldNames); } -Editor::Editor(ConfigStore const &config) +Editor::Editor(ConfigStore const& config) : _inputDialect(config, "in.csv."), _outputDialect(config, "out.csv."), _dialectsMatch(_inputDialect == _outputDialect), @@ -403,7 +403,7 @@ Editor::Editor(ConfigStore const &config) Editor::~Editor() {} -char const *Editor::readRecord(char const *const begin, char const *const end) { +char const* Editor::readRecord(char const* const begin, char const* const end) { if (end <= begin || begin == 0) { throw std::runtime_error("Empty or invalid input line."); } else if (_numInputFields == 0) { @@ -414,10 +414,10 @@ char const *Editor::readRecord(char const *const begin, char const *const end) { bool quoted = false; bool escaped = false; bool decode = false; - Field *f = _fields.get(); - Field *fend = f + _numInputFields; + Field* f = _fields.get(); + Field* fend = f + _numInputFields; f->inputValue = begin; - char const *cur = begin; + char const* cur = begin; for (; cur < end; ++cur) { char const c = *cur; if (c == '\n' || c == '\r') { @@ -505,7 +505,7 @@ char const *Editor::readRecord(char const *const begin, char const *const end) { // Set output values for remaining fields to NULL. fend = _fields.get() + _numFields; for (++f; f != fend; ++f) { - std::string const &null = _outputDialect.getNull(); + std::string const& null = _outputDialect.getNull(); std::memcpy(f->outputValue, null.data(), null.size()); f->outputSize = static_cast(null.size()); f->flags = 0; @@ -520,15 +520,15 @@ char const *Editor::readRecord(char const *const begin, char const *const end) { return cur; } -char *Editor::writeRecord(char *buf) const { +char* Editor::writeRecord(char* buf) const { char decodeBuf[MAX_FIELD_SIZE]; char encodeBuf[MAX_FIELD_SIZE]; size_t size = 0; char const delimiter = _outputDialect.getDelimiter(); for (int i = 0; i < _numOutputFields; ++i) { - Field const &f = _fields[_outputs[i]]; - char const *val; + Field const& f = _fields[_outputs[i]]; + char const* val; size_t sz; if (!f.inputValue || (f.flags & Field::EDITED) != 0) { // Output values are always encoded in the output dialect. @@ -576,8 +576,8 @@ std::string const Editor::get(int i, bool decode) const { if (i < 0 || i >= _numInputFields) { throw std::runtime_error("Invalid input field."); } - Field const &f = _fields[i]; - char const *val = f.inputValue; + Field const& f = _fields[i]; + char const* val = f.inputValue; size_t sz = f.inputSize; if (decode) { if (_inputDialect.isNull(val, sz)) { @@ -594,22 +594,22 @@ bool Editor::setNull(int i) { if (i < 0 || i >= _numFields) { return false; } - Field *f = &_fields[i]; + Field* f = &_fields[i]; if (!f->outputValue) { return false; } - std::string const &null = _outputDialect.getNull(); + std::string const& null = _outputDialect.getNull(); std::memcpy(f->outputValue, null.data(), null.size()); f->outputSize = static_cast(null.size()); f->flags |= Field::EDITED; return true; } -bool Editor::set(int i, std::string const &val) { +bool Editor::set(int i, std::string const& val) { if (i < 0 || i >= _numFields) { return false; } - Field *f = &_fields[i]; + Field* f = &_fields[i]; if (!f->outputValue) { return false; } @@ -622,7 +622,7 @@ bool Editor::set(int i, char c) { if (i < 0 || i >= _numFields) { return false; } - Field *f = &_fields[i]; + Field* f = &_fields[i]; if (!f->outputValue) { return false; } @@ -637,7 +637,7 @@ bool Editor::set(int i, char c) { if (i < 0 || i >= _numFields) { \ return false; \ } \ - Field *f = &_fields[i]; \ + Field* f = &_fields[i]; \ if (!f->outputValue) { \ return false; \ } \ @@ -668,7 +668,7 @@ IMPLEMENT_SET(double, .17g) #undef IMPLEMENT_SET_IMPL #undef IMPLEMENT_SET -void Editor::defineOptions(po::options_description &opts) { +void Editor::defineOptions(po::options_description& opts) { po::options_description in("\\___________ Input CSV format", 80); Dialect::defineOptions(in, "in.csv."); in.add_options()("in.csv.field", po::value>(), @@ -688,38 +688,38 @@ void Editor::defineOptions(po::options_description &opts) { opts.add(in).add(out); } -void Editor::_initialize(std::vector const &inputFieldNames, - std::vector const &outputFieldNames) { +void Editor::_initialize(std::vector const& inputFieldNames, + std::vector const& outputFieldNames) { typedef std::pair Mapping; int i = 0; // total number of fields for (; i < _numInputFields; ++i) { - std::string const &name = inputFieldNames[i]; + std::string const& name = inputFieldNames[i]; Mapping m = _fieldMap.insert(std::pair(name, i)); if (!m.second) { throw std::runtime_error( "The input CSV field name list contains " "duplicates."); } - Field *f = &_fields[i]; + Field* f = &_fields[i]; // Before the first readRecord() call, assign NULL to all input // fields. - std::string const &null = _inputDialect.getNull(); + std::string const& null = _inputDialect.getNull(); f->inputValue = null.data(); f->inputSize = static_cast(null.size()); } for (int j = 0; j < _numOutputFields; ++j) { - std::string const &name = outputFieldNames[j]; + std::string const& name = outputFieldNames[j]; Mapping m = _fieldMap.insert(std::pair(name, i)); if (m.second) { // The output field name does not match any input field. Create // a new output field and assign NULL to the output value. - Field *f = &_fields[i]; - f->outputValue = static_cast(std::malloc(MAX_FIELD_SIZE)); + Field* f = &_fields[i]; + f->outputValue = static_cast(std::malloc(MAX_FIELD_SIZE)); if (!f->outputValue) { throw std::bad_alloc(); } - std::string const &null = _outputDialect.getNull(); + std::string const& null = _outputDialect.getNull(); std::memcpy(f->outputValue, null.data(), null.size()); f->outputSize = static_cast(null.size()); _outputs[j] = i++; @@ -727,11 +727,11 @@ void Editor::_initialize(std::vector const &inputFieldNames, // The output field name matched an existing field - // make sure space is available for an output value. int k = m.first->second; - Field *f = &_fields[k]; + Field* f = &_fields[k]; if (!f->outputValue) { // f is also an input field, so there is no need // to set an output value here. - f->outputValue = static_cast(std::malloc(MAX_FIELD_SIZE)); + f->outputValue = static_cast(std::malloc(MAX_FIELD_SIZE)); if (!f->outputValue) { throw std::bad_alloc(); } @@ -748,8 +748,8 @@ bool Editor::_get(int i) const { if (i < 0 || i >= _numInputFields) { throw std::runtime_error("Invalid input field"); } - Field const &f = _fields[i]; - char const *val = f.inputValue; + Field const& f = _fields[i]; + char const* val = f.inputValue; size_t sz = f.inputSize; if (_inputDialect.isNull(val, sz)) { throw std::runtime_error("Input field value is NULL."); @@ -760,7 +760,7 @@ bool Editor::_get(int i) const { val = buf; } // Trim leading and trailing whitespace. - char const *end = val + sz; + char const* end = val + sz; for (; val < end && isspace(*val); ++val) { } for (; end > val && isspace(end[-1]); --end) { @@ -786,8 +786,8 @@ char Editor::_get(int i) const { if (i < 0 || i >= _numInputFields) { throw std::runtime_error("Invalid input field"); } - Field const &f = _fields[i]; - char const *val = f.inputValue; + Field const& f = _fields[i]; + char const* val = f.inputValue; size_t sz = f.inputSize; if (_inputDialect.isNull(val, sz)) { throw std::runtime_error("Input field value is NULL."); @@ -810,12 +810,12 @@ char Editor::_get(int i) const { // converted. This can lead to crashes or incorrect results. For example, // consider what happens when the field delimiter is a digit. -Editor::CharConstPtrPair const Editor::_getFieldText(int i, char *buf) const { +Editor::CharConstPtrPair const Editor::_getFieldText(int i, char* buf) const { if (i < 0 || i >= _numInputFields) { throw std::runtime_error("Invalid input field"); } - Field const &f = _fields[i]; - char const *val = f.inputValue; + Field const& f = _fields[i]; + char const* val = f.inputValue; size_t sz = f.inputSize; if (_inputDialect.isNull(val, sz)) { throw std::runtime_error("Input field value is NULL."); @@ -825,7 +825,7 @@ Editor::CharConstPtrPair const Editor::_getFieldText(int i, char *buf) const { val = buf; buf[sz] = '\0'; } - char const *end = val + sz; + char const* end = val + sz; for (; val < end && isspace(*val); ++val) { } for (; end > val && isspace(end[-1]); --end) { @@ -848,7 +848,7 @@ Editor::CharConstPtrPair const Editor::_getFieldText(int i, char *buf) const { U Editor::_get(int i) const { \ char buf[MAX_FIELD_SIZE + 1]; \ CharConstPtrPair f = _getFieldText(i, buf); \ - char *e = 0; \ + char* e = 0; \ errno = 0; \ V v = strto##suffix(f.first, &e, 10); \ if (e != f.second) { \ @@ -865,7 +865,7 @@ Editor::CharConstPtrPair const Editor::_getFieldText(int i, char *buf) const { U Editor::_get(int i) const { \ char buf[MAX_FIELD_SIZE + 1]; \ CharConstPtrPair f = _getFieldText(i, buf); \ - char *e = 0; \ + char* e = 0; \ U u = strto##suffix(f.first, &e); \ if (e != f.second) { \ throw std::runtime_error("Cannot convert field value to a C++ " #U); \ diff --git a/src/partition/Csv.h b/src/partition/Csv.h index 6ebcb8cb71..02edb7aa7a 100644 --- a/src/partition/Csv.h +++ b/src/partition/Csv.h @@ -96,7 +96,7 @@ class Dialect { /// Create a dialect with an explicit NULL string. /// To disable quoting, specify '\0' as the quote character. To disable /// escaping, specify '\0' as the escape character. - Dialect(std::string const &null, char delimiter, char escape, char quote); + Dialect(std::string const& null, char delimiter, char escape, char quote); /// Create a dialect. The NULL string is set to "NULL" if quoting /// is enabled, "\N" if escaping enabled, and "" otherwise. To disable @@ -107,35 +107,35 @@ class Dialect { /// Build a dialect from configuration variables with names given by the /// concatenation of prefix and "null", "delimiter", "escape", /// "no-escape", "quote" and "no-quote". - Dialect(ConfigStore const &config, std::string const &prefix); + Dialect(ConfigStore const& config, std::string const& prefix); - Dialect(Dialect const &dialect); + Dialect(Dialect const& dialect); ~Dialect(); - Dialect &operator=(Dialect const &dialect); + Dialect& operator=(Dialect const& dialect); - std::string const &getNull() const { return _null; } + std::string const& getNull() const { return _null; } char getDelimiter() const { return _delimiter; } char getEscape() const { return _escape; } char getQuote() const { return _quote; } - bool operator==(Dialect const &d) const { + bool operator==(Dialect const& d) const { return _null == d._null && _delimiter == d._delimiter && _escape == d._escape && _quote == d._quote; } /// Is the encoded field value identical to the NULL string? - bool isNull(char const *value, size_t size) const { + bool isNull(char const* value, size_t size) const { return _null.compare(0, _null.size(), value, size) == 0; } /// Decode a value encoded in this dialect into `buf` and return the /// number of characters written. No more than MAX_FIELD_SIZE characters /// are written - if more are required an exception is thrown. Leading /// and trailing whitespace is preserved. - size_t decode(char *buf, char const *value, size_t size) const; + size_t decode(char* buf, char const* value, size_t size) const; /// Decode a field encoded in this dialect. - std::string const decode(char const *value, size_t size) const { + std::string const decode(char const* value, size_t size) const { char buf[MAX_FIELD_SIZE]; size = decode(buf, value, size); return std::string(buf, size); @@ -144,16 +144,16 @@ class Dialect { /// Encode a field according to this dialect into `buf` and return the /// number of characters written. No more than MAX_FIELD_SIZE characters /// are written - if more are required an exception is thrown. - size_t encode(char *buf, char const *value, size_t size) const; + size_t encode(char* buf, char const* value, size_t size) const; /// Encode a value in this dialect. - std::string const encode(char const *value, size_t size) const { + std::string const encode(char const* value, size_t size) const { char buf[MAX_FIELD_SIZE]; size = encode(buf, value, size); return std::string(buf, size); } /// Define configuration variables for specifying a dialect. - static void defineOptions(boost::program_options::options_description &opts, std::string const &prefix); + static void defineOptions(boost::program_options::options_description& opts, std::string const& prefix); private: static size_t const NUM_CHARS = 256; // Number of distinct character values. @@ -162,7 +162,7 @@ class Dialect { enum { HAS_CRLF = 0x1, HAS_DELIM = 0x2, HAS_QUOTE = 0x4, HAS_ESCAPE = 0x8 }; - int _scan(char const *value, size_t size) const; + int _scan(char const* value, size_t size) const; void _validate(); std::string _null; @@ -206,10 +206,10 @@ class Dialect { /// drop CSV fields, while simultaneously performing CSV format conversion. class Editor { public: - Editor(Dialect const &inputDialect, Dialect const &outputDialect, - std::vector const &inputFieldNames, std::vector const &outputFieldNames); + Editor(Dialect const& inputDialect, Dialect const& outputDialect, + std::vector const& inputFieldNames, std::vector const& outputFieldNames); - Editor(ConfigStore const &config); + Editor(ConfigStore const& config); ~Editor(); @@ -220,24 +220,24 @@ class Editor { /// input line must remain live until the next call to `readRecord()` /// or editor destruction, whichever comes first. Raw input is never /// modified. - char const *readRecord(char const *begin, char const *end); + char const* readRecord(char const* begin, char const* end); /// Write the combination of the current input fields and any edits /// performed to `buf`, returning a pointer to the character following the /// last character written. At most `MAX_LINE_SIZE` bytes are written - /// if the output record is longer, an exception is thrown. - char *writeRecord(char *buf) const; + char* writeRecord(char* buf) const; // -- Metadata ---- - Dialect const &getInputDialect() const { return _inputDialect; } - Dialect const &getOutputDialect() const { return _outputDialect; } + Dialect const& getInputDialect() const { return _inputDialect; } + Dialect const& getOutputDialect() const { return _outputDialect; } /// Return the number of input fields `readRecord()` expects to find in /// a line of text. int getNumInputFields() const { return _numInputFields; } /// Return an index for the named field or -1 if no such field exists. - int getFieldIndex(std::string const &name) const { + int getFieldIndex(std::string const& name) const { FieldMap::const_iterator i = _fieldMap.find(name); return i == _fieldMap.end() ? -1 : i->second; } @@ -245,7 +245,7 @@ class Editor { ///@{ /// Is the given field an input field? bool isInputField(int i) const { return i >= 0 && i < _numInputFields; } - bool isInputField(std::string const &name) const { return isInputField(getFieldIndex(name)); } + bool isInputField(std::string const& name) const { return isInputField(getFieldIndex(name)); } ///@} // -- Field access ---- @@ -258,17 +258,17 @@ class Editor { if (i < 0 || i >= _numInputFields) { return true; } - Field const &f = _fields[i]; + Field const& f = _fields[i]; return _inputDialect.isNull(f.inputValue, f.inputSize); } - bool isNull(std::string const &name) const { return isNull(getFieldIndex(name)); } + bool isNull(std::string const& name) const { return isNull(getFieldIndex(name)); } ///@} ///@{ /// Return the value of an input field value as a string. The decode flag /// controls whether the encoded value is decoded prior to return. std::string const get(int i, bool decode) const; - std::string const get(std::string const &name, bool decode) const { + std::string const get(std::string const& name, bool decode) const { return get(getFieldIndex(name), decode); } ///@} @@ -281,7 +281,7 @@ class Editor { return _get(i); } template - T get(std::string const &name) const { + T get(std::string const& name) const { return get(getFieldIndex(name)); } ///@} @@ -292,13 +292,13 @@ class Editor { /// Set the value of an output field to NULL. Return true if the field was /// set, and false if it is not an output field and cannot be modified. bool setNull(int i); - bool setNull(std::string const &name) { return setNull(getFieldIndex(name)); } + bool setNull(std::string const& name) { return setNull(getFieldIndex(name)); } ///@} ///@{ /// Set the value of an output field. Return true if the field was set, /// and false if it is not an output field and cannot be modified. - bool set(int i, std::string const &value); + bool set(int i, std::string const& value); bool set(int i, bool value) { return set(i, value ? '\1' : '\0'); } bool set(int i, char value); bool set(int i, int value); @@ -311,28 +311,28 @@ class Editor { bool set(int i, double value); template - bool set(std::string const &name, T value) { + bool set(std::string const& name, T value) { return set(getFieldIndex(name), value); } ///@} /// Define configuration variables for CSV editing. - static void defineOptions(boost::program_options::options_description &opts); + static void defineOptions(boost::program_options::options_description& opts); private: // Disable copy construction and assignment. - Editor(Editor const &); - Editor &operator=(Editor const &); + Editor(Editor const&); + Editor& operator=(Editor const&); - typedef std::pair CharConstPtrPair; + typedef std::pair CharConstPtrPair; typedef boost::unordered_map FieldMap; struct Field { static uint16_t const DECODE = 0x01; static uint16_t const EDITED = 0x02; - char const *inputValue; - char *outputValue; + char const* inputValue; + char* outputValue; uint16_t inputSize; uint16_t outputSize; uint16_t flags; @@ -341,9 +341,9 @@ class Editor { ~Field(); }; - void _initialize(std::vector const &inputFieldNames, - std::vector const &outputFieldNames); - CharConstPtrPair const _getFieldText(int i, char *buf) const; + void _initialize(std::vector const& inputFieldNames, + std::vector const& outputFieldNames); + CharConstPtrPair const _getFieldText(int i, char* buf) const; template T _get(int i) const; diff --git a/src/partition/FileUtils.cc b/src/partition/FileUtils.cc index 37f3b9e357..202826296f 100644 --- a/src/partition/FileUtils.cc +++ b/src/partition/FileUtils.cc @@ -46,7 +46,7 @@ namespace fs = boost::filesystem; namespace lsst::partition { -InputFile::InputFile(fs::path const &path) : _path(path), _fd(-1), _sz(-1) { +InputFile::InputFile(fs::path const& path) : _path(path), _fd(-1), _sz(-1) { struct ::stat st; int fd = ::open(path.string().c_str(), O_RDONLY); if (fd == -1) { @@ -72,8 +72,8 @@ InputFile::~InputFile() { } } -void InputFile::read(void *buf, off_t off, size_t sz) const { - uint8_t *cur = static_cast(buf); +void InputFile::read(void* buf, off_t off, size_t sz) const { + uint8_t* cur = static_cast(buf); while (sz > 0) { ssize_t n = ::pread(_fd, cur, sz, off); if (n == 0) { @@ -91,12 +91,12 @@ void InputFile::read(void *buf, off_t off, size_t sz) const { } } -void InputFile::read(void *buf, off_t off, size_t sz, int & /*bufferSize*/, - ConfigParamArrow const & /*params*/) const { +void InputFile::read(void* buf, off_t off, size_t sz, int& /*bufferSize*/, + ConfigParamArrow const& /*params*/) const { read(buf, off, sz); } -InputFileArrow::InputFileArrow(fs::path const &path, off_t blockSize) +InputFileArrow::InputFileArrow(fs::path const& path, off_t blockSize) : InputFile(path), _path(path), _fd(-1), _sz(-1) { struct ::stat st; @@ -131,9 +131,9 @@ InputFileArrow::~InputFileArrow() { int InputFileArrow::getBatchNumber() const { return _batchReader->getTotalBatchNumber(); } -void InputFileArrow::read(void *buf, off_t off, size_t sz, int &csvBufferSize, - ConfigParamArrow const ¶ms) const { - uint8_t *cur = static_cast(buf); +void InputFileArrow::read(void* buf, off_t off, size_t sz, int& csvBufferSize, + ConfigParamArrow const& params) const { + uint8_t* cur = static_cast(buf); auto const success = _batchReader->readNextBatch_Table2CSV(cur, csvBufferSize, params.columns, params.optionalColumns, @@ -151,7 +151,7 @@ void InputFileArrow::read(void *buf, off_t off, size_t sz, int &csvBufferSize, } } -OutputFile::OutputFile(fs::path const &path, bool truncate) : _path(path), _fd(-1) { +OutputFile::OutputFile(fs::path const& path, bool truncate) : _path(path), _fd(-1) { int flags = O_CREAT | O_WRONLY; if (truncate) { flags |= O_TRUNC; @@ -182,11 +182,11 @@ OutputFile::~OutputFile() { } } -void OutputFile::append(void const *buf, size_t sz) { +void OutputFile::append(void const* buf, size_t sz) { if (!buf || sz == 0) { return; } - char const *b = static_cast(buf); + char const* b = static_cast(buf); while (sz > 0) { ssize_t n = ::write(_fd, b, sz); if (n < 0) { @@ -212,13 +212,13 @@ BufferedAppender::~BufferedAppender() { _cur = 0; } -void BufferedAppender::append(void const *buf, size_t size) { +void BufferedAppender::append(void const* buf, size_t size) { if (!_file) { throw std::logic_error( std::string("BufferedAppender: append() called after " "close() and/or before open().\n")); } - uint8_t const *b = static_cast(buf); + uint8_t const* b = static_cast(buf); while (size > 0) { size_t n = std::min(size, static_cast(_end - _cur)); ::memcpy(_cur, b, n); @@ -232,13 +232,13 @@ void BufferedAppender::append(void const *buf, size_t size) { } } -void BufferedAppender::open(fs::path const &path, bool truncate) { +void BufferedAppender::open(fs::path const& path, bool truncate) { close(); - OutputFile *f = new OutputFile(path, truncate); + OutputFile* f = new OutputFile(path, truncate); if (!_buf) { // Allocate buffer. size_t sz = static_cast(_end - _buf); - uint8_t *buf = static_cast(malloc(sz)); + uint8_t* buf = static_cast(malloc(sz)); if (!buf) { delete f; throw std::bad_alloc(); diff --git a/src/partition/FileUtils.h b/src/partition/FileUtils.h index 68c49c2a1c..be71b23eac 100644 --- a/src/partition/FileUtils.h +++ b/src/partition/FileUtils.h @@ -50,8 +50,8 @@ struct ConfigParamArrow { std::string str_escape; bool quote = false; - ConfigParamArrow(std::vector const &columns, std::set const &optionalColumns, - std::string const &vnull, std::string const &vdelimiter, std::string const &vescape, + ConfigParamArrow(std::vector const& columns, std::set const& optionalColumns, + std::string const& vnull, std::string const& vdelimiter, std::string const& vescape, bool vquote) : columns(columns), optionalColumns(optionalColumns), @@ -61,8 +61,8 @@ struct ConfigParamArrow { quote(vquote) {} ConfigParamArrow() = default; - ConfigParamArrow(const ConfigParamArrow &v) = default; - ConfigParamArrow &operator=(const ConfigParamArrow &) = delete; + ConfigParamArrow(const ConfigParamArrow& v) = default; + ConfigParamArrow& operator=(const ConfigParamArrow&) = delete; }; typedef struct ConfigParamArrow ConfigParamArrow; @@ -70,25 +70,25 @@ typedef struct ConfigParamArrow ConfigParamArrow; /// An input file. Safe for use from multiple threads. class InputFile { public: - explicit InputFile(boost::filesystem::path const &path); + explicit InputFile(boost::filesystem::path const& path); virtual ~InputFile(); // Disable copy construction and assignment. - InputFile(InputFile const &) = delete; - InputFile &operator=(InputFile const &) = delete; + InputFile(InputFile const&) = delete; + InputFile& operator=(InputFile const&) = delete; /// Return the size of the input file. off_t size() const { return _sz; } /// Return the path of the input file. - boost::filesystem::path const &path() const { return _path; } + boost::filesystem::path const& path() const { return _path; } // Needed in derived class InputFileArrow virtual int getBatchNumber() const { return -1; } /// Read a range of bytes into `buf`. - void read(void *buf, off_t off, size_t sz) const; - virtual void read(void *buf, off_t off, size_t sz, int &bufferSize, ConfigParamArrow const ¶ms) const; + void read(void* buf, off_t off, size_t sz) const; + virtual void read(void* buf, off_t off, size_t sz, int& bufferSize, ConfigParamArrow const& params) const; private: mutable char _msg[1024]; @@ -100,18 +100,18 @@ class InputFile { class InputFileArrow : public InputFile { public: - InputFileArrow(boost::filesystem::path const &path, off_t blockSize); + InputFileArrow(boost::filesystem::path const& path, off_t blockSize); virtual ~InputFileArrow(); // Disable copy construction and assignment. - InputFileArrow(InputFileArrow const &) = delete; - InputFileArrow &operator=(InputFileArrow const &) = delete; + InputFileArrow(InputFileArrow const&) = delete; + InputFileArrow& operator=(InputFileArrow const&) = delete; virtual int getBatchNumber() const override; /// Read a range of bytes into `buf`. - virtual void read(void *buf, off_t off, size_t sz, int &bufferSize, - ConfigParamArrow const ¶ms) const override; + virtual void read(void* buf, off_t off, size_t sz, int& bufferSize, + ConfigParamArrow const& params) const override; private: mutable char _msg[1024]; @@ -130,18 +130,18 @@ class OutputFile { /// Open the given file for writing, creating it if necessary. /// Setting `truncate` to true will cause the file to be overwritten /// if it already exists. - OutputFile(boost::filesystem::path const &path, bool truncate); + OutputFile(boost::filesystem::path const& path, bool truncate); ~OutputFile(); // Disable copy construction and assignment. - OutputFile(OutputFile const &) = delete; - OutputFile &operator=(OutputFile const &) = delete; + OutputFile(OutputFile const&) = delete; + OutputFile& operator=(OutputFile const&) = delete; /// Return the path of the output file. - boost::filesystem::path const &path() const { return _path; } + boost::filesystem::path const& path() const { return _path; } /// Append `size` bytes from `buf` to the file. - void append(void const *buf, size_t size); + void append(void const* buf, size_t size); private: mutable char _msg[1024]; @@ -159,33 +159,33 @@ class BufferedAppender { ~BufferedAppender(); /// Append `size` bytes from `buf` to the currently open file. - void append(void const *buf, size_t size); + void append(void const* buf, size_t size); /// Is there a currently open file? If not, calling `append()` is forbidden. bool isOpen() const { return _file != 0; } /// Close the currently open file and open a new one. - void open(boost::filesystem::path const &path, bool truncate); + void open(boost::filesystem::path const& path, bool truncate); /// Write any buffered data to the currently open file and close it. void close(); private: // Disable copy-construction and assignment. - BufferedAppender(BufferedAppender const &); - BufferedAppender &operator=(BufferedAppender const &); + BufferedAppender(BufferedAppender const&); + BufferedAppender& operator=(BufferedAppender const&); - uint8_t *_buf; - uint8_t *_end; - uint8_t *_cur; - OutputFile *_file; + uint8_t* _buf; + uint8_t* _end; + uint8_t* _cur; + OutputFile* _file; }; // TODO(smm): the functions below should be moved to their own header. /// Encode a 32 bit integer as a little-endian sequence of 4 bytes. Return /// `buf + 4`. -inline uint8_t *encode(uint8_t *buf, uint32_t x) { +inline uint8_t* encode(uint8_t* buf, uint32_t x) { buf[0] = static_cast(x & 0xff); buf[1] = static_cast((x >> 8) & 0xff); buf[2] = static_cast((x >> 16) & 0xff); @@ -194,7 +194,7 @@ inline uint8_t *encode(uint8_t *buf, uint32_t x) { } /// Encode a 64 bit integer as a little-endian sequence of 8 bytes. Return /// `buf + 8`. -inline uint8_t *encode(uint8_t *buf, uint64_t x) { +inline uint8_t* encode(uint8_t* buf, uint64_t x) { buf[0] = static_cast(x & 0xff); buf[1] = static_cast((x >> 8) & 0xff); buf[2] = static_cast((x >> 16) & 0xff); @@ -207,18 +207,18 @@ inline uint8_t *encode(uint8_t *buf, uint64_t x) { } template -inline T decode(uint8_t const *) { +inline T decode(uint8_t const*) { BOOST_STATIC_ASSERT(sizeof(T) == 0); } /// Decode a little-endian sequence of 4 bytes to a 32-bit integer. template <> -inline uint32_t decode(uint8_t const *buf) { +inline uint32_t decode(uint8_t const* buf) { return static_cast(buf[0]) | (static_cast(buf[1]) << 8) | (static_cast(buf[2]) << 16) | (static_cast(buf[3]) << 24); } /// Decode a little-endian sequence of 8 bytes to a 64-bit integer. template <> -inline uint64_t decode(uint8_t const *buf) { +inline uint64_t decode(uint8_t const* buf) { return static_cast(buf[0]) | (static_cast(buf[1]) << 8) | (static_cast(buf[2]) << 16) | (static_cast(buf[3]) << 24) | (static_cast(buf[4]) << 32) | (static_cast(buf[5]) << 40) | diff --git a/src/partition/Geometry.cc b/src/partition/Geometry.cc index 793a9c6ea0..c65a7a80a3 100644 --- a/src/partition/Geometry.cc +++ b/src/partition/Geometry.cc @@ -101,7 +101,7 @@ Vector3d const NY(0.0, -1.0, 0.0); Vector3d const NZ(0.0, 0.0, -1.0); // Vertex triplet for each HTM root triangle. -Vector3d const *const htmRootVert[24] = { +Vector3d const* const htmRootVert[24] = { &X, &NZ, &Y, // S0 &Y, &NZ, &NX, // S1 &NX, &NZ, &NY, // S2 @@ -113,7 +113,7 @@ Vector3d const *const htmRootVert[24] = { }; // Return the number of the HTM root triangle containing v. -inline uint32_t rootNumFor(Vector3d const &v) { +inline uint32_t rootNumFor(Vector3d const& v) { if (v(2) < 0.0) { // S0, S1, S2, S3 if (v(1) > 0.0) { @@ -166,7 +166,7 @@ double maxAlpha(double r, double centerLat) { return DEG_PER_RAD * std::fabs(atan(y / x)); } -uint32_t htmId(Vector3d const &v, int level) { +uint32_t htmId(Vector3d const& v, int level) { // See http://research.microsoft.com/apps/pubs/default.aspx?id=64531 if (level < 0 || level > HTM_MAX_LEVEL) { throw std::runtime_error("Invalid HTM subdivision level."); @@ -240,7 +240,7 @@ int htmLevel(uint32_t id) { return static_cast(level >> 1); } -Vector3d const cartesian(std::pair const &lonLat) { +Vector3d const cartesian(std::pair const& lonLat) { double lon = lonLat.first * RAD_PER_DEG; double lat = lonLat.second * RAD_PER_DEG; double sinLon = std::sin(lon); @@ -250,7 +250,7 @@ Vector3d const cartesian(std::pair const &lonLat) { return Vector3d(cosLon * cosLat, sinLon * cosLat, sinLat); } -std::pair const spherical(Vector3d const &v) { +std::pair const spherical(Vector3d const& v) { std::pair sc(0.0, 0.0); double d2 = v(0) * v(0) + v(1) * v(1); if (d2 != 0.0) { @@ -269,7 +269,7 @@ std::pair const spherical(Vector3d const &v) { return sc; } -double angSep(Vector3d const &v0, Vector3d const &v1) { +double angSep(Vector3d const& v0, Vector3d const& v1) { double cs = v0.dot(v1); Vector3d n = v0.cross(v1); double ss = n.norm(); @@ -329,7 +329,7 @@ SphericalTriangle::SphericalTriangle(uint32_t id) : _m(), _mi() { _mi = _m.inverse(); } -SphericalTriangle::SphericalTriangle(Vector3d const &v0, Vector3d const &v1, Vector3d const &v2) +SphericalTriangle::SphericalTriangle(Vector3d const& v0, Vector3d const& v1, Vector3d const& v2) : _m(), _mi() { _m.col(0) = v0; _m.col(1) = v1; @@ -427,10 +427,10 @@ double SphericalTriangle::area() const { namespace { // Intersect the input spherical convex polygon with the given half-space. -size_t intersect(Vector3d const *inVe, // input (vertex, edge) pair array +size_t intersect(Vector3d const* inVe, // input (vertex, edge) pair array size_t numVerts, // # of input vertices - Vector3d const &plane, // plane normal of half-space - Vector3d *outVe) // output (vertex, edge) pair array + Vector3d const& plane, // plane normal of half-space + Vector3d* outVe) // output (vertex, edge) pair array { assert(numVerts > 1 && inVe != 0 && outVe != 0); size_t i = 0, j = numVerts - 1, n = 0; @@ -540,7 +540,7 @@ double LonRangeList::extent() const { } // Compute the area of the input polygon intersected with zmin <= z <= zmax. -double zArea(Vector3d const *inVe, // input (vertex, edge) pair array +double zArea(Vector3d const* inVe, // input (vertex, edge) pair array size_t numVerts, // # of input vertices double zmin, double zmax) { // A = 2πχ(M) - Σ αᵥ - Σ cos(φᵤ) ∆θᵤ (see above) @@ -560,7 +560,7 @@ double zArea(Vector3d const *inVe, // input (vertex, edge) pair array double z = inVe[2 * i](2); // n is parallel to the edge plane normal (but does not necessarily // have unit norm). - Vector3d const &n = inVe[2 * j + 1]; + Vector3d const& n = inVe[2 * j + 1]; if (z >= zmin && z <= zmax) { // Vertex i is in z-range; compute and accumulate turning angle αᵥ // at vertex v = i. Here αᵥ is just the angle between the normal @@ -695,7 +695,7 @@ double zArea(Vector3d const *inVe, // input (vertex, edge) pair array } // unnamed namespace -double SphericalTriangle::intersectionArea(SphericalBox const &box) const { +double SphericalTriangle::intersectionArea(SphericalBox const& box) const { if (box.getLonMin() == box.getLonMax() || box.getLatMin() >= 90.0 - EPSILON_DEG || box.getLatMax() <= -90.0 - +EPSILON_DEG) { // box is degenerate or very small. @@ -711,8 +711,8 @@ double SphericalTriangle::intersectionArea(SphericalBox const &box) const { } Vector3d veBuf0[(3 + 2) * 2]; Vector3d veBuf1[(3 + 2) * 2]; - Vector3d *in = veBuf0; - Vector3d *out = veBuf1; + Vector3d* in = veBuf0; + Vector3d* out = veBuf1; size_t numVerts = 3; // Populate in with (vertex, edge) pairs. in[0] = vertex(0); @@ -774,7 +774,7 @@ SphericalBox::SphericalBox(double lonMin, double lonMax, double latMin, double l _latMax = clampLat(latMax); } -SphericalBox::SphericalBox(Vector3d const &v0, Vector3d const &v1, Vector3d const &v2) { +SphericalBox::SphericalBox(Vector3d const& v0, Vector3d const& v1, Vector3d const& v2) { // Find the bounding circle of the triangle. Vector3d cv = v0 + v1 + v2; double r = angSep(cv, v0); @@ -839,7 +839,7 @@ double SphericalBox::area() const { return RAD_PER_DEG * getLonExtent() * (std::sin(RAD_PER_DEG * _latMax) - std::sin(RAD_PER_DEG * _latMin)); } -void SphericalBox::htmIds(std::vector &ids, int level) const { +void SphericalBox::htmIds(std::vector& ids, int level) const { if (level < 0 || level > HTM_MAX_LEVEL) { throw std::runtime_error("Invalid HTM subdivision level."); } @@ -855,10 +855,10 @@ void SphericalBox::htmIds(std::vector &ids, int level) const { // Slow method for finding triangles overlapping a box. For the subdivision // levels and box sizes encountered in practice, this is very unlikely to be // a performance problem. -void SphericalBox::_findIds(std::vector &ids, // Storage for overlapping triangle IDs. +void SphericalBox::_findIds(std::vector& ids, // Storage for overlapping triangle IDs. uint32_t id, // HTM ID of triangle `m`. int level, // Number of recursions remaining. - Matrix3d const &m) const // Triangle vertices. + Matrix3d const& m) const // Triangle vertices. { if (!intersects(SphericalBox(m.col(0), m.col(1), m.col(2)))) { return; diff --git a/src/partition/Geometry.h b/src/partition/Geometry.h index 204a60dc75..5bb2881931 100644 --- a/src/partition/Geometry.h +++ b/src/partition/Geometry.h @@ -81,14 +81,14 @@ double reduceLon(double lon); double maxAlpha(double r, double centerLat); /// Compute the HTM ID of `v`. -uint32_t htmId(Vector3d const &v, int level); +uint32_t htmId(Vector3d const& v, int level); /// Return the HTM subdivision level of `id` or -1 if `id` is invalid. int htmLevel(uint32_t id); /// Return the unit 3-vector corresponding to the given spherical /// coordinates (in degrees). -Vector3d const cartesian(std::pair const &lonLat); +Vector3d const cartesian(std::pair const& lonLat); inline Vector3d const cartesian(double lon, double lat) { return cartesian(std::pair(lon, lat)); @@ -96,14 +96,14 @@ inline Vector3d const cartesian(double lon, double lat) { /// Return the longitude and latitude angles (in degrees) corresponding /// to the given 3-vector. -std::pair const spherical(Vector3d const &v); +std::pair const spherical(Vector3d const& v); inline std::pair const spherical(double x, double y, double z) { return spherical(Vector3d(x, y, z)); } /// Return the angular separation between `v0` and `v1` in radians. -double angSep(Vector3d const &v0, Vector3d const &v1); +double angSep(Vector3d const& v0, Vector3d const& v1); class SphericalBox; @@ -160,17 +160,17 @@ class SphericalTriangle { explicit SphericalTriangle(uint32_t htmId); /// Construct the triangle with the given vertices. - SphericalTriangle(Vector3d const &v0, Vector3d const &v1, Vector3d const &v2); + SphericalTriangle(Vector3d const& v0, Vector3d const& v1, Vector3d const& v2); /// Get the i-th vertex (i = 0,1,2). No bounds checking is performed. - Vector3d const &vertex(int i) const { return _m.col(i); } + Vector3d const& vertex(int i) const { return _m.col(i); } /// Get the matrix that converts from cartesian to /// spherical barycentric coordinates. - Matrix3d const &getBarycentricTransform() const { return _mi; } + Matrix3d const& getBarycentricTransform() const { return _mi; } /// Get the matrix that converts from spherical barycentric /// to cartesian coordinates. - Matrix3d const &getCartesianTransform() const { return _m; } + Matrix3d const& getCartesianTransform() const { return _m; } /// Compute the area of the triangle in steradians. double area() const; @@ -179,7 +179,7 @@ class SphericalTriangle { /// with a spherical box. The routine is not fully general - for simplicity /// of implementation, spherical boxes with RA extent strictly between 180 /// and 360 degrees are not supported. - double intersectionArea(SphericalBox const &box) const; + double intersectionArea(SphericalBox const& box) const; private: /// [V0 V1 V2], where column vectors V0, V1, V2 are the triangle @@ -204,7 +204,7 @@ class SphericalBox { SphericalBox(double lonMin, double lonMax, double latMin, double latMax); /// Create a conservative bounding box for a spherical triangle. - SphericalBox(Vector3d const &v0, Vector3d const &v1, Vector3d const &v2); + SphericalBox(Vector3d const& v0, Vector3d const& v1, Vector3d const& v2); /// Expand the box by the given radius. void expand(double radius); @@ -242,12 +242,12 @@ class SphericalBox { return lon >= _lonMin && lon <= _lonMax; } - bool contains(std::pair const &position) const { + bool contains(std::pair const& position) const { return contains(position.first, position.second); } /// Does this box intersect the given box? - bool intersects(SphericalBox const &box) const { + bool intersects(SphericalBox const& box) const { if (isEmpty() || box.isEmpty()) { return false; } else if (box._latMin > _latMax || box._latMax < _latMin) { @@ -265,10 +265,10 @@ class SphericalBox { /// Compute a conservative approximation to the list of HTM triangles /// potentially overlapping this box and store them in `ids`. - void htmIds(std::vector &ids, int level) const; + void htmIds(std::vector& ids, int level) const; private: - void _findIds(std::vector &ids, uint32_t id, int level, Matrix3d const &m) const; + void _findIds(std::vector& ids, uint32_t id, int level, Matrix3d const& m) const; double _lonMin; double _lonMax; diff --git a/src/partition/InputLines.cc b/src/partition/InputLines.cc index 56c626f6c8..5196284814 100644 --- a/src/partition/InputLines.cc +++ b/src/partition/InputLines.cc @@ -39,18 +39,18 @@ namespace lsst::partition { namespace { -typedef std::pair CharPtrPair; +typedef std::pair CharPtrPair; struct LineFragmentStorage { size_t size; char buf[MAX_LINE_SIZE]; - LineFragmentStorage(size_t sz, char *b) : size(sz) { std::memcpy(buf, b, sz); } + LineFragmentStorage(size_t sz, char* b) : size(sz) { std::memcpy(buf, b, sz); } }; // One side of a line split in two by a block boundary. struct LineFragment { - LineFragmentStorage *data; + LineFragmentStorage* data; LineFragment() : data(0) {} ~LineFragment() { @@ -63,17 +63,17 @@ struct LineFragment { // caller is absolved of any responsibility for the line. The second // call will fail and return the data stored by the first call. In this // case, the caller is responsible for the line. - LineFragmentStorage *tryStore(LineFragmentStorage *newval) { + LineFragmentStorage* tryStore(LineFragmentStorage* newval) { assert(newval != 0); #if __GNUC__ && ((__SIZEOF_POINTER__ == 4 && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ (__SIZEOF_POINTER__ == 8 && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8)) - LineFragmentStorage *oldval = 0; + LineFragmentStorage* oldval = 0; return __sync_val_compare_and_swap(&data, oldval, newval); #else #warning CAS not supported on this platform - falling back to locking. static boost::mutex m; boost::lock_guard lock(m); - LineFragmentStorage *oldval = data; + LineFragmentStorage* oldval = data; if (oldval == 0) { data = newval; } @@ -92,14 +92,14 @@ struct Block { Block() : file(), offset(0), size(0), head(), tail() {} - CharPtrPair const read(char *buf, bool skipFirstLine, ConfigParamArrow const ¶ms); + CharPtrPair const read(char* buf, bool skipFirstLine, ConfigParamArrow const& params); }; // Read a file block and handle the lines crossing its boundaries. -CharPtrPair const Block::read(char *buf, bool skipFirstLine, ConfigParamArrow const &configArrow) { +CharPtrPair const Block::read(char* buf, bool skipFirstLine, ConfigParamArrow const& configArrow) { // Read into buf, leaving space for a line on either side of the block. - char *readBeg = buf + MAX_LINE_SIZE; - char *readEnd = readBeg + size; + char* readBeg = buf + MAX_LINE_SIZE; + char* readEnd = readBeg + size; // Arrow/Parquet : retrieve the real size of the arrow CSV block int bufferSize = 0; @@ -112,7 +112,7 @@ CharPtrPair const Block::read(char *buf, bool skipFirstLine, ConfigParamArrow co // The responsibility for returning a line which crosses the beginning // or end of this block lies with the last thread to encounter the // line. - char *beg = readBeg; + char* beg = readBeg; if (head || skipFirstLine) { // Scan past the first line. for (; beg < readEnd && *beg != '\n' && *beg != '\r'; ++beg) { @@ -137,8 +137,8 @@ CharPtrPair const Block::read(char *buf, bool skipFirstLine, ConfigParamArrow co // This is not the first block in the enclosing file. If the // initial part of the first line has already been read by the // reader of the previous block, return the entire line in buf. - LineFragmentStorage *right = new LineFragmentStorage(static_cast(beg - readBeg), readBeg); - LineFragmentStorage *left = head->tryStore(right); + LineFragmentStorage* right = new LineFragmentStorage(static_cast(beg - readBeg), readBeg); + LineFragmentStorage* left = head->tryStore(right); if (left != 0) { beg = readBeg - left->size; std::memcpy(beg, left->buf, left->size); @@ -146,7 +146,7 @@ CharPtrPair const Block::read(char *buf, bool skipFirstLine, ConfigParamArrow co } } } - char *end = readEnd; + char* end = readEnd; if (tail) { // This is not the last block in the enclosing file - // scan to the beginning of the last line. @@ -157,8 +157,8 @@ CharPtrPair const Block::read(char *buf, bool skipFirstLine, ConfigParamArrow co } // If the trailing part of the last line has already been read by // the reader of the following block, return the entire line in buf. - LineFragmentStorage *left = new LineFragmentStorage(static_cast(readEnd - end), end); - LineFragmentStorage *right = tail->tryStore(left); + LineFragmentStorage* left = new LineFragmentStorage(static_cast(readEnd - end), end); + LineFragmentStorage* right = tail->tryStore(left); if (right != 0) { std::memcpy(readEnd, right->buf, right->size); end = readEnd + right->size; @@ -169,7 +169,7 @@ CharPtrPair const Block::read(char *buf, bool skipFirstLine, ConfigParamArrow co } // Split a file into a series of blocks. -std::vector const split(fs::path const &path, off_t blockSize) { +std::vector const split(fs::path const& path, off_t blockSize) { std::vector blocks; Block b; @@ -220,9 +220,9 @@ std::vector const split(fs::path const &path, off_t blockSize) { class InputLines::Impl { public: - Impl(std::vector const &paths, size_t blockSize, bool skipFirstLine); - Impl(std::vector const &paths, size_t blockSize, bool skipFirstLine, - ConfigParamArrow const &config); + Impl(std::vector const& paths, size_t blockSize, bool skipFirstLine); + Impl(std::vector const& paths, size_t blockSize, bool skipFirstLine, + ConfigParamArrow const& config); ~Impl() {} size_t getBlockSize() const { return _blockSize; } @@ -232,13 +232,13 @@ class InputLines::Impl { return _blockCount == 0; } - CharPtrPair const read(char *buf); + CharPtrPair const read(char* buf); private: BOOST_STATIC_ASSERT(MAX_LINE_SIZE < 1 * MiB); - Impl(Impl const &); - Impl &operator=(Impl const &); + Impl(Impl const&); + Impl& operator=(Impl const&); size_t const _blockSize; bool const _skipFirstLine; @@ -254,7 +254,7 @@ class InputLines::Impl { char _pad1[CACHE_LINE_SIZE]; }; -InputLines::Impl::Impl(std::vector const &paths, size_t blockSize, bool skipFirstLine) +InputLines::Impl::Impl(std::vector const& paths, size_t blockSize, bool skipFirstLine) : _blockSize(std::min(std::max(blockSize, 1 * MiB), 1 * GiB)), _skipFirstLine(skipFirstLine), _configArrow(ConfigParamArrow()), @@ -263,8 +263,8 @@ InputLines::Impl::Impl(std::vector const &paths, size_t blockSize, boo _queue(), _paths(paths) {} -InputLines::Impl::Impl(std::vector const &paths, size_t blockSize, bool skipFirstLine, - ConfigParamArrow const &config) +InputLines::Impl::Impl(std::vector const& paths, size_t blockSize, bool skipFirstLine, + ConfigParamArrow const& config) : _blockSize(std::min(std::max(blockSize, 1 * MiB), 1 * GiB)), _skipFirstLine(skipFirstLine), _configArrow(config), @@ -273,7 +273,7 @@ InputLines::Impl::Impl(std::vector const &paths, size_t blockSize, boo _queue(), _paths(paths) {} -CharPtrPair const InputLines::Impl::read(char *buf) { +CharPtrPair const InputLines::Impl::read(char* buf) { boost::unique_lock lock(_mutex); while (_blockCount > 0) { if (!_queue.empty()) { @@ -318,16 +318,16 @@ CharPtrPair const InputLines::Impl::read(char *buf) { } } // All lines have been read. - return CharPtrPair(static_cast(0), static_cast(0)); + return CharPtrPair(static_cast(0), static_cast(0)); } // Method delegation. -InputLines::InputLines(std::vector const &paths, size_t blockSize, bool skipFirstLine) +InputLines::InputLines(std::vector const& paths, size_t blockSize, bool skipFirstLine) : _impl(std::make_shared(paths, blockSize, skipFirstLine)) {} -InputLines::InputLines(std::vector const &paths, size_t blockSize, bool skipFirstLine, - ConfigParamArrow const &configArrow) +InputLines::InputLines(std::vector const& paths, size_t blockSize, bool skipFirstLine, + ConfigParamArrow const& configArrow) : _impl(std::make_shared(paths, blockSize, skipFirstLine, configArrow)) {} size_t InputLines::getBlockSize() const { return _impl ? _impl->getBlockSize() : 0; } @@ -336,11 +336,11 @@ size_t InputLines::getMinimumBufferCapacity() const { return _impl ? _impl->getM bool InputLines::empty() const { return _impl ? _impl->empty() : true; } -CharPtrPair const InputLines::read(char *buf) { +CharPtrPair const InputLines::read(char* buf) { if (_impl) { return _impl->read(buf); } - return CharPtrPair(static_cast(0), static_cast(0)); + return CharPtrPair(static_cast(0), static_cast(0)); } } // namespace lsst::partition diff --git a/src/partition/MapReduce.h b/src/partition/MapReduce.h index fa236b217f..87ef6d0a7c 100644 --- a/src/partition/MapReduce.h +++ b/src/partition/MapReduce.h @@ -63,15 +63,15 @@ template struct Record { K key; uint32_t size; - char *data; + char* data; Record() : key(), size(0), data(0) {} - explicit Record(K const &k) : key(k), size(0), data(0) {} + explicit Record(K const& k) : key(k), size(0), data(0) {} /// Return a hash of the record key. uint32_t hash() const { return key.hash(); } - bool operator<(Record const &r) const { return key < r.key; } + bool operator<(Record const& r) const { return key < r.key; } }; /// An append-only record silo. @@ -97,9 +97,9 @@ class Silo { size_t getBytesUsed() const { return _bytesUsed; } /// Order silos by memory usage, from largest to smallest. - bool operator<(Silo const &silo) const { return silo._bytesUsed < _bytesUsed; } + bool operator<(Silo const& silo) const { return silo._bytesUsed < _bytesUsed; } - std::vector const &getRecords() const { return _records; } + std::vector const& getRecords() const { return _records; } void reserve(size_t cap) { _records.reserve(cap); } @@ -112,14 +112,14 @@ class Silo { /// Add a record to the silo, using `Editor::writeRecord()` to produce /// the record text. Passing in the editor allows records to be written /// directly to silo memory, avoiding a copy. - void add(K const &key, csv::Editor const &editor); + void add(K const& key, csv::Editor const& editor); /// Add a record to the silo. - void add(K const &key, char const *data, uint32_t size); + void add(K const& key, char const* data, uint32_t size); private: // Disable copy construction and assignment. - Silo(Silo const &); - Silo &operator=(Silo const &); + Silo(Silo const&); + Silo& operator=(Silo const&); void _grow(); @@ -127,9 +127,9 @@ class Silo { std::vector _records; size_t _bytesUsed; - char *_head; // Head of linked allocation list. - char *_cur; - char *_end; // End of current allocation. + char* _head; // Head of linked allocation list. + char* _cur; + char* _end; // End of current allocation. char _pad1[CACHE_LINE_SIZE]; }; @@ -138,9 +138,9 @@ template Silo::~Silo() { // Traverse linked-list, freeing each allocation. Forward // pointers are located at the beginning of each allocation. - char *head = _head; + char* head = _head; while (head) { - char *next = *reinterpret_cast(head); + char* next = *reinterpret_cast(head); std::free(head); head = next; } @@ -156,15 +156,15 @@ void Silo::clear() { if (_head) { // Set data insertion point to the beginning of the // first allocation. - _cur = _head + sizeof(char *); + _cur = _head + sizeof(char*); _end = _head + ALLOC_SIZE; } } template -void Silo::add(K const &key, csv::Editor const &editor) { - char *buf = _cur; - char *end = _end; +void Silo::add(K const& key, csv::Editor const& editor) { + char* buf = _cur; + char* end = _end; if (end - buf < MAX_LINE_SIZE) { // The size of the line being written isn't known in advance, so // the silo must always present MAX_LINE_SIZE or more contiguous @@ -183,12 +183,12 @@ void Silo::add(K const &key, csv::Editor const &editor) { } template -void Silo::add(K const &key, char const *data, uint32_t size) { +void Silo::add(K const& key, char const* data, uint32_t size) { if (size > MAX_LINE_SIZE) { throw std::runtime_error("Record too long."); } - char *buf = _cur; - char *end = _end; + char* buf = _cur; + char* end = _end; if (static_cast(end - buf) < size) { _grow(); buf = _cur; @@ -207,26 +207,26 @@ template void Silo::_grow() { // [_cur, _end) has no room for data, so either advance to the next // allocation in the linked-list, or append a new allocation at the tail. - char *tail = 0; - char *next = 0; + char* tail = 0; + char* next = 0; if (_end) { tail = _end - ALLOC_SIZE; - next = *reinterpret_cast(tail); + next = *reinterpret_cast(tail); } if (!next) { - next = static_cast(std::malloc(ALLOC_SIZE)); + next = static_cast(std::malloc(ALLOC_SIZE)); if (!next) { throw std::bad_alloc(); } if (tail) { - *reinterpret_cast(tail) = next; + *reinterpret_cast(tail) = next; } - *reinterpret_cast(next) = 0; + *reinterpret_cast(next) = 0; if (!_head) { _head = next; } } - _cur = next + sizeof(char *); + _cur = next + sizeof(char*); _end = next + ALLOC_SIZE; } @@ -304,7 +304,7 @@ namespace detail { /// Comparator for shared pointers to `Silo`s. template struct SiloPtrCmp { - bool operator()(std::shared_ptr > const &s, std::shared_ptr > const &t) const { + bool operator()(std::shared_ptr > const& s, std::shared_ptr > const& t) const { return *s < *t; } }; @@ -325,7 +325,7 @@ struct SortedRecordRange { /// Order sorted ranges by their minimum records, /// from largest to smallest. - bool operator<(SortedRecordRange const &r) const { return *r.cur < *cur; } + bool operator<(SortedRecordRange const& r) const { return *r.cur < *cur; } }; /// CRTP base-class containing the meat of the map-reduce implementation. @@ -336,21 +336,21 @@ class JobBase { public: typedef WorkerT Worker; - JobBase(ConfigStore const &config); + JobBase(ConfigStore const& config); ~JobBase(); void run(InputLines input); void operator()(); - static void defineOptions(boost::program_options::options_description &opts); + static void defineOptions(boost::program_options::options_description& opts); private: - JobBase(JobBase const &); - JobBase &operator=(JobBase const &); + JobBase(JobBase const&); + JobBase& operator=(JobBase const&); void _work(); void _cleanup(); - void _fail(std::exception const &ex); + void _fail(std::exception const& ex); typedef typename Worker::Key Key; typedef detail::SortedRecordRange SortedRecordRange; @@ -360,7 +360,7 @@ class JobBase { typedef detail::SiloPtrCmp SiloPtrCmp; typedef typename std::vector::const_iterator SiloPtrIter; - ConfigStore const *_config; + ConfigStore const* _config; InputLines _input; size_t _threshold; @@ -383,11 +383,11 @@ class JobBase { // DerivedT is responsible for storing worker results. Note // that _mutex is locked when this is called. - void _storeResultImpl(Worker &worker) { static_cast(this)->_storeResult(worker); } + void _storeResultImpl(Worker& worker) { static_cast(this)->_storeResult(worker); } }; template -JobBase::JobBase(ConfigStore const &config) +JobBase::JobBase(ConfigStore const& config) : _config(&config), _threshold(0), _numWorkers(config.get("mr.num-workers")), @@ -420,7 +420,7 @@ void JobBase::_cleanup() { } template -void JobBase::_fail(std::exception const &ex) { +void JobBase::_fail(std::exception const& ex) { boost::unique_lock lock(_mutex); if (!_failed) { // Mark job as failed, and set error message. @@ -449,7 +449,7 @@ void JobBase::run(InputLines input) { for (; i < _numWorkers - 1; ++i) { threads[i] = boost::thread(boost::ref(*this)); } - } catch (std::exception const &ex) { + } catch (std::exception const& ex) { _fail(ex); } // The caller participates in job execution, avoiding thread @@ -473,13 +473,13 @@ template void JobBase::operator()() { try { _work(); - } catch (std::exception const &ex) { + } catch (std::exception const& ex) { _fail(ex); } } template -void JobBase::defineOptions(boost::program_options::options_description &opts) { +void JobBase::defineOptions(boost::program_options::options_description& opts) { namespace po = boost::program_options; po::options_description mr("\\_________________ Map-Reduce", 80); mr.add_options()("mr.block-size", po::value()->default_value(4), @@ -503,7 +503,7 @@ void JobBase::defineOptions(boost::program_options::options_d template void JobBase::_work() { // Pre-allocate disk read buffer. - std::shared_ptr buffer(static_cast(std::malloc(_input.getMinimumBufferCapacity())), + std::shared_ptr buffer(static_cast(std::malloc(_input.getMinimumBufferCapacity())), std::free); if (!buffer) { throw std::bad_alloc(); @@ -541,7 +541,7 @@ void JobBase::_work() { SiloPtr silo = _silos.back(); _silos.pop_back(); lock.unlock(); - std::pair data = _input.read(buffer.get()); + std::pair data = _input.read(buffer.get()); if (data.first == 0 && data.second == 0) { // No input left. silo->sort(); @@ -601,7 +601,7 @@ void JobBase::_work() { std::make_heap(ranges.begin(), ranges.end()); while (!ranges.empty()) { std::pop_heap(ranges.begin(), ranges.end()); - SortedRecordRange *r = &ranges.back(); + SortedRecordRange* r = &ranges.back(); RecordIter i = r->cur; r->advance(); if (i->hash() % _numWorkers == rank) { @@ -650,7 +650,7 @@ template class JobImpl : private JobBase, WorkerT> { typedef JobBase, WorkerT> Base; - void _storeResult(WorkerT &w) { + void _storeResult(WorkerT& w) { std::shared_ptr r = w.result(); if (!_result) { _result = r; @@ -665,7 +665,7 @@ class JobImpl : private JobBase, WorkerT> { friend class JobBase, WorkerT>; public: - explicit JobImpl(ConfigStore const &config) : Base(config) {} + explicit JobImpl(ConfigStore const& config) : Base(config) {} std::shared_ptr const run(InputLines input) { try { @@ -687,13 +687,13 @@ template class JobImpl : private JobBase, WorkerT> { typedef JobBase, WorkerT> Base; - void _storeResult(WorkerT &) {} + void _storeResult(WorkerT&) {} // Allow JobBase to call _storeResult. friend class JobBase, WorkerT>; public: - explicit JobImpl(ConfigStore const &config) : Base(config) {} + explicit JobImpl(ConfigStore const& config) : Base(config) {} void run(InputLines input) { Base::run(input); } @@ -724,7 +724,7 @@ class JobImpl : private JobBase, WorkerT> template class Job : public detail::JobImpl { public: - explicit Job(ConfigStore const &config) : detail::JobImpl(config) {} + explicit Job(ConfigStore const& config) : detail::JobImpl(config) {} }; } // namespace lsst::partition diff --git a/src/partition/sph-duplicate2.cc b/src/partition/sph-duplicate2.cc index 5775716a22..6714350d7f 100644 --- a/src/partition/sph-duplicate2.cc +++ b/src/partition/sph-duplicate2.cc @@ -57,7 +57,7 @@ namespace { class CmdLineOptions { private: template - T _getMandatoryOption(std::string const &name, po::variables_map const &vm) { + T _getMandatoryOption(std::string const& name, po::variables_map const& vm) { if (!vm.count(name)) throw new std::invalid_argument("missing command line option: " + name); return vm[name].as(); @@ -74,7 +74,7 @@ class CmdLineOptions { /** * @return 'false' if the appplication was run in the 'hel' mode. */ - bool parse(int argc, char const *const *argv) { + bool parse(int argc, char const* const* argv) { po::options_description desc( "\n" "DESCRIPTION\n" @@ -203,10 +203,10 @@ class CmdLineOptions { private: /// Copy constructor (not allowed) - CmdLineOptions(CmdLineOptions const &); + CmdLineOptions(CmdLineOptions const&); /// Assignment operator (ot allowed) - CmdLineOptions &operator=(CmdLineOptions const &); + CmdLineOptions& operator=(CmdLineOptions const&); public: bool verbose; @@ -255,7 +255,7 @@ struct RaDecl { * * @return the translated coordinates */ -RaDecl transformRaDecl(double ra, double decl, part::SphericalBox const &box) { +RaDecl transformRaDecl(double ra, double decl, part::SphericalBox const& box) { RaDecl coord{ra, decl}; coord.ra += opt.raShift; @@ -273,7 +273,7 @@ class PrimaryKeyGenerator { public: /// Construct the generator for the specified table - PrimaryKeyGenerator(CmdLineOptions const &opt, std::string const &table) : _opt(opt), _table(table) {} + PrimaryKeyGenerator(CmdLineOptions const& opt, std::string const& table) : _opt(opt), _table(table) {} ~PrimaryKeyGenerator() {} @@ -295,7 +295,7 @@ class PrimaryKeyGenerator { } /// Allocate and return the next key in a series - uint64_t next(uint64_t const oldId, RaDecl const &coord) { + uint64_t next(uint64_t const oldId, RaDecl const& coord) { // Compute new ID for the shifted RA/DECL using the requested // algorithm. @@ -366,10 +366,10 @@ class PrimaryKeyGenerator { PrimaryKeyGenerator(); /// Assignment operator (is disabled) - PrimaryKeyGenerator &operator=(PrimaryKeyGenerator const &lhs); + PrimaryKeyGenerator& operator=(PrimaryKeyGenerator const& lhs); private: - CmdLineOptions const &_opt; + CmdLineOptions const& _opt; std::string _table; HtmIdMap _maxId; @@ -393,7 +393,7 @@ class ColDef { virtual ~ColDef() {} /// Load column definitions from a file - void load(std::string const &filename) { + void load(std::string const& filename) { std::ifstream infile(filename, std::ifstream::in); std::string name; for (int colnum = 0; std::getline(infile, name); colnum++) { @@ -410,7 +410,7 @@ class ColDef { ColDef() : maxLen(0) {} /// Evaluate the column - virtual void _evaluateColumn(std::string const &name, int colnum) = 0; + virtual void _evaluateColumn(std::string const& name, int colnum) = 0; /// Validator for the definitions /** @@ -420,10 +420,10 @@ class ColDef { private: /// Copy constructor (is disabled) - ColDef(ColDef const &); + ColDef(ColDef const&); /// Assignment operator (is disabled) - ColDef &operator=(ColDef const &lhs); + ColDef& operator=(ColDef const& lhs); public: std::vector columns; @@ -442,7 +442,7 @@ class ColDefObject : public ColDef { protected: /// Evaluate the column - virtual void _evaluateColumn(std::string const &name, int colnum) { + virtual void _evaluateColumn(std::string const& name, int colnum) { if ("deepSourceId" == name) { idxDeepSourceId = colnum; } else if ("ra" == name) { @@ -463,10 +463,10 @@ class ColDefObject : public ColDef { private: /// Copy constructor (is disabled) - ColDefObject(ColDefObject const &); + ColDefObject(ColDefObject const&); /// Assignment operator (is disabled) - ColDefObject &operator=(ColDefObject const &lhs); + ColDefObject& operator=(ColDefObject const& lhs); public: int idxDeepSourceId; @@ -499,7 +499,7 @@ class ColDefSource : public ColDef { protected: /// Evaluate the column - virtual void _evaluateColumn(std::string const &name, int colnum) { + virtual void _evaluateColumn(std::string const& name, int colnum) { if ("id" == name) { idxId = colnum; } else if ("coord_ra" == name) { @@ -528,10 +528,10 @@ class ColDefSource : public ColDef { private: /// Copy constructor (is disabled) - ColDefSource(ColDefSource const &); + ColDefSource(ColDefSource const&); /// Assignment operator (is disabled) - ColDefSource &operator=(ColDefSource const &lhs); + ColDefSource& operator=(ColDefSource const& lhs); public: int idxId; @@ -557,7 +557,7 @@ class ColDefForcedSource : public ColDef { protected: /// Evaluate the column - virtual void _evaluateColumn(std::string const &name, int colnum) { + virtual void _evaluateColumn(std::string const& name, int colnum) { if ("deepSourceId" == name) { idxDeepSourceId = colnum; } else if ("chunkId" == name) { @@ -572,10 +572,10 @@ class ColDefForcedSource : public ColDef { private: /// Copy constructor (is disabled) - ColDefForcedSource(ColDefForcedSource const &); + ColDefForcedSource(ColDefForcedSource const&); /// Assignment operator (is disabled) - ColDefForcedSource &operator=(ColDefForcedSource const &lhs); + ColDefForcedSource& operator=(ColDefForcedSource const& lhs); public: int idxDeepSourceId; @@ -587,7 +587,7 @@ class ColDefForcedSource : public ColDef { ColDefForcedSource coldefForcedSource; /// Write a row into a stream -void writeRow(std::vector const &tokens, std::ofstream &os) { +void writeRow(std::vector const& tokens, std::ofstream& os) { if (opt.dryRun) return; for (size_t idx = 0; idx < tokens.size(); ++idx) { @@ -609,7 +609,7 @@ ObjectIdTransformMap objIdTransformInput, objIdTransformDuplicate; std::set objIdOutOfBox; /// Duplicate the next row of the chunk's Object table -size_t duplicateObjectRow(std::string &line, part::SphericalBox const &box, std::ofstream &os) { +size_t duplicateObjectRow(std::string& line, part::SphericalBox const& box, std::ofstream& os) { // Split the input line into tokens and store them // in a temporrary array at positions which are supposed to match // the correposnding ColDef @@ -636,7 +636,7 @@ size_t duplicateObjectRow(std::string &line, part::SphericalBox const &box, std: double decl(0.); int idx = 0; - for (std::string const &token : tokens) { + for (std::string const& token : tokens) { if (coldefObject.idxDeepSourceId == idx) { deepSourceId = boost::lexical_cast(token); } else if (coldefObject.idxRa == idx) { @@ -712,7 +712,7 @@ size_t duplicateObjectRow(std::string &line, part::SphericalBox const &box, std: } /// Duplicate all rows of the chunk's Object table -std::pair duplicateObject(part::SphericalBox const &box) { +std::pair duplicateObject(part::SphericalBox const& box) { std::string const inFileName = opt.indir + "/Object_" + std::to_string(opt.chunkId) + ".txt", outFileName = opt.outdir + "/Object_" + std::to_string(opt.chunkId) + ".txt"; @@ -733,7 +733,7 @@ std::pair duplicateObject(part::SphericalBox const &box) { } /// Duplicate the next row of the chunk's Source table -size_t duplicateSourceRow(std::string &line, part::SphericalBox const &box, std::ofstream &os) { +size_t duplicateSourceRow(std::string& line, part::SphericalBox const& box, std::ofstream& os) { // Split the input line into tokens and store them // in a temporrary array at positions which are supposed to match // the correposnding ColDef @@ -764,7 +764,7 @@ size_t duplicateSourceRow(std::string &line, part::SphericalBox const &box, std: double cluster_coord_decl(0.); int idx = 0; - for (std::string const &token : tokens) { + for (std::string const& token : tokens) { if (coldefSource.idxId == idx) { id = boost::lexical_cast(token); } else if (coldefSource.idxCoordRa == idx) { @@ -872,7 +872,7 @@ size_t duplicateSourceRow(std::string &line, part::SphericalBox const &box, std: } /// Duplicate all rows of the chunk's Source table -std::pair duplicateSource(part::SphericalBox const &box) { +std::pair duplicateSource(part::SphericalBox const& box) { std::string const inFileName = opt.indir + "/Source_" + std::to_string(opt.chunkId) + ".txt", outFileName = opt.outdir + "/Source_" + std::to_string(opt.chunkId) + ".txt"; @@ -890,7 +890,7 @@ std::pair duplicateSource(part::SphericalBox const &box) { } /// Duplicate the next row of the chunk's ForcedSource table -size_t duplicateForcedSourceRow(std::string &line, part::SphericalBox const &box, std::ofstream &os) { +size_t duplicateForcedSourceRow(std::string& line, part::SphericalBox const& box, std::ofstream& os) { // Split the input line into tokens and store them // in a temporrary array at positions which are supposed to match // the correposnding ColDef @@ -915,7 +915,7 @@ size_t duplicateForcedSourceRow(std::string &line, part::SphericalBox const &box uint64_t deepSourceId(0ULL); int idx = 0; - for (std::string const &token : tokens) { + for (std::string const& token : tokens) { if (coldefForcedSource.idxDeepSourceId == idx) { deepSourceId = boost::lexical_cast(token); } @@ -966,7 +966,7 @@ size_t duplicateForcedSourceRow(std::string &line, part::SphericalBox const &box } /// Duplicate all rows of the chunk's ForcedSource table -std::pair duplicateForcedSource(part::SphericalBox const &box) { +std::pair duplicateForcedSource(part::SphericalBox const& box) { std::string const inFileName = opt.indir + "/ForcedSource_" + std::to_string(opt.chunkId) + ".txt", outFileName = opt.outdir + "/ForcedSource_" + std::to_string(opt.chunkId) + ".txt"; @@ -992,7 +992,7 @@ void duplicate() { } part::Chunker chunker(opt.overlap, opt.numStripes, opt.numSubStripesPerStripe); - part::SphericalBox const &box(chunker.getChunkBounds(opt.chunkId)); + part::SphericalBox const& box(chunker.getChunkBounds(opt.chunkId)); if (opt.verbose) std::cout << "\n" @@ -1023,7 +1023,7 @@ void duplicate() { } } // namespace -int main(int argc, char const *const *argv) { +int main(int argc, char const* const* argv) { try { if (!::opt.parse(argc, argv)) return EXIT_FAILURE; @@ -1035,7 +1035,7 @@ int main(int argc, char const *const *argv) { // Process the chunk(s) ::duplicate(); - } catch (std::exception const &ex) { + } catch (std::exception const& ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } diff --git a/src/partition/tests/mapReduce.cc b/src/partition/tests/mapReduce.cc index ae5163c660..29317cec56 100644 --- a/src/partition/tests/mapReduce.cc +++ b/src/partition/tests/mapReduce.cc @@ -53,7 +53,7 @@ unsigned int const NUM_LINES = 1024 * 1024; /// Generate CSV files containing a total of NUM_LINES lines, where /// each line consists of a single line number. -void buildInput(TempFile const &t1, TempFile const &t2) { +void buildInput(TempFile const& t1, TempFile const& t2) { char buf[17]; unsigned int line; BufferedAppender a(1 * MiB); @@ -74,7 +74,7 @@ void buildInput(TempFile const &t1, TempFile const &t2) { struct Key { uint32_t line; uint32_t hash() const { return line; } - bool operator<(Key const &k) const { return line < k.line; } + bool operator<(Key const& k) const { return line < k.line; } }; // 2-bits per line that indicate whether a line has been mapped/reduced. @@ -99,7 +99,7 @@ class Lines { _reduced[line] = true; } - void merge(Lines const &lines) { + void merge(Lines const& lines) { _failed = _failed || lines._failed; for (size_t i = 0; i < NUM_LINES; ++i) { if (lines._mapped[i]) { @@ -134,9 +134,9 @@ class Lines { class Worker : public WorkerBase { public: - Worker(ConfigStore const &config) : _editor(config), _lines(new Lines()) {} + Worker(ConfigStore const& config) : _editor(config), _lines(new Lines()) {} - void map(char const *beg, char const *end, Silo &silo) { + void map(char const* beg, char const* end, Silo& silo) { Key k; while (beg < end) { beg = _editor.readRecord(beg, end); @@ -156,7 +156,7 @@ class Worker : public WorkerBase { shared_ptr const result() { return _lines; } - static void defineOptions(po::options_description &opts) { csv::Editor::defineOptions(opts); } + static void defineOptions(po::options_description& opts) { csv::Editor::defineOptions(opts); } private: csv::Editor _editor; @@ -168,7 +168,7 @@ typedef Job TestJob; } // unnamed namespace BOOST_AUTO_TEST_CASE(MapReduceTest) { - char const *argv[4] = { + char const* argv[4] = { "dummy", "--in.csv.field=line", "--mr.pool-size=8", @@ -187,7 +187,7 @@ BOOST_AUTO_TEST_CASE(MapReduceTest) { argv[3] = s.c_str(); po::variables_map vm; // Older boost versions (1.41) require the const_cast. - po::store(po::parse_command_line(4, const_cast(argv), options), vm); + po::store(po::parse_command_line(4, const_cast(argv), options), vm); po::notify(vm); ConfigStore config; config.add(vm); diff --git a/src/proto/CMakeLists.txt b/src/proto/CMakeLists.txt deleted file mode 100644 index 7f10de2e62..0000000000 --- a/src/proto/CMakeLists.txt +++ /dev/null @@ -1,35 +0,0 @@ -add_library(proto SHARED) - -target_compile_options(proto PRIVATE - -Wno-maybe-uninitialized -) - -protobuf_generate_cpp(PROTO_PB_SRCS PROTO_PB_HDRS worker.proto) - -target_sources(proto PRIVATE - ${PROTO_PB_SRCS} - ${PROTO_PB_HDRS} - FrameBuffer.cc - ProtoHeaderWrap.cc - ScanTableInfo.cc -) - -target_link_libraries(proto PUBLIC - log - protobuf -) - -install( - TARGETS proto -) - -add_executable(testProtocol testProtocol.cc) - -target_link_libraries(testProtocol - proto - crypto - Boost::unit_test_framework -) - -add_test(NAME testProtocol COMMAND testProtocol) - diff --git a/src/proto/COPYRIGHT b/src/proto/COPYRIGHT deleted file mode 100644 index b457fc9841..0000000000 --- a/src/proto/COPYRIGHT +++ /dev/null @@ -1 +0,0 @@ -2011-2015 LSST Corp, 2015-2018 SLAC diff --git a/src/proto/FakeProtocolFixture.h b/src/proto/FakeProtocolFixture.h deleted file mode 100644 index e4c232edaa..0000000000 --- a/src/proto/FakeProtocolFixture.h +++ /dev/null @@ -1,92 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2015 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -#ifndef LSST_QSERV_PROTO_FAKEPROTOCOLFIXTURE_H -#define LSST_QSERV_PROTO_FAKEPROTOCOLFIXTURE_H - -// System headers -#include -#include - -namespace lsst::qserv::proto { - -/// FakeProtocolFixture is a utility class containing code for making fake -/// versions of the protobufs messages used in Qserv. Its intent was -/// only to be used for test code. -class FakeProtocolFixture { -public: - FakeProtocolFixture() : _counter(0) {} - - TaskMsg* makeTaskMsg() { - TaskMsg* t(new TaskMsg()); - t->set_chunkid(20 + _counter); - t->set_db("elephant"); - t->set_jobid(0); - t->set_queryid(49); - t->set_scaninteractive(true); - - auto sTbl = t->add_scantable(); - sTbl->set_db("orange"); - sTbl->set_table("cart"); - sTbl->set_lockinmemory(false); - sTbl->set_scanrating(1); - - sTbl = t->add_scantable(); - sTbl->set_db("plum"); - sTbl->set_table("bike"); - sTbl->set_lockinmemory(false); - sTbl->set_scanrating(1); - - for (int i = 0; i < 3; ++i) { - TaskMsg::Fragment* f = t->add_fragment(); - f->add_query("Hello, this is a query."); - addSubChunk(*f, 100 + i); - f->set_resulttable("r_341"); - } - ++_counter; - return t; - } - - void addSubChunk(TaskMsg_Fragment& f, int scId) { - TaskMsg_Subchunk* s; - if (!f.has_subchunks()) { - TaskMsg_Subchunk subc; - // f.add_scgroup(); // How do I add optional objects? - subc.set_database("subdatabase_default"); - proto::TaskMsg_Subchunk_DbTbl* dbTbl = subc.add_dbtbl(); - dbTbl->set_db("subdatabase"); - dbTbl->set_tbl("subtable"); - f.mutable_subchunks()->CopyFrom(subc); - s = f.mutable_subchunks(); - } - s = f.mutable_subchunks(); - s->add_id(scId); - } - -private: - int _counter; -}; - -} // namespace lsst::qserv::proto - -#endif // #define LSST_QSERV_PROTO_FAKEPROTOCOLFIXTURE_H diff --git a/src/proto/FrameBuffer.cc b/src/proto/FrameBuffer.cc deleted file mode 100644 index 798908f9a6..0000000000 --- a/src/proto/FrameBuffer.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* - * LSST Data Management System - * Copyright 2018 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// Class header -#include "proto/FrameBuffer.h" - -// System headers -#include - -// Qserv headers - -namespace lsst::qserv::proto { - -/////////////////////////// -// Class FrameBufferView // -/////////////////////////// - -FrameBufferView::FrameBufferView(char const* data, size_t size) : _data(data), _next(data), _size(size) {} - -uint32_t FrameBufferView::parseLength() { - uint32_t const headerLength = sizeof(uint32_t); - if (_size - (_next - _data) < headerLength) - FrameBufferError( - "FrameBufferView::parseLength() ** not enough data to be interpreted as the frame header " - "**"); - - uint32_t const messageLength = ntohl(*(reinterpret_cast(_next))); - - // Move the pointer to the next message (if any) - _next += headerLength; - - return messageLength; -} - -/////////////////////// -// Class FrameBuffer // -/////////////////////// - -const size_t FrameBuffer::DEFAULT_SIZE = 1024; -const size_t FrameBuffer::DESIRED_LIMIT = 2000000; -const size_t FrameBuffer::HARD_LIMIT = 64000000; - -FrameBuffer::FrameBuffer(size_t capacity) : _data(new char[capacity]), _capacity(capacity), _size(0) { - if (_capacity > HARD_LIMIT) - throw FrameBufferError( - "FrameBuffer::FrameBuffer() ** requested capacity " + std::to_string(capacity) + - " exceeds the hard limit of Google protobuf: " + std::to_string(HARD_LIMIT) + " **"); -} - -FrameBuffer::~FrameBuffer() { - delete[] _data; - _data = 0; - _capacity = 0; - _size = 0; -} - -void FrameBuffer::resize(size_t newSizeBytes) { - // Make sure there is enough space in the buffer to accomodate - // the request. - - extend(newSizeBytes); - - _size = newSizeBytes; -} - -void FrameBuffer::extend(size_t newCapacityBytes) { - if (newCapacityBytes <= _capacity) return; - - // Allocate a larger buffer - - if (newCapacityBytes > HARD_LIMIT) - throw FrameBufferError( - "FrameBuffer::extend() ** requested capacity " + std::to_string(newCapacityBytes) + - " exceeds the hard limit of Google protobuf " + std::to_string(HARD_LIMIT) + " **"); - - char* ptr = new char[newCapacityBytes]; - if (!ptr) - throw FrameBufferError("FrameBuffer::extend() ** failed to allocate a buffer of requested size " + - std::to_string(newCapacityBytes) + " **"); - - // Carry over the meaningful content of the older buffer into the new one - // before disposing the old buffer. - std::copy(_data, _data + _size, ptr); - - delete[] _data; - _data = ptr; - - _capacity = newCapacityBytes; -} - -} // namespace lsst::qserv::proto diff --git a/src/proto/FrameBuffer.h b/src/proto/FrameBuffer.h deleted file mode 100644 index 8cb90716d1..0000000000 --- a/src/proto/FrameBuffer.h +++ /dev/null @@ -1,253 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2018 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_PROTO_FRAME_BUFFER_H -#define LSST_QSERV_PROTO_FRAME_BUFFER_H - -/// FrameBuffer.h declares: -/// -/// struct FrameBufferError -/// class FrameBufferView -/// class FrameBuffer -/// -/// (see individual class documentation for more information) - -// System headers -#include // ntohl -#include // uint32_t -#include -#include - -// Qserv headers - -// Forward declarations - -// This header declarations - -namespace lsst::qserv::proto { - -/** - * Class FrameBufferError is used for throwing exceptions on various - * ubnormal conditions seen in the implementations of the buffer - * classes. - */ -struct FrameBufferError : std::runtime_error { - /// Normal constructor of the exception class - FrameBufferError(std::string const& msg) : std::runtime_error(msg) {} -}; - -/** - * The helper class encapsulating deserialization operations with Google - * protobuf objects on a staticly definied input byte stream. The stream - * is expected to contain an arbitrary sequence of the following pairs of - * records: - * - * 4-bytes: frame header containing 'N' - the length of a message - * N-bytes: the message serialized as a Protobuf object - * ... - */ -class FrameBufferView { -public: - /** - * Construct the buffer - * - * @param data - pointer to the data blob to be parsed - * @param size - the length (bytes) in the data blob - */ - explicit FrameBufferView(char const* data, size_t size); - - // Default construction and copy semantics are proxibited - - FrameBufferView() = delete; - FrameBufferView(FrameBufferView const&) = delete; - FrameBufferView& operator=(FrameBufferView const&) = delete; - - /// Destructor - ~FrameBufferView() = default; - - /* - * Parse and deserialize the message given the specified size of - * the message as informed by a prior frame header. - * If successful the method will also advance the current pointer within - * the data blob past the parsed message thus allowing it to parse the next - * message. - * - * The method will throw exception FrameBufferError if: - * - the buffer doesn't have enough data to be interpreted as - * the message of the required size - * - message deserialization failed - * - * @param message - protobuf object to be initialized upon a successful - * completion of the operation - */ - template - void parse(T& message) { - uint32_t const messageLength = parseLength(); - - if (_size - (_next - _data) < messageLength) - throw FrameBufferError("FrameBufferView::parse() ** not enough data (" + - std::to_string(_size - (_next - _data)) + " bytes instead of " + - std::to_string(messageLength) + " to be interpreted as the message"); - - if (not message.ParseFromArray(_next, messageLength) || not message.IsInitialized()) { - throw FrameBufferError("FrameBufferView::parse() ** message deserialization failed **"); - } - - // Move the pointer to the next message (if any) - _next += messageLength; - } - -private: - /** - * Parse and deserialize the length of a message from the frame header - * at a curren position of the data pointer. - * If succeeded the method will also advance the current pointer within - * the data blob past the parsed message thus allowing to parse the next - * message. - * - * The method will throw one of these exceptions: - * - * std::underflow_error - * if the buffer doesn't have enough data to be interpreted as the - * frame header - * - * @return the length (bytes) of of the next message - */ - uint32_t parseLength(); - -private: - char const* _data; // start of the data blob - char const* _next; // start of the next message within the blob - - size_t _size; -}; - -/** - * The helper class encapsulating serialization operations - * with Google protobuf objects. - */ -class FrameBuffer { -public: - /// The default capacity of teh buffer - static const size_t DEFAULT_SIZE; - - /// Google protobuffers are more efficient below this size (bytes) - static const size_t DESIRED_LIMIT; - - /// The hard limit (bytes) for a single Google protobuffer - static const size_t HARD_LIMIT; - - /** - * Construct the buffer of the specified initial capacity (bytes). - */ - explicit FrameBuffer(size_t capacity = DEFAULT_SIZE); - - // Copy semantics are proxibited - - FrameBuffer(FrameBuffer const&) = delete; - FrameBuffer& operator=(FrameBuffer const&) = delete; - - /// Destructor - ~FrameBuffer(); - - /** - * @return pointer to the data blob - */ - char* data() { return _data; } - - /** - * @return maximum capacity (bytes) of the buffer - */ - size_t capacity() const { return _capacity; } - - /** - * @return meaninful size (bytes) of the buffer - */ - size_t size() const { return _size; } - - /** - * Set the size of the meaningful content of the buffer. If the buffer - * capacity is insufficient to accomodate the requested size the buffer - * will be extended. In the later case its previous content (if any) will - * be preserved. - * - * The method will throw one of these exceptions: - * - * std::overflow_error - * if the buffer doesn't have enough space to accomodate the request - * - * @param newSizeBytes - new size (bytes) of the buffer - */ - void resize(size_t newSizeBytes = 0); - - /** - * Add a message into the buffer. The message will be preceeed - * by a frame header carrying the length of the message. - * - * The method will throw one of these exceptions: - * - * std::overflow_error - * if the buffer doesn't have enough space to accomodate the data - * - * std::runtime_error - * if the serialization failed - * - * @param message - protobuf message to be serialized into the buffer - */ - template - void serialize(T const& message) { - uint32_t const headerLength = sizeof(uint32_t); - uint32_t const messageLength = message.ByteSizeLong(); - - // Make sure we have enough space to accomodate the frame header - // and the message body. - extend(_size + headerLength + messageLength); - - // Serialize the message header carrying the length of the message - *(reinterpret_cast(_data + _size)) = htonl(messageLength); - _size += headerLength; - - // Serialize the message itself - if (!message.SerializeToArray(_data + _size, _capacity - _size)) - throw FrameBufferError("FrameBuffer::serialize() ** message serialization failed **"); - - _size += messageLength; - } - -private: - /** - * Ensure the buffer capacity is no less than the specified number of bytes. - * Extend it otherwise. The previous contents (as per its 'size') of the buffer - * as well as its size will be preserved. - */ - void extend(size_t newCapacityBytes); - -private: - char* _data; // start of the allocated buffer - - size_t _capacity; - size_t _size; -}; - -} // namespace lsst::qserv::proto - -#endif // LSST_QSERV_PROTO_FRAME_BUFFER_H diff --git a/src/proto/ProtoImporter.h b/src/proto/ProtoImporter.h deleted file mode 100644 index 4173d7cfe9..0000000000 --- a/src/proto/ProtoImporter.h +++ /dev/null @@ -1,63 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2015-2017 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -#ifndef LSST_QSERV_PROTO_PROTOIMPORTER_H -#define LSST_QSERV_PROTO_PROTOIMPORTER_H - -// System headers -#include -#include - -namespace lsst::qserv::proto { - -/// ProtoImporter -/// Minimal-copy import of an arbitrary proto msg from a raw buffer. -/// Example: -/// struct TaskMsgAcceptor : public ProtoImporter { -/// virtual void operator()(std::shared_ptr m) { ...} -/// }; -/// ProtoImporter p(std::shared_ptr()); -/// p(data,size); // calls operator() defined above. -template -class ProtoImporter { -public: - ProtoImporter() {} - - bool messageAcceptable(std::string const& msg) { - Msg m; - return setMsgFrom(m, msg.data(), msg.size()); - } - - static bool setMsgFrom(Msg& m, char const* buf, int bufLen) { - // For dev/debugging: accepts a partially-formed message - // bool ok = m.ParsePartialFromArray(buf, bufLen); - - // Accept only complete, compliant messages. - bool ok = m.ParseFromArray(buf, bufLen); - return ok && m.IsInitialized(); - } -}; - -} // namespace lsst::qserv::proto - -#endif // #define LSST_QSERV_PROTO_PROTOIMPORTER_H diff --git a/src/proto/testProtocol.cc b/src/proto/testProtocol.cc deleted file mode 100644 index 175eeeb98b..0000000000 --- a/src/proto/testProtocol.cc +++ /dev/null @@ -1,183 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2011-2016 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// System headers -#include -#include -#include -#include -#include - -// Third-party headers -#include -#include - -// LSST headers -#include "lsst/log/Log.h" - -// Qserv headers -#include "proto/ScanTableInfo.h" -#include "proto/worker.pb.h" - -#include "proto/FakeProtocolFixture.h" - -// Boost unit test header -#define BOOST_TEST_MODULE Protocol_1 -#include - -namespace test = boost::test_tools; -namespace gio = google::protobuf::io; - -using namespace lsst::qserv; - -struct ProtocolFixture : public lsst::qserv::proto::FakeProtocolFixture { - ProtocolFixture(void) : FakeProtocolFixture(), counter(0) {} - ~ProtocolFixture(void) {} - - bool compareTaskMsgs(lsst::qserv::proto::TaskMsg& t1, lsst::qserv::proto::TaskMsg& t2) { - bool nonFragEq = (t1.chunkid() == t2.chunkid()) && (t1.db() == t2.db()); - bool sTablesEq = t1.scantable_size() == t2.scantable_size(); - for (int i = 0; i < t1.scantable_size(); ++i) { - auto const& sTbl1 = t1.scantable(i); - auto const& sTbl2 = t2.scantable(i); - bool eq = (sTbl1.db().compare(sTbl2.db()) == 0 && sTbl1.table() == sTbl2.table() && - sTbl1.lockinmemory() == sTbl2.lockinmemory() && - sTbl1.scanrating() == sTbl2.scanrating()); - sTablesEq = sTablesEq && eq; - } - - bool fEqual = (t1.fragment_size() == t2.fragment_size()); - for (int i = 0; i < t1.fragment_size(); ++i) { - fEqual = fEqual && compareFragment(t1.fragment(i), t2.fragment(i)); - } - return nonFragEq && fEqual && sTablesEq; - } - - bool compareSubchunk(lsst::qserv::proto::TaskMsg_Subchunk const& s1, - lsst::qserv::proto::TaskMsg_Subchunk const& s2) { - if (s1.database() != s2.database()) { - return false; - } - if (s1.dbtbl_size() != s2.dbtbl_size()) { - return false; - } - for (int i = 0; i < s1.dbtbl_size(); ++i) { - if (s1.dbtbl(i).db() != s2.dbtbl(i).db() && s1.dbtbl(i).tbl() != s2.dbtbl(i).tbl()) return false; - } - if (s1.id_size() != s2.id_size()) { - return false; - } - for (int i = 0; i < s1.id_size(); ++i) { - if (s1.id(i) != s2.id(i)) return false; - } - return true; - } - - bool compareFragment(lsst::qserv::proto::TaskMsg_Fragment const& f1, - lsst::qserv::proto::TaskMsg_Fragment const& f2) { - bool qEqual = true; - if (f1.query_size() == f2.query_size()) { - for (int i = 0; i < f1.query_size(); ++i) { - if (f1.query(i) != f2.query(i)) return false; - } - } else { - return false; - } - bool sEqual = true; - if (f1.has_subchunks()) { - if (f2.has_subchunks()) { - sEqual = sEqual && compareSubchunk(f1.subchunks(), f2.subchunks()); - } else { - sEqual = false; - } - } else if (f2.has_subchunks()) { - sEqual = false; - } - return qEqual && sEqual; - } - - int counter; -}; - -BOOST_FIXTURE_TEST_SUITE(ProtocolTestSuite, ProtocolFixture) - -BOOST_AUTO_TEST_CASE(TaskMsgMsgSanity) { - GOOGLE_PROTOBUF_VERIFY_VERSION; - std::stringstream ss; - std::unique_ptr t1(makeTaskMsg()); - BOOST_CHECK(t1.get()); - t1->SerializeToOstream(&ss); - - std::string blah = ss.str(); - std::stringstream ss2(blah); - std::unique_ptr t2(new lsst::qserv::proto::TaskMsg()); - BOOST_CHECK(t1.get()); - t2->ParseFromIstream(&ss2); - BOOST_CHECK(compareTaskMsgs(*t1, *t2)); -} - -BOOST_AUTO_TEST_CASE(ScanTableInfo) { - lsst::qserv::proto::ScanTableInfo stiA{"dba", "fruit", false, 1}; - lsst::qserv::proto::ScanTableInfo stiB{"dba", "fruit", true, 1}; - BOOST_CHECK(stiA.compare(stiB) < 0); - BOOST_CHECK(stiB.compare(stiA) > 0); - BOOST_CHECK(stiA.compare(stiA) == 0); - BOOST_CHECK(stiB.compare(stiB) == 0); - - lsst::qserv::proto::ScanTableInfo stiC{"dba", "fruit", true, 1}; - lsst::qserv::proto::ScanTableInfo stiD{"dba", "fruit", true, 2}; - BOOST_CHECK(stiC.compare(stiD) < 0); - BOOST_CHECK(stiD.compare(stiC) > 0); - BOOST_CHECK(stiC.compare(stiC) == 0); - BOOST_CHECK(stiD.compare(stiD) == 0); - - lsst::qserv::proto::ScanTableInfo stiE{"dba", "fruit", true, 2}; - lsst::qserv::proto::ScanTableInfo stiF{"dbb", "fruit", true, 2}; - BOOST_CHECK(stiE.compare(stiF) < 0); - BOOST_CHECK(stiF.compare(stiE) > 0); - BOOST_CHECK(stiE.compare(stiE) == 0); - BOOST_CHECK(stiF.compare(stiF) == 0); - - lsst::qserv::proto::ScanTableInfo stiG{"dbb", "fruit", true, 2}; - lsst::qserv::proto::ScanTableInfo stiH{"dbb", "veggie", true, 2}; - BOOST_CHECK(stiG.compare(stiH) < 0); - BOOST_CHECK(stiH.compare(stiG) > 0); - BOOST_CHECK(stiG.compare(stiG) == 0); - BOOST_CHECK(stiH.compare(stiH) == 0); - - lsst::qserv::proto::ScanTableInfo::ListOf list = {stiE, stiH, stiC, stiD, stiB, stiA, stiG, stiF}; - lsst::qserv::proto::ScanInfo scanInfo; - scanInfo.infoTables = list; - scanInfo.sortTablesSlowestFirst(); - int j = 0; - BOOST_CHECK(scanInfo.infoTables[j++].compare(stiH) == 0); - BOOST_CHECK(scanInfo.infoTables[j++].compare(stiG) == 0); - BOOST_CHECK(scanInfo.infoTables[j++].compare(stiF) == 0); - BOOST_CHECK(scanInfo.infoTables[j++].compare(stiE) == 0); - BOOST_CHECK(scanInfo.infoTables[j++].compare(stiD) == 0); - BOOST_CHECK(scanInfo.infoTables[j++].compare(stiC) == 0); - BOOST_CHECK(scanInfo.infoTables[j++].compare(stiB) == 0); - BOOST_CHECK(scanInfo.infoTables[j++].compare(stiA) == 0); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/proto/worker.proto b/src/proto/worker.proto deleted file mode 100644 index 52c19e3929..0000000000 --- a/src/proto/worker.proto +++ /dev/null @@ -1,145 +0,0 @@ -/* - * LSST Data Management System - * Copyright 2011-2015 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -/// worker.proto -/// This defines the wire-messages sent between czar and worker. - -// After enabling this option, please visit Qserv code to allow -// Arena where it's protected by macro QSERV_USE_PROTO_ARENA. -syntax = "proto2"; -option cc_enable_arenas = true; - -package lsst.qserv.proto; - -// Query message sent to worker -// One of these Task objects should be sent. -message TaskMsg { - // Future: might have multiple db/chunk dependencies. - optional string db = 2; - optional int32 chunkid = 3; - // repeated string scantables = 4; // obsolete - optional string user = 6; - optional int32 scanpriority = 8; - message Subchunk { - optional string database = 1; // database (unused) - repeated DbTbl dbtbl = 2; // subchunked tables - repeated int32 id = 3; // subchunk ids - message DbTbl { - required string db = 1; - required string tbl = 2; - } - } - message Fragment { - // A query fragment without "CREATE or INSERT". - // Worker should synthesize. - repeated string query = 1; - optional string resulttable = 3; - optional Subchunk subchunks = 4; // Only needed with subchunk-ed queries - - // Each fragment may only write results to one table, - // but multiple fragments may write to the same table, - // in which case the table contains a concatenation of the - // contributing fragments' rows. - } - repeated Fragment fragment = 5; - message ScanTable { - required string db = 1; - required string table = 2; - required bool lockInMemory = 3; - required int32 scanRating = 4; - } - repeated ScanTable scantable = 9; - optional uint64 queryid = 10; - optional int32 jobid = 11; - optional bool scaninteractive = 12; - optional int32 attemptcount = 13; - optional uint32 czarid = 14; - optional int32 maxtablesize_mb = 15 [default = 0]; -} - -// The file-based result delivery protocol has two kinds of messages. -// -// 1. The summary message sent back to Czar over the XROOTD/SSI protocol: -// -// - The length in bytes of the serialized ResponseSummary object (32-bits) -// - The serialized ResponseSummary object -// -// 2. The response data messages serialized and written into the result file -// -// - The length in bytes of the first serialized ResponseData object (32-bits) -// - The serialized first ResponseData object -// [ ... ] -// - The length in bytes of the last serialized ResponseData object (32-bits) -// - The serialized last ResponseData object - -message ResponseSummary { - required string wname = 1; - optional int32 errorcode = 2 [default = 0]; - optional string errormsg = 3 [default = ""]; - required uint64 queryid = 4; - required int32 jobid = 5; - optional uint32 rowcount = 6 [default = 0]; - optional uint64 transmitsize = 7 [default = 0]; - optional int32 attemptcount = 8 [default = 0]; - optional string fileresource_http = 10 [default = ""]; -} - -message RowBundle { - repeated bytes column = 1; // bytes to allow BLOB encoding - repeated bool isnull = 2; // Flag to allow sending nulls. -} - -message ResponseData { - repeated RowBundle row = 1; - required uint32 rowcount = 2; - required uint64 transmitsize = 3; -} - -///////////////////////////////////////////////////////////////// -// Protocol definition for the query management requests. These -// requests do not require any response messages to be explicitly -// sent by workers. -// -// ATTENTION: each message sent to a worker must be preceeded by -// an int32 size (network-byte-ordered) word carrying a size -// of the message. -//////////////////////////////////////////////////////////////// - -// The completion status to be sent back with responses to the query management requests. -message WorkerCommandStatus { - enum Code { - SUCCESS = 1; // The successful completion of a request. - ERROR = 2; // An error occurred during request execution. - } - optional Code code = 3 [default = SUCCESS]; - optional string error = 2 [default = ""]; // Optional error message (depends on the code) -} - -message QueryManagement { - enum Operation { - CANCEL_AFTER_RESTART = 1; // Cancel older queries before the specified query (excluding that one). - CANCEL = 2; // Cancel a specific query. - COMPLETE = 3; // Notify workers on the completion of the specified query. - } - required Operation op = 1; - required uint64 czar_id = 3; - required uint64 query_id = 2; -} diff --git a/src/protojson/CMakeLists.txt b/src/protojson/CMakeLists.txt new file mode 100644 index 0000000000..5ee5434b81 --- /dev/null +++ b/src/protojson/CMakeLists.txt @@ -0,0 +1,56 @@ +add_library(protojson SHARED) + +target_sources(protojson PRIVATE + ChunkUseCountAnswerMsg.cc + PwHideJson.cc + ResponseMsg.cc + ScanTableInfo.cc + UberJobErrorMsg.cc + UberJobReadyMsg.cc + UberJobMsg.cc + WorkerCzarComIssue.cc + WorkerQueryStatusData.cc +) + +target_link_libraries(protojson PUBLIC + curl + http + log + qhttp + util + Boost::filesystem + Boost::regex + Boost::system + cpp-httplib +) + +install(TARGETS protojson) + +function(PROTOJSON_TESTS) + foreach(TEST IN ITEMS ${ARGV}) + add_executable(${TEST} ${TEST}.cc) + target_link_libraries(${TEST} PUBLIC + global + http + protojson + mysql + sql + wbase + wconfig + wcontrol + wdb + wpublish + wsched + Boost::unit_test_framework + Threads::Threads + ) + add_test(NAME ${TEST} COMMAND ${TEST}) + endforeach() +endfunction() + +protojson_tests( + testStatusData + testUberJobErrorMsg + testUberJobReadyMsg + testUberJobMsg +) diff --git a/src/protojson/ChunkUseCountAnswerMsg.cc b/src/protojson/ChunkUseCountAnswerMsg.cc new file mode 100644 index 0000000000..2fa59c31dd --- /dev/null +++ b/src/protojson/ChunkUseCountAnswerMsg.cc @@ -0,0 +1,98 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "ChunkUseCountAnswerMsg.h" + +#include + +// Qserv headers +#include "http/RequestBodyJSON.h" +#include "protojson/PwHideJson.h" +#include "wpublish/QueriesAndChunks.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.ChunkUseCountAnswer"); +} // namespace + +namespace lsst::qserv::protojson { + +json ChunkUseCountAnswerMsg::toJson() const { + json jsCounts = json::object(); + for (auto const& [dbName, chunkCountMap] : *_dbchunkCountMap) { + json chunkCountJson; + for (auto const& [chunkId, useCount] : chunkCountMap) { + chunkCountJson[std::to_string(chunkId)] = useCount; + } + jsCounts[dbName] = chunkCountJson; + } + json js; + js["dbChunkUseCount"] = jsCounts; + return js; +} + +ChunkUseCountAnswerMsg::Ptr ChunkUseCountAnswerMsg::createFromJson(nlohmann::json const& jsin) { + DbChunkCountMapPtr dbchunkCountMap = make_shared(); + json jsArray = jsin.at("dbChunkUseCount"); + for (auto const& [dbName, chunkCountJson] : jsArray.items()) { + for (auto const& [chunkIdStr, useCountJson] : chunkCountJson.items()) { + int chunkId = std::stoi(chunkIdStr); + int useCount = useCountJson.get(); + // chunkUseCountAnswerMsg->(*_dbchunkCountMap)[dbName][chunkId] = useCount; + (*dbchunkCountMap)[dbName][chunkId] = useCount; + } + } + return ChunkUseCountAnswerMsg::create(dbchunkCountMap); +} + +bool ChunkUseCountAnswerMsg::equal(ChunkUseCountAnswerMsg const& other) const { + if (_dbchunkCountMap->size() != other._dbchunkCountMap->size()) { + return false; + } + for (auto const& [dbName, chunkCountMap] : *_dbchunkCountMap) { + auto iter = other._dbchunkCountMap->find(dbName); + if (iter == other._dbchunkCountMap->end()) { + return false; + } + auto const& otherChunkCountMap = iter->second; + if (chunkCountMap.size() != otherChunkCountMap.size()) { + return false; + } + for (auto const& [chunkId, useCount] : chunkCountMap) { + auto iter2 = otherChunkCountMap.find(chunkId); + if (iter2 == otherChunkCountMap.end()) { + return false; + } + if (useCount != iter2->second) { + return false; + } + } + } + return true; +} + +} // namespace lsst::qserv::protojson diff --git a/src/protojson/ChunkUseCountAnswerMsg.h b/src/protojson/ChunkUseCountAnswerMsg.h new file mode 100644 index 0000000000..3a4598cd64 --- /dev/null +++ b/src/protojson/ChunkUseCountAnswerMsg.h @@ -0,0 +1,72 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_PROTOJSON_CHUNKUSECOUNTANSWER_H +#define LSST_QSERV_PROTOJSON_CHUNKUSECOUNTANSWER_H + +// System headers +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// qserv headers +#include "global/clock_defs.h" +#include "global/intTypes.h" + +namespace lsst::qserv::wpublish { +class ChunkStatistics; +} + +namespace lsst::qserv::protojson { + +/// This class is used to handle the chunk use request messages. It converts json messages +/// to and from DbChunkCountMapPtr objects, which are generated by workers using the +/// QueriesAndChunks class. +class ChunkUseCountAnswerMsg { +public: + using Ptr = std::shared_ptr; + typedef std::map ChunkCountMap; ///< Map of chunkId to useCount for a specific database. + typedef std::map DbChunkCountMap; + typedef std::shared_ptr DbChunkCountMapPtr; + + ChunkUseCountAnswerMsg() = delete; + + static Ptr create(DbChunkCountMapPtr const& chunkStatsMap_) { + return Ptr(new ChunkUseCountAnswerMsg(chunkStatsMap_)); + } + + static Ptr createFromJson(nlohmann::json const& json); + + nlohmann::json toJson() const; + bool equal(ChunkUseCountAnswerMsg const& other) const; + +private: + ChunkUseCountAnswerMsg(DbChunkCountMapPtr const& chunkStatsMap_) : _dbchunkCountMap(chunkStatsMap_) {} + + DbChunkCountMapPtr _dbchunkCountMap; ///< Map of database name to map of chunkId to useCount. +}; + +} // namespace lsst::qserv::protojson + +#endif // LSST_QSERV_PROTOJSON_CHUNKUSECOUNTANSWER_H diff --git a/src/protojson/PwHideJson.cc b/src/protojson/PwHideJson.cc new file mode 100644 index 0000000000..dbef393c5f --- /dev/null +++ b/src/protojson/PwHideJson.cc @@ -0,0 +1,62 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "protojson/PwHideJson.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.PwHideJson"); +} // namespace + +namespace lsst::qserv::protojson { + +// TODO Really need to make this recursive. +nlohmann::json PwHideJson::hide(nlohmann::json const& in) const { + try { + nlohmann::json js(in); + for (auto const& key : keySet) { + auto iter = js.find(key); + if (iter != js.end()) { + *iter = mask; + } + } + return js; + } catch (...) { + /// This should never happen, but this function is only expected to + /// be used in rare errors. It just shouldn't crash the program + /// under any circumstances. + nlohmann::json jsthrew({"PwHideJson::hide threw something", 0}); + return jsthrew; + } +} + +nlohmann::json pwHide(nlohmann::json const& in) { + PwHideJson phj; + return phj.hide(in); +} + +} // namespace lsst::qserv::protojson diff --git a/src/proto/ProtoHeaderWrap.cc b/src/protojson/PwHideJson.h similarity index 50% rename from src/proto/ProtoHeaderWrap.cc rename to src/protojson/PwHideJson.h index 8b0d496856..19c6cd2bdd 100644 --- a/src/proto/ProtoHeaderWrap.cc +++ b/src/protojson/PwHideJson.h @@ -1,7 +1,5 @@ -// -*- LSST-C++ -*- /* * LSST Data Management System - * Copyright 2015-2016 LSST Corporation. * * This product includes software developed by the * LSST Project (http://www.lsst.org/). @@ -20,26 +18,36 @@ * the GNU General Public License along with this program. If not, * see . */ +#ifndef LSST_QSERV_PROTOJSON_PWHIDEJSON_H +#define LSST_QSERV_PROTOJSON_PWHIDEJSON_H // System headers +#include +#include -// LSST headers -#include "lsst/log/Log.h" +// Third party headers +#include "nlohmann/json.hpp" -// Qserv headers -#include "proto/ProtoHeaderWrap.h" -#include "util/common.h" +// qserv headers -namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.parser.ProtoHeaderWrap"); -} +namespace lsst::qserv::protojson { -namespace lsst::qserv::proto { +/// Return a new json object where the top level values of keys in `keySet` are +/// replaced with the `mask` (default "-"). +class PwHideJson { +public: + PwHideJson() = default; -// Google protobuffers are more efficient below 2MB, but xrootd is faster with larger limits. -// Reducing max to 2MB as it reduces the probablity of running out of memory. -const size_t ProtoHeaderWrap::PROTOBUFFER_DESIRED_LIMIT = 2000000; -// A single Google protobuffer can't be larger than this. -const size_t ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT = 64000000; + /// Return a copy of `in` where top level secret keys are set to mask. + /// TODO: make recursive + nlohmann::json hide(nlohmann::json const& in) const; -} // namespace lsst::qserv::proto + std::set keySet{"auth_key", "password", "pw", "passwd", "admin_auth_key"}; + std::string mask{"-"}; +}; + +nlohmann::json pwHide(nlohmann::json const& in); + +} // namespace lsst::qserv::protojson + +#endif // LSST_QSERV_PROTOJSON_PWHIDEJSON_H diff --git a/src/protojson/ResponseMsg.cc b/src/protojson/ResponseMsg.cc new file mode 100644 index 0000000000..576f2fd541 --- /dev/null +++ b/src/protojson/ResponseMsg.cc @@ -0,0 +1,180 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "protojson/ResponseMsg.h" + +#include + +// Qserv headers +#include "http/RequestBodyJSON.h" +#include "protojson/PwHideJson.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.ResponseMsg"); +} // namespace + +namespace lsst::qserv::protojson { + +ResponseMsg::ResponseMsg(bool success_, string const& errorType_, string const& note_) + : success(success_), errorType(errorType_), note(note_) {} + +json ResponseMsg::toJson() const { + json js; + int su = success ? 1 : 0; + js["success"] = su; + js["errortype"] = errorType; + js["note"] = note; + return js; +} + +ResponseMsg::Ptr ResponseMsg::createFromJson(nlohmann::json const& jsRespMsg) { + auto success_ = (0 != http::RequestBodyJSON::required(jsRespMsg, "success")); + auto errorType_ = http::RequestBodyJSON::required(jsRespMsg, "errortype"); + auto note_ = http::RequestBodyJSON::required(jsRespMsg, "note"); + return create(success_, errorType_, note_); +} + +bool ResponseMsg::equal(ResponseMsg const& other) const { + return (success == other.success) && (errorType == other.errorType) && (note == other.note); +} + +string ResponseMsg::dump() const { + ostringstream os; + dumpOs(os); + return os.str(); +} + +ostream& ResponseMsg::dumpOs(ostream& os) const { + os << "protojson::ResponseMsg success=" << success << " errorType=" << errorType << " note=" << note; + return os; +} + +ostream& operator<<(ostream& os, ResponseMsg const& cmd) { + cmd.dumpOs(os); + return os; +} + +ExecutiveRespMsg::ExecutiveRespMsg(bool success_, bool dataObsolete_, QueryId qId_, UberJobId ujId_, + CzarId czId_, std::string const& errorType_, std::string const& note_) + : ResponseMsg(success_, errorType_, note_), + dataObsolete(dataObsolete_), + qId(qId_), + ujId(ujId_), + czId(czId_) {} + +ExecutiveRespMsg::Ptr ExecutiveRespMsg::createFromJson(nlohmann::json const& respJson) { + auto basePtr = ResponseMsg::createFromJson(respJson); + auto success_ = basePtr->success; + auto errorType_ = basePtr->errorType; + auto note_ = basePtr->note; + + auto dataObsolete_ = http::RequestBodyJSON::required(respJson, "dataObsolete"); + auto qId_ = http::RequestBodyJSON::required(respJson, "qId"); + auto ujId_ = http::RequestBodyJSON::required(respJson, "ujId"); + auto czId_ = http::RequestBodyJSON::required(respJson, "czId"); + + return ExecutiveRespMsg::create(success_, dataObsolete_, qId_, ujId_, czId_, errorType_, note_); +} + +json ExecutiveRespMsg::toJson() const { + json js = ResponseMsg::toJson(); + js["dataObsolete"] = dataObsolete; + js["qId"] = qId; + js["ujId"] = ujId; + js["czId"] = czId; + return js; +} + +std::ostream& ExecutiveRespMsg::dumpOs(std::ostream& os) const { + ResponseMsg::dumpOs(os); + os << "(ExecutiveRespMsg "; + os << " qId=" << qId; + os << " ujId=" << ujId; + os << " czId=" << czId; + os << " dataObsolete=" << dataObsolete; + os << ")"; + return os; +} + +WorkerCzarComRespMsg::Ptr WorkerCzarComRespMsg::createFromJson(nlohmann::json const& inJson) { + auto basePtr = ResponseMsg::createFromJson(inJson); + auto success_ = basePtr->success; + auto errorType_ = basePtr->errorType; + auto note_ = basePtr->note; + + auto thoughtCzarWasDeadTime_ = + http::RequestBodyJSON::required(inJson, "thoughtCzarWasDeadTime"); + auto execRespMsgs_ = json::array(); + if (inJson.contains("execRespMsgs")) { + execRespMsgs_ = inJson["execRespMsgs"]; + } + vector execRMsgs; + for (auto const& jsExecRespMsg : execRespMsgs_) { + try { + auto execRespMsg = ExecutiveRespMsg::createFromJson(jsExecRespMsg); + execRMsgs.emplace_back(execRespMsg); + } catch (std::invalid_argument const& ex) { + // Can anything be done beyond logging the error? + // The worker is probably going to send this until the qId/ujId is killed. + // This error message should never show up, but good to know if it happens. + LOGS(_log, LOG_LVL_WARN, + "WorkerCzarComRespMsg::createFromJson failed to read execRespMsg:" + << protojson::pwHide(jsExecRespMsg) << " exception: " << ex.what()); + } + } + auto wccRespMsg = WorkerCzarComRespMsg::create(success_, thoughtCzarWasDeadTime_, errorType_, note_); + wccRespMsg->execRespMsgs = execRMsgs; + return wccRespMsg; +} + +json WorkerCzarComRespMsg::toJson() const { + json js = ResponseMsg::toJson(); + js["thoughtCzarWasDeadTime"] = thoughtCzarWasDeadTime; + + json jsExecRespMsgs = json::array(); + for (auto const& erMsg : execRespMsgs) { + jsExecRespMsgs.emplace_back(erMsg->toJson()); + } + js["execRespMsgs"] = jsExecRespMsgs; + return js; +} + +std::ostream& WorkerCzarComRespMsg::dumpOs(std::ostream& os) const { + ResponseMsg::dumpOs(os); + os << "(WorkerCzarComRespMsg czarDeadTime=" << thoughtCzarWasDeadTime; + os << " execRespMsgs("; + for (auto const& msg : execRespMsgs) { + os << "("; + msg->dumpOs(os); + os << ")"; + } + os << "))"; + return os; +} + +} // namespace lsst::qserv::protojson diff --git a/src/protojson/ResponseMsg.h b/src/protojson/ResponseMsg.h new file mode 100644 index 0000000000..6f9b3b0d81 --- /dev/null +++ b/src/protojson/ResponseMsg.h @@ -0,0 +1,158 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_PROTOJSON_RESPONSEMSG_H +#define LSST_QSERV_PROTOJSON_RESPONSEMSG_H + +// System headers +#include +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// qserv headers +#include "global/clock_defs.h" +#include "global/intTypes.h" + +namespace lsst::qserv::qdisp { +class UberJob; +} + +// This header declarations +namespace lsst::qserv::protojson { + +/// This class handles the message used for most success/fail responses. +class ResponseMsg { +public: + using Ptr = std::shared_ptr; + + ResponseMsg(bool success_, std::string const& errorType_ = "none", + std::string const& note_ = std::string()); + + ResponseMsg() = delete; + ResponseMsg(ResponseMsg const&) = delete; + ResponseMsg& operator=(ResponseMsg const&) = delete; + + bool equal(ResponseMsg const& other) const; + + static Ptr create(bool success_, std::string const& errorType_ = "none", + std::string const& note_ = std::string()) { + return Ptr(new ResponseMsg(success_, errorType_, note_)); + } + + /// This function creates ResponseMessage from respJson, if reasonable. + static Ptr createFromJson(nlohmann::json const& respJson); + + virtual ~ResponseMsg() = default; + + /// Action for worker to take if its message to the czar returned failed. + /// In most cases, nothing needs to be done. + virtual void failedUpdateUberJobData(CzarId czarId, QueryId queryId, UberJobId ujId) {} + + /// Action for czar to take if its message to the worker returned failed. + /// In most cases, nothing needs to be done. + virtual void failedUpdateUberJob(std::shared_ptr) {} + + /// Return a json version of this object. + virtual nlohmann::json toJson() const; + + /// class name for log, fName is expected to be __func__. + std::string cName(const char* fName) const { return std::string("ResponseMsg::") + fName; } + + /// Returns a string for logging. + /// Note: Naming dumpOs just dump should work, but it causes gcc to fail when child classes in + /// unit tests try to call dump() and the base class version is not found. + virtual std::ostream& dumpOs(std::ostream& os) const; + std::string dump() const; + friend std::ostream& operator<<(std::ostream& os, ResponseMsg const& cmd); + + bool success; + std::string errorType; + std::string note; +}; + +class ExecutiveRespMsg : public ResponseMsg { +public: + using Ptr = std::shared_ptr; + + ExecutiveRespMsg(bool success_, bool dataObsolete_, QueryId qId_, UberJobId ujId_, CzarId czId_, + std::string const& errorType_ = "none", std::string const& note_ = std::string()); + + virtual ~ExecutiveRespMsg() = default; + + static Ptr create(bool success_, bool dataObsolete_, QueryId qId_, UberJobId ujId_, CzarId czId_, + std::string const& errorType_ = "none", std::string const& note_ = std::string()) { + return Ptr(new ExecutiveRespMsg(success_, dataObsolete_, qId_, ujId_, czId_, errorType_, note_)); + } + + /// This function creates ExecutiveRespMsg from respJson, if reasonable. + static Ptr createFromJson(nlohmann::json const& respJson); + + std::string cName(const char* fName) const { return std::string("ExecutiveRespMsg::") + fName; } + + nlohmann::json toJson() const override; + + std::ostream& dumpOs(std::ostream& os) const override; + + bool dataObsolete; ///< Indicates that the result data the worker has is obsolete and may be deleted. + QueryId qId; ///< The query id for the data, if applicable. + UberJobId ujId; ///< The uberjob id for the data, if applicable. + CzarId czId; ///< The czar id for the data, if applicable. +}; + +class WorkerCzarComRespMsg : public ResponseMsg { +public: + using Ptr = std::shared_ptr; + + WorkerCzarComRespMsg(bool success_, uint64_t thoughtCzarWasDeadTime_, + std::string const& errorType_ = "none", std::string const& note_ = std::string()) + : ResponseMsg(success_, errorType_, note_), thoughtCzarWasDeadTime(thoughtCzarWasDeadTime_) {} + + virtual ~WorkerCzarComRespMsg() = default; + + static Ptr create(bool success_, uint64_t thoughtCzarWasDeadTime_, std::string const& errorType_ = "none", + std::string const& note_ = std::string()) { + return Ptr(new WorkerCzarComRespMsg(success_, thoughtCzarWasDeadTime_, errorType_, note_)); + } + + /// This function creates WorkerCzarComRespMsg from inJson, if reasonable. + static Ptr createFromJson(nlohmann::json const& inJson); + + std::string cName(const char* fName) const { return std::string("WorkerCzarComRespMsg::") + fName; } + + nlohmann::json toJson() const override; + + std::ostream& dumpOs(std::ostream& os) const override; + + /// Indicates the `thoughtCzarWasDeadTime` sent by the worker (normally 0) + uint64_t thoughtCzarWasDeadTime; + + /// List of ExecutiveRespMsg objects for the UberJobs that were + /// in the originating WorkerCzarComIssue message. + std::vector execRespMsgs; +}; + +} // namespace lsst::qserv::protojson + +#endif // LSST_QSERV_PROTOJSON_RESPONSEMSG_H diff --git a/src/proto/ScanTableInfo.cc b/src/protojson/ScanTableInfo.cc similarity index 66% rename from src/proto/ScanTableInfo.cc rename to src/protojson/ScanTableInfo.cc index 101e1a8d77..c113a55289 100644 --- a/src/proto/ScanTableInfo.cc +++ b/src/protojson/ScanTableInfo.cc @@ -22,16 +22,27 @@ */ // Class header -#include "proto/ScanTableInfo.h" +#include "protojson/ScanTableInfo.h" // System headers #include #include // Qserv headers +#include "http/RequestBodyJSON.h" #include "util/IterableFormatter.h" -namespace lsst::qserv::proto { +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.ScanTableInfo"); +} // namespace + +using namespace std; +using namespace nlohmann; + +namespace lsst::qserv::protojson { /// @return 0 if equal, -1 if this < rhs, 1 if this > rhs int ScanTableInfo::compare(ScanTableInfo const& rhs) const { @@ -87,15 +98,54 @@ void ScanInfo::sortTablesSlowestFirst() { std::sort(infoTables.begin(), infoTables.end(), func); } +nlohmann::json ScanInfo::toJson() const { + auto jsScanInfo = json({{"infoscanrating", scanRating}, {"infotables", json::array()}}); + + auto& jsInfoTables = jsScanInfo["infotables"]; + for (auto const& tInfo : infoTables) { + json jsTInfo = json({{"sidb", tInfo.db}, + {"sitable", tInfo.table}, + {"sirating", tInfo.scanRating}, + {"silockinmem", tInfo.lockInMemory}}); + + jsInfoTables.push_back(jsTInfo); + } + + return jsScanInfo; +} + +ScanInfo::Ptr ScanInfo::createFromJson(nlohmann::json const& siJson) { + Ptr siPtr = create(); + auto& iTbls = siPtr->infoTables; + + siPtr->scanRating = http::RequestBodyJSON::required(siJson, "infoscanrating"); + json const& jsTbls = http::RequestBodyJSON::required(siJson, "infotables"); + for (auto const& jsElem : jsTbls) { + auto db = http::RequestBodyJSON::required(jsElem, "sidb"); + auto table = http::RequestBodyJSON::required(jsElem, "sitable"); + auto sRating = http::RequestBodyJSON::required(jsElem, "sirating"); + auto lockInMem = http::RequestBodyJSON::required(jsElem, "silockinmem"); + iTbls.emplace_back(db, table, lockInMem, sRating); + } + siPtr->sortTablesSlowestFirst(); + + return siPtr; +} + std::ostream& operator<<(std::ostream& os, ScanTableInfo const& tbl) { os << "(db=" << tbl.db << " table=" << tbl.table; os << " lockInMemory=" << tbl.lockInMemory << " scanRating=" << tbl.scanRating << ")"; return os; } +std::ostream& ScanInfo::dump(std::ostream& os) const { + os << "ScanInfo{speed=" << scanRating << " tables: " << util::printable(infoTables) << "}"; + return os; +} + std::ostream& operator<<(std::ostream& os, ScanInfo const& info) { - os << "ScanInfo{speed=" << info.scanRating << " tables: " << util::printable(info.infoTables) << "}"; + info.dump(os); return os; } -} // namespace lsst::qserv::proto +} // namespace lsst::qserv::protojson diff --git a/src/proto/ScanTableInfo.h b/src/protojson/ScanTableInfo.h similarity index 70% rename from src/proto/ScanTableInfo.h rename to src/protojson/ScanTableInfo.h index 634953e656..4f1b3e8ccf 100644 --- a/src/proto/ScanTableInfo.h +++ b/src/protojson/ScanTableInfo.h @@ -21,39 +21,30 @@ * see . */ -#ifndef LSST_QSERV_PROTO_SCANTABLEINFO_H -#define LSST_QSERV_PROTO_SCANTABLEINFO_H +#ifndef LSST_QSERV_PROTOJSON_SCANTABLEINFO_H +#define LSST_QSERV_PROTOJSON_SCANTABLEINFO_H // System headers +#include #include #include -// Qserv headers -#include "proto/worker.pb.h" +// Third party headers +#include "nlohmann/json.hpp" -namespace lsst::qserv::proto { +namespace lsst::qserv::protojson { /// Structure to store shared scan information for a single table. /// struct ScanTableInfo { using ListOf = std::vector; + ScanTableInfo() = default; ScanTableInfo(std::string const& db_, std::string const& table_) : db(db_), table(table_) {} ScanTableInfo(std::string const& db_, std::string const& table_, bool lockInMemory_, int scanRating_) : db{db_}, table{table_}, lockInMemory{lockInMemory_}, scanRating{scanRating_} {} - ScanTableInfo(TaskMsg_ScanTable const& scanTbl) - : db{scanTbl.db()}, - table{scanTbl.table()}, - lockInMemory{scanTbl.lockinmemory()}, - scanRating{scanTbl.scanrating()} {} - - /// Copy contents of this object into a TaskMsg_ScanTable object. - void copyToScanTable(TaskMsg_ScanTable* msgScanTbl) const { - msgScanTbl->set_db(db); - msgScanTbl->set_table(table); - msgScanTbl->set_lockinmemory(lockInMemory); - msgScanTbl->set_scanrating(scanRating); - } + + ScanTableInfo(ScanTableInfo const&) = default; int compare(ScanTableInfo const& rhs) const; @@ -63,20 +54,37 @@ struct ScanTableInfo { int scanRating{0}; }; -struct ScanInfo { +/// This class stores information about database table ratings for +/// a user query. +class ScanInfo { +public: + using Ptr = std::shared_ptr; + /// Threshold priority values. Scan priorities are not limited to these values. enum Rating { FASTEST = 0, FAST = 10, MEDIUM = 20, SLOW = 30, SLOWEST = 100 }; + ScanInfo() = default; + ScanInfo(ScanInfo const&) = default; + + static Ptr create() { return Ptr(new ScanInfo()); } + + static Ptr createFromJson(nlohmann::json const& ujJson); + + /// Return a json version of the contents of this class. + nlohmann::json toJson() const; + void sortTablesSlowestFirst(); int compareTables(ScanInfo const& rhs); ScanTableInfo::ListOf infoTables; int scanRating{Rating::FASTEST}; + + std::ostream& dump(std::ostream& os) const; }; std::ostream& operator<<(std::ostream& os, ScanTableInfo const& tbl); std::ostream& operator<<(std::ostream& os, ScanInfo const& info); -} // namespace lsst::qserv::proto +} // namespace lsst::qserv::protojson -#endif // LSST_QSERV_PROTO_SCANTABLEINFO_H +#endif // LSST_QSERV_PROTOJSON_SCANTABLEINFO_H diff --git a/src/protojson/UberJobErrorMsg.cc b/src/protojson/UberJobErrorMsg.cc new file mode 100644 index 0000000000..a79674c49a --- /dev/null +++ b/src/protojson/UberJobErrorMsg.cc @@ -0,0 +1,163 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "protojson/UberJobErrorMsg.h" + +#include + +// Qserv headers +#include "http/Client.h" +#include "http/MetaModule.h" +#include "http/RequestBodyJSON.h" +#include "util/common.h" +#include "util/TimeUtils.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.UberJobErrorMsg"); +} // namespace + +namespace lsst::qserv::protojson { + +string UberJobErrorMsg::cName(const char* fName) const { + return string("UberJobErrorMsg::") + fName + " qId=" + to_string(queryId) + + " ujId=" + to_string(uberJobId); +} + +UberJobErrorMsg::Ptr UberJobErrorMsg::create(AuthContext const& authContext_, unsigned int version_, + string const& workerIdStr_, string const& czarName_, + CzarId czarId_, QueryId queryId_, UberJobId uberJobId_, + util::MultiError const& multiErr_) { + Ptr jrMsg = Ptr(new UberJobErrorMsg(authContext_, version_, workerIdStr_, czarName_, czarId_, queryId_, + uberJobId_, multiErr_)); + return jrMsg; +} + +UberJobErrorMsg::Ptr UberJobErrorMsg::createFromJson(nlohmann::json const& jsWReq) { + string const fName("UberJobErrorMsg::createFromJson"); + LOGS(_log, LOG_LVL_DEBUG, fName); + try { + AuthContext const authContext_(http::RequestBodyJSON::required(jsWReq, "instance_id"), + http::RequestBodyJSON::required(jsWReq, "auth_key")); + + util::MultiError multiErr_ = multiErrorFromJson(jsWReq["multiError"]); + + Ptr jrMsg = Ptr(new UberJobErrorMsg( + authContext_, http::RequestBodyJSON::required(jsWReq, "version"), + http::RequestBodyJSON::required(jsWReq, "workerid"), + http::RequestBodyJSON::required(jsWReq, "czar"), + http::RequestBodyJSON::required(jsWReq, "czarid"), + http::RequestBodyJSON::required(jsWReq, "queryid"), + http::RequestBodyJSON::required(jsWReq, "uberjobid"), multiErr_)); + return jrMsg; + } catch (invalid_argument const& exc) { + LOGS(_log, LOG_LVL_ERROR, string("UberJobErrorMsg::createJson invalid ") << exc.what()); + } + return nullptr; +} + +util::MultiError UberJobErrorMsg::multiErrorFromJson(nlohmann::json const& jsMErr) { + util::MultiError multiErr_; + if (!jsMErr.is_array()) { + throw std::invalid_argument("UberJobErrorMsg::multiErrorFromJson MultiError is not a json::array"); + } + // Fill in multiErr_ with the values in jsMErr + for (auto const& jsElem : jsMErr) { + try { + auto const errCode = http::RequestBodyJSON::required(jsElem, "eCode"); + auto const subCode = http::RequestBodyJSON::required(jsElem, "subCode"); + auto const count = http::RequestBodyJSON::required(jsElem, "count"); + auto const eMsg = http::RequestBodyJSON::required(jsElem, "eMsg"); + auto const& chunkIdsArray = jsElem["chunkIds"]; + set chunkIds(chunkIdsArray.begin(), chunkIdsArray.end()); + auto const& jobIdsArray = jsElem["jobIds"]; + set jobIds(jobIdsArray.begin(), jobIdsArray.end()); + util::Error err(errCode, subCode, chunkIds, jobIds, eMsg, count); + multiErr_.insert(err); + } catch (invalid_argument const& ex) { + // skip to next element + LOGS(_log, LOG_LVL_WARN, "UberJobErrorMsg::multiErrorFromJson failed to read Error:" << jsElem); + } + } + return multiErr_; +} + +bool UberJobErrorMsg::equals(UberJobStatusMsg const& other) const { + try { + UberJobErrorMsg const& otherError = dynamic_cast(other); + if (multiError == otherError.multiError) { + return equalsBase(other); + } + } catch (std::bad_cast& ex) { + } + // different type + return false; +} + +std::ostream& UberJobErrorMsg::dump(std::ostream& os) const { + os << "{UberJobErrorMsg:"; + UberJobStatusMsg::dump(os); + os << " multiError=" << multiError << "}"; + return os; +} + +UberJobErrorMsg::UberJobErrorMsg(AuthContext const& authContext_, unsigned int version_, + string const& workerId_, string const& czarName_, CzarId czarId_, + QueryId queryId_, UberJobId uberJobId_, util::MultiError const& multiErr_) + : UberJobStatusMsg(authContext_, version_, workerId_, czarName_, czarId_, queryId_, uberJobId_), + multiError(multiErr_) {} + +json UberJobErrorMsg::toJson() const { + json jsJr; + + // These need to match what http::BaseModule::enforceInstanceId() + // and http::BaseModule::enforceAuthorization() are looking for. + jsJr["instance_id"] = authContext.replicationInstanceId; + jsJr["auth_key"] = authContext.replicationAuthKey; + jsJr["version"] = version; + + jsJr["workerid"] = workerId; + jsJr["czar"] = czarName; + jsJr["czarid"] = czarId; + jsJr["queryid"] = queryId; + jsJr["uberjobid"] = uberJobId; + jsJr["multiError"] = json::array(); + auto& jsMultiE = jsJr["multiError"]; + auto errVect = multiError.getVector(); + for (auto const& err : errVect) { + jsMultiE.push_back(json::object({{"count", err.getCount()}, + {"eCode", err.getCode()}, + {"subCode", err.getSubCode()}, + {"eMsg", err.getMsg()}, + {"chunkIds", err.getChunkIdsVect()}, + {"jobIds", err.getJobIdsVect()}})); + } + + return jsJr; +} + +} // namespace lsst::qserv::protojson diff --git a/src/protojson/UberJobErrorMsg.h b/src/protojson/UberJobErrorMsg.h new file mode 100644 index 0000000000..57bbcec2c4 --- /dev/null +++ b/src/protojson/UberJobErrorMsg.h @@ -0,0 +1,82 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_PROTOJSON_UBERJOBERRORMSG_H +#define LSST_QSERV_PROTOJSON_UBERJOBERRORMSG_H + +// System headers +#include +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// qserv headers +#include "global/clock_defs.h" +#include "global/intTypes.h" +#include "protojson/WorkerQueryStatusData.h" +#include "protojson/UberJobReadyMsg.h" +#include "util/MultiError.h" + +// This header declarations +namespace lsst::qserv::protojson { + +/// This class handles the message used to inform the czar that there has +/// been a problem with an UberJob. +class UberJobErrorMsg : public UberJobStatusMsg { +public: + using Ptr = std::shared_ptr; + /// class name for log, fName is expected to be __func__. + std::string cName(const char* fName) const override; + + UberJobErrorMsg() = delete; + UberJobErrorMsg(UberJobErrorMsg const&) = delete; + UberJobErrorMsg& operator=(UberJobErrorMsg const&) = delete; + + static Ptr create(AuthContext const& authContext_, unsigned int version_, std::string const& workerIdStr_, + std::string const& czarName_, CzarId czarId_, QueryId queryId_, UberJobId uberJobId_, + util::MultiError const& multiErr_); + + /// This function creates a UberJobErrorMsg object from the worker json `czarJson`. + static Ptr createFromJson(nlohmann::json const& czarJson); + static util::MultiError multiErrorFromJson(nlohmann::json const& czarJson); + + ~UberJobErrorMsg() = default; + + bool equals(UberJobStatusMsg const& other) const override; + + /// Return a json object with data for collection of the UberJob result file. + nlohmann::json toJson() const override; + std::ostream& dump(std::ostream& os) const override; + + util::MultiError multiError; + +private: + UberJobErrorMsg(AuthContext const& authContext_, unsigned int version_, std::string const& workerId_, + std::string const& czarName_, CzarId czarId_, QueryId queryId_, UberJobId uberJobId_, + util::MultiError const& multiErr_); +}; + +} // namespace lsst::qserv::protojson + +#endif // LSST_QSERV_PROTOJSON_UBERJOBERRORMSG_H diff --git a/src/protojson/UberJobMsg.cc b/src/protojson/UberJobMsg.cc new file mode 100644 index 0000000000..346cdab895 --- /dev/null +++ b/src/protojson/UberJobMsg.cc @@ -0,0 +1,442 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "protojson/UberJobMsg.h" + +#include + +// Qserv headers +#include "http/Client.h" +#include "http/MetaModule.h" +#include "http/RequestBodyJSON.h" +#include "qdisp/JobQuery.h" +#include "qdisp/JobDescription.h" +#include "qproc/ChunkQuerySpec.h" +#include "util/common.h" +#include "util/TimeUtils.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.UberJobMsg"); +} // namespace + +namespace lsst::qserv::protojson { + +UberJobMsg::UberJobMsg(unsigned int metaVersion, std::string const& replicationInstanceId, + std::string const& replicationAuthKey, CzarContactInfo::Ptr const& czInfo, + string const& workerId, QueryId qId, UberJobId ujId, int rowLimit, int maxTableSizeMB, + ScanInfo::Ptr const& scanInfo_, bool scanInteractive_, + std::vector> const& jobs) + : _metaVersion(metaVersion), + _replicationInstanceId(replicationInstanceId), + _replicationAuthKey(replicationAuthKey), + _czInfo(czInfo), + _workerId(workerId), + _qId(qId), + _ujId(ujId), + _rowLimit(rowLimit), + _maxTableSizeMB(maxTableSizeMB), + _scanInfo(scanInfo_), + _scanInteractive(scanInteractive_), + _idStr("QID=" + to_string(_qId) + "_ujId=" + to_string(_ujId)) { + for (auto& jobPtr : jobs) { + // This creates the JobMsg objects for all relates jobs and their fragments. + auto jobMsg = JobMsg::create(jobPtr, _jobSubQueryTempMap, _jobDbTablesMap); + _jobMsgVect->push_back(jobMsg); + } +} + +json UberJobMsg::toJson() const { + json ujmJson = {{"version", _metaVersion}, + {"instance_id", _replicationInstanceId}, + {"auth_key", _replicationAuthKey}, + {"worker", _workerId}, + {"queryid", _qId}, + {"uberjobid", _ujId}, + {"czarinfo", _czInfo->toJson()}, + {"rowlimit", _rowLimit}, + {"subqueries_map", _jobSubQueryTempMap->toJson()}, + {"dbtables_map", _jobDbTablesMap->toJson()}, + {"maxtablesizemb", _maxTableSizeMB}, + {"scaninfo", _scanInfo->toJson()}, + {"scaninteractive", _scanInteractive}, + {"jobs", json::array()}}; + + auto& jsJobs = ujmJson["jobs"]; + for (auto const& jbMsg : *_jobMsgVect) { + jsJobs.emplace_back(jbMsg->toJson()); + } + + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " ujmJson=" << ujmJson); + return ujmJson; +} + +UberJobMsg::Ptr UberJobMsg::createFromJson(nlohmann::json const& ujmJson) { + LOGS(_log, LOG_LVL_TRACE, "UberJobMsg::createFromJson ujmJson=" << ujmJson); + try { + if (ujmJson["version"] != http::MetaModule::version) { + LOGS(_log, LOG_LVL_ERROR, "UberJobMsg::createFromJson bad version " << ujmJson["version"]); + return nullptr; + } + + auto czInfo_ = CzarContactInfo::createFromJson(ujmJson["czarinfo"]); + if (czInfo_ == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "UberJobMsg::createFromJson czar could not be parsed in " << ujmJson); + return nullptr; + } + + auto scanInfo_ = ScanInfo::createFromJson(ujmJson["scaninfo"]); + if (scanInfo_ == nullptr) { + LOGS(_log, LOG_LVL_ERROR, + "UberJobMsg::createFromJson scanInfo could not be parsed in " << ujmJson); + return nullptr; + } + + auto metaVersion = http::RequestBodyJSON::required(ujmJson, "version"); + auto replicationInstanceId = http::RequestBodyJSON::required(ujmJson, "instance_id"); + auto replicationAuthKey = http::RequestBodyJSON::required(ujmJson, "auth_key"); + auto workerId = http::RequestBodyJSON::required(ujmJson, "worker"); + auto qId = http::RequestBodyJSON::required(ujmJson, "queryid"); + auto ujId = http::RequestBodyJSON::required(ujmJson, "uberjobid"); + auto rowLimit = http::RequestBodyJSON::required(ujmJson, "rowlimit"); + auto maxTableSizeMB = http::RequestBodyJSON::required(ujmJson, "maxtablesizemb"); + auto czInfo = CzarContactInfo::createFromJson(ujmJson["czarinfo"]); + auto scanInteractive_ = http::RequestBodyJSON::required(ujmJson, "scaninteractive"); + auto jsUjJobs = http::RequestBodyJSON::required(ujmJson, "jobs"); + + std::vector> emptyJobs; + + Ptr ujmPtr = Ptr(new UberJobMsg(metaVersion, replicationInstanceId, replicationAuthKey, czInfo, + workerId, qId, ujId, rowLimit, maxTableSizeMB, scanInfo_, + scanInteractive_, emptyJobs)); + + auto const& jsSubQueriesMap = http::RequestBodyJSON::required(ujmJson, "subqueries_map"); + ujmPtr->_jobSubQueryTempMap = JobSubQueryTempMap::createFromJson(jsSubQueriesMap); + + auto jsDbTablesMap = http::RequestBodyJSON::required(ujmJson, "dbtables_map"); + ujmPtr->_jobDbTablesMap = JobDbTableMap::createFromJson(jsDbTablesMap); + + for (auto const& jsUjJob : jsUjJobs) { + JobMsg::Ptr jobMsgPtr = + JobMsg::createFromJson(jsUjJob, ujmPtr->_jobSubQueryTempMap, ujmPtr->_jobDbTablesMap); + ujmPtr->_jobMsgVect->push_back(jobMsgPtr); + } + return ujmPtr; + } catch (invalid_argument const& exc) { + LOGS(_log, LOG_LVL_ERROR, "UberJobMsg::createFromJson invalid " << exc.what() << " json=" << ujmJson); + } + return nullptr; +} + +JobMsg::Ptr JobMsg::create(std::shared_ptr const& jobPtr, + JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, + JobDbTableMap::Ptr const& jobDbTablesMap) { + auto jMsg = Ptr(new JobMsg(jobPtr, jobSubQueryTempMap, jobDbTablesMap)); + return jMsg; +} + +JobMsg::JobMsg(std::shared_ptr const& jobPtr, + JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, JobDbTableMap::Ptr const& jobDbTablesMap) + : _jobSubQueryTempMap(jobSubQueryTempMap), _jobDbTablesMap(jobDbTablesMap) { + auto const descr = jobPtr->getDescription(); + if (descr == nullptr) { + throw util::Bug(ERR_LOC, cName(__func__) + " description=null for job=" + jobPtr->getIdStr()); + } + auto chunkQuerySpec = descr->getChunkQuerySpec(); + _jobId = descr->id(); + _attemptCount = descr->getAttemptCount(); + _chunkQuerySpecDb = chunkQuerySpec->db; + _chunkId = chunkQuerySpec->chunkId; + + // Add fragments + _jobFragments = JobFragment::createVect(*chunkQuerySpec, jobSubQueryTempMap, jobDbTablesMap); +} + +nlohmann::json JobMsg::toJson() const { + auto jsJobMsg = nlohmann::json({{"jobId", _jobId}, + {"attemptCount", _attemptCount}, + {"querySpecDb", _chunkQuerySpecDb}, + {"chunkId", _chunkId}, + {"queryFragments", json::array()}}); + + auto& jsqFrags = jsJobMsg["queryFragments"]; + for (auto& jFrag : *_jobFragments) { + jsqFrags.emplace_back(jFrag->toJson()); + } + + return jsJobMsg; +} + +JobMsg::JobMsg(JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, JobDbTableMap::Ptr const& jobDbTablesMap, + JobId jobId, int attemptCount, std::string const& chunkQuerySpecDb, int chunkId) + : _jobId(jobId), + _attemptCount(attemptCount), + _chunkQuerySpecDb(chunkQuerySpecDb), + _chunkId(chunkId), + _jobSubQueryTempMap(jobSubQueryTempMap), + _jobDbTablesMap(jobDbTablesMap) {} + +JobMsg::Ptr JobMsg::createFromJson(nlohmann::json const& ujJson, + JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, + JobDbTableMap::Ptr const& jobDbTablesMap) { + JobId jobId = http::RequestBodyJSON::required(ujJson, "jobId"); + int attemptCount = http::RequestBodyJSON::required(ujJson, "attemptCount"); + string chunkQuerySpecDb = http::RequestBodyJSON::required(ujJson, "querySpecDb"); + int chunkId = http::RequestBodyJSON::required(ujJson, "chunkId"); + + json jsQFrags = http::RequestBodyJSON::required(ujJson, "queryFragments"); + + Ptr jMsgPtr = Ptr( + new JobMsg(jobSubQueryTempMap, jobDbTablesMap, jobId, attemptCount, chunkQuerySpecDb, chunkId)); + jMsgPtr->_jobFragments = + JobFragment::createVectFromJson(jsQFrags, jMsgPtr->_jobSubQueryTempMap, jMsgPtr->_jobDbTablesMap); + return jMsgPtr; +} + +json JobSubQueryTempMap::toJson() const { + json jsSubQueryTemplateMap = {{"subquerytemplate_map", json::array()}}; + auto& jsSqtMap = jsSubQueryTemplateMap["subquerytemplate_map"]; + for (auto const& [key, templ] : _qTemplateMap) { + json jsElem = {{"index", key}, {"template", templ}}; + jsSqtMap.push_back(jsElem); + } + + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " " << jsSqtMap); + return jsSubQueryTemplateMap; +} + +JobSubQueryTempMap::Ptr JobSubQueryTempMap::createFromJson(nlohmann::json const& ujJson) { + Ptr sqtMapPtr = create(); + auto& sqtMap = sqtMapPtr->_qTemplateMap; + LOGS(_log, LOG_LVL_TRACE, "JobSubQueryTempMap::createFromJson " << ujJson); + auto const& jsElements = ujJson["subquerytemplate_map"]; + for (auto const& jsElem : jsElements) { + int index = http::RequestBodyJSON::required(jsElem, "index"); + string templ = http::RequestBodyJSON::required(jsElem, "template"); + auto res = sqtMap.insert(make_pair(index, templ)); + if (!res.second) { + throw invalid_argument(sqtMapPtr->cName(__func__) + "index=" + to_string(index) + "=" + templ + + " index already found in " + to_string(ujJson)); + } + } + return sqtMapPtr; +} + +int JobSubQueryTempMap::findSubQueryTemp(string const& qTemp) { + // The expected number of templates is expected to be small, less than 4, + // so this shouldn't be horribly expensive. + for (auto const& [key, temp] : _qTemplateMap) { + if (temp == qTemp) { + return key; + } + } + + // Need to insert + int index = _qTemplateMap.size(); + _qTemplateMap[index] = qTemp; + return index; +} + +int JobDbTableMap::findDbTable(pair const& dbTablePair) { + // The expected number of templates is expected to be small, less than 4, + // so this shouldn't be horribly expensive. + for (auto const& [key, dbTbl] : _dbTableMap) { + if (dbTablePair == dbTbl) { + return key; + } + } + + // Need to insert + int index = _dbTableMap.size(); + _dbTableMap[index] = dbTablePair; + return index; +} + +json JobDbTableMap::toJson() const { + auto jsDbTblMap = json::array(); + for (auto const& [key, valPair] : _dbTableMap) { + json jsDbTbl = {{"index", key}, {"db", valPair.first}, {"table", valPair.second}}; + jsDbTblMap.push_back(jsDbTbl); + } + + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " " << jsDbTblMap); + return jsDbTblMap; +} + +JobDbTableMap::Ptr JobDbTableMap::createFromJson(nlohmann::json const& ujJson) { + Ptr dbTablesMapPtr = create(); + auto& dbTblMap = dbTablesMapPtr->_dbTableMap; + + LOGS(_log, LOG_LVL_TRACE, "JobDbTableMap::createFromJson " << ujJson); + + for (auto const& jsElem : ujJson) { + int index = http::RequestBodyJSON::required(jsElem, "index"); + string db = http::RequestBodyJSON::required(jsElem, "db"); + string tbl = http::RequestBodyJSON::required(jsElem, "table"); + auto res = dbTblMap.insert(make_pair(index, make_pair(db, tbl))); + if (!res.second) { + throw invalid_argument(dbTablesMapPtr->cName(__func__) + " index=" + to_string(index) + "=" + db + + +"." + tbl + " index already found in " + to_string(ujJson)); + } + } + + return dbTablesMapPtr; +} + +JobFragment::JobFragment(JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, + JobDbTableMap::Ptr const& jobDbTablesMap) + : _jobSubQueryTempMap(jobSubQueryTempMap), _jobDbTablesMap(jobDbTablesMap) {} + +JobFragment::VectPtr JobFragment::createVect(qproc::ChunkQuerySpec const& chunkQuerySpec, + JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, + JobDbTableMap::Ptr const& jobDbTablesMap) { + VectPtr jFragments{new Vect()}; + if (chunkQuerySpec.nextFragment.get()) { + qproc::ChunkQuerySpec const* sPtr = &chunkQuerySpec; + while (sPtr) { + LOGS(_log, LOG_LVL_TRACE, "nextFragment"); + // Linked fragments will not have valid subChunkTables vectors, + // So, we reuse the root fragment's vector. + _addFragment(*jFragments, chunkQuerySpec.subChunkTables, sPtr->subChunkIds, sPtr->queries, + jobSubQueryTempMap, jobDbTablesMap); + sPtr = sPtr->nextFragment.get(); + } + } else { + LOGS(_log, LOG_LVL_TRACE, "no nextFragment"); + _addFragment(*jFragments, chunkQuerySpec.subChunkTables, chunkQuerySpec.subChunkIds, + chunkQuerySpec.queries, jobSubQueryTempMap, jobDbTablesMap); + } + + return jFragments; +} + +void JobFragment::_addFragment(std::vector& jFragments, DbTableSet const& subChunkTables, + std::vector const& subchunkIds, std::vector const& queries, + JobSubQueryTempMap::Ptr const& subQueryTemplates, + JobDbTableMap::Ptr const& dbTablesMap) { + LOGS(_log, LOG_LVL_TRACE, "JobFragment::_addFragment start"); + Ptr jFrag = Ptr(new JobFragment(subQueryTemplates, dbTablesMap)); + + // queries: The query string is stored in `_jobSubQueryTempMap` and the list of + // integer indexes, `_subQueryTempIndexes`, points back to the specific template. + for (auto& qry : queries) { + int index = jFrag->_jobSubQueryTempMap->findSubQueryTemp(qry); + jFrag->_jobSubQueryTempIndexes.push_back(index); + LOGS(_log, LOG_LVL_TRACE, jFrag->cName(__func__) << " added frag=" << qry << " index=" << index); + } + + // Add the db+table pairs to the subchunks for the fragment. + for (auto& tbl : subChunkTables) { + int index = jFrag->_jobDbTablesMap->findDbTable(make_pair(tbl.db, tbl.table)); + jFrag->_jobDbTablesIndexes.push_back(index); + LOGS(_log, LOG_LVL_TRACE, + jFrag->cName(__func__) << " added dbtbl=" << tbl.db << "." << tbl.table << " index=" << index); + } + + // Add subchunk id numbers + for (auto& subchunkId : subchunkIds) { + jFrag->_subchunkIds.push_back(subchunkId); + LOGS(_log, LOG_LVL_TRACE, jFrag->cName(__func__) << " added subchunkId=" << subchunkId); + } + + jFragments.push_back(move(jFrag)); +} + +string JobFragment::dump() const { + stringstream os; + os << " templateIndexes={"; + for (int j : _jobSubQueryTempIndexes) { + os << j << ", "; + } + os << "} subchunkIds={"; + for (int j : _subchunkIds) { + os << j << ", "; + } + os << "} dbtbl={"; + for (int j : _subchunkIds) { + os << j << ", "; + } + os << "}"; + return os.str(); +} + +nlohmann::json JobFragment::toJson() const { + json jsFragment = {{"subquerytemplate_indexes", _jobSubQueryTempIndexes}, + {"dbtables_indexes", _jobDbTablesIndexes}, + {"subchunkids", _subchunkIds}}; + + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " " << jsFragment); + return jsFragment; +} + +JobFragment::VectPtr JobFragment::createVectFromJson(nlohmann::json const& jsFrags, + JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, + JobDbTableMap::Ptr const& dbTablesMap) { + LOGS(_log, LOG_LVL_TRACE, "JobFragment::createVectFromJson " << jsFrags); + + JobFragment::VectPtr jobFragments{new JobFragment::Vect()}; + + for (auto const& jsFrag : jsFrags) { + Ptr jobFrag = Ptr(new JobFragment(jobSubQueryTempMap, dbTablesMap)); + + jobFrag->_jobSubQueryTempIndexes = jsFrag["subquerytemplate_indexes"].get>(); + for (int j : jobFrag->_jobSubQueryTempIndexes) { + try { + string tem = jobSubQueryTempMap->getSubQueryTemp(j); + LOGS(_log, LOG_LVL_TRACE, jobFrag->cName(__func__) << " j=" << j << " =" << tem); + } catch (std::out_of_range const& ex) { + LOGS(_log, LOG_LVL_ERROR, + jobFrag->cName(__func__) << " index=" << j << " not found in template map " << jsFrag); + // rethrow as something callers expect. + throw std::invalid_argument(jobFrag->cName(__func__) + " template index=" + to_string(j) + + " " + ex.what()); + } + } + + jobFrag->_jobDbTablesIndexes = jsFrag["dbtables_indexes"].get>(); + for (int j : jobFrag->_jobDbTablesIndexes) { + try { + auto dbTblPr = dbTablesMap->getDbTable(j); + LOGS(_log, LOG_LVL_TRACE, + jobFrag->cName(__func__) + << " j=" << j << " =" << dbTblPr.first << "." << dbTblPr.second); + } catch (std::out_of_range const& ex) { + LOGS(_log, LOG_LVL_ERROR, + jobFrag->cName(__func__) << " index=" << j << " not found in dbTable map " << jsFrag); + // rethrow as something callers expect. + throw std::invalid_argument(jobFrag->cName(__func__) + " dbtable index=" + to_string(j) + + " " + ex.what()); + } + } + + jobFrag->_subchunkIds = jsFrag["subchunkids"].get>(); + jobFragments->push_back(jobFrag); + } + return jobFragments; +} + +} // namespace lsst::qserv::protojson diff --git a/src/protojson/UberJobMsg.h b/src/protojson/UberJobMsg.h new file mode 100644 index 0000000000..2b693cd01e --- /dev/null +++ b/src/protojson/UberJobMsg.h @@ -0,0 +1,303 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_PROTOJSON_UBERJOBMSG_H +#define LSST_QSERV_PROTOJSON_UBERJOBMSG_H + +// System headers +#include +#include +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// qserv headers +#include "global/clock_defs.h" +#include "global/DbTable.h" +#include "global/intTypes.h" +#include "protojson/ScanTableInfo.h" +#include "protojson/WorkerQueryStatusData.h" + +namespace lsst::qserv::qdisp { +class JobQuery; +} + +namespace lsst::qserv::qproc { +class ChunkQuerySpec; +} + +// This header declarations +namespace lsst::qserv::protojson { + +/// This class is used to store query template strings names in a reasonably +/// concise fashion. +/// The same templates recur frequently, so the individual occurrences +/// will be replaced with an integer index and use this class to recover the +/// original template. +class JobSubQueryTempMap { +public: + using Ptr = std::shared_ptr; + + std::string cName(const char* fName) const { return std::string("JobSubQueryTempMap::") + fName; } + + JobSubQueryTempMap(JobSubQueryTempMap const&) = delete; + + static Ptr create() { return Ptr(new JobSubQueryTempMap()); } + + /// Create JobSubQueryTempMap from result of toJson(). + static Ptr createFromJson(nlohmann::json const& ujJson); + + /// Find or insert qTemp into the map and return its index. + int findSubQueryTemp(std::string const& qTemp); + + /// Return the SubQueryTemp string at `index`. + /// @throws std::out_of_range + std::string getSubQueryTemp(int index) { return _qTemplateMap.at(index); } + + nlohmann::json toJson() const; + +private: + JobSubQueryTempMap() = default; + + std::map _qTemplateMap; +}; + +/// This class is used to store db.table names in a reasonably concise fashion. +/// The same db+table name pairs recur frequently, so the individual occurrences +/// will be replaced with an integer index and use this class to recover the +/// complete names. +class JobDbTableMap { +public: + using Ptr = std::shared_ptr; + + std::string cName(const char* fName) const { return std::string("JobDbTableMap::") + fName; } + + JobDbTableMap(JobDbTableMap const&) = delete; + + static Ptr create() { return Ptr(new JobDbTableMap()); } + + /// Create JobDbTableMap from result of toJson(). + static Ptr createFromJson(nlohmann::json const& ujJson); + + /// Find or insert the db.table pair into the map and return its index. + int findDbTable(std::pair const& dbTablePair); + + /// Return the db.table pair at `index`. + /// @throws std::out_of_range + std::pair getDbTable(int index) { return _dbTableMap.at(index); } + + nlohmann::json toJson() const; + +private: + JobDbTableMap() = default; + + /// Map of db name and table name pairs: db first, table second. + /// The order in the map is arbitrary, but must be consistent + /// so that lookups using the int index always return the same pair. + std::map> _dbTableMap; +}; + +/// This class stores the contents of a query fragment, which will be reconstructed +/// and run on a worker to help answer a user query. +class JobFragment { +public: + using Ptr = std::shared_ptr; + using Vect = std::vector; + using VectPtr = std::shared_ptr; + + std::string cName(const char* fName) const { return std::string("JobFragment::") + fName; } + + JobFragment() = delete; + JobFragment(JobFragment const&) = delete; + + static VectPtr createVect(qproc::ChunkQuerySpec const& chunkQuerySpec, + JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, + JobDbTableMap::Ptr const& dbTablesMap); + + /// Create JobFragment from the toJson() result. + static VectPtr createVectFromJson(nlohmann::json const& ujJson, + JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, + JobDbTableMap::Ptr const& dbTablesMap); + + /// Return a json version of the contents of this class. + nlohmann::json toJson() const; + + std::vector const& getJobSubQueryTempIndexes() const { return _jobSubQueryTempIndexes; } + std::vector const& getJobDbTablesIndexes() const { return _jobDbTablesIndexes; } + std::vector const& getSubchunkIds() const { return _subchunkIds; } + + std::string dump() const; + +private: + JobFragment(JobSubQueryTempMap::Ptr const& subQueryTemplates, JobDbTableMap::Ptr const& dbTablesMap); + + /// Add the required data for a query fragment. + static void _addFragment(std::vector& jFragments, DbTableSet const& subChunkTables, + std::vector const& subchunkIds, std::vector const& queries, + JobSubQueryTempMap::Ptr const& subQueryTemplates, + JobDbTableMap::Ptr const& dbTablesMap); + + JobSubQueryTempMap::Ptr _jobSubQueryTempMap; ///< Pointer to indexed list of subquery fragments. + std::vector _jobSubQueryTempIndexes; ///< List of subquery template indexes. + + JobDbTableMap::Ptr _jobDbTablesMap; ///< Pointer to the tables map + std::vector _jobDbTablesIndexes; ///< List of tables used. + + std::vector _subchunkIds; ///< List of subchunks for this chunk. +}; + +/// This class is used to store the information for a single Job (the queries and metadata +/// required to collect rows from a single chunk) in a reasonable manner. +class JobMsg { +public: + using Ptr = std::shared_ptr; + using Vect = std::vector; + using VectPtr = std::shared_ptr; + std::string cName(const char* fnc) const { return std::string("JobMsg::") + fnc; } + + JobMsg() = delete; + JobMsg(JobMsg const&) = delete; + JobMsg& operator=(JobMsg const&) = delete; + + static Ptr create(std::shared_ptr const& jobs, + JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, + JobDbTableMap::Ptr const& jobDbTablesMap); + + /// Create a Job message from the toJson() results. + static Ptr createFromJson(nlohmann::json const& ujJson, JobSubQueryTempMap::Ptr const& subQueryTemplates, + JobDbTableMap::Ptr const& dbTablesMap); + + /// Return a json version of the contents of this class. + nlohmann::json toJson() const; + + JobId getJobId() const { return _jobId; } + int getAttemptCount() const { return _attemptCount; } + std::string getChunkQuerySpecDb() const { return _chunkQuerySpecDb; } + int getChunkId() const { return _chunkId; } + + JobFragment::VectPtr getJobFragments() const { return _jobFragments; } + +private: + JobMsg(std::shared_ptr const& jobPtr, JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, + JobDbTableMap::Ptr const& jobDbTablesMap); + + JobMsg(JobSubQueryTempMap::Ptr const& jobSubQueryTempMap, JobDbTableMap::Ptr const& jobDbTablesMap, + JobId jobId, int attemptCount, std::string const& chunkQuerySpecDb, int chunkId); + + JobId _jobId; + int _attemptCount; + std::string _chunkQuerySpecDb; + + int _chunkId; + JobFragment::VectPtr _jobFragments{new JobFragment::Vect()}; + + JobSubQueryTempMap::Ptr _jobSubQueryTempMap; ///< Map of all query templates related to this UberJob. + JobDbTableMap::Ptr _jobDbTablesMap; ///< Map of all db.tables related to this UberJob. +}; + +/// This class stores an UberJob, a collection of Jobs meant for a +/// specific worker, so it can be converted to and from a json format +/// and sent to a worker. +/// There are several fields which are the same for each job, so these +/// values are stored in maps and the individual Jobs and Fragments +/// use integer indexes to reduce the size of the final message. +class UberJobMsg : public std::enable_shared_from_this { +public: + using Ptr = std::shared_ptr; + std::string cName(const char* fnc) const { return std::string("UberJobMsg::") + fnc; } + + UberJobMsg() = delete; + UberJobMsg(UberJobMsg const&) = delete; + UberJobMsg& operator=(UberJobMsg const&) = delete; + + static Ptr create(unsigned int metaVersion, std::string const& replicationInstanceId, + std::string const& replicationAuthKey, CzarContactInfo::Ptr const& czInfo, + WorkerContactInfo::Ptr const& wInfo, QueryId qId, UberJobId ujId, int rowLimit, + int maxTableSizeMB, ScanInfo::Ptr const& scanInfo_, bool scanInteractive_, + std::vector> const& jobs) { + return Ptr(new UberJobMsg(metaVersion, replicationInstanceId, replicationAuthKey, czInfo, wInfo->wId, + qId, ujId, rowLimit, maxTableSizeMB, scanInfo_, scanInteractive_, jobs)); + } + + static Ptr createFromJson(nlohmann::json const& ujJson); + + /// Return a json version of the contents of this class. + nlohmann::json toJson() const; + + QueryId getQueryId() const { return _qId; } + UberJobId getUberJobId() const { return _ujId; } + int getRowLimit() const { return _rowLimit; } + std::string getWorkerId() const { return _workerId; } + int getMaxTableSizeMb() const { return _maxTableSizeMB; } + + CzarContactInfo::Ptr getCzarContactInfo() const { return _czInfo; } + JobSubQueryTempMap::Ptr getJobSubQueryTempMap() const { return _jobSubQueryTempMap; } + JobDbTableMap::Ptr getJobDbTableMap() const { return _jobDbTablesMap; } + + JobMsg::VectPtr getJobMsgVect() const { return _jobMsgVect; } + + ScanInfo::Ptr getScanInfo() const { return _scanInfo; } + + bool getScanInteractive() const { return _scanInteractive; } + + std::string const& getIdStr() const { return _idStr; } + +private: + UberJobMsg(unsigned int metaVersion, std::string const& replicationInstanceId, + std::string const& replicationAuthKey, CzarContactInfo::Ptr const& czInfo, + std::string const& workerId, QueryId qId, UberJobId ujId, int rowLimit, int maxTableSizeMB, + ScanInfo::Ptr const& scanInfo_, bool scanInteractive, + std::vector> const& jobs); + + unsigned int _metaVersion; // "version", http::MetaModule::version + // czar + std::string _replicationInstanceId; // "instance_id", czarConfig->replicationInstanceId() + std::string _replicationAuthKey; //"auth_key", czarConfig->replicationAuthKey() + CzarContactInfo::Ptr _czInfo; + std::string _workerId; // "worker", ciwId + QueryId _qId; // "queryid", _queryId + UberJobId _ujId; // "uberjobid", _uberJobId + int _rowLimit; // "rowlimit", _rowLimit + int _maxTableSizeMB; // + + /// Map of all query templates related to this UberJob. + JobSubQueryTempMap::Ptr _jobSubQueryTempMap{JobSubQueryTempMap::create()}; + + /// Map of all db.tables related to this UberJob. + JobDbTableMap::Ptr _jobDbTablesMap{JobDbTableMap::create()}; + + /// List of all job data in this UberJob. "jobs", json::array() + JobMsg::VectPtr _jobMsgVect{new JobMsg::Vect()}; + + ScanInfo::Ptr _scanInfo{ScanInfo::create()}; ///< Information for shared scan rating. + + /// True if the user query has been designated interactive (quick + high priority) + bool _scanInteractive; + + std::string const _idStr; +}; + +} // namespace lsst::qserv::protojson + +#endif // LSST_QSERV_PROTOJSON_UBERJOBMSG_H diff --git a/src/protojson/UberJobReadyMsg.cc b/src/protojson/UberJobReadyMsg.cc new file mode 100644 index 0000000000..dc12fe5934 --- /dev/null +++ b/src/protojson/UberJobReadyMsg.cc @@ -0,0 +1,170 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "protojson/UberJobReadyMsg.h" + +#include + +// Qserv headers +#include "http/Client.h" +#include "http/MetaModule.h" +#include "http/RequestBodyJSON.h" +#include "util/common.h" +#include "util/TimeUtils.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.UberJobReadyMsg"); +} // namespace + +namespace lsst::qserv::protojson { +UberJobStatusMsg::UberJobStatusMsg(AuthContext const& authContext_, unsigned int version_, + string const& workerId_, string const& czarName_, CzarId czarId_, + QueryId queryId_, UberJobId uberJobId_) + : authContext(authContext_), + version(version_), + workerId(workerId_), + czarName(czarName_), + czarId(czarId_), + queryId(queryId_), + uberJobId(uberJobId_) { + if (version != http::MetaModule::version) { + string eMsg = cName(__func__) + " UberJobStatusMsg constructor bad version " + to_string(version); + LOGS(_log, LOG_LVL_ERROR, eMsg); + throw invalid_argument(eMsg); + } +} + +bool UberJobStatusMsg::equalsBase(UberJobStatusMsg const& other) const { + return ((authContext == other.authContext) && (queryId == other.queryId) && + (uberJobId == other.uberJobId) && (version == other.version) && (workerId == other.workerId) && + (czarName == other.czarName)); +} + +std::ostream& UberJobStatusMsg::dump(std::ostream& os) const { + os << "{UberJobStatusMsg:" << " QID=" << queryId << "_ujId=" << uberJobId << " czId=" << czarId + << " czName=" << czarName << " workerId=" << workerId << " version=" << version << "}"; + return os; +} + +std::string UberJobStatusMsg::dump() const { + std::ostringstream os; + dump(os); + return os.str(); +} + +std::ostream& operator<<(std::ostream& os, UberJobStatusMsg const& ujMsg) { return ujMsg.dump(os); } + +string UberJobReadyMsg::cName(const char* fName) const { + return string("UberJobReadyMsg::") + fName + " QID=" + to_string(queryId) + + "_ujId=" + to_string(uberJobId); +} + +UberJobReadyMsg::Ptr UberJobReadyMsg::create(AuthContext const& authContext_, unsigned int version_, + string const& workerIdStr_, string const& czarName_, + CzarId czarId_, QueryId queryId_, UberJobId uberJobId_, + FileUrlInfo const& fileUrlInfo_) { + Ptr jrMsg = Ptr(new UberJobReadyMsg(authContext_, version_, workerIdStr_, czarName_, czarId_, queryId_, + uberJobId_, fileUrlInfo_)); + return jrMsg; +} + +bool UberJobReadyMsg::equals(UberJobStatusMsg const& other) const { + try { + UberJobReadyMsg const& otherReady = dynamic_cast(other); + if (fileUrlInfo == otherReady.fileUrlInfo) { + return equalsBase(other); + } + } catch (std::bad_cast& ex) { + } + // different type + return false; +} + +std::ostream& UberJobReadyMsg::dump(std::ostream& os) const { + os << "{UberJobReadyMsg:"; + UberJobStatusMsg::dump(os); + os << fileUrlInfo.dump() << "}"; + return os; +} + +UberJobReadyMsg::Ptr UberJobReadyMsg::createFromJson(json const& jsWReq) { + string const fName("UberJobReadyMsg::createFromJson"); + LOGS(_log, LOG_LVL_DEBUG, fName); + try { + // If replication identifiers were wrong, it wouldn't have gotten this far. + AuthContext authContext_(http::RequestBodyJSON::required(jsWReq, "instance_id"), + http::RequestBodyJSON::required(jsWReq, "auth_key")); + FileUrlInfo fileUrlInfo_(http::RequestBodyJSON::required(jsWReq, "fileUrl"), + http::RequestBodyJSON::required(jsWReq, "rowCount"), + http::RequestBodyJSON::required(jsWReq, "fileSize")); + Ptr jrMsg = Ptr(new UberJobReadyMsg( + authContext_, http::RequestBodyJSON::required(jsWReq, "version"), + http::RequestBodyJSON::required(jsWReq, "workerid"), + http::RequestBodyJSON::required(jsWReq, "czar"), + http::RequestBodyJSON::required(jsWReq, "czarid"), + http::RequestBodyJSON::required(jsWReq, "queryid"), + http::RequestBodyJSON::required(jsWReq, "uberjobid"), fileUrlInfo_)); + return jrMsg; + } catch (invalid_argument const& exc) { + LOGS(_log, LOG_LVL_ERROR, string("UberJobReadyMsg::createJson invalid ") << exc.what()); + } + return nullptr; +} + +UberJobReadyMsg::UberJobReadyMsg(AuthContext const& authContext_, unsigned int version_, + string const& workerId_, string const& czarName_, CzarId czarId_, + QueryId queryId_, UberJobId uberJobId_, FileUrlInfo const& fileUrlInfo_) + : UberJobStatusMsg(authContext_, version_, workerId_, czarName_, czarId_, queryId_, uberJobId_), + fileUrlInfo(fileUrlInfo_) {} + +json UberJobReadyMsg::toJson() const { + json jsJr; + + // These need to match what http::BaseModule::enforceInstanceId() + // and http::BaseModule::enforceAuthorization() are looking for. + jsJr["instance_id"] = authContext.replicationInstanceId; + jsJr["auth_key"] = authContext.replicationAuthKey; + jsJr["version"] = version; + + jsJr["workerid"] = workerId; + jsJr["czar"] = czarName; + jsJr["czarid"] = czarId; + jsJr["queryid"] = queryId; + jsJr["uberjobid"] = uberJobId; + jsJr["fileUrl"] = fileUrlInfo.fileUrl; + jsJr["rowCount"] = fileUrlInfo.rowCount; + jsJr["fileSize"] = fileUrlInfo.fileSize; + return jsJr; +} + +std::string FileUrlInfo::dump() const { + return std::string("{fileUrl=") + fileUrl + " rowCount=" + std::to_string(rowCount) + + " fileSize=" + std::to_string(fileSize) + "}"; +} + +} // namespace lsst::qserv::protojson diff --git a/src/protojson/UberJobReadyMsg.h b/src/protojson/UberJobReadyMsg.h new file mode 100644 index 0000000000..2a085da659 --- /dev/null +++ b/src/protojson/UberJobReadyMsg.h @@ -0,0 +1,138 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_PROTOJSON_UBERJOBREADYMSG_H +#define LSST_QSERV_PROTOJSON_UBERJOBREADYMSG_H + +// System headers +#include +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// qserv headers +#include "global/clock_defs.h" +#include "global/intTypes.h" +#include "protojson/WorkerQueryStatusData.h" + +// This header declarations +namespace lsst::qserv::protojson { + +/// Base class for returning UberJob results on the worker back to the czar. +/// If the worker successful collected results for the UberJob, it sends back +/// an UberJobReadyMsg with information needed to collect the result file. +/// If it failed, it send back an UberJobErrorMsg with information about the +/// error. +class UberJobStatusMsg { +public: + using Ptr = std::shared_ptr; + virtual std::string cName(const char* fName) const { return std::string("UberJobStatusMsg") + fName; } + UberJobStatusMsg() = delete; + UberJobStatusMsg(UberJobStatusMsg const&) = delete; + UberJobStatusMsg& operator=(UberJobStatusMsg const&) = delete; + virtual ~UberJobStatusMsg() = default; + + virtual nlohmann::json toJson() const = 0; + + virtual bool equals(UberJobStatusMsg const& other) const = 0; + + /// Returns a string for logging. + virtual std::ostream& dump(std::ostream& os) const; + std::string dump() const; + friend std::ostream& operator<<(std::ostream& os, UberJobStatusMsg const& ujMsg); + + AuthContext const authContext; + unsigned int const version; + std::string const workerId; + std::string const czarName; + CzarId const czarId; + QueryId const queryId; + UberJobId const uberJobId; + +protected: + UberJobStatusMsg(AuthContext const& authContext_, unsigned int version_, std::string const& workerId_, + std::string const& czarName_, CzarId czarId_, QueryId queryId_, UberJobId uberJobId_); + bool equalsBase(UberJobStatusMsg const& other) const; +}; + +/// This class stores some information about the result file to be collected by the czar. +class FileUrlInfo { +public: + FileUrlInfo() = default; + FileUrlInfo(std::string const& fileUrl_, uint64_t rowCount_, uint64_t fileSize_) + : fileUrl(fileUrl_), rowCount(rowCount_), fileSize(fileSize_) {} + ~FileUrlInfo() = default; + + bool operator==(FileUrlInfo const& other) const { + return (fileUrl == other.fileUrl && rowCount == other.rowCount && fileSize == other.fileSize); + } + + std::string dump() const; + + std::string fileUrl; + uint64_t rowCount = 0; + uint64_t fileSize = 0; +}; + +/// This class handles the message used to inform the czar that a result file +/// for an UberJob is ready. +class UberJobReadyMsg : public UberJobStatusMsg { +public: + using Ptr = std::shared_ptr; + + /// class name for log, fName is expected to be __func__. + std::string cName(const char* fName) const override; + + UberJobReadyMsg() = delete; + UberJobReadyMsg(UberJobReadyMsg const&) = delete; + UberJobReadyMsg& operator=(UberJobReadyMsg const&) = delete; + + static Ptr create(AuthContext const& authContext_, unsigned int version_, std::string const& workerIdStr_, + std::string const& czarName_, CzarId czarId_, QueryId queryId_, UberJobId uberJobId_, + FileUrlInfo const& fileUrlInfo_); + + /// This function creates a UberJobReadyMsg object from the worker json `czarJson`, the + /// other parameters are used to verify the json message. + static Ptr createFromJson(nlohmann::json const& czarJson); + + ~UberJobReadyMsg() override = default; + + bool equals(UberJobStatusMsg const& other) const override; + + /// Return a json object with data allowing collection of UberJob result file. + nlohmann::json toJson() const override; + + std::ostream& dump(std::ostream& os) const override; + + FileUrlInfo const fileUrlInfo; + +private: + UberJobReadyMsg(AuthContext const& authContext_, unsigned int version_, std::string const& workerId_, + std::string const& czarName_, CzarId czarId_, QueryId queryId_, UberJobId uberJobId_, + FileUrlInfo const& fileUrlInfo_); +}; + +} // namespace lsst::qserv::protojson + +#endif // LSST_QSERV_PROTOJSON_UBERJOBREADYMSG_H diff --git a/src/protojson/WorkerCzarComIssue.cc b/src/protojson/WorkerCzarComIssue.cc new file mode 100644 index 0000000000..6bd6772936 --- /dev/null +++ b/src/protojson/WorkerCzarComIssue.cc @@ -0,0 +1,328 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "protojson/WorkerCzarComIssue.h" + +#include + +// Qserv headers +#include "http/Client.h" +#include "http/MetaModule.h" +#include "http/RequestBodyJSON.h" +#include "protojson/PwHideJson.h" +#include "protojson/ResponseMsg.h" +#include "protojson/UberJobErrorMsg.h" +#include "protojson/UberJobReadyMsg.h" +#include "util/common.h" +#include "util/TimeUtils.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.WorkerCzarComIssue"); +} // namespace + +namespace lsst::qserv::protojson { + +std::string WorkerCzarComIssue::wrkCzIdLog() const { + stringstream os; + auto wInf = _wInfo; + auto cInf = _czInfo; + os << " wId=" << (wInf == nullptr ? "null" : wInf->wId) << " czId="; + // `?` doesn't like mixing return types. + if (cInf == nullptr) + os << "null"; + else + os << cInf->czId; + os << " "; + return os.str(); +} + +json WorkerCzarComIssue::toJson() { + json jsCzarR; + lock_guard _lgWciMtx(_wciMtx); + if (_wInfo == nullptr || _czInfo == nullptr) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " _wInfo or _czInfo was null"); + return jsCzarR; + } + + jsCzarR["version"] = http::MetaModule::version; + jsCzarR["instance_id"] = _authContext.replicationInstanceId; + jsCzarR["auth_key"] = _authContext.replicationAuthKey; + jsCzarR["czarinfo"] = _czInfo->toJson(); + jsCzarR["czar"] = _czInfo->czName; + jsCzarR["workerinfo"] = _wInfo->toJson(); + + jsCzarR["thoughtczarwasdead"] = _thoughtCzarWasDeadTime; + + // List of failed transmits + jsCzarR["failedtransmits"] = json::array(); + auto& jsFts = jsCzarR["failedtransmits"]; + auto iter = _failedTransmits->begin(); + while (iter != _failedTransmits->end()) { + auto const& key = iter->first; + QueryId qId = key.first; + UberJobId ujId = key.second; + UberJobStatusMsg::Ptr ujMsg = iter->second; + auto const resp = ujMsg->toJson(); + json jsF = json{{"qId", qId}, {"ujId", ujId}, {"failed", resp}}; + jsFts.push_back(jsF); + ++iter; + } + + return jsCzarR; +} + +WorkerCzarComIssue::Ptr WorkerCzarComIssue::createFromJson(nlohmann::json const& jsCzarReq, + AuthContext const& authContext_) { + string const fName("WorkerCzarComIssue::createFromJson"); + LOGS(_log, LOG_LVL_DEBUG, fName); + try { + if (jsCzarReq["version"] != http::MetaModule::version) { + LOGS(_log, LOG_LVL_ERROR, fName << " bad version"); + return nullptr; + } + + auto czInfo_ = CzarContactInfo::createFromJson(jsCzarReq["czarinfo"]); + auto now = CLOCK::now(); + auto wInfo_ = WorkerContactInfo::createFromJsonWorker(jsCzarReq["workerinfo"], now); + if (czInfo_ == nullptr || wInfo_ == nullptr) { + LOGS(_log, LOG_LVL_ERROR, fName << " or worker info could not be parsed in " << jsCzarReq); + } + auto wccIssue = create(authContext_); + wccIssue->setContactInfo(wInfo_, czInfo_); + wccIssue->_thoughtCzarWasDeadTime = + http::RequestBodyJSON::required(jsCzarReq, "thoughtczarwasdead"); + json fTransmits = json::array(); + fTransmits = jsCzarReq.at("failedtransmits"); + if (!fTransmits.is_array()) { + throw std::invalid_argument(fName + " failedtransmits is not a json::array"); + } + + // Fill in _failedTransmits with the values in fTransmits + for (auto const& jsElem : fTransmits) { + try { + auto const qId = http::RequestBodyJSON::required(jsElem, "qId"); + auto const ujId = http::RequestBodyJSON::required(jsElem, "ujId"); + json jsFt = jsElem["failed"]; + UberJobStatusMsg::Ptr ujMsg; + bool isReadyMsg = jsFt.contains("fileUrl"); + if (isReadyMsg) { + ujMsg = UberJobReadyMsg::createFromJson(jsFt); + } else { + ujMsg = UberJobErrorMsg::createFromJson(jsFt); + } + wccIssue->addFailedTransmit(qId, ujId, ujMsg); + } catch (std::invalid_argument const& ex) { + // skip to next element + LOGS(_log, LOG_LVL_WARN, fName << " failed to read failedTransmit:" << jsElem); + } + } + return wccIssue; + } catch (invalid_argument const& exc) { + LOGS(_log, LOG_LVL_ERROR, string("WorkerQueryStatusData::createJson invalid ") << exc.what()); + } + return nullptr; +} + +bool WorkerCzarComIssue::operator==(WorkerCzarComIssue const& other) const { + if ((*_wInfo != *other._wInfo) || (*_czInfo != *other._czInfo) || (_authContext != other._authContext) || + (_thoughtCzarWasDeadTime != other._thoughtCzarWasDeadTime)) { + return false; + } + + if (_failedTransmits->size() != other._failedTransmits->size()) { + return false; + } + auto iterThis = _failedTransmits->begin(); + auto iterOther = other._failedTransmits->begin(); + for (; iterThis != _failedTransmits->end() && iterOther != other._failedTransmits->end(); + ++iterThis, ++iterOther) { + if (iterThis->first != iterOther->first) { + return false; + } + + auto const& ftThis = iterThis->second; + auto const& ftOther = iterOther->second; + if (!(ftThis->equals(*ftOther))) { + return false; + } + } + return true; +} + +void WorkerCzarComIssue::addFailedTransmit(QueryId qId, UberJobId ujId, + std::shared_ptr const& ujMsg) { + lock_guard _lgWciMtx(_wciMtx); + _addFailedTransmit(qId, ujId, ujMsg); +} + +void WorkerCzarComIssue::_addFailedTransmit(QueryId qId, UberJobId ujId, + std::shared_ptr const& ujMsg) { + auto key = make_pair(qId, ujId); + (*_failedTransmits)[key] = ujMsg; + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " failedTransmits sz=" << _failedTransmits->size()); +} + +json WorkerCzarComIssue::responseToJson(uint64_t msgThoughtCzarWasDeadTime, + vector const& execRespMsgs) const { + lock_guard _lgWciMtx(_wciMtx); + return _responseToJson(msgThoughtCzarWasDeadTime, execRespMsgs); +} + +std::shared_ptr WorkerCzarComIssue::takeFailedTransmitsMap() { + lock_guard _lgWciMtx(_wciMtx); + auto res = _failedTransmits; + _failedTransmits = make_shared(); + return res; +} + +tuple, vector> WorkerCzarComIssue::clearMapEntries( + nlohmann::json const& response) { + vector ujDataObsoleteList; + vector ujParseErrorList; + size_t count = 0; + if (!response.contains("execRespMsgs")) { + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " response did not have 'execRespMsgs' " << pwHide(response)); + return {count, ujDataObsoleteList, ujParseErrorList}; + } + auto const& jsExecRespMsgs = response["execRespMsgs"]; + if (!jsExecRespMsgs.is_array()) { + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " response 'execRespMsgs' is not array " << pwHide(response)); + return {count, ujDataObsoleteList, ujParseErrorList}; + } + + lock_guard _lgWciMtx(_wciMtx); + for (auto const& elem : jsExecRespMsgs) { + if (!(elem.contains("qId") && elem.contains("ujId"))) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << "elem missing qId or ujId elem=" << elem); + continue; + } + QueryId qId = elem["qId"]; + UberJobId ujId = elem["ujId"]; + + try { + auto execRespMsg = ExecutiveRespMsg::createFromJson(elem); + if (execRespMsg == nullptr) { + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " failed to parse execRespMsg elem=" << pwHide(elem)); + continue; + } + // Find the appropriate Task and update it if needed. + if (execRespMsg->success) { + // Nothing needs to be done if the UberJob is not obsolete. If it + // is obsolete, it could be useful to delete the result file. + if (execRespMsg->dataObsolete) { + ujDataObsoleteList.emplace_back(_czInfo, qId, ujId); + } + } else { + // The czar couldn't parse this for some reason, leaving no way to get the result + // file back to the czar. Delete the result file. + ujParseErrorList.emplace_back(_czInfo, qId, ujId); + } + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) << " removing qId=" << qId << "_ujId=" << ujId << " from map"); + _failedTransmits->erase(make_pair(qId, ujId)); + count++; + } catch (std::invalid_argument const& ex) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " failed to parse execRespMsg elem=" << elem + << " exception: " << ex.what()); + // This should never happen as all messages should parse. + // There's probably nothing else that can be done at this point. Hopefully, other + // cleanup mechanisms will be enough to prevent this from causing problems. + continue; + } + } + return {count, ujDataObsoleteList, ujParseErrorList}; +} + +void WorkerCzarComIssue::clearFailedTransmitsForQids(std::map const& qIdMap) { + size_t startSize = 0; + size_t endSize = 0; + { + std::lock_guard lg(_wciMtx); + // Normally empty. + startSize = _failedTransmits->size(); + if (startSize == 0) { + return; + } + // Make a qId set for faster lookup. + std::set qIdSet; + for (auto const& [qId, tm] : qIdMap) { + qIdSet.insert(qId); + } + for (auto iter = _failedTransmits->begin(); iter != _failedTransmits->end();) { + QueryId qIdFailed = iter->first.first; + if (qIdSet.find(qIdFailed) != qIdSet.end()) { + iter = _failedTransmits->erase(iter); + } else { + ++iter; + } + } + endSize = _failedTransmits->size(); + } + if (startSize > 0) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " startSize=" << startSize << " endSize=" << endSize); + } +} + +json WorkerCzarComIssue::_responseToJson( + uint64_t msgThoughtCzarWasDeadTime, + vector const& execRespMsgs_) const { + protojson::WorkerCzarComRespMsg wccRespMsg(true, msgThoughtCzarWasDeadTime); + wccRespMsg.execRespMsgs = execRespMsgs_; + return wccRespMsg.toJson(); +} + +string WorkerCzarComIssue::dump() const { + lock_guard _lgWciMtx(_wciMtx); + return _dump(); +} + +string WorkerCzarComIssue::_dump() const { + stringstream os; + os << "WorkerCzarComIssue wInfo=" << ((_wInfo == nullptr) ? "?" : _wInfo->dump()); + os << " czInfo=" << _czInfo->dump(); + os << " thoughtCzarWasDeadTime=" << _thoughtCzarWasDeadTime; + os << " failedTransmits["; + for (auto const& [key, ft] : *_failedTransmits) { + os << "{qId=" << key.first << " ujId=" << key.second << "{"; + auto ujMsg = ft; + if (ujMsg == nullptr) { + os << " ujMsg=nullptr"; + } else { + os << " ujMsg=" << ujMsg->dump(); + } + os << "}}"; + } + os << "]"; + return os.str(); +} + +} // namespace lsst::qserv::protojson diff --git a/src/protojson/WorkerCzarComIssue.h b/src/protojson/WorkerCzarComIssue.h new file mode 100644 index 0000000000..b40ec3d5d1 --- /dev/null +++ b/src/protojson/WorkerCzarComIssue.h @@ -0,0 +1,189 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_PROTOJSON_WORKERCZARCOMISSUE_H +#define LSST_QSERV_PROTOJSON_WORKERCZARCOMISSUE_H + +// System headers +#include +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// qserv headers +#include "protojson/ResponseMsg.h" +#include "protojson/WorkerQueryStatusData.h" +#include "wpublish/QueriesAndChunks.h" + +// This header declarations +namespace lsst::qserv::protojson { + +class UberJobStatusMsg; + +typedef std::shared_ptr FailedTransmitType; +typedef std::map, FailedTransmitType> FailedTransmitsMap; + +/// This class is used to send/receive a message from the worker to a specific +/// czar. It is used when there has been a communication issue with the worker +/// sending UberJob file ready messages. If there have been timeouts, the worker +/// will send this message to the czar immediately after the worker receives a +/// WorkerQueryStatusData message from the czar. Receiving that message indicates +/// that czar is once again capable of communicating. +/// +/// If communication with the czar has failed for a long time, the worker +/// will set "_thoughtCzarWasDead" and delete all incomplete work associated +/// with that czar. Result files will remain until garbage cleanup or the czar +/// calls for their removal. +/// +/// UberJob file ready messages that failed to be sent to the czar will be +/// added to this message via the `_failtedTransmit` map. The czar response to +/// this will include a list of QueryId + UberJobId values, which will be +/// cleared from `_failtedTransmit`. +/// +/// Since QueryId + UberJobId is unique, the czar ignores all calls after the +/// first one to collect the worker's file, but attempts are made to minimize +/// duplicate calls. +/// +/// This message is expected to rarely be needed. +class WorkerCzarComIssue { +public: + using Ptr = std::shared_ptr; + + WorkerCzarComIssue() = delete; + ~WorkerCzarComIssue() = default; + + bool operator==(WorkerCzarComIssue const& other) const; + + std::string cName(const char* funcN) const { + return std::string("WorkerCzarComIssue") + funcN + wrkCzIdLog(); + } + + static Ptr create(AuthContext const& authContext_) { return Ptr(new WorkerCzarComIssue(authContext_)); } + + static Ptr createFromJson(nlohmann::json const& workerJson, AuthContext const& authContext_); + + void setThoughtCzarWasDeadTime(uint64_t msDeadNowAliveTime) { + std::lock_guard lg(_wciMtx); + _thoughtCzarWasDeadTime = msDeadNowAliveTime; + } + + /// Go through the list of QueryId + UberJobId values in the response and clear those entries from the + /// failedTransmits map. + /// @return - the number of entries cleared, + /// - a vector of obsolete UberJob identifiers + /// - a vector of UberJob identifier that the czar could not parse + /// Nothing really needs to be done with the vector of obsolete UberJob identifiers, but deleting them + /// will conserve resources. The vector of UberJob identifiers that the czar could not parse is a problem. + /// An error message should be sent back to the czar for each of those in an attempt to maintain system + /// stability, but they really should never have happened in the first place. + std::tuple, std::vector> clearMapEntries( + nlohmann::json const& response); + + /// Remove all entries from the failedTransmits map with QueryId `qId`. + /// The czar is done with these queries. + /// @param qIdMap - map of dead queries and times. The times aren't needed + /// here, but using the existing map is more efficient. + /// Note: _failedTransmits is normally empty. + void clearFailedTransmitsForQids(std::map const& qIdMap); + + uint64_t getThoughtCzarWasDeadTime() const { return _thoughtCzarWasDeadTime; } + + /// Return true if there is a reason this WorkerCzarComIssue should be sent to this czar. + bool needToSend() const { + std::lock_guard lg(_wciMtx); + return (_thoughtCzarWasDeadTime > 0 || _failedTransmits->size() > 0); + } + + /// Set the contact information for the appropriate czar and worker. + void setContactInfo(WorkerContactInfo::Ptr const& wInfo_, CzarContactInfo::Ptr const& czInfo_) { + std::lock_guard lgWci(_wciMtx); + if (_wInfo == nullptr && wInfo_ != nullptr) _wInfo = wInfo_; + if (_czInfo == nullptr && czInfo_ != nullptr) _czInfo = czInfo_; + } + + CzarContactInfo::Ptr getCzarInfo() const { + std::lock_guard lgWci(_wciMtx); + return _czInfo; + } + + WorkerContactInfo::Ptr getWorkerInfo() const { + std::lock_guard lgWci(_wciMtx); + return _wInfo; + } + + /// The `request` may indicate success or failure + void addFailedTransmit(QueryId qId, UberJobId ujId, + std::shared_ptr const& ujMsg); + + /// Return a json version of the contents of this class. + nlohmann::json toJson(); + + /// Return a json object indicating the status of the message for the + /// original requester. + nlohmann::json responseToJson(uint64_t msgThoughtCzarWasDeadTime, + std::vector const& execRespMsgs) const; + + /// Take the failedTransmitsMap and make an empty one to take its place. + std::shared_ptr takeFailedTransmitsMap(); + + /// Return a short string with worker and czar IDs for logging. + std::string wrkCzIdLog() const; + + std::string dump() const; + +private: + WorkerCzarComIssue(AuthContext const& authContext_) : _authContext(authContext_) {} + + /// The `request` may indicate success or failure + void _addFailedTransmit(QueryId qId, UberJobId ujId, + std::shared_ptr const& ujMsg); + + /// Return a json object indicating the status of the message for the + /// original requester. + nlohmann::json _responseToJson(uint64_t thoughtCzarWasDeadTime, + std::vector const& execRespMsgs_) const; + + std::string _dump() const; + + WorkerContactInfo::Ptr _wInfo; + CzarContactInfo::Ptr _czInfo; + AuthContext _authContext; + + /// If the worker thought the czar was dead, this is the time in milliseconds that the worker + /// thought the czar came back to life. This is passed to the czar so the czar knows + /// the worker has killed all related UberJobs. The czar sends this value back to + /// the worker in the response to avoid race conditions. If the returned value matches + /// or is greater than what is stored, it is certain that there have not been any + /// dead/alive cycles since the czar received the message. + uint64_t _thoughtCzarWasDeadTime = 0; + + /// Map of failed transmits using QueryId + UberJobId for the key + std::shared_ptr _failedTransmits{new FailedTransmitsMap}; + + mutable MUTEX _wciMtx; ///< protects all members. +}; + +} // namespace lsst::qserv::protojson + +#endif // LSST_QSERV_PROTOJSON_WORKERCZARCOMISSUE_H diff --git a/src/protojson/WorkerQueryStatusData.cc b/src/protojson/WorkerQueryStatusData.cc new file mode 100644 index 0000000000..97438a36e4 --- /dev/null +++ b/src/protojson/WorkerQueryStatusData.cc @@ -0,0 +1,452 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "protojson/WorkerQueryStatusData.h" + +#include + +// Qserv headers +#include "http/Client.h" +#include "http/MetaModule.h" +#include "http/RequestBodyJSON.h" +#include "util/common.h" +#include "util/TimeUtils.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.WorkerQueryStatusData"); +} // namespace + +namespace lsst::qserv::protojson { + +json CzarContactInfo::toJson() const { + json jsCzar; + jsCzar["name"] = czName; + jsCzar["id"] = czId; + jsCzar["management-port"] = czPort; + jsCzar["management-host-name"] = czHostName; + jsCzar["czar-startup-time"] = czStartupTime; + return jsCzar; +} + +CzarContactInfo::Ptr CzarContactInfo::createFromJson(nlohmann::json const& czJson) { + try { + auto czName_ = http::RequestBodyJSON::required(czJson, "name"); + auto czId_ = http::RequestBodyJSON::required(czJson, "id"); + auto czPort_ = http::RequestBodyJSON::required(czJson, "management-port"); + auto czHostName_ = http::RequestBodyJSON::required(czJson, "management-host-name"); + auto czStartupTime_ = http::RequestBodyJSON::required(czJson, "czar-startup-time"); + return create(czName_, czId_, czPort_, czHostName_, czStartupTime_); + } catch (invalid_argument const& exc) { + LOGS(_log, LOG_LVL_ERROR, string("CzarContactInfo::createJson invalid ") << exc.what()); + } + return nullptr; +} + +bool CzarContactInfo::compare(CzarContactInfo const& other) const { + return (czName == other.czName && czId == other.czId && czPort == other.czPort && + czHostName == other.czHostName); +} + +std::string CzarContactInfo::dump() const { + stringstream os; + os << "czName=" << czName << " czId=" << czId << " czPort=" << czPort << " czHostName=" << czHostName + << " czStartupTime=" << czStartupTime; + return os.str(); +} + +json WorkerContactInfo::toJson() const { + lock_guard lg(_rMtx); + return _toJson(); +} + +json WorkerContactInfo::_toJson() const { + json jsWorker; + jsWorker["id"] = wId; + jsWorker["host"] = _wHostAddr; + jsWorker["management-host-name"] = _wHostName; + jsWorker["management-port"] = _wPort; + jsWorker["w-startup-time"] = _wStartupTime; + return jsWorker; +} + +WorkerContactInfo::Ptr WorkerContactInfo::createFromJsonRegistry(string const& wId_, + nlohmann::json const& regJson) { + try { + auto wHost_ = http::RequestBodyJSON::required(regJson, "host-addr"); + auto wManagementHost_ = http::RequestBodyJSON::required(regJson, "host-name"); + auto wPort_ = http::RequestBodyJSON::required(regJson, "management-port"); + auto updateTimeInt = http::RequestBodyJSON::required(regJson, "update-time-ms"); + TIMEPOINT updateTime_ = TIMEPOINT(chrono::milliseconds(updateTimeInt)); + + return create(wId_, wHost_, wManagementHost_, wPort_, updateTime_); + } catch (invalid_argument const& exc) { + LOGS(_log, LOG_LVL_ERROR, string("CWorkerContactInfo::createJson invalid ") << exc.what()); + } + return nullptr; +} + +WorkerContactInfo::Ptr WorkerContactInfo::createFromJsonWorker(nlohmann::json const& wJson, + TIMEPOINT updateTime_) { + try { + auto wId_ = http::RequestBodyJSON::required(wJson, "id"); + auto wHost_ = http::RequestBodyJSON::required(wJson, "host"); + auto wManagementHost_ = http::RequestBodyJSON::required(wJson, "management-host-name"); + auto wPort_ = http::RequestBodyJSON::required(wJson, "management-port"); + + return create(wId_, wHost_, wManagementHost_, wPort_, updateTime_); + } catch (invalid_argument const& exc) { + LOGS(_log, LOG_LVL_ERROR, string("CWorkerContactInfo::createJson invalid ") << exc.what()); + } + return nullptr; +} + +bool WorkerContactInfo::operator==(WorkerContactInfo const& other) const { + return ((wId == other.wId) && (_wHostAddr == other._wHostAddr) && (_wHostName == other._wHostName) && + (_wPort == other._wPort) && (_wStartupTime == other._wStartupTime)); +} + +void WorkerContactInfo::setRegUpdateTime(TIMEPOINT updateTime) { + std::lock_guard lg(_rMtx); + _regUpdateTime = updateTime; + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " " << _dump()); +} + +string WorkerContactInfo::dump() const { + lock_guard lg(_rMtx); + return _dump(); +} + +string WorkerContactInfo::_dump() const { + stringstream os; + os << "workerContactInfo{" + << "id=" << wId << " hostAddr=" << _wHostAddr << " hostName=" << _wHostName << " port=" << _wPort + << " update=" << util::TimeUtils::timePointToDateTimeString(_regUpdateTime) << "}"; + return os.str(); +} + +shared_ptr WorkerQueryStatusData::toJson(double maxLifetime) { + // Go through the _qIdDoneKeepFiles, _qIdDoneDeleteFiles, and _qIdDeadUberJobs lists to build a + // message to send to the worker. + auto now = CLOCK::now(); + shared_ptr jsWorkerReqPtr = make_shared(); + json& jsWorkerR = *jsWorkerReqPtr; + jsWorkerR["version"] = http::MetaModule::version; + jsWorkerR["instance_id"] = _authContext.replicationInstanceId; + jsWorkerR["auth_key"] = _authContext.replicationAuthKey; + jsWorkerR["czarinfo"] = _czInfo->toJson(); + { + lock_guard lgI(_infoMtx); + if (_wInfo != nullptr) { + jsWorkerR["workerinfo"] = _wInfo->toJson(); + jsWorkerR["worker"] = _wInfo->wId; + } else { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " wInfo is null"); + } + } + + // Note, old elements in the maps will be deleted after being added to the message + // to keep the czar from keeping track of these forever. + addListsToJson(jsWorkerR, now, maxLifetime); + if (czarCancelAfterRestart) { + jsWorkerR["czarrestart"] = true; + lock_guard mapLg(mapMtx); + jsWorkerR["czarrestartcancelczid"] = czarCancelAfterRestartCzId; + jsWorkerR["czarrestartcancelqid"] = czarCancelAfterRestartQId; + } else { + jsWorkerR["czarrestart"] = false; + } + + return jsWorkerReqPtr; +} + +void WorkerQueryStatusData::addListsToJson(json& jsWR, TIMEPOINT tmMark, double maxLifetime) { + jsWR["qiddonekeepfiles"] = json::array(); + jsWR["qiddonedeletefiles"] = json::array(); + jsWR["qiddeaduberjobs"] = json::array(); + lock_guard mapLg(mapMtx); + { + auto& jsDoneKeep = jsWR["qiddonekeepfiles"]; + auto iterDoneKeep = qIdDoneKeepFiles.begin(); + while (iterDoneKeep != qIdDoneKeepFiles.end()) { + auto qId = iterDoneKeep->first; + jsDoneKeep.push_back(qId); + auto tmTouched = iterDoneKeep->second; + double ageSecs = std::chrono::duration(tmMark - tmTouched).count(); + if (ageSecs > maxLifetime) { + iterDoneKeep = qIdDoneKeepFiles.erase(iterDoneKeep); + } else { + ++iterDoneKeep; + } + } + } + { + auto& jsDoneDelete = jsWR["qiddonedeletefiles"]; + auto iterDoneDelete = qIdDoneDeleteFiles.begin(); + while (iterDoneDelete != qIdDoneDeleteFiles.end()) { + auto qId = iterDoneDelete->first; + jsDoneDelete.push_back(qId); + auto tmStamp = iterDoneDelete->second; + double ageSecs = std::chrono::duration(tmMark - tmStamp).count(); + if (ageSecs > maxLifetime) { + iterDoneDelete = qIdDoneDeleteFiles.erase(iterDoneDelete); + } else { + ++iterDoneDelete; + } + } + } + { + auto& jsDeadUj = jsWR["qiddeaduberjobs"]; + auto iterDeadUjQid = qIdDeadUberJobs.begin(); + while (iterDeadUjQid != qIdDeadUberJobs.end()) { + TIMEPOINT youngestTm = TIMEPOINT::max(); // need to find the youngest + auto qId = iterDeadUjQid->first; + auto& ujIdMap = iterDeadUjQid->second; + + json jsQidUj = {{"qid", qId}, {"ujids", json::array()}}; + auto& jsUjIds = jsQidUj["ujids"]; + + auto iterUjId = ujIdMap.begin(); + bool addedUjId = false; + + while (iterUjId != ujIdMap.end()) { + UberJobId ujId = iterUjId->first; + auto tmStamp = iterUjId->second; + if (tmStamp < youngestTm) { + youngestTm = tmStamp; + } + + jsUjIds.push_back(ujId); + addedUjId = true; + double ageSecs = std::chrono::duration(tmMark - tmStamp).count(); + if (ageSecs > maxLifetime) { + iterUjId = ujIdMap.erase(iterUjId); + } else { + ++iterUjId; + } + } + + if (addedUjId) { + jsDeadUj.push_back(jsQidUj); + } + + // If the youngest element was too old, delete the map. + if (ujIdMap.empty() || std::chrono::duration(tmMark - youngestTm).count() > maxLifetime) { + iterDeadUjQid = qIdDeadUberJobs.erase(iterDeadUjQid); + } else { + ++iterDeadUjQid; + } + } + } +} + +WorkerQueryStatusData::Ptr WorkerQueryStatusData::createFromJson(nlohmann::json const& jsWorkerReq, + AuthContext const& authContext_, + TIMEPOINT updateTm_) { + try { + if (jsWorkerReq["version"] != http::MetaModule::version) { + LOGS(_log, LOG_LVL_ERROR, "WorkerQueryStatusData::createJson bad version"); + return nullptr; + } + + auto czInfo_ = CzarContactInfo::createFromJson(jsWorkerReq["czarinfo"]); + auto wInfo_ = WorkerContactInfo::createFromJsonWorker(jsWorkerReq["workerinfo"], updateTm_); + if (czInfo_ == nullptr || wInfo_ == nullptr) { + LOGS(_log, LOG_LVL_ERROR, + "WorkerQueryStatusData::createJson czar or worker info could not be parsed in " + << jsWorkerReq); + return nullptr; + } + + auto wqsData = WorkerQueryStatusData::create(wInfo_, czInfo_, authContext_); + wqsData->parseLists(jsWorkerReq, updateTm_); + + bool czarRestart = http::RequestBodyJSON::required(jsWorkerReq, "czarrestart"); + if (czarRestart) { + auto restartCzarId = + http::RequestBodyJSON::required(jsWorkerReq, "czarrestartcancelczid"); + auto restartQueryId = + http::RequestBodyJSON::required(jsWorkerReq, "czarrestartcancelqid"); + wqsData->setCzarCancelAfterRestart(restartCzarId, restartQueryId); + } + return wqsData; + } catch (invalid_argument const& exc) { + LOGS(_log, LOG_LVL_ERROR, string("WorkerQueryStatusData::createJson invalid ") << exc.what()); + } + return nullptr; +} + +void WorkerQueryStatusData::parseLists(nlohmann::json const& jsWR, TIMEPOINT updateTm) { + lock_guard mapLg(mapMtx); + parseListsInto(jsWR, updateTm, qIdDoneKeepFiles, qIdDoneDeleteFiles, qIdDeadUberJobs); +} + +void WorkerQueryStatusData::parseListsInto(nlohmann::json const& jsWR, TIMEPOINT updateTm, + std::map& doneKeepF, + std::map& doneDeleteF, + std::map>& deadUberJobs) { + auto& jsQIdDoneKeepFiles = jsWR["qiddonekeepfiles"]; + for (auto const& qidKeep : jsQIdDoneKeepFiles) { + doneKeepF[qidKeep] = updateTm; + } + + auto& jsQIdDoneDeleteFiles = jsWR["qiddonedeletefiles"]; + for (auto const& qidDelete : jsQIdDoneDeleteFiles) { + doneDeleteF[qidDelete] = updateTm; + } + + auto& jsQIdDeadUberJobs = jsWR["qiddeaduberjobs"]; + // Interestingly, !jsQIdDeadUberJobs.empty() doesn't work, but .size() > 0 does. + // Not having the size() check causes issues with the for loop trying to read the + // first element of an empty list, which goes badly. + if (jsQIdDeadUberJobs.size() > 0) { + for (auto const& qDeadUjs : jsQIdDeadUberJobs) { + QueryId qId = qDeadUjs["qid"]; + auto const& ujIds = qDeadUjs["ujids"]; + auto& mapOfUj = deadUberJobs[qId]; + for (auto const& ujId : ujIds) { + mapOfUj[ujId] = updateTm; + } + } + } +} + +void WorkerQueryStatusData::addDeadUberJobs(QueryId qId, std::vector ujIds, TIMEPOINT tm) { + lock_guard mapLg(mapMtx); + auto& ujMap = qIdDeadUberJobs[qId]; + for (auto const ujId : ujIds) { + ujMap[ujId] = tm; + } +} + +void WorkerQueryStatusData::setWInfo(WorkerContactInfo::Ptr const& wInfo_) { + std::lock_guard lgI(_infoMtx); + if (_wInfo == nullptr) { + _wInfo = wInfo_; + return; + } + if (wInfo_ != nullptr) { + // This only changes host and port values of _wInfo. + _wInfo->changeBaseInfo(*wInfo_); + } + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " " << _wInfo->dump()); +} + +void WorkerQueryStatusData::addDeadUberJob(QueryId qId, UberJobId ujId, TIMEPOINT tm) { + lock_guard mapLg(mapMtx); + auto& ujMap = qIdDeadUberJobs[qId]; + ujMap[ujId] = tm; +} + +void WorkerQueryStatusData::addToDoneDeleteFiles(QueryId qId) { + lock_guard mapLg(mapMtx); + qIdDoneDeleteFiles[qId] = CLOCK::now(); +} + +void WorkerQueryStatusData::addToDoneKeepFiles(QueryId qId) { + lock_guard mapLg(mapMtx); + qIdDoneKeepFiles[qId] = CLOCK::now(); +} + +void WorkerQueryStatusData::removeDeadUberJobsFor(QueryId qId) { + lock_guard mapLg(mapMtx); + qIdDeadUberJobs.erase(qId); +} + +json WorkerQueryStatusData::buildResponseJson(uint64_t workerStartupTime) { + // Go through the _qIdDoneKeepFiles, _qIdDoneDeleteFiles, and _qIdDeadUberJobs lists to build a + // response. Nothing should be deleted and time is irrelevant for this, so maxLifetime is enormous + // and any time could be used for last contact, but now() is easy. + // This is only called by the worker. As such, nothing should be deleted here as the lifetime of + // these elements is determined by the lifetime of the owning UserQueryInfo instance. + // See + double maxLifetime = std::numeric_limits::max(); + auto now = CLOCK::now(); + ResponseMsg respMsg(true); + auto jsResp = respMsg.toJson(); + jsResp["w-startup-time"] = workerStartupTime; + addListsToJson(jsResp, now, maxLifetime); + return jsResp; +} + +bool WorkerQueryStatusData::handleResponseJson(nlohmann::json const& jsResp) { + auto now = CLOCK::now(); + std::map doneKeepF; + std::map doneDeleteF; + std::map> deadUberJobs; + parseListsInto(jsResp, now, doneKeepF, doneDeleteF, deadUberJobs); + + lock_guard mapLg(mapMtx); + // Remove entries from _qIdDoneKeepFiles + for (auto const& [qId, tm] : doneKeepF) { + qIdDoneKeepFiles.erase(qId); + } + + // Remove entries from _qIdDoneDeleteFiles + for (auto const& [qId, tm] : doneDeleteF) { + qIdDoneDeleteFiles.erase(qId); + } + + // Remove entries from _qIdDeadUberJobs + for (auto const& [qId, ujMap] : deadUberJobs) { + auto iter = qIdDeadUberJobs.find(qId); + if (iter != qIdDeadUberJobs.end()) { + auto& deadMap = iter->second; + for (auto const& [ujId, tm] : ujMap) { + deadMap.erase(ujId); + } + if (deadMap.empty()) { + qIdDeadUberJobs.erase(iter); + } + } + } + + bool workerRestarted = false; + auto workerStartupTime = http::RequestBodyJSON::required(jsResp, "w-startup-time"); + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " workerStartupTime=" << workerStartupTime); + if (!_wInfo->checkWStartupTime(workerStartupTime)) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " startup time for worker=" << _wInfo->dump() + << " changed to=" << workerStartupTime << " Assuming worker restarted"); + workerRestarted = true; + } + return workerRestarted; +} + +string WorkerQueryStatusData::dump() const { + lock_guard lgI(_infoMtx); + return _dump(); +} + +string WorkerQueryStatusData::_dump() const { + VMUTEX_HELD(_infoMtx); + stringstream os; + os << "ActiveWorker " << ((_wInfo == nullptr) ? "?" : _wInfo->dump()); + return os.str(); +} + +} // namespace lsst::qserv::protojson diff --git a/src/protojson/WorkerQueryStatusData.h b/src/protojson/WorkerQueryStatusData.h new file mode 100644 index 0000000000..69e1b4fb68 --- /dev/null +++ b/src/protojson/WorkerQueryStatusData.h @@ -0,0 +1,388 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_PROTOJSON_WORKERQUERYSTATUSDATA_H +#define LSST_QSERV_PROTOJSON_WORKERQUERYSTATUSDATA_H + +// System headers +#include +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// qserv headers +#include "global/clock_defs.h" +#include "global/intTypes.h" +#include "protojson/ResponseMsg.h" +#include "util/Mutex.h" + +// This header declarations +namespace lsst::qserv::protojson { + +class AuthContext { +public: + AuthContext() = default; + AuthContext(std::string const& replicationInstanceId_, std::string const& replicationAuthKey_) + : replicationInstanceId(replicationInstanceId_), replicationAuthKey(replicationAuthKey_) {} + ~AuthContext() = default; + + bool operator==(AuthContext const& other) const { + return (replicationInstanceId == other.replicationInstanceId) && + (replicationAuthKey == other.replicationAuthKey); + } + bool operator!=(AuthContext const& other) const { return !(*this == other); } + + std::string replicationInstanceId; + std::string replicationAuthKey; +}; + +/// This class just contains the czar id and network contact information. +class CzarContactInfo : public std::enable_shared_from_this { +public: + using Ptr = std::shared_ptr; + std::string cName(const char* fnc) const { return std::string("CzarContactInfo") + fnc; } + + CzarContactInfo() = delete; + CzarContactInfo(CzarContactInfo const&) = default; + CzarContactInfo& operator=(CzarContactInfo const&) = default; + + /// Return true is elements, other than czStartupTime, are the same. + bool compare(CzarContactInfo const& other) const; + + bool operator==(CzarContactInfo const& other) const { return compare(other); } + bool operator!=(CzarContactInfo const& other) const { return !(*this == other); } + + static Ptr create(std::string const& czName_, CzarId czId_, int czPort_, std::string const& czHostName_, + uint64_t czStartupTime_) { + return Ptr(new CzarContactInfo(czName_, czId_, czPort_, czHostName_, czStartupTime_)); + } + + static Ptr createFromJson(nlohmann::json const& czarJson); + + std::string const czName; ///< czar "name" + CzarId const czId; ///< czar "id" + int const czPort; ///< czar "management-port" + std::string const czHostName; ///< czar "management-host-name" + uint64_t const czStartupTime; ///< czar startup time + + /// Return a json version of the contents of this class. + nlohmann::json toJson() const; + + std::string dump() const; + +private: + CzarContactInfo(std::string const& czName_, CzarId czId_, int czPort_, std::string const& czHostName_, + uint64_t czStartupTime_) + : czName(czName_), + czId(czId_), + czPort(czPort_), + czHostName(czHostName_), + czStartupTime(czStartupTime_) {} +}; + +class UberJobIdentType { +public: + UberJobIdentType() = default; + UberJobIdentType(CzarContactInfo::Ptr const& czInfo_, QueryId qId_, UberJobId ujId_) + : czInfo(czInfo_), qId(qId_), ujId(ujId_) {} + + CzarContactInfo::Ptr czInfo; + QueryId qId; + UberJobId ujId; +}; + +/// This class just contains the worker id and network communication information. +class WorkerContactInfo { +public: + using Ptr = std::shared_ptr; + + using WCMap = std::unordered_map; + using WCMapPtr = std::shared_ptr; + + static Ptr create(std::string const& wId_, std::string const& wHostAddr_, std::string const& wHostName_, + int wPort_, TIMEPOINT updateTime_) { + return Ptr(new WorkerContactInfo(wId_, wHostAddr_, wHostName_, wPort_, updateTime_)); + } + + /// Ignores _registryUpdateTime as that is not set to or from json. + bool operator==(WorkerContactInfo const& other) const; + bool operator!=(WorkerContactInfo const& other) const { return !(*this == other); } + + /// This function creates a WorkerQueryStatusData object from a registry json message, + /// which is provided by the system registry. + static Ptr createFromJsonRegistry(std::string const& wId_, nlohmann::json const& regJson); + + /// This function creates a WorkerQueryStatusData object from a worker json message. + static Ptr createFromJsonWorker(nlohmann::json const& workerJson, TIMEPOINT updateTime); + + /// Return a json version of the contents of this object. + nlohmann::json toJson() const; + + std::string cName(const char* fn) { return std::string("WorkerContactInfo::") + fn; } + + std::string const wId; ///< key, this is the one thing that cannot change. + + /// Change host and port info to those provided in `other`. + void changeBaseInfo(WorkerContactInfo const& other) { + auto [oWId, oWHostAddr, oWHostName, oWPort] = other.getAll(); + std::lock_guard lg(_rMtx); + _wHostAddr = oWHostAddr; + _wHostName = oWHostName; + _wPort = oWPort; + } + + /// @return wId - workerId + /// @return _wHost - worker host + /// @return _wManagementHost - management host + /// @return _wPort - worker port + std::tuple getAll() const { + std::lock_guard lg(_rMtx); + return {wId, _wHostAddr, _wHostName, _wPort}; + } + + /// Return true if communication related items are the same. + bool isSameContactInfo(WorkerContactInfo const& other) const { + auto [oWId, oWHost, oWManagementHost, oWPort] = other.getAll(); + std::lock_guard lg(_rMtx); + return (wId == oWId && _wHostAddr == oWHost && _wHostName == oWManagementHost && _wPort == oWPort); + } + + void setRegUpdateTime(TIMEPOINT updateTime); + + double timeSinceRegUpdateSeconds() const { + std::lock_guard lg(_rMtx); + double secs = std::chrono::duration(CLOCK::now() - _regUpdateTime).count(); + return secs; + } + + TIMEPOINT getRegUpdateTime() const { + std::lock_guard lg(_rMtx); + return _regUpdateTime; + } + + /// @return true if startupTime equals _wStartupTime or _wStartupTime was never set, + /// if _wStartupTime was never set, it is set to startupTime. + /// @return false indicates the worker was restarted and all associated jobs need + /// re-assignment. + bool checkWStartupTime(uint64_t startupTime) { + std::lock_guard lg(_rMtx); + if (_wStartupTime == startupTime) { + return true; + } + if (_wStartupTime == 0) { + _wStartupTime = startupTime; + return true; + } + _wStartupTime = startupTime; + return false; + } + + uint64_t getWStartupTime() const { + std::lock_guard lg(_rMtx); + return _wStartupTime; + } + + std::string dump() const; + +private: + WorkerContactInfo(std::string const& wId_, std::string const& wHost_, std::string const& wHostName_, + int wPort_, TIMEPOINT updateTime_) + : wId(wId_), _wHostAddr(wHost_), _wHostName(wHostName_), _wPort(wPort_) { + setRegUpdateTime(updateTime_); + } + + // _rMtx must be locked before calling + std::string _dump() const; + + // _rMtx must be locked before calling + nlohmann::json _toJson() const; + + std::string _wHostAddr; ///< "host-addr" entry (like 10.0.0.1) + std::string _wHostName; ///< "management-host-name" entry (FQDN like blah.edu) + int _wPort; ///< "management-port" entry. + + /// Last time the registry heard from this worker. The ActiveWorker class + /// will use this to determine the worker's state (alive/dead). + TIMEPOINT _regUpdateTime; + + /// "w-startup-time", it's value is set to zero until the real value is + /// received from the worker. Once it is non-zero, any change indicates + /// the worker was restarted and all UberJobs that were assigned there + /// need to be unassigned. On the worker, this should always be set from + /// foreman()->getStartupTime(); + uint64_t _wStartupTime = 0; + + mutable MUTEX _rMtx; ///< protects _regUpdate +}; + +/// This class's purpose is to be a structure to store and transfer information +/// about which queries have been completed or cancelled on the worker. This +/// class contains the functions that encode and decode the data they contain +/// to and from a json format. +class WorkerQueryStatusData { +public: + using Ptr = std::shared_ptr; + + WorkerQueryStatusData() = delete; + WorkerQueryStatusData(WorkerQueryStatusData const&) = delete; + WorkerQueryStatusData& operator=(WorkerQueryStatusData const&) = delete; + + std::string cName(const char* fName) { return std::string("WorkerQueryStatusData::") + fName; } + + static Ptr create(WorkerContactInfo::Ptr const& wInfo_, CzarContactInfo::Ptr const& czInfo_, + AuthContext const& authContext_) { + return Ptr(new WorkerQueryStatusData(wInfo_, czInfo_, authContext_)); + } + + /// This function creates a WorkerQueryStatusData object from the worker json `czarJson`, the + /// other parameters are used to verify the json message. + static Ptr createFromJson(nlohmann::json const& czarJson, AuthContext const& authContext_, + TIMEPOINT updateTm_); + + ~WorkerQueryStatusData() = default; + + void setWInfo(WorkerContactInfo::Ptr const& wInfo_); + + WorkerContactInfo::Ptr getWInfo() const { + std::lock_guard lgI(_infoMtx); + return _wInfo; + } + CzarContactInfo::Ptr getCzInfo() const { return _czInfo; } + + /// `qId` and `ujId` identify a dead UberJob which is added to the list + /// of dead UberJobs for this worker. + void addDeadUberJob(QueryId qId, UberJobId ujId, TIMEPOINT tm); + + /// Add multiple UberJobIds for `qId` to the list of dead UberJobs for + /// this worker. + void addDeadUberJobs(QueryId qId, std::vector ujIds, TIMEPOINT tm); + + /// Add `qId` to the list of user queries where all Tasks can be stopped + /// and result files can be deleted. + void addToDoneDeleteFiles(QueryId qId); + + /// Add `qId` to the list of user queries where all Tasks can be stopped + /// but result files should be kept. + void addToDoneKeepFiles(QueryId qId); + + /// Remove all UberJobs from the list of dead UberJobs with QueryId `qId`. + /// There's no point in tracking individual UberJobs once the entire + /// user query is finished or cancelled as they will all be deleted by + /// `addToDoneDeleteFiles` + void removeDeadUberJobsFor(QueryId qId); + + void setCzarCancelAfterRestart(CzarId czId, QueryId lastQId) { + std::lock_guard mapLg(mapMtx); + czarCancelAfterRestart = true; + czarCancelAfterRestartCzId = czId; + czarCancelAfterRestartQId = lastQId; + } + + bool isCzarRestart() const { return czarCancelAfterRestart; } + CzarId getCzarRestartCzarId() const { return czarCancelAfterRestartCzId; } + QueryId getCzarRestartQueryId() const { return czarCancelAfterRestartQId; } + + /// Create a json object held by a shared pointer to use as a message. + /// Old objects in this instance will be removed after being added to the + /// json message. + std::shared_ptr toJson(double maxLifetime); + + /// Add contents of qIdDoneKeepFiles, _qIdDoneDeleteFiles, and _qIdDeadUberJobs to `jsWR`, + /// and remove map elements that have an age (tmMark - element.touchTime) greater + /// than maxLifetime. + void addListsToJson(nlohmann::json& jsWR, TIMEPOINT tmMark, double maxLifetime); + + /// Parse the lists in `jsWR` to populate the lists for qIdDoneKeepFiles, + /// qIdDoneDeleteFiles, and qIdDeadUberJobs. + /// @throws std::invalid_argument + void parseLists(nlohmann::json const& jsWR, TIMEPOINT updateTm); + + /// Return a json object indicating the status of the message for the + /// original requester. + nlohmann::json buildResponseJson(uint64_t workerStartupTime); + + /// Use the worker's response, `jsResp`, to update the status of this object. + /// The worker's response contains lists indicating what the worker + /// received from the czar's json message created with `serializeResponseJson`. + /// The czar can remove the ids from the lists as once the worker has + /// verified them. + /// @return transmitSuccess - true if the message was parsed successfully. + /// @return workerRestarted - true if `workerStartupTime` doesn't match, + /// indicating the worker has been restarted and the czar should + /// invalidate and re-assign all UberJobs associated with this + /// worker. + /// @throw invalid_argument if there are problems with json parsing. + bool handleResponseJson(nlohmann::json const& jsResp); + + /// Parse the contents of `jsWR` to fill the maps `doneKeepF`, `doneDeleteF`, + /// and `deadUberJobs`. + static void parseListsInto(nlohmann::json const& jsWR, TIMEPOINT updateTm, + std::map& doneKeepF, + std::map& doneDeleteF, + std::map>& deadUberJobs); + + std::string dump() const; + + // Making these private requires member functions to be written + // that cause issues with linking. All of the workarounds are ugly. + /// Map of QueryIds where the LIMIT clause has been satisfied so + /// that Tasks can be stopped but result files need to be kept. + std::map qIdDoneKeepFiles; + + /// Map fo QueryIds where Tasks can be stopped and files deleted, which is + /// used when user queries are cancelled or finished. + std::map qIdDoneDeleteFiles; + + /// Map used to indicated a specific UberJobs need to be killed. + std::map> qIdDeadUberJobs; + + /// If true, this indicates that this is a newly started czar and + /// the worker should stop all previous work associated with this + /// CzarId. + std::atomic czarCancelAfterRestart = false; + CzarId czarCancelAfterRestartCzId = 0; + QueryId czarCancelAfterRestartQId = 0; + + /// Protects _qIdDoneKeepFiles, _qIdDoneDeleteFiles, _qIdDeadUberJobs, + /// and czarCancelAfter variables. + mutable MUTEX mapMtx; + +private: + WorkerQueryStatusData(WorkerContactInfo::Ptr const& wInfo_, CzarContactInfo::Ptr const& czInfo_, + AuthContext const& authContext_) + : _wInfo(wInfo_), _czInfo(czInfo_), _authContext(authContext_) {} + + WorkerContactInfo::Ptr _wInfo; ///< Information needed to contact the worker. + CzarContactInfo::Ptr const _czInfo; ///< Information needed to contact the czar. + mutable MUTEX _infoMtx; ///< protects _wInfo + + AuthContext const _authContext; ///< Used for message verification. + + /// _infoMtx must be locked before calling. + std::string _dump() const; +}; + +} // namespace lsst::qserv::protojson + +#endif // LSST_QSERV_PROTOJSON_WORKERQUERYSTATUSDATA_H diff --git a/src/protojson/testStatusData.cc b/src/protojson/testStatusData.cc new file mode 100644 index 0000000000..3c182b4e6f --- /dev/null +++ b/src/protojson/testStatusData.cc @@ -0,0 +1,432 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// System headers +#include +#include +#include +#include +#include + +// Qserv headers +#include "global/clock_defs.h" +#include "global/intTypes.h" +#include "lsst/log/Log.h" +#include "protojson/ResponseMsg.h" +#include "protojson/ScanTableInfo.h" +#include "protojson/UberJobErrorMsg.h" +#include "protojson/UberJobReadyMsg.h" +#include "protojson/WorkerCzarComIssue.h" +#include "protojson/WorkerQueryStatusData.h" +#include "util/Error.h" +#include "util/MultiError.h" +#include "wbase/UberJobData.h" +#include "wconfig/WorkerConfig.h" +#include "wpublish/QueriesAndChunks.h" + +// Boost unit test header +#define BOOST_TEST_MODULE RequestQuery +#include + +using namespace std; +namespace test = boost::test_tools; +using namespace lsst::qserv::protojson; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.testStatusData"); +} + +BOOST_AUTO_TEST_SUITE(Suite) + +auto workerCfg = lsst::qserv::wconfig::WorkerConfig::create(); +/// This is basically a dummy object that needs to be available for UberJobData objects in the test. +auto queriesAndChunksG = lsst::qserv::wpublish::QueriesAndChunks::setupGlobal(2s, 1s, 10, 100, false); + +BOOST_AUTO_TEST_CASE(WorkerQueryStatusData) { + lsst::qserv::protojson::AuthContext authContext_("repliInstId", "repliIAuthKey"); + + uint64_t cxrStartTime = lsst::qserv::millisecSinceEpoch(lsst::qserv::CLOCK::now() - 5s); + uint64_t wkrStartTime = lsst::qserv::millisecSinceEpoch(lsst::qserv::CLOCK::now() - 10s); + + string const czrName("czar_name"); + lsst::qserv::CzarId const czrId = 32; + int czrPort = 2022; + string const czrHost("cz_host"); + + auto czarA = + lsst::qserv::protojson::CzarContactInfo::create(czrName, czrId, czrPort, czrHost, cxrStartTime); + + auto czarAJs = czarA->toJson(); + + auto czarB = lsst::qserv::protojson::CzarContactInfo::createFromJson(czarAJs); + BOOST_REQUIRE(czarA->compare(*czarB)); + + auto czarC = lsst::qserv::protojson::CzarContactInfo::create("different", czrId, czrPort, czrHost, + cxrStartTime); + BOOST_REQUIRE(!czarA->compare(*czarC)); + + auto start = lsst::qserv::CLOCK::now(); + auto workerA = WorkerContactInfo::create("sd_workerA", "host_w1", "mgmhost_a", 3421, start); + + auto workerB = WorkerContactInfo::create("sd_workerB", "host_w2", "mgmhost_a", 3421, start); + auto workerC = WorkerContactInfo::create("sd_workerC", "host_w3", "mgmhost_b", 3422, start); + + auto jsWorkerA = workerA->toJson(); + auto start1Sec = start + 1s; + auto workerA1 = WorkerContactInfo::createFromJsonWorker(jsWorkerA, start1Sec); + BOOST_REQUIRE(workerA->isSameContactInfo(*workerA1)); + + // WorkerQueryStatusData + auto wqsdA = lsst::qserv::protojson::WorkerQueryStatusData::create(workerA, czarA, authContext_); + + double maxLifetime = 300.0; + auto jsDataA = wqsdA->toJson(maxLifetime); + + // Check that empty lists work. + auto wqsdA1 = + lsst::qserv::protojson::WorkerQueryStatusData::createFromJson(*jsDataA, authContext_, start1Sec); + + auto jsDataA1 = wqsdA1->toJson(maxLifetime); + BOOST_REQUIRE(*jsDataA == *jsDataA1); + + vector qIdsDelFiles = {7, 8, 9, 15, 25, 26, 27, 30}; + vector qIdsKeepFiles = {1, 2, 3, 4, 6, 10, 13, 19, 33}; + for (auto const qIdDF : qIdsDelFiles) { + wqsdA->qIdDoneDeleteFiles[qIdDF] = start; + } + + jsDataA = wqsdA->toJson(maxLifetime); + BOOST_REQUIRE(*jsDataA != *jsDataA1); + + for (auto const qIdKF : qIdsKeepFiles) { + wqsdA->qIdDoneKeepFiles[qIdKF] = start; + } + + wqsdA->addDeadUberJobs(12, {1, 3}, start); + + jsDataA = wqsdA->toJson(maxLifetime); + + auto start5Sec = start + 5s; + auto workerAFromJson = + lsst::qserv::protojson::WorkerQueryStatusData::createFromJson(*jsDataA, authContext_, start5Sec); + auto jsWorkerAFromJson = workerAFromJson->toJson(maxLifetime); + BOOST_REQUIRE(*jsDataA == *jsWorkerAFromJson); + + wqsdA->addDeadUberJobs(12, {34}, start5Sec); + wqsdA->addDeadUberJobs(91, {77}, start5Sec); + wqsdA->addDeadUberJobs(1059, {1, 4, 6, 7, 8, 10, 3, 22, 93}, start5Sec); + + jsDataA = wqsdA->toJson(maxLifetime); + BOOST_REQUIRE(*jsDataA != *jsWorkerAFromJson); + + workerAFromJson = + lsst::qserv::protojson::WorkerQueryStatusData::createFromJson(*jsDataA, authContext_, start5Sec); + jsWorkerAFromJson = workerAFromJson->toJson(maxLifetime); + BOOST_REQUIRE(*jsDataA == *jsWorkerAFromJson); + + // Make the response, which contains lists of the items handled by the workers. + auto jsWorkerResp = workerAFromJson->buildResponseJson(wkrStartTime); + + // test removal of elements after response. + BOOST_REQUIRE(!wqsdA->qIdDoneDeleteFiles.empty()); + BOOST_REQUIRE(!wqsdA->qIdDoneKeepFiles.empty()); + BOOST_REQUIRE(!wqsdA->qIdDeadUberJobs.empty()); + + wqsdA->handleResponseJson(jsWorkerResp); + auto workerRestarted = wqsdA->handleResponseJson(jsWorkerResp); + BOOST_REQUIRE(workerRestarted == false); + + BOOST_REQUIRE(wqsdA->qIdDoneDeleteFiles.empty()); + BOOST_REQUIRE(wqsdA->qIdDoneKeepFiles.empty()); + BOOST_REQUIRE(wqsdA->qIdDeadUberJobs.empty()); +} + +BOOST_AUTO_TEST_CASE(WorkerCzarComIssue) { + AuthContext authContext_("repliInstId", "repliIAuthKey"); + + uint64_t cxrStartTime = lsst::qserv::millisecSinceEpoch(lsst::qserv::CLOCK::now() - 5s); + + string const czrName("czar_name"); + lsst::qserv::CzarId const czrId = 32; + int czrPort = 2022; + string const czrHost("cz_host"); + + auto czarA = + lsst::qserv::protojson::CzarContactInfo::create(czrName, czrId, czrPort, czrHost, cxrStartTime); + auto czarAJs = czarA->toJson(); + + auto start = lsst::qserv::CLOCK::now(); + auto workerA = WorkerContactInfo::create("sd_workerA", "host_w1", "mgmhost_a", 3421, start); + auto jsWorkerA = workerA->toJson(); + + // WorkerCzarComIssue, thought czar was dead test. + auto wccIssueA = lsst::qserv::protojson::WorkerCzarComIssue::create(authContext_); + wccIssueA->setContactInfo(workerA, czarA); + BOOST_REQUIRE(wccIssueA->needToSend() == false); + wccIssueA->setThoughtCzarWasDeadTime(3452987); + BOOST_REQUIRE(wccIssueA->needToSend() == true); + + auto jsIssueA = wccIssueA->toJson(); + + // The source WorkerCzarComIssue for failed transmit tests. + auto wccIssueA1 = lsst::qserv::protojson::WorkerCzarComIssue::createFromJson(jsIssueA, authContext_); + auto jsIssueA1 = wccIssueA1->toJson(); + BOOST_REQUIRE(jsIssueA == jsIssueA1); + + // A vector of the expected responses from the czar from UberJobData responses + // added to wccIssueA1 as failed transmits. + std::vector czarResponseMsgs; + + // Test a list of failed messages. + string const czarHost = "czarHost"; + int const czarPort = 234; + string const czarName = "czar1"; + lsst::qserv::CzarId const czarId = 1; + string const workerId1 = "wrkr1"; + int const resultPort = 436; + int const rowlimit = 0; + int const maxTableBytes = 1'000'000; + + lsst::qserv::UberJobId const ujId1 = 1; + lsst::qserv::QueryId const qId1 = 722234; + bool const scaninteractive1 = true; + auto scanInfo1 = lsst::qserv::protojson::ScanInfo::create(); + uint64_t const rowCount1 = 81; + uint64_t const fileSize1 = 1240; + FileUrlInfo fileInf1("http://test/ulr1/fn", rowCount1, fileSize1); + auto ujData1 = lsst::qserv::wbase::UberJobData::create( + ujId1, czarName, czarId, czarHost, czarPort, qId1, rowlimit, maxTableBytes, scanInfo1, + scaninteractive1, workerId1, nullptr, queriesAndChunksG, authContext_.replicationAuthKey, + resultPort); + auto ujResponse1 = ujData1->responseFileReadyBuild(fileInf1, authContext_); + wccIssueA1->addFailedTransmit(qId1, ujId1, ujResponse1); + auto execRespMsg1 = ExecutiveRespMsg::create(true, false, qId1, ujId1, czarId); + czarResponseMsgs.push_back(execRespMsg1); + + auto jsWcA1 = wccIssueA1->toJson(); + // parse jsWcA1 and check if the answer is correct + auto wccIssueA1Out1 = lsst::qserv::protojson::WorkerCzarComIssue::createFromJson(jsWcA1, authContext_); + BOOST_REQUIRE(*wccIssueA1 == *wccIssueA1Out1); + + lsst::qserv::QueryId const qId1a = qId1; + lsst::qserv::UberJobId const ujId1a = 9; + lsst::qserv::protojson::FileUrlInfo fileInf1a("http://test/ulr1/fna", 36, 12400); + auto ujData1a = lsst::qserv::wbase::UberJobData::create( + ujId1a, czarName, czarId, czarHost, czarPort, qId1, rowlimit, maxTableBytes, scanInfo1, + scaninteractive1, workerId1, nullptr, queriesAndChunksG, authContext_.replicationAuthKey, + resultPort); + auto ujResponse1a = ujData1->responseFileReadyBuild(fileInf1a, authContext_); + wccIssueA1->addFailedTransmit(qId1a, ujId1a, ujResponse1a); + auto execRespMsg1a = ExecutiveRespMsg::create(true, true, qId1a, ujId1a, czarId); + czarResponseMsgs.push_back(execRespMsg1a); + + auto jsWcA1a = wccIssueA1->toJson(); + // parse jsWcA1a and check if the answer is correct + auto wccIssueA1aOut1 = lsst::qserv::protojson::WorkerCzarComIssue::createFromJson(jsWcA1a, authContext_); + BOOST_REQUIRE(*wccIssueA1 == *wccIssueA1aOut1); + BOOST_REQUIRE(*wccIssueA1 != *wccIssueA1Out1); + + lsst::qserv::UberJobId const ujId2 = 333; + lsst::qserv::QueryId qId2 = 722237; + bool scaninteractive2 = false; + lsst::qserv::protojson::FileUrlInfo fileInf2("http://test/ulr2/fn", 456, 424000); + auto scanInfo2 = lsst::qserv::protojson::ScanInfo::create(); + auto ujData2 = lsst::qserv::wbase::UberJobData::create( + ujId2, czarName, czarId, czarHost, czarPort, qId2, rowlimit, maxTableBytes, scanInfo2, + scaninteractive2, workerId1, nullptr, queriesAndChunksG, authContext_.replicationAuthKey, + resultPort); + auto ujResponse2 = ujData2->responseFileReadyBuild(fileInf2, authContext_); + wccIssueA1->addFailedTransmit(qId2, ujId2, ujResponse2); + auto execRespMsg2 = ExecutiveRespMsg::create(true, false, qId2, ujId2, czarId); + czarResponseMsgs.push_back(execRespMsg2); + + auto jsWcA2 = wccIssueA1->toJson(); + // parse jsWcA2 and check if the answer is correct + auto wccIssueA2Out1 = lsst::qserv::protojson::WorkerCzarComIssue::createFromJson(jsWcA2, authContext_); + BOOST_REQUIRE(*wccIssueA1 == *wccIssueA2Out1); + + lsst::qserv::UberJobId const ujId3 = 8; + lsst::qserv::QueryId qId3 = 722240; + int const chunkId3 = 471; + bool const cancelled3 = true; + lsst::qserv::util::MultiError multiErr; + lsst::qserv::util::Error err(105423, 3, "Some random error."); + multiErr.insert(err); + auto ujData3 = lsst::qserv::wbase::UberJobData::create( + ujId3, czarName, czarId, czarHost, czarPort, qId3, rowlimit, maxTableBytes, scanInfo2, + scaninteractive2, workerId1, nullptr, queriesAndChunksG, authContext_.replicationAuthKey, + resultPort); + auto ujResponse3 = + ujData3->responseErrorBuild(multiErr, chunkId3, cancelled3, LOG_LVL_DEBUG, authContext_); + wccIssueA1->addFailedTransmit(qId3, ujId3, ujResponse3); + auto execRespMsg3 = ExecutiveRespMsg::create(true, true, qId3, ujId3, czarId); + czarResponseMsgs.push_back(execRespMsg3); + + auto jsWcA3 = wccIssueA1->toJson(); + // parse jsWcA3 and check if the answer is correct + auto wccIssueA3Out1 = lsst::qserv::protojson::WorkerCzarComIssue::createFromJson(jsWcA3, authContext_); + LOGS(_log, LOG_LVL_INFO, "wccIssueA1=" << wccIssueA1->dump()); + LOGS(_log, LOG_LVL_INFO, "wccIssueA3Out1=" << wccIssueA3Out1->dump()); + BOOST_REQUIRE(*wccIssueA1 != *wccIssueA2Out1); + BOOST_REQUIRE(*wccIssueA1 == *wccIssueA3Out1); + + // Add the ExecutiveRespMsg messages that correlate to the failed transmits to + // the czar response message so they can be used to clear the map entries. + auto czarRespMsgJson = + wccIssueA3Out1->responseToJson(wccIssueA3Out1->getThoughtCzarWasDeadTime(), czarResponseMsgs); + LOGS(_log, LOG_LVL_INFO, "czarRespMsgJson=" << czarRespMsgJson); + + // Parse the response and remove the appropriate entries from wccIsueA1. + auto czRespMsg = lsst::qserv::protojson::WorkerCzarComRespMsg::createFromJson(czarRespMsgJson); + LOGS(_log, LOG_LVL_INFO, "czRespMsg=" << czRespMsg->dump()); + BOOST_REQUIRE(czRespMsg->success == true); + + // Use the response to clear the original transmit failure messages from the originating worker. + auto [countA3Out1, obsoleteList, parseErrorList] = wccIssueA1->clearMapEntries(czarRespMsgJson); + LOGS(_log, LOG_LVL_INFO, + "countA3Out1=" << countA3Out1 << " obsoleteSz=" << obsoleteList.size() + << " parseErrorSz=" << parseErrorList.size()); + BOOST_REQUIRE(countA3Out1 == 4); + BOOST_REQUIRE(obsoleteList.size() == 2); + BOOST_REQUIRE(parseErrorList.size() == 0); + + auto ftMap = wccIssueA1->takeFailedTransmitsMap(); + BOOST_REQUIRE(ftMap->size() == 0); +} + +BOOST_AUTO_TEST_CASE(WorkerCzarComIssueClearFailedTransmitsForQids) { + AuthContext authContext_("repliInstId", "repliIAuthKey"); + + uint64_t czStartTime = lsst::qserv::millisecSinceEpoch(lsst::qserv::CLOCK::now() - 5s); + string const czName("czar_name"); + lsst::qserv::CzarId const czId = 32; + int czPort = 2022; + string const czHost("cz_host"); + + auto czarA = lsst::qserv::protojson::CzarContactInfo::create(czName, czId, czPort, czHost, czStartTime); + + auto start = lsst::qserv::CLOCK::now(); + auto workerA = WorkerContactInfo::create("sd_workerA", "host_w1", "mgmhost_a", 3421, start); + + // Create the WorkerCzarComIssue and set contact info + auto wcc = lsst::qserv::protojson::WorkerCzarComIssue::create(authContext_); + wcc->setContactInfo(workerA, czarA); + + // Build three failed transmit messages: + // - qIdA has two UberJobs (uj 1 and uj 2) + // - qIdB has one UberJob (uj 3) + lsst::qserv::QueryId const qIdA = 1001; + lsst::qserv::QueryId const qIdB = 2002; + lsst::qserv::UberJobId const ujA1 = 1; + lsst::qserv::UberJobId const ujA2 = 2; + lsst::qserv::UberJobId const ujB1 = 3; + + string const workerId1 = "wrkr1"; + int const resultPort = 436; + int const rowlimit = 0; + int const maxTableBytes = 1'000'000; + bool const scaninteractive = true; + auto scanInfo = lsst::qserv::protojson::ScanInfo::create(); + + // create UberJobData for qIdA ujA1 + lsst::qserv::protojson::FileUrlInfo fileInf1("http://test/fn1", 10, 100); + auto ujDataA1 = lsst::qserv::wbase::UberJobData::create( + ujA1, czName, czId, czHost, czPort, qIdA, rowlimit, maxTableBytes, scanInfo, scaninteractive, + workerId1, nullptr, queriesAndChunksG, authContext_.replicationAuthKey, resultPort); + auto ujRespA1 = ujDataA1->responseFileReadyBuild(fileInf1, authContext_); + wcc->addFailedTransmit(qIdA, ujA1, ujRespA1); + + // create UberJobData for qIdA ujA2 + lsst::qserv::protojson::FileUrlInfo fileInf2("http://test/fn2", 20, 200); + auto ujDataA2 = lsst::qserv::wbase::UberJobData::create( + ujA2, czName, czId, czHost, czPort, qIdA, rowlimit, maxTableBytes, scanInfo, scaninteractive, + workerId1, nullptr, queriesAndChunksG, authContext_.replicationAuthKey, resultPort); + auto ujRespA2 = ujDataA2->responseFileReadyBuild(fileInf2, authContext_); + wcc->addFailedTransmit(qIdA, ujA2, ujRespA2); + + // create UberJobData for qIdB ujB1 + lsst::qserv::protojson::FileUrlInfo fileInf3("http://test/fn3", 30, 300); + auto ujDataB1 = lsst::qserv::wbase::UberJobData::create( + ujB1, czName, czId, czHost, czPort, qIdB, rowlimit, maxTableBytes, scanInfo, scaninteractive, + workerId1, nullptr, queriesAndChunksG, authContext_.replicationAuthKey, resultPort); + auto ujRespB1 = ujDataB1->responseFileReadyBuild(fileInf3, authContext_); + wcc->addFailedTransmit(qIdB, ujB1, ujRespB1); + + // Confirm three failed transmits exist via JSON representation + auto jsBefore = wcc->toJson(); + BOOST_REQUIRE(jsBefore.contains("failedtransmits")); + auto const& arrBefore = jsBefore["failedtransmits"]; + BOOST_REQUIRE(arrBefore.is_array()); + BOOST_REQUIRE(arrBefore.size() == 3); + + // Prepare a map with qIdA to be cleared + std::map qIdMap; + qIdMap[qIdA] = lsst::qserv::CLOCK::now(); + + // Call clearFailedTransmitsForQids + wcc->clearFailedTransmitsForQids(qIdMap); + + // Verify only the qIdB entry remains + auto jsAfter = wcc->toJson(); + BOOST_REQUIRE(jsAfter.contains("failedtransmits")); + auto const& arrAfter = jsAfter["failedtransmits"]; + BOOST_REQUIRE(arrAfter.is_array()); + BOOST_REQUIRE(arrAfter.size() == 1); + // remaining element should have qId == qIdB + auto rem = arrAfter[0]; + BOOST_REQUIRE(rem.contains("qId")); + lsst::qserv::QueryId remQ = rem["qId"]; + BOOST_REQUIRE(remQ == qIdB); +} + +BOOST_AUTO_TEST_CASE(ResponseMsg) { + auto respMsgA = lsst::qserv::protojson::ResponseMsg::create(true); + auto jsA = respMsgA->toJson(); + auto respMsgAOut = lsst::qserv::protojson::ResponseMsg::createFromJson(jsA); + BOOST_REQUIRE(respMsgA->equal(*respMsgAOut)); + + auto respMsgB = lsst::qserv::protojson::ResponseMsg::create(false, "asdrewjgfay523yuq@", "junk msg"); + auto respMsgC = lsst::qserv::protojson::ResponseMsg::create(false, "asd", "junk msg"); + auto respMsgD = lsst::qserv::protojson::ResponseMsg::create(false, "asdrewjgfay523yuq@", "junkmsg"); + auto jsB = respMsgB->toJson(); + auto respMsgBOut = lsst::qserv::protojson::ResponseMsg::createFromJson(jsB); + BOOST_REQUIRE(respMsgB->equal(*respMsgBOut)); + BOOST_REQUIRE(!respMsgA->equal(*respMsgBOut)); + BOOST_REQUIRE(!respMsgB->equal(*respMsgC)); + BOOST_REQUIRE(!respMsgD->equal(*respMsgC)); +} + +BOOST_AUTO_TEST_CASE(ExecutiveRespMsg) { + auto respMsgA = lsst::qserv::protojson::ExecutiveRespMsg::create(true, false, 123, 456, 9, "allGood", + "just a test"); + auto jsA = respMsgA->toJson(); + auto respMsgAOut = lsst::qserv::protojson::ExecutiveRespMsg::createFromJson(jsA); + BOOST_REQUIRE(respMsgA->equal(*respMsgAOut)); +} + +BOOST_AUTO_TEST_CASE(WorkerCzarComRespMsg) { + auto respMsgA = lsst::qserv::protojson::WorkerCzarComRespMsg::create(true, 73); + auto jsA = respMsgA->toJson(); + auto respMsgAOut = lsst::qserv::protojson::WorkerCzarComRespMsg::createFromJson(jsA); + BOOST_REQUIRE(respMsgA->equal(*respMsgAOut)); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/protojson/testUberJobErrorMsg.cc b/src/protojson/testUberJobErrorMsg.cc new file mode 100644 index 0000000000..239805246f --- /dev/null +++ b/src/protojson/testUberJobErrorMsg.cc @@ -0,0 +1,123 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// System headers +#include +#include +#include +#include +#include + +#include "nlohmann/json.hpp" + +// Qserv headers +#include "global/clock_defs.h" +#include "http/MetaModule.h" +#include "lsst/log/Log.h" +#include "protojson/PwHideJson.h" +#include "protojson/UberJobErrorMsg.h" + +// Boost unit test header +#define BOOST_TEST_MODULE RequestQuery +#include + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.testUberJobErrorMsg"); +} + +using namespace std; +namespace test = boost::test_tools; +using namespace lsst::qserv::protojson; + +AuthContext const authContext_("repliInstId", "repliIAuthKey"); +unsigned int const version = lsst::qserv::http::MetaModule::version; + +BOOST_AUTO_TEST_SUITE(Suite) + +bool parseSerializeReparseCheck(string const& jsStr, string const& note) { + string fName("parseSerialize "); + fName += note + " "; + LOGS(_log, LOG_LVL_INFO, fName << " start " << jsStr); + nlohmann::json js = nlohmann::json::parse(jsStr); + LOGS(_log, LOG_LVL_INFO, fName << " parse 1"); + + UberJobErrorMsg::Ptr jrm = UberJobErrorMsg::createFromJson(js); + BOOST_REQUIRE(jrm != nullptr); + + auto jsJrm = jrm->toJson(); + LOGS(_log, LOG_LVL_INFO, fName << " serialized jsJrm=" << jsJrm); + + UberJobErrorMsg::Ptr jrmCreated = UberJobErrorMsg::createFromJson(jsJrm); + LOGS(_log, LOG_LVL_INFO, fName << " created"); + auto jsJrmCreated = jrmCreated->toJson(); + LOGS(_log, LOG_LVL_INFO, fName << " created->serialized"); + + bool createdMatchesOriginal = jsJrm == jsJrmCreated; + if (createdMatchesOriginal) { + LOGS(_log, LOG_LVL_INFO, fName << "created matches original"); + } else { + LOGS(_log, LOG_LVL_ERROR, "jsJrm != jsJrmCreated"); + LOGS(_log, LOG_LVL_ERROR, "jsJrm=" << jsJrm); + LOGS(_log, LOG_LVL_ERROR, "jsJrmCreated=" << jsJrmCreated); + } + BOOST_REQUIRE(createdMatchesOriginal); + return createdMatchesOriginal; +} + +BOOST_AUTO_TEST_CASE(WorkerQueryStatusData) { + LOGS(_log, LOG_LVL_INFO, "testJRM start"); + + string const workerIdStr("wrker72"); + string const czarName("cz4242"); + lsst::qserv::CzarId const czarId = 745; + lsst::qserv::QueryId const queryId = 986532; + lsst::qserv::UberJobId const uberJobId = 14578; + lsst::qserv::util::Error err1(-3, 8, {3, 5, 11}, {1, 2, 3}, "something went wrong", true); + lsst::qserv::util::Error err2(7, 13, "oops another", true); + lsst::qserv::util::MultiError multiE; + multiE.insert(err1); + multiE.insert(err2); + auto jrm = UberJobErrorMsg::create(authContext_, version, workerIdStr, czarName, czarId, queryId, + uberJobId, multiE); + + auto jsJrm = jrm->toJson(); + string const strJrm = to_string(jsJrm); + LOGS(_log, LOG_LVL_INFO, "stdJrm=" << strJrm); + + BOOST_REQUIRE(parseSerializeReparseCheck(strJrm, "A")); +} + +BOOST_AUTO_TEST_CASE(PwHideJson) { + LOGS(_log, LOG_LVL_INFO, "testPwHideJson start"); + + nlohmann::json tst1({{"a", 36}, {"pw", {5, 8, 9}}, {"auth_key", "jsonauth"}, {"password", 7}}); + std::cout << "tst1=" << tst1 << endl; + nlohmann::json expected1({{"a", 36}, {"pw", "-"}, {"auth_key", "-"}, {"password", "-"}}); + std::cout << "expected1=" << expected1 << endl; + + lsst::qserv::protojson::PwHideJson pwHide; + auto out1 = pwHide.hide(tst1); + std::cout << "out1=" << out1 << endl; + + BOOST_REQUIRE(out1 == expected1); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/protojson/testUberJobMsg.cc b/src/protojson/testUberJobMsg.cc new file mode 100644 index 0000000000..26dc8457aa --- /dev/null +++ b/src/protojson/testUberJobMsg.cc @@ -0,0 +1,123 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// System headers +#include +#include +#include +#include +#include + +#include "nlohmann/json.hpp" + +// Qserv headers +#include "global/clock_defs.h" +#include "lsst/log/Log.h" +#include "protojson/UberJobMsg.h" + +// Boost unit test header +#define BOOST_TEST_MODULE RequestQuery +#include + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.testUberJobMsg"); +} + +using namespace std; +namespace test = boost::test_tools; +using namespace lsst::qserv::protojson; + +BOOST_AUTO_TEST_SUITE(Suite) + +string testA() { + string ta = + R"({"maxtablesizemb":5432,"scaninteractive":true,"auth_key":"replauthkey","czarinfo":{"czar-startup-time":1732658208085,"id":1,"management-host-name":"3a8b68cf9b67","management-port":40865,"name":"proxy"},"dbtables_map":[],"scaninfo":{"infoscanrating":0,"infotables":[]},"instance_id":"qserv_proj","jobs":[{"attemptCount":0,"chunkId":1234567890,"chunkresultname":"r_1_a0d45001254932466b784acf90323565_1234567890_0","jobId":0,"queryFragments":[{"dbtables_indexes":[],"resulttblname":"r_1_a0d45001254932466b784acf90323565_1234567890_0","subchunkids":[],"subquerytemplate_indexes":[0]}],"querySpecDb":"qcase01","scanInteractive":true,"scanPriority":0}],"queryid":1,"rowlimit":0,"subqueries_map":{"subquerytemplate_map":[{"index":0,"template":"SELECT `qcase01.Filter`.`filterId` AS `filterId`,`qcase01.Filter`.`filterName` AS `filterName`,`qcase01.Filter`.`photClam` AS `photClam`,`qcase01.Filter`.`photBW` AS `photBW` FROM `qcase01`.`Filter`AS`qcase01.Filter` WHERE (`qcase01.Filter`.`filterId`<<1)=2"}]},"uberjobid":2,"version":55,"worker":"6c56ba9b-ac40-11ef-acb7-0242c0a8030a"})"; + return ta; +} + +string testB() { + string tb = + R"({"auth_key":"slac6dev:kukara4a","czarinfo":{"czar-startup-time":1733499789161,"id":7,"management-host-name":"sdfqserv001.sdf.slac.stanford.edu","management-port":41923,"name":"proxy"},"dbtables_map":[{"db":"dp02_dc2_catalogs","index":0,"table":"Object"}],"instance_id":"slac6dev","jobs":[{"attemptCount":0,"chunkId":79680,"chunkresultname":"r_280607_e6eac6bb53b0f8505ed36bf82a4d93f1_79680_0","jobId":1398,"queryFragments":[{"dbtables_indexes":[],"resulttblname":"r_280607_e6eac6bb53b0f8505ed36bf82a4d93f1_79680_0","subchunkids":[],"subquerytemplate_indexes":[0]}],"querySpecDb":"dp02_dc2_catalogs","scanInteractive":false,"scanPriority":1},{"attemptCount":0,"chunkId":80358,"chunkresultname":"r_280607_e6eac6bb53b0f8505ed36bf82a4d93f1_80358_0","chunkscantables_indexes":[0],"jobId":1435,"queryFragments":[{"dbtables_indexes":[],"resulttblname":"r_280607_e6eac6bb53b0f8505ed36bf82a4d93f1_80358_0","subchunkids":[],"subquerytemplate_indexes":[1]}],"querySpecDb":"dp02_dc2_catalogs","scanInteractive":false,"scanPriority":1},{"attemptCount":0,"chunkId":81017,"chunkresultname":"r_280607_e6eac6bb53b0f8505ed36bf82a4d93f1_81017_0","chunkscantables_indexes":[0],"jobId":1452,"queryFragments":[{"dbtables_indexes":[],"resulttblname":"r_280607_e6eac6bb53b0f8505ed36bf82a4d93f1_81017_0","subchunkids":[],"subquerytemplate_indexes":[2]}],"querySpecDb":"dp02_dc2_catalogs","scanInteractive":false,"scanPriority":1}],"maxtablesizemb":5100,"scaninteractive":false,"queryid":280607,"rowlimit":0,"scaninfo":{"infoscanrating":1,"infotables":[{"sidb":"dp02_dc2_catalogs","silockinmem":true,"sirating":1,"sitable":"Object"}]},"subqueries_map":{"subquerytemplate_map":[{"index":0,"template":"SELECT COUNT(`obj`.`g_ap12Flux`) AS `QS1_COUNT`,SUM(`obj`.`g_ap12Flux`) AS `QS2_SUM`,MIN(`obj`.`g_ap12Flux`) AS `QS3_MIN`,MAX(`obj`.`g_ap12Flux`) AS `QS4_MAX`,COUNT(`obj`.`g_ap12FluxErr`) AS `QS5_COUNT`,SUM(`obj`.`g_ap12FluxErr`) AS `QS6_SUM`,MIN(`obj`.`g_ap12FluxErr`) AS `QS7_MIN`,MAX(`obj`.`g_ap12FluxErr`) AS `QS8_MAX`,COUNT(`obj`.`g_ap25Flux`) AS `QS9_COUNT`,SUM(`obj`.`g_ap25Flux`) AS `QS10_SUM`,MIN(`obj`.`g_ap25Flux`) AS `QS11_MIN`,MAX(`obj`.`g_ap25Flux`) AS `QS12_MAX`,COUNT(`obj`.`g_ap25FluxErr`) AS `QS13_COUNT`,SUM(`obj`.`g_ap25FluxErr`) AS `QS14_SUM`,MIN(`obj`.`g_ap25FluxErr`) AS `QS15_MIN`,MAX(`obj`.`g_ap25FluxErr`) AS `QS16_MAX` FROM `dp02_dc2_catalogs`.`Object_79680` AS `obj`"},{"index":1,"template":"SELECT COUNT(`obj`.`g_ap12Flux`) AS `QS1_COUNT`,SUM(`obj`.`g_ap12Flux`) AS `QS2_SUM`,MIN(`obj`.`g_ap12Flux`) AS `QS3_MIN`,MAX(`obj`.`g_ap12Flux`) AS `QS4_MAX`,COUNT(`obj`.`g_ap12FluxErr`) AS `QS5_COUNT`,SUM(`obj`.`g_ap12FluxErr`) AS `QS6_SUM`,MIN(`obj`.`g_ap12FluxErr`) AS `QS7_MIN`,MAX(`obj`.`g_ap12FluxErr`) AS `QS8_MAX`,COUNT(`obj`.`g_ap25Flux`) AS `QS9_COUNT`,SUM(`obj`.`g_ap25Flux`) AS `QS10_SUM`,MIN(`obj`.`g_ap25Flux`) AS `QS11_MIN`,MAX(`obj`.`g_ap25Flux`) AS `QS12_MAX`,COUNT(`obj`.`g_ap25FluxErr`) AS `QS13_COUNT`,SUM(`obj`.`g_ap25FluxErr`) AS `QS14_SUM`,MIN(`obj`.`g_ap25FluxErr`) AS `QS15_MIN`,MAX(`obj`.`g_ap25FluxErr`) AS `QS16_MAX` FROM `dp02_dc2_catalogs`.`Object_80358` AS `obj`"},{"index":2,"template":"SELECT COUNT(`obj`.`g_ap12Flux`) AS `QS1_COUNT`,SUM(`obj`.`g_ap12Flux`) AS `QS2_SUM`,MIN(`obj`.`g_ap12Flux`) AS `QS3_MIN`,MAX(`obj`.`g_ap12Flux`) AS `QS4_MAX`,COUNT(`obj`.`g_ap12FluxErr`) AS `QS5_COUNT`,SUM(`obj`.`g_ap12FluxErr`) AS `QS6_SUM`,MIN(`obj`.`g_ap12FluxErr`) AS `QS7_MIN`,MAX(`obj`.`g_ap12FluxErr`) AS `QS8_MAX`,COUNT(`obj`.`g_ap25Flux`) AS `QS9_COUNT`,SUM(`obj`.`g_ap25Flux`) AS `QS10_SUM`,MIN(`obj`.`g_ap25Flux`) AS `QS11_MIN`,MAX(`obj`.`g_ap25Flux`) AS `QS12_MAX`,COUNT(`obj`.`g_ap25FluxErr`) AS `QS13_COUNT`,SUM(`obj`.`g_ap25FluxErr`) AS `QS14_SUM`,MIN(`obj`.`g_ap25FluxErr`) AS `QS15_MIN`,MAX(`obj`.`g_ap25FluxErr`) AS `QS16_MAX` FROM `dp02_dc2_catalogs`.`Object_81017` AS `obj`"}]},"uberjobid":147,"version":55,"worker":"db04"})"; + return tb; +} + +bool parseSerializeReparseCheck(string const& jsStr, string const& note) { + string fName("parseSerialize "); + fName += note + " "; + LOGS(_log, LOG_LVL_INFO, fName << " start " << jsStr); + nlohmann::json js = nlohmann::json::parse(jsStr); + LOGS(_log, LOG_LVL_INFO, fName << " parse 1"); + + UberJobMsg::Ptr ujm = UberJobMsg::createFromJson(js); + BOOST_REQUIRE(ujm != nullptr); + + nlohmann::json jsUjm = ujm->toJson(); + LOGS(_log, LOG_LVL_INFO, fName << " serialized jsUjm=" << jsUjm); + + UberJobMsg::Ptr ujmCreated = UberJobMsg::createFromJson(jsUjm); + LOGS(_log, LOG_LVL_INFO, fName << " created"); + nlohmann::json jsUjmCreated = ujmCreated->toJson(); + LOGS(_log, LOG_LVL_INFO, fName << " created->serialized"); + + bool createdMatchesOriginal = jsUjm == jsUjmCreated; + if (createdMatchesOriginal) { + LOGS(_log, LOG_LVL_INFO, fName << "created matches original"); + } else { + LOGS(_log, LOG_LVL_ERROR, "jsUjm != jsUjmCreated"); + LOGS(_log, LOG_LVL_ERROR, "jsUjm=" << jsUjm); + LOGS(_log, LOG_LVL_ERROR, "jsUjmCreated=" << jsUjmCreated); + } + BOOST_REQUIRE(createdMatchesOriginal); + return createdMatchesOriginal; +} + +BOOST_AUTO_TEST_CASE(WorkerQueryStatusData) { + string const replicationInstanceId = "repliInstId"; + string const replicationAuthKey = "repliIAuthKey"; + + LOGS(_log, LOG_LVL_INFO, "testUJM start"); + string jsStr = testA(); + nlohmann::json js = nlohmann::json::parse(jsStr); + UberJobMsg::Ptr ujm = UberJobMsg::createFromJson(js); + BOOST_REQUIRE(ujm != nullptr); + + nlohmann::json jsUjm = ujm->toJson(); + + LOGS(_log, LOG_LVL_INFO, "js=" << js); + LOGS(_log, LOG_LVL_INFO, "jsUjm=" << jsUjm); + + UberJobMsg::Ptr ujmCreated = UberJobMsg::createFromJson(jsUjm); + LOGS(_log, LOG_LVL_INFO, "ujmCreated=" << ujmCreated); + nlohmann::json jsUjmCreated = ujmCreated->toJson(); + + bool createdMatchesOriginal = jsUjm == jsUjmCreated; + if (!createdMatchesOriginal) { + LOGS(_log, LOG_LVL_ERROR, "jsUjm != jsUjmCreated"); + LOGS(_log, LOG_LVL_ERROR, "jsUjm=" << jsUjm); + LOGS(_log, LOG_LVL_ERROR, "jsUjmCreated=" << jsUjmCreated); + } + BOOST_REQUIRE(createdMatchesOriginal); + + BOOST_REQUIRE(parseSerializeReparseCheck(testA(), "A")); + BOOST_REQUIRE(parseSerializeReparseCheck(testB(), "B")); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/protojson/testUberJobReadyMsg.cc b/src/protojson/testUberJobReadyMsg.cc new file mode 100644 index 0000000000..5345539ac3 --- /dev/null +++ b/src/protojson/testUberJobReadyMsg.cc @@ -0,0 +1,136 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// System headers +#include +#include +#include +#include +#include + +#include "nlohmann/json.hpp" + +// Qserv headers +#include "global/clock_defs.h" +#include "http/MetaModule.h" +#include "lsst/log/Log.h" +#include "protojson/ChunkUseCountAnswerMsg.h" +#include "protojson/UberJobReadyMsg.h" + +// Boost unit test header +#define BOOST_TEST_MODULE RequestQuery +#include + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.protojson.testUberJobReadyMsg"); +} + +using namespace std; +namespace test = boost::test_tools; +using namespace lsst::qserv::protojson; + +AuthContext const authContext_("repliInstId", "repliIAuthKey"); +unsigned int const version = lsst::qserv::http::MetaModule::version; + +BOOST_AUTO_TEST_SUITE(Suite) + +bool parseSerializeReparseCheck(string const& jsStr, string const& note) { + string fName("parseSerialize "); + fName += note + " "; + LOGS(_log, LOG_LVL_INFO, fName << " start " << jsStr); + nlohmann::json js = nlohmann::json::parse(jsStr); + LOGS(_log, LOG_LVL_INFO, fName << " parse 1"); + + UberJobReadyMsg::Ptr jrm = UberJobReadyMsg::createFromJson(js); + BOOST_REQUIRE(jrm != nullptr); + + auto jsJrm = jrm->toJson(); + LOGS(_log, LOG_LVL_INFO, fName << " serialized jsJrm=" << jsJrm); + + UberJobReadyMsg::Ptr jrmCreated = UberJobReadyMsg::createFromJson(jsJrm); + LOGS(_log, LOG_LVL_INFO, fName << " created"); + auto jsJrmCreated = jrmCreated->toJson(); + LOGS(_log, LOG_LVL_INFO, fName << " created->serialized"); + + bool createdMatchesOriginal = jsJrm == jsJrmCreated; + if (createdMatchesOriginal) { + LOGS(_log, LOG_LVL_INFO, fName << "created matches original"); + } else { + LOGS(_log, LOG_LVL_ERROR, "jsJrm != jsJrmCreated"); + LOGS(_log, LOG_LVL_ERROR, "jsJrm=" << jsJrm); + LOGS(_log, LOG_LVL_ERROR, "jsJrmCreated=" << jsJrmCreated); + } + BOOST_REQUIRE(createdMatchesOriginal); + return createdMatchesOriginal; +} + +BOOST_AUTO_TEST_CASE(WorkerQueryStatusData) { + LOGS(_log, LOG_LVL_INFO, "testJRM start"); + + string const workerIdStr("wrker72"); + string const czarName("cz4242"); + lsst::qserv::CzarId const czarId = 745; + lsst::qserv::QueryId const queryId = 986532; + lsst::qserv::UberJobId const uberJobId = 14578; + uint64_t const rowCount = 391; + uint64_t const fileSize = 5623; + FileUrlInfo fileUrlInfo_("ht.qwrk/some/dir/fil.txt", rowCount, fileSize); + + auto jrm = UberJobReadyMsg::create(authContext_, version, workerIdStr, czarName, czarId, queryId, + uberJobId, fileUrlInfo_); + + auto jsJrm = jrm->toJson(); + string const strJrm = to_string(jsJrm); + LOGS(_log, LOG_LVL_INFO, "stdJrm=" << strJrm); + + BOOST_REQUIRE(parseSerializeReparseCheck(strJrm, "A")); +} + +BOOST_AUTO_TEST_CASE(testChunkUseCountAnswerMsg) { + LOGS(_log, LOG_LVL_INFO, "test ChunkUseCountAnswerMsg start"); + + ChunkUseCountAnswerMsg::DbChunkCountMapPtr dbChunkCountMap1 = + make_shared(); + (*dbChunkCountMap1)["db1"] = {{1, 3}, {8, 5}, {9, 1153}}; + (*dbChunkCountMap1)["db2"] = {{1, 7}, {8, 11}, {9, 200}}; + + auto cAnswerMsgA = ChunkUseCountAnswerMsg::create(dbChunkCountMap1); + auto jsMsgA = cAnswerMsgA->toJson(); + LOGS(_log, LOG_LVL_INFO, "jsMsgA=" << jsMsgA); + + auto cAnswerMsgB = ChunkUseCountAnswerMsg::createFromJson(jsMsgA); + auto jsMsgB = cAnswerMsgB->toJson(); + LOGS(_log, LOG_LVL_INFO, "jsMsgB=" << jsMsgB); + + BOOST_REQUIRE(jsMsgA == jsMsgB); + BOOST_REQUIRE(cAnswerMsgA->equal(*cAnswerMsgB)); + + ChunkUseCountAnswerMsg::DbChunkCountMapPtr dbChunkCountMap2 = + make_shared(); + (*dbChunkCountMap2)["db1"] = {{1, 3}, {8, 5}, {9, 1153}}; + (*dbChunkCountMap2)["db2"] = {{1, 7}, {8, 12}, {9, 200}}; + auto cAnswerMsg2 = ChunkUseCountAnswerMsg::create(dbChunkCountMap2); + auto jsMsg2 = cAnswerMsg2->toJson(); + BOOST_REQUIRE(jsMsgA != jsMsg2); + BOOST_REQUIRE(!cAnswerMsgA->equal(*cAnswerMsg2)); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/proxy/CMakeLists.txt b/src/proxy/CMakeLists.txt index 5aa1ca3e2c..03dd2fe1f2 100644 --- a/src/proxy/CMakeLists.txt +++ b/src/proxy/CMakeLists.txt @@ -27,7 +27,7 @@ target_link_libraries(czarProxy PRIVATE rproc css qmeta - xrdreq) +) install(TARGETS czarProxy DESTINATION lua/qserv/lib) install(FILES mysqlProxy.lua DESTINATION lua/qserv/scripts) diff --git a/src/qana/CMakeLists.txt b/src/qana/CMakeLists.txt index a49dd9e9bb..ace1a22f03 100644 --- a/src/qana/CMakeLists.txt +++ b/src/qana/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(qana SHARED) -add_dependencies(qana proto) target_sources(qana PRIVATE AggregatePlugin.cc @@ -21,9 +20,7 @@ target_link_libraries(qana PUBLIC log ) -install ( - TARGETS qana -) +install (TARGETS qana) FUNCTION(qana_tests) FOREACH(TEST IN ITEMS ${ARGV}) @@ -31,6 +28,8 @@ FUNCTION(qana_tests) target_link_libraries(${TEST} PUBLIC cconfig ccontrol + czar + global parser qana qdisp @@ -39,7 +38,6 @@ FUNCTION(qana_tests) css qmeta rproc - xrdreq Boost::unit_test_framework Threads::Threads ) diff --git a/src/qana/DuplSelectExprPlugin.cc b/src/qana/DuplSelectExprPlugin.cc index 1c0183866f..79b5b8f8e9 100644 --- a/src/qana/DuplSelectExprPlugin.cc +++ b/src/qana/DuplSelectExprPlugin.cc @@ -91,8 +91,8 @@ util::MultiError DuplSelectExprPlugin::getDuplicateAndPosition(StringVector cons boost::format err_msg = boost::format(ERR_MSG) % key % os.str(); - util::Error error(util::ErrorCode::DUPLICATE_SELECT_EXPR, err_msg.str()); - multiError.push_back(error); + util::Error error(util::Error::DUPLICATE_SELECT_EXPR, util::Error::NONE, err_msg.str()); + multiError.insert(error); } } diff --git a/src/qana/ScanTablePlugin.cc b/src/qana/ScanTablePlugin.cc index 0b4b197f1d..136bc27eaf 100644 --- a/src/qana/ScanTablePlugin.cc +++ b/src/qana/ScanTablePlugin.cc @@ -41,7 +41,6 @@ // Qserv headers #include "global/stringTypes.h" -#include "proto/ScanTableInfo.h" #include "query/ColumnRef.h" #include "query/FromList.h" #include "query/QueryContext.h" @@ -66,8 +65,8 @@ void ScanTablePlugin::applyLogical(query::SelectStmt& stmt, query::QueryContext& void ScanTablePlugin::applyFinal(query::QueryContext& context) { int const scanThreshold = _interactiveChunkLimit; if (context.chunkCount < scanThreshold) { - context.scanInfo.infoTables.clear(); - context.scanInfo.scanRating = 0; + context.scanInfo->infoTables.clear(); + context.scanInfo->scanRating = 0; LOGS(_log, LOG_LVL_INFO, "ScanInfo Squash full table scan tables: <" << scanThreshold << " chunks."); } } @@ -94,7 +93,8 @@ StringPairVector filterPartitioned(query::TableRefList const& tList) { return vector; } -proto::ScanInfo ScanTablePlugin::_findScanTables(query::SelectStmt& stmt, query::QueryContext& context) { +protojson::ScanInfo::Ptr ScanTablePlugin::_findScanTables(query::SelectStmt& stmt, + query::QueryContext& context) { // Might be better as a separate plugin // All tables of a query are scan tables if the statement both: @@ -191,15 +191,15 @@ proto::ScanInfo ScanTablePlugin::_findScanTables(query::SelectStmt& stmt, query: // Ask css if any of the tables should be locked in memory and their scan rating. // Use this information to determine scanPriority. - proto::ScanInfo scanInfo; + auto scanInfo = protojson::ScanInfo::create(); for (auto& pair : scanTables) { - proto::ScanTableInfo info(pair.first, pair.second); + protojson::ScanTableInfo info(pair.first, pair.second); css::ScanTableParams const params = context.css->getScanTableParams(info.db, info.table); info.lockInMemory = params.lockInMem; info.scanRating = params.scanRating; - scanInfo.infoTables.push_back(info); - scanInfo.scanRating = std::max(scanInfo.scanRating, info.scanRating); - scanInfo.scanRating = std::min(scanInfo.scanRating, static_cast(proto::ScanInfo::SLOWEST)); + scanInfo->infoTables.push_back(info); + scanInfo->scanRating = std::max(scanInfo->scanRating, info.scanRating); + scanInfo->scanRating = std::min(scanInfo->scanRating, static_cast(protojson::ScanInfo::SLOWEST)); LOGS(_log, LOG_LVL_INFO, "ScanInfo " << info.db << "." << info.table << " lockInMemory=" << info.lockInMemory << " rating=" << info.scanRating); diff --git a/src/qana/ScanTablePlugin.h b/src/qana/ScanTablePlugin.h index 145424852f..aa069710c0 100644 --- a/src/qana/ScanTablePlugin.h +++ b/src/qana/ScanTablePlugin.h @@ -27,7 +27,7 @@ #include "qana/QueryPlugin.h" // Qserv headers -#include "proto/ScanTableInfo.h" +#include "protojson/ScanTableInfo.h" namespace lsst::qserv::qana { @@ -55,8 +55,8 @@ class ScanTablePlugin : public QueryPlugin { std::string name() const override { return "ScanTablePlugin"; } private: - proto::ScanInfo _findScanTables(query::SelectStmt& stmt, query::QueryContext& context); - proto::ScanInfo _scanInfo; + protojson::ScanInfo::Ptr _findScanTables(query::SelectStmt& stmt, query::QueryContext& context); + protojson::ScanInfo::Ptr _scanInfo; int _interactiveChunkLimit; }; diff --git a/src/qana/testDuplSelectExprPlugin.cc b/src/qana/testDuplSelectExprPlugin.cc index aa63caabe5..b074170791 100644 --- a/src/qana/testDuplSelectExprPlugin.cc +++ b/src/qana/testDuplSelectExprPlugin.cc @@ -86,7 +86,7 @@ BOOST_AUTO_TEST_CASE(getDuplicateAndPosition) { util::MultiError errors = testPlugin.getDuplicateAndPosition(v); std::stringstream sstm; - sstm << "[" << util::ErrorCode::DUPLICATE_SELECT_EXPR << "] " << DuplSelectExprPlugin::ERR_MSG; + sstm << "[count=1][code=" << util::Error::DUPLICATE_SELECT_EXPR << "] " << DuplSelectExprPlugin::ERR_MSG; std::string err_msg_template = sstm.str(); boost::format dupl_field_err_msg = boost::format(err_msg_template) % "f1" % " 2 3"; diff --git a/src/qdisp/CMakeLists.txt b/src/qdisp/CMakeLists.txt index aacde12776..f8e1fff05d 100644 --- a/src/qdisp/CMakeLists.txt +++ b/src/qdisp/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(qdisp SHARED) -add_dependencies(qdisp proto) target_sources(qdisp PRIVATE ChunkMeta.cc @@ -7,38 +6,24 @@ target_sources(qdisp PRIVATE Executive.cc JobDescription.cc JobQuery.cc - JobStatus.cc - MessageStore.cc - QdispPool.cc - QueryRequest.cc - XrdSsiMocks.cc -) - -target_include_directories(qdisp PRIVATE - ${XROOTD_INCLUDE_DIRS} + UberJob.cc ) target_link_libraries(qdisp PUBLIC cconfig log - http - XrdSsiLib ) -install( - TARGETS qdisp -) +install(TARGETS qdisp) add_executable(testQDisp testQDisp.cc) -target_include_directories(testQDisp PRIVATE - ${XROOTD_INCLUDE_DIRS} -) - target_link_libraries(testQDisp cconfig ccontrol czar + global + mysql parser qana qdisp @@ -47,12 +32,11 @@ target_link_libraries(testQDisp qmeta query rproc - xrdreq Boost::unit_test_framework Threads::Threads ) # This is failing in github actions CI but not when running locally on my dev machine. -# add_test(NAME testQDisp COMMAND testQDisp) +add_test(NAME testQDisp COMMAND testQDisp) # set_tests_properties(testQDisp PROPERTIES WILL_FAIL 1) diff --git a/src/qdisp/CzarStats.cc b/src/qdisp/CzarStats.cc index 3dd53eceb1..34061fe169 100644 --- a/src/qdisp/CzarStats.cc +++ b/src/qdisp/CzarStats.cc @@ -29,8 +29,8 @@ // Qserv headers #include "cconfig/CzarConfig.h" -#include "qdisp/QdispPool.h" #include "util/Bug.h" +#include "util/QdispPool.h" #include "util/TimeUtils.h" // LSST headers @@ -46,23 +46,23 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.CzarStats"); namespace lsst::qserv::qdisp { CzarStats::Ptr CzarStats::_globalCzarStats; -util::Mutex CzarStats::_globalMtx; +MUTEX CzarStats::_globalMtx; -void CzarStats::setup(qdisp::QdispPool::Ptr const& qdispPool) { - std::lock_guard lg(_globalMtx); +void CzarStats::setup(util::QdispPool::Ptr const& qdispPool) { + std::lock_guard lg(_globalMtx); if (_globalCzarStats != nullptr || qdispPool == nullptr) { throw util::Bug(ERR_LOC, "Error CzarStats::setup called after global pointer set or qdispPool=null."); } _globalCzarStats = Ptr(new CzarStats(qdispPool)); } -CzarStats::CzarStats(qdisp::QdispPool::Ptr const& qdispPool) +CzarStats::CzarStats(util::QdispPool::Ptr const& qdispPool) : _qdispPool(qdispPool), _startTimeMs(util::TimeUtils::now()) { auto bucketValsRates = {128'000.0, 512'000.0, 1'024'000.0, 16'000'000.0, 128'000'000.0, 256'000'000.0, 512'000'000.0, 768'000'000.0, 1'000'000'000.0, 2'000'000'000.0, 4'000'000'000.0, 8'000'000'000.0}; - _histXRootDSSIRecvRate = util::HistogramRolling::Ptr( - new util::HistogramRolling("XRootDSSIRecvRateBytesPerSec", bucketValsRates, 1h, 10000)); + _histDataRecvRate = util::HistogramRolling::Ptr( + new util::HistogramRolling("DataRecvRateBytesPerSec", bucketValsRates, 1h, 10000)); _histMergeRate = util::HistogramRolling::Ptr( new util::HistogramRolling("MergeRateBytesPerSec", bucketValsRates, 1h, 10000)); _histFileReadRate = util::HistogramRolling::Ptr( @@ -77,7 +77,7 @@ CzarStats::CzarStats(qdisp::QdispPool::Ptr const& qdispPool) } CzarStats::Ptr CzarStats::get() { - std::lock_guard lg(_globalMtx); + std::lock_guard lg(_globalMtx); if (_globalCzarStats == nullptr) { throw util::Bug(ERR_LOC, "Error CzarStats::get called before CzarStats::setup."); } @@ -102,10 +102,10 @@ void CzarStats::endQueryRespConcurrentProcessing(TIMEPOINT start, TIMEPOINT end) _histRespProcessing->addEntry(end, secs.count()); } -void CzarStats::addXRootDSSIRecvRate(double bytesPerSec) { - _histXRootDSSIRecvRate->addEntry(bytesPerSec); +void CzarStats::addDataRecvRate(double bytesPerSec) { + _histDataRecvRate->addEntry(bytesPerSec); LOGS(_log, LOG_LVL_TRACE, - "CzarStats::" << __func__ << " " << bytesPerSec << " " << _histXRootDSSIRecvRate->getString("")); + "CzarStats::" << __func__ << " " << bytesPerSec << " " << _histDataRecvRate->getString("")); } void CzarStats::addMergeRate(double bytesPerSec) { @@ -147,7 +147,7 @@ nlohmann::json CzarStats::getQdispStatsJson() const { nlohmann::json CzarStats::getTransmitStatsJson() const { nlohmann::json result; - result[_histXRootDSSIRecvRate->label()] = _histXRootDSSIRecvRate->getJson(); + result[_histDataRecvRate->label()] = _histDataRecvRate->getJson(); result[_histMergeRate->label()] = _histMergeRate->getJson(); result[_histFileReadRate->label()] = _histFileReadRate->getJson(); return result; diff --git a/src/qdisp/CzarStats.h b/src/qdisp/CzarStats.h index d4dccc8ddf..0e8bfb689b 100644 --- a/src/qdisp/CzarStats.h +++ b/src/qdisp/CzarStats.h @@ -43,9 +43,11 @@ // Third party headers #include -namespace lsst::qserv::qdisp { - +namespace lsst::qserv::util { class QdispPool; +} + +namespace lsst::qserv::qdisp { /// This class is used to track statistics for the czar. /// setup() needs to be called before get(). @@ -67,14 +69,14 @@ class CzarStats : std::enable_shared_from_this { /// Setup the global CzarStats instance /// @throws Bug if global has already been set or qdispPool is null. - static void setup(std::shared_ptr const& qdispPool); + static void setup(std::shared_ptr const& qdispPool); /// Return a pointer to the global CzarStats instance. /// @throws Bug if get() is called before setup() static Ptr get(); /// Add a bytes per second entry for query result transmits received over XRootD/SSI - void addXRootDSSIRecvRate(double bytesPerSec); + void addDataRecvRate(double bytesPerSec); /// Add a bytes per second entry for result merges void addMergeRate(double bytesPerSec); @@ -147,19 +149,19 @@ class CzarStats : std::enable_shared_from_this { nlohmann::json getTransmitStatsJson() const; private: - CzarStats(std::shared_ptr const& qdispPool); + CzarStats(std::shared_ptr const& qdispPool); - static Ptr _globalCzarStats; ///< Pointer to the global instance. - static util::Mutex _globalMtx; ///< Protects `_globalCzarStats` + static Ptr _globalCzarStats; ///< Pointer to the global instance. + static MUTEX _globalMtx; ///< Protects `_globalCzarStats` /// Connection to get information about the czar's pool of dispatch threads. - std::shared_ptr _qdispPool; + std::shared_ptr _qdispPool; /// The start up time (milliseconds since the UNIX EPOCH) of the status collector. uint64_t const _startTimeMs = 0; /// Histogram for tracking XROOTD/SSI receive rate in bytes per second. - util::HistogramRolling::Ptr _histXRootDSSIRecvRate; + util::HistogramRolling::Ptr _histDataRecvRate; /// Histogram for tracking merge rate in bytes per second. util::HistogramRolling::Ptr _histMergeRate; diff --git a/src/qdisp/Executive.cc b/src/qdisp/Executive.cc index 51b1da226d..bcacc91f86 100644 --- a/src/qdisp/Executive.cc +++ b/src/qdisp/Executive.cc @@ -48,51 +48,42 @@ // Third-party headers #include "boost/format.hpp" -#include "XrdSsi/XrdSsiErrInfo.hh" -#include "XrdSsi/XrdSsiProvider.hh" -#include "XrdSsi/XrdSsiResource.hh" -#include "XrdSsi/XrdSsiService.hh" // LSST headers #include "lsst/log/Log.h" // Qserv headers #include "cconfig/CzarConfig.h" +#include "ccontrol/MergingHandler.h" #include "ccontrol/msgCode.h" +#include "ccontrol/TmpTableName.h" +#include "ccontrol/UserQuerySelect.h" +#include "czar/Czar.h" #include "global/LogContext.h" #include "global/ResourceUnit.h" +#include "protojson/UberJobReadyMsg.h" #include "qdisp/CzarStats.h" #include "qdisp/JobQuery.h" -#include "qdisp/MessageStore.h" -#include "qdisp/QueryRequest.h" #include "qdisp/ResponseHandler.h" -#include "qdisp/XrdSsiMocks.h" #include "query/QueryContext.h" #include "qproc/QuerySession.h" #include "qmeta/Exceptions.h" +#include "qmeta/MessageStore.h" #include "qmeta/QProgress.h" #include "qmeta/QProgressHistory.h" #include "query/SelectStmt.h" +#include "rproc/InfileMerger.h" #include "util/AsyncTimer.h" #include "util/Bug.h" #include "util/EventThread.h" +#include "util/QdispPool.h" using namespace std; -extern XrdSsiProvider* XrdSsiProviderClient; - namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.Executive"); -string getErrorText(XrdSsiErrInfo& e) { - ostringstream os; - int errCode; - os << "XrdSsiError " << e.Get(errCode); - os << " Code=" << errCode; - return os.str(); -} - } // anonymous namespace namespace lsst::qserv::qdisp { @@ -100,28 +91,30 @@ namespace lsst::qserv::qdisp { //////////////////////////////////////////////////////////////////////// // class Executive implementation //////////////////////////////////////////////////////////////////////// -Executive::Executive(ExecutiveConfig const& c, shared_ptr const& ms, - SharedResources::Ptr const& sharedResources, - shared_ptr const& queryProgress, +Executive::Executive(int secondsBetweenUpdates, shared_ptr const& ms, + util::QdispPool::Ptr const& qdispPool, shared_ptr const& queryProgress, shared_ptr const& queryProgressHistory, - shared_ptr const& querySession) - : _config(c), - _messageStore(ms), - _qdispPool(sharedResources->getQdispPool()), + shared_ptr const& querySession, unsigned int jobMaxAttempts) + : _messageStore(ms), + _qdispPool(qdispPool), _queryProgress(queryProgress), _queryProgressHistory(queryProgressHistory), - _querySession(querySession) { - _secondsBetweenQMetaUpdates = chrono::seconds(_config.secondsBetweenChunkUpdates); - _setup(); + _secondsBetweenQMetaUpdates(chrono::seconds(secondsBetweenUpdates)), + _querySession(querySession), + _jobMaxAttempts(jobMaxAttempts) { _setupLimit(); qdisp::CzarStats::get()->addQuery(); } Executive::~Executive() { + LOGS(_log, LOG_LVL_DEBUG, "Executive::~Executive() " << getIdStr()); qdisp::CzarStats::get()->deleteQuery(); qdisp::CzarStats::get()->deleteJobs(_incompleteJobs.size()); - // Real XrdSsiService objects are unowned, but mocks are allocated in _setup. - delete dynamic_cast(_xrdSsiService); + // Remove this executive from the map. + auto cz = czar::Czar::getCzar(); // cz can be null in unit tests. + if (cz != nullptr && cz->getExecutiveFromMap(getId()) != nullptr) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) + " pointer in map should be invalid QID=" << getId()); + } if (_asyncTimer != nullptr) { _asyncTimer->cancel(); if (_queryProgressHistory != nullptr) { @@ -134,15 +127,17 @@ Executive::~Executive() { } } -Executive::Ptr Executive::create(ExecutiveConfig const& c, shared_ptr const& ms, - SharedResources::Ptr const& sharedResources, +Executive::Ptr Executive::create(int secsBetweenUpdates, shared_ptr const& ms, + shared_ptr const& qdispPool, shared_ptr const& queryProgress, shared_ptr const& queryProgressHistory, shared_ptr const& querySession, boost::asio::io_service& asioIoService) { LOGS(_log, LOG_LVL_DEBUG, "Executive::" << __func__); - Executive::Ptr ptr( - new Executive(c, ms, sharedResources, queryProgress, queryProgressHistory, querySession)); + + auto czarConfig = cconfig::CzarConfig::instance(); + Executive::Ptr ptr(new Executive(secsBetweenUpdates, ms, qdispPool, queryProgress, queryProgressHistory, + querySession, czarConfig->jobMaxAttempts())); // Start the query progress monitoring timer (if enabled). The query status // will be sampled on each expiration event of the timer. Note that the timer @@ -152,19 +147,22 @@ Executive::Ptr Executive::create(ExecutiveConfig const& c, shared_ptrczarStatsUpdateIvalSec(); + auto const czarStatsUpdateIvalSec = czarConfig->czarStatsUpdateIvalSec(); if (czarStatsUpdateIvalSec > 0) { + // AsyncTimer has a 'self' keep alive in AsyncTimer::start() that keeps it safe when + // this Executive is deleted. ptr->_asyncTimer = util::AsyncTimer::create( asioIoService, std::chrono::milliseconds(czarStatsUpdateIvalSec * 1000), [self = std::weak_ptr(ptr)](auto expirationIvalMs) -> bool { auto ptr = self.lock(); - LOGS(_log, LOG_LVL_DEBUG, - "Executive::" << __func__ << " expirationIvalMs: " << expirationIvalMs.count() - << " ms"); + string const msg = string("Executive::") + __func__ + + " expirationIvalMs: " + to_string(expirationIvalMs.count()) + " ms"; if (ptr != nullptr) { ptr->_updateStats(); + LOGS(_log, LOG_LVL_DEBUG, msg + " " + ptr->getIdStr()); return true; } + LOGS(_log, LOG_LVL_DEBUG, msg); return false; }); ptr->_asyncTimer->start(); @@ -183,8 +181,16 @@ void Executive::_updateStats() const { } void Executive::setQueryId(QueryId id) { + if (_queryIdSet.exchange(true) == true) { + throw util::Bug(ERR_LOC, "Executive::setQueryId called more than once _id=" + to_string(_id) + + " id=" + to_string(id)); + } _id = id; _idStr = QueryIdHelper::makeIdStr(_id); + + // Insert into the global executive map. + czar::Czar::getCzar()->insertExecutive(_id, shared_from_this()); + if (_queryProgressHistory != nullptr) { try { _queryProgressHistory->track(_id); @@ -194,36 +200,48 @@ void Executive::setQueryId(QueryId id) { } } +UberJob::Ptr Executive::findUberJob(UberJobId ujId) const { + lock_guard lgMap(_uberJobsMapMtx); + auto iter = _uberJobsMap.find(ujId); + if (iter == _uberJobsMap.end()) { + return nullptr; + } + return iter->second; +} + /// Add a new job to executive queue, if not already in. Not thread-safe. /// JobQuery::Ptr Executive::add(JobDescription::Ptr const& jobDesc) { JobQuery::Ptr jobQuery; { // Create the JobQuery and put it in the map. - JobStatus::Ptr jobStatus = make_shared(); + auto jobStatus = make_shared(); Ptr thisPtr = shared_from_this(); - MarkCompleteFunc::Ptr mcf = make_shared(thisPtr, jobDesc->id()); - jobQuery = JobQuery::create(thisPtr, jobDesc, jobStatus, mcf, _id); + jobQuery = JobQuery::create(thisPtr, jobDesc, jobStatus, _id); - QSERV_LOGCONTEXT_QUERY_JOB(jobQuery->getQueryId(), jobQuery->getIdInt()); + QSERV_LOGCONTEXT_QUERY_JOB(jobQuery->getQueryId(), jobQuery->getJobId()); { - lock_guard lock(_cancelled.getMutex()); - if (_cancelled) { - LOGS(_log, LOG_LVL_DEBUG, - "Executive already cancelled, ignoring add(" << jobDesc->id() << ")"); - return nullptr; + { + lock_guard lock(_cancelled.getMutex()); + if (_cancelled) { + LOGS(_log, LOG_LVL_DEBUG, + "Executive already cancelled, ignoring add(" << jobDesc->id() << ")"); + return nullptr; + } } - if (!_addJobToMap(jobQuery)) { - LOGS(_log, LOG_LVL_ERROR, "Executive ignoring duplicate job add"); + if (!_track(jobQuery->getJobId(), jobQuery)) { + LOGS(_log, LOG_LVL_ERROR, "Executive ignoring duplicate track add"); return jobQuery; } - if (!_track(jobQuery->getIdInt(), jobQuery)) { - LOGS(_log, LOG_LVL_ERROR, "Executive ignoring duplicate track add"); + if (!_addJobToMap(jobQuery)) { + LOGS(_log, LOG_LVL_ERROR, "Executive ignoring duplicate job add"); return jobQuery; } + + _addToChunkJobMap(jobQuery); } if (_empty.exchange(false)) { @@ -232,17 +250,30 @@ JobQuery::Ptr Executive::add(JobDescription::Ptr const& jobDesc) { ++_requestCount; } - QSERV_LOGCONTEXT_QUERY_JOB(jobQuery->getQueryId(), jobQuery->getIdInt()); + QSERV_LOGCONTEXT_QUERY_JOB(jobQuery->getQueryId(), jobQuery->getJobId()); - LOGS(_log, LOG_LVL_DEBUG, "Executive::add with path=" << jobDesc->resource().path()); - bool started = jobQuery->runJob(); - if (!started && isLimitRowComplete()) { - markCompleted(jobQuery->getIdInt(), false); - } return jobQuery; } -void Executive::queueJobStart(PriorityCommand::Ptr const& cmd) { +void Executive::queueFileCollect(util::PriorityCommand::Ptr const& cmd) { + if (_scanInteractive) { + _qdispPool->queCmd(cmd, 2); + } else { + _qdispPool->queCmd(cmd, 3); + } +} + +void Executive::addAndQueueUberJob(shared_ptr const& uj) { + { + lock_guard lck(_uberJobsMapMtx); + UberJobId ujId = uj->getUjId(); + _uberJobsMap[ujId] = uj; + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " ujId=" << ujId << " uj.sz=" << uj->getJobCount()); + } + + auto runUberJobFunc = [uj](util::CmdData*) { uj->runUberJob(); }; + + auto cmd = util::PriorityCommand::Ptr(new util::PriorityCommand(runUberJobFunc)); _jobStartCmdList.push_back(cmd); if (_scanInteractive) { _qdispPool->queCmd(cmd, 0); @@ -264,42 +295,69 @@ void Executive::waitForAllJobsToStart() { LOGS(_log, LOG_LVL_INFO, "waitForAllJobsToStart done"); } -// If the executive has not been cancelled, then we simply start the query. -// @return true if query was actually started (i.e. we were not cancelled) -// -bool Executive::startQuery(shared_ptr const& jobQuery) { - lock_guard lock(_cancelled.getMutex()); +Executive::ChunkIdJobMapType Executive::unassignedChunksInQuery() { + lock_guard lck(_chunkToJobMapMtx); - // If we have been cancelled, then return false. - // - if (_cancelled) return false; - - // Construct a temporary resource object to pass to ProcessRequest(). - // Interactive Queries should have an Affinity of XrdSsiResource::None or Weak while - // Scans should have an affinity of Strong - XrdSsiResource::Affinity affinity = (_scanInteractive) ? XrdSsiResource::Weak : XrdSsiResource::Strong; - XrdSsiResource jobResource(jobQuery->getDescription()->resource().path(), "", jobQuery->getIdStr(), "", 0, - affinity); - - // Now construct the actual query request and tie it to the jobQuery. The - // shared pointer is used by QueryRequest to keep itself alive, sloppy design. - // Note that JobQuery calls StartQuery that then calls JobQuery, yech! - // - QueryRequest::Ptr qr = QueryRequest::create(jobQuery); - jobQuery->setQueryRequest(qr); + ChunkIdJobMapType unassignedMap; + for (auto const& [key, jobPtr] : _chunkToJobMap) { + if (!jobPtr->isInUberJob()) { + unassignedMap[key] = jobPtr; + } + } + return unassignedMap; +} - // Start the query. The rest is magically done in the background. - // - getXrdSsiService()->ProcessRequest(*(qr.get()), jobResource); - return true; +string Executive::dumpUberJobCounts() const { + stringstream os; + os << "exec=" << getIdStr(); + int totalJobs = 0; + { + lock_guard ujmLck(_uberJobsMapMtx); + for (auto const& [ujKey, ujPtr] : _uberJobsMap) { + int jobCount = ujPtr->getJobCount(); + totalJobs += jobCount; + os << "{" << ujKey << ":" << ujPtr->getIdStr() << " jobCount=" << jobCount << "}"; + } + } + { + lock_guard jmLck(_jobMapMtx); + os << " ujTotalJobs=" << totalJobs << " execJobs=" << _jobMap.size(); + } + return os.str(); +} + +void Executive::assignJobsToUberJobs() { + auto uqs = _userQuerySelect.lock(); + if (uqs != nullptr) { + uqs->buildAndSendUberJobs(); + } +} + +void Executive::addMultiError(int errorCode, int subError, std::string const& errorMsg, bool logLvlErr) { + util::Error err(errorCode, subError, errorMsg, logLvlErr); + { + lock_guard lock(_errorsMutex); + _multiError.insert(err); + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) + " multiError:" << _multiError.size() << ":" << _multiError); + } +} + +void Executive::addMultiError(util::MultiError const& multiErr) { + { + lock_guard lock(_errorsMutex); + _multiError.merge(multiErr); + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) + " multiError:" << _multiError.size() << ":" << _multiError); + } } /// Add a JobQuery to this Executive. /// Return true if it was successfully added to the map. /// bool Executive::_addJobToMap(JobQuery::Ptr const& job) { - auto entry = pair(job->getIdInt(), job); - lock_guard lockJobMap(_jobMapMtx); + auto entry = pair(job->getJobId(), job); + lock_guard lockJobMap(_jobMapMtx); bool res = _jobMap.insert(entry).second; _totalJobs = _jobMap.size(); return res; @@ -309,24 +367,26 @@ bool Executive::join() { // To join, we make sure that all of the chunks added so far are complete. // Check to see if _requesters is empty, if not, then sleep on a condition. _waitAllUntilEmpty(); + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " wait done"); // Okay to merge. probably not the Executive's responsibility struct successF { static bool func(Executive::JobMap::value_type const& entry) { - JobStatus::Info const& esI = entry.second->getStatus()->getInfo(); + qmeta::JobStatus::Info const& esI = entry.second->getStatus()->getInfo(); LOGS(_log, LOG_LVL_TRACE, "entry state:" << (void*)entry.second.get() << " " << esI); - return (esI.state == JobStatus::RESPONSE_DONE) || (esI.state == JobStatus::COMPLETE); + return (esI.state == qmeta::JobStatus::RESPONSE_DONE) || + (esI.state == qmeta::JobStatus::COMPLETE); } }; int sCount = 0; { - lock_guard lockJobMap(_jobMapMtx); + lock_guard lockJobMap(_jobMapMtx); sCount = count_if(_jobMap.begin(), _jobMap.end(), successF::func); } if (sCount == _requestCount) { LOGS(_log, LOG_LVL_INFO, "Query execution succeeded all: " << _requestCount << " jobs dispatched and completed."); - } else if (isLimitRowComplete()) { + } else if (isRowLimitComplete()) { LOGS(_log, LOG_LVL_INFO, "Query execution succeeded enough (LIMIT): " << sCount << " jobs out of " << _requestCount << " completed."); @@ -337,22 +397,22 @@ bool Executive::join() { } _empty = (sCount == _requestCount); LOGS(_log, LOG_LVL_DEBUG, - "Flag set to _empty=" << _empty << ", sCount=" << sCount << ", requestCount=" << _requestCount); - return _empty || isLimitRowComplete(); + cName(__func__) << " " + << "Flag set to _empty=" << _empty << ", sCount=" << sCount + << ", requestCount=" << _requestCount); + + return _empty || isRowLimitComplete(); } -void Executive::markCompleted(int jobId, bool success) { - ResponseHandler::Error err; +void Executive::markCompleted(JobId jobId, bool success) { + util::Error err; + string errStr; string idStr = QueryIdHelper::makeIdStr(_id, jobId); - LOGS(_log, LOG_LVL_DEBUG, "Executive::markCompleted " << success); - if (!success && !isLimitRowComplete()) { + LOGS(_log, LOG_LVL_TRACE, "Executive::markCompleted " << success); + if (!success && !isRowLimitComplete()) { { lock_guard lock(_incompleteJobsMutex); - auto iter = _incompleteJobs.find(jobId); - if (iter != _incompleteJobs.end()) { - auto jobQuery = iter->second; - err = jobQuery->getDescription()->respHandler()->getError(); - } else { + if (_incompleteJobs.count(jobId) == 0) { string msg = "Executive::markCompleted failed to find TRACKED " + idStr + " size=" + to_string(_incompleteJobs.size()); // If the user query has been cancelled, this is expected for jobs that have not yet @@ -366,82 +426,154 @@ void Executive::markCompleted(int jobId, bool success) { return; } } - LOGS(_log, LOG_LVL_WARN, - "Executive: error executing " << err << " (status: " << err.getStatus() << ")"); + { - lock_guard lockJobMap(_jobMapMtx); + lock_guard lock(_errorsMutex); + err = _multiError.firstError(); + errStr = _multiError.toOneLineString(); + } + + LOGS(_log, LOG_LVL_DEBUG, "Executive: error executing " << err); + { + lock_guard lockJobMap(_jobMapMtx); auto job = _jobMap[jobId]; string id = job->getIdStr() + "<>" + idStr; - auto jState = job->getStatus()->getInfo().state; + // Don't overwrite existing error states. - if (jState != JobStatus::CANCEL && jState != JobStatus::RESPONSE_ERROR && - jState != JobStatus::RESULT_ERROR && jState != JobStatus::MERGE_ERROR) { - job->getStatus()->updateInfo(id, JobStatus::RESULT_ERROR, "EXECFAIL", err.getCode(), - err.getMsg()); - } - } - { - lock_guard lock(_errorsMutex); - _multiError.push_back(err); - LOGS(_log, LOG_LVL_TRACE, - "Currently " << _multiError.size() << " registered errors: " << _multiError); + job->getStatus()->updateInfoNoErrorOverwrite(id, qmeta::JobStatus::RESULT_ERROR, "EXECFAIL", + err.getCode(), errStr, MSG_ERROR); } } _unTrack(jobId); - if (!success && !isLimitRowComplete()) { - LOGS(_log, LOG_LVL_ERROR, - "Executive: requesting squash, cause: " << " failed (code=" << err.getCode() << " " - << err.getMsg() << ")"); - squash(); // ask to squash + if (!success && !isRowLimitComplete()) { + squash("markComplete error " + err.dump()); // ask to squash } } -void Executive::squash() { +std::shared_ptr Executive::findJob(int jobId) const { + lock_guard lockJobMap(_jobMapMtx); + auto iter = _jobMap.find(jobId); + if (iter == _jobMap.end()) return nullptr; + return iter->second; +} + +void Executive::squash(string const& note) { bool alreadyCancelled = _cancelled.exchange(true); if (alreadyCancelled) { - LOGS(_log, LOG_LVL_DEBUG, "Executive::squash() already cancelled! refusing."); + LOGS(_log, LOG_LVL_DEBUG, "Executive::squash() already cancelled! refusing. qid=" << getId()); return; } - LOGS(_log, LOG_LVL_INFO, "Executive::squash Trying to cancel all queries..."); + LOGS(_log, LOG_LVL_WARN, + "Executive::squash Trying to cancel all queries... qid=" << getId() << " " << note); deque jobsToCancel; { - lock_guard lockJobMap(_jobMapMtx); + lock_guard lockJobMap(_jobMapMtx); for (auto const& jobEntry : _jobMap) { jobsToCancel.push_back(jobEntry.second); } } + int cancelCount = 0; + bool const superfluous = false; + bool const logLvlErr = false; for (auto const& job : jobsToCancel) { - job->cancel(); + job->cancel(superfluous, logLvlErr); + ++cancelCount; } - LOGS(_log, LOG_LVL_DEBUG, "Executive::squash done"); + LOGS(_log, LOG_LVL_ERROR, "Executive::squash cancelled " << cancelCount << " jobs"); + + // Send a message to all workers saying this czarId + queryId is cancelled. + // The workers will just mark all associated tasks as cancelled, and that should be it. + // Any message to this czar about this query should result in an error sent back to + // the worker as soon it can't locate an executive or the executive says it was + // cancelled. + bool const deleteResults = true; + sendWorkersEndMsg(deleteResults); + LOGS(_log, LOG_LVL_DEBUG, "Executive::squash done canceled " << cancelCount << " Jobs"); } void Executive::_squashSuperfluous() { if (_cancelled) { - LOGS(_log, LOG_LVL_INFO, "squashSuperfluous() irrelevant as query already cancelled"); + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " irrelevant as query already cancelled"); + return; + } + + if (_superfluous.exchange(true) == true) { + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " irrelevant as query already superfluous"); return; } LOGS(_log, LOG_LVL_INFO, "Executive::squashSuperflous Trying to cancel incomplete jobs"); deque jobsToCancel; { - lock_guard lockJobMap(_jobMapMtx); + lock_guard lockJobMap(_jobMapMtx); for (auto const& jobEntry : _jobMap) { JobQuery::Ptr jq = jobEntry.second; // It's important that none of the cancelled queries // try to remove their rows from the result. - if (jq->getStatus()->getInfo().state != JobStatus::COMPLETE) { + if (jq->getStatus()->getInfo().state != qmeta::JobStatus::COMPLETE && + jq->getStatus()->getInfo().state != qmeta::JobStatus::CANCEL) { jobsToCancel.push_back(jobEntry.second); } } } + int cancelCount = 0; + bool const superfluous = true; + bool const logLvlErr = false; for (auto const& job : jobsToCancel) { - job->cancel(true); + job->cancel(superfluous, logLvlErr); + ++cancelCount; + } + LOGS(_log, LOG_LVL_ERROR, "Executive::squashSuperfluous cancelled " << cancelCount << " jobs"); + + bool const keepResults = false; + sendWorkersEndMsg(keepResults); + LOGS(_log, LOG_LVL_DEBUG, "Executive::squashSuperfluous done canceled " << cancelCount << " Jobs"); +} + +void Executive::sendWorkersEndMsg(bool deleteResults) { + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " terminating this query deleteResults=" << deleteResults); + auto cz = czar::Czar::getCzar(); + if (cz != nullptr) { // Possible in unit tests. + cz->getCzarRegistry()->endUserQueryOnWorkers(_id, deleteResults); + } +} + +void Executive::killIncompleteUberJobsOnWorker(std::string const& workerId) { + if (_cancelled) { + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " irrelevant as query already cancelled"); + return; + } + + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " killing incomplete UberJobs on " << workerId); + deque ujToCancel; + { + lock_guard lockUJMap(_uberJobsMapMtx); + for (auto const& [ujKey, ujPtr] : _uberJobsMap) { + auto ujStatus = ujPtr->getStatus()->getState(); + if (ujStatus != qmeta::JobStatus::RESPONSE_DONE && ujStatus != qmeta::JobStatus::COMPLETE) { + // RESPONSE_DONE indicates the result file has been read by + // the czar, so before that point the worker's data is + // likely destroyed. COMPLETE indicates all jobs in the + // UberJob are complete. + if (ujPtr->getWorkerContactInfo()->wId == workerId) { + ujToCancel.push_back(ujPtr); + } + } + } + } + + for (auto const& uj : ujToCancel) { + if (uj->killUberJob()) { + uj->setStatusIfOk(qmeta::JobStatus::CANCEL, getIdStr() + " killIncomplete on worker=" + workerId); + } else { + // This should be very rare. + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " UberJob could not be cancelled as it was already merging."); + } } - LOGS(_log, LOG_LVL_DEBUG, "Executive::squashSuperfluous done"); } int Executive::getNumInflight() const { @@ -452,7 +584,7 @@ int Executive::getNumInflight() const { string Executive::getProgressDesc() const { ostringstream os; { - lock_guard lockJobMap(_jobMapMtx); + lock_guard lockJobMap(_jobMapMtx); auto first = true; for (auto entry : _jobMap) { JobQuery::Ptr job = entry.second; @@ -468,27 +600,6 @@ string Executive::getProgressDesc() const { return msg_progress; } -void Executive::_setup() { - XrdSsiErrInfo eInfo; - _empty.store(true); - _requestCount = 0; - // If unit testing, load the mock service. - if (_config.serviceUrl.compare(_config.getMockStr()) == 0) { - _xrdSsiService = new XrdSsiServiceMock(this); - } else { - static XrdSsiService* xrdSsiServiceStatic = - XrdSsiProviderClient->GetService(eInfo, _config.serviceUrl); - _xrdSsiService = xrdSsiServiceStatic; - } - if (!_xrdSsiService) { - LOGS(_log, LOG_LVL_DEBUG, - _id << " Error obtaining XrdSsiService in Executive: " - "serviceUrl=" - << _config.serviceUrl << " " << getErrorText(eInfo)); - } - assert(_xrdSsiService); -} - /** Add (jobId,r) entry to _requesters map if not here yet * else leave _requesters untouched. * @@ -538,7 +649,7 @@ void Executive::_unTrack(int jobId) { s = _getIncompleteJobsString(5); } } - bool logDebug = untracked || isLimitRowComplete(); + bool logDebug = untracked || isRowLimitComplete(); LOGS(_log, (logDebug ? LOG_LVL_DEBUG : LOG_LVL_WARN), "Executive UNTRACKING " << (untracked ? "success" : "failed") << "::" << s); // Every time a chunk completes, consider sending an update to QMeta. @@ -582,7 +693,7 @@ void Executive::updateProxyMessages() { { // Add all messages to the message store. These will // be used to populate qservMeta.QMessages for this query. - lock_guard lockJobMap(_jobMapMtx); + lock_guard lockJobMap(_jobMapMtx); for (auto const& entry : _jobMap) { JobQuery::Ptr const& job = entry.second; auto const& info = job->getStatus()->getInfo(); @@ -602,6 +713,7 @@ void Executive::updateProxyMessages() { // the _messageStore. This will be passed to the proxy for the user, if // there's an error. if (not _multiError.empty()) { + // "MULTIERROR" indicates these should be sent to the proxy as error messages. _messageStore->addErrorMessage("MULTIERROR", _multiError.toString()); LOGS(_log, LOG_LVL_INFO, "MULTIERROR:" << _multiError.toString()); } @@ -618,6 +730,7 @@ void Executive::_waitAllUntilEmpty() { int moreDetailThreshold = 10; int complainCount = 0; const chrono::seconds statePrintDelay(5); + // Loop until all jobs have completed and all jobs have been created. while (!_incompleteJobs.empty()) { count = _incompleteJobs.size(); if (count != lastCount) { @@ -640,6 +753,16 @@ void Executive::_waitAllUntilEmpty() { } } +void Executive::_addToChunkJobMap(JobQuery::Ptr const& job) { + int chunkId = job->getDescription()->resource().chunk(); + auto entry = pair(chunkId, job); + lock_guard lck(_chunkToJobMapMtx); + bool inserted = _chunkToJobMap.insert(entry).second; + if (!inserted) { + throw util::Bug(ERR_LOC, "map insert FAILED ChunkId=" + to_string(chunkId) + " already existed"); + } +} + void Executive::_setupLimit() { // Figure out the limit situation. auto qSession = _querySession.lock(); @@ -657,6 +780,13 @@ void Executive::_setupLimit() { _limitSquashApplies = hasLimit && !(groupBy || orderBy || allChunksRequired); } +int Executive::getUjRowLimit() const { + if (_limitSquashApplies) { + return _limit; + } + return 0; +} + void Executive::addResultRows(int64_t rowCount) { _totalResultRows += rowCount; } void Executive::checkLimitRowComplete() { @@ -674,13 +804,104 @@ void Executive::checkLimitRowComplete() { _squashSuperfluous(); } +void Executive::checkResultFileSize(uint64_t fileSize) { + if (_cancelled || isRowLimitComplete()) return; + _totalResultFileSize += fileSize; + + size_t const MB_SIZE_BYTES = 1024 * 1024; + uint64_t maxResultTableSizeBytes = cconfig::CzarConfig::instance()->getMaxTableSizeMB() * MB_SIZE_BYTES; + LOGS(_log, LOG_LVL_TRACE, + cName(__func__) << " sz=" << fileSize << " total=" << _totalResultFileSize + << " max=" << maxResultTableSizeBytes); + + if ((fileSize > maxResultTableSizeBytes) || + (!_limitSquashApplies && _totalResultFileSize > maxResultTableSizeBytes)) { + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " total=" << _totalResultFileSize << " max=" << maxResultTableSizeBytes); + // _totalResultFileSize may include non zero values from dead UberJobs, + // so recalculate it to verify. + uint64_t total = 0; + { + lock_guard lck(_uberJobsMapMtx); + for (auto const& [ujId, ujPtr] : _uberJobsMap) { + total += ujPtr->getResultFileSize(); + } + _totalResultFileSize = total; + } + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << "recheck total=" << total << " max=" << maxResultTableSizeBytes); + if (total > maxResultTableSizeBytes) { + LOGS(_log, LOG_LVL_ERROR, "Executive: requesting squash, result file size too large " << total); + util::Error err(util::Error::CZAR_RESULT_TOO_LARGE, util::Error::NONE, + "Incomplete result already too large " + to_string(total)); + _multiError.insert(err); + _resultFileSizeExceeded = true; + squash("czar, file too large"); + } + } +} + +shared_ptr> Executive::getLimitSquashLock() { + shared_ptr> ptr(new lock_guard(_mtxLimitSquash)); + return ptr; +} + +void Executive::collectFile(std::shared_ptr ujPtr, protojson::FileUrlInfo const& fileUrlInfo, + std::string const& idStr) { + // Limit collecting LIMIT queries to one at a time, but only for LIMIT. + // This is to avoid having to wait for multiple large files to merge when only a + // few are needed to satisfy the LIMIT. This can make a huge difference. + shared_ptr> limitSquashL; + if (_limitSquashApplies) { + limitSquashL.reset(new lock_guard(_mtxLimitSquash)); + } + MergeEndStatus flushStatus = + ujPtr->getRespHandler()->flushHttp(fileUrlInfo.fileUrl, fileUrlInfo.fileSize); + LOGS(_log, LOG_LVL_TRACE, + cName(__func__) << "ujId=" << ujPtr->getUjId() << " success=" << flushStatus.success + << " contaminated=" << flushStatus.contaminated); + if (!flushStatus.success) { + if (flushStatus.contaminated) { + // This would probably indicate malformed file+rowCount or writing the result table failed. + // If any merging happened, the result table (and entire user query) is ruined. + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << "ujId=" << ujPtr->getUjId() + << " flushHttp failed after merging, results ruined."); + } else { + // Perhaps something went wrong with file collection, so it is worth trying the jobs again + // by abandoning this UberJob. + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << "ujId=" << ujPtr->getUjId() << " flushHttp failed, retrying Jobs."); + } + ujPtr->importResultError(flushStatus.contaminated, "mergeError", "merging failed"); + return; + } + + // Success + CzarStats::get()->addTotalRowsRecv(fileUrlInfo.rowCount); + CzarStats::get()->addTotalBytesRecv(fileUrlInfo.fileSize); + + // At this point all data for this job have been read and merged + bool const statusSet = ujPtr->importResultFinish(); + if (!statusSet) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << "ujId=" << ujPtr->getUjId() << " failed to set status, squashing " + << getIdStr()); + // Something has gone very wrong, possibly merged same results twice. + squash(cName(__func__) + " couldn't set UberJob status"); + return; + } + addResultRows(fileUrlInfo.rowCount); + checkLimitRowComplete(); +} + ostream& operator<<(ostream& os, Executive::JobMap::value_type const& v) { - JobStatus::Ptr status = v.second->getStatus(); + auto const& status = v.second->getStatus(); os << v.first << ": " << *status; return os; } -/// precondition: _requestersMutex is held by current thread. +/// precondition: _incompleteJobsMutex is held by current thread. void Executive::_printState(ostream& os) { for (auto const& entry : _incompleteJobs) { JobQuery::Ptr job = entry.second; diff --git a/src/qdisp/Executive.h b/src/qdisp/Executive.h index ce4dc3e521..8141c05db8 100644 --- a/src/qdisp/Executive.h +++ b/src/qdisp/Executive.h @@ -30,7 +30,6 @@ #include #include #include -#include #include // Third-party headers @@ -40,21 +39,27 @@ #include "global/intTypes.h" #include "global/ResourceUnit.h" #include "global/stringTypes.h" +#include "protojson/ScanTableInfo.h" #include "qdisp/JobDescription.h" -#include "qdisp/JobStatus.h" #include "qdisp/ResponseHandler.h" -#include "qdisp/SharedResources.h" -#include "qdisp/QdispPool.h" +#include "qdisp/UberJob.h" +#include "qmeta/JobStatus.h" #include "util/EventThread.h" #include "util/InstanceCount.h" #include "util/MultiError.h" #include "util/threadSafe.h" #include "util/ThreadPool.h" -// Forward declarations -class XrdSsiService; +namespace lsst::qserv::ccontrol { +class UserQuerySelect; +} + +namespace lsst::qserv::protojson { +class FileUrlInfo; +} namespace lsst::qserv::qmeta { +class MessageStore; class QProgress; class QProgressHistory; } // namespace lsst::qserv::qmeta @@ -63,53 +68,60 @@ namespace lsst::qserv::qproc { class QuerySession; } // namespace lsst::qserv::qproc -namespace lsst::qserv::qdisp { -class JobQuery; -class MessageStore; -} // namespace lsst::qserv::qdisp - namespace lsst::qserv::util { class AsyncTimer; +class PriorityCommand; +class QdispPool; } // namespace lsst::qserv::util // This header declarations namespace lsst::qserv::qdisp { -struct ExecutiveConfig { - typedef std::shared_ptr Ptr; - ExecutiveConfig(std::string const& serviceUrl_, int secsBetweenChunkUpdates_) - : serviceUrl(serviceUrl_), secondsBetweenChunkUpdates(secsBetweenChunkUpdates_) {} - ExecutiveConfig(int, int) : serviceUrl(getMockStr()) {} - - std::string serviceUrl; ///< XrdSsi service URL, e.g. localhost:1094 - int secondsBetweenChunkUpdates; ///< Seconds between QMeta chunk updates. - static std::string getMockStr() { return "Mock"; } -}; +class JobQuery; +class UberJob; -/// class Executive manages the execution of jobs for a UserQuery, while -/// maintaining minimal information about the jobs themselves. +/// class Executive manages the execution of jobs for a UserQuery. class Executive : public std::enable_shared_from_this { public: typedef std::shared_ptr Ptr; typedef std::unordered_map> JobMap; + typedef int ChunkIdType; + typedef std::map> ChunkIdJobMapType; /// Construct an Executive. - /// If c->serviceUrl == ExecutiveConfig::getMockStr(), then use XrdSsiServiceMock - /// instead of a real XrdSsiService - static Executive::Ptr create(ExecutiveConfig const& c, std::shared_ptr const& ms, - SharedResources::Ptr const& sharedResources, + static Executive::Ptr create(int secsBetweenUpdates, std::shared_ptr const& ms, + std::shared_ptr const& qdispPool, std::shared_ptr const& queryProgress, std::shared_ptr const& queryProgressHistory, std::shared_ptr const& querySession, boost::asio::io_service& asioIoService); - ~Executive(); + virtual ~Executive(); + + std::string cName(const char* funcName = "") { + return std::string("Executive::") + funcName + " " + getIdStr(); + } + + /// Set the UserQuerySelect object for this query so this Executive can ask it to make new + /// UberJobs in the future, if needed. + void setUserQuerySelect(std::shared_ptr const& uqs) { _userQuerySelect = uqs; } + + /// Return a map that only contains Jobs not assigned to an UberJob. + ChunkIdJobMapType unassignedChunksInQuery(); + + /// Find the UberJob with `ujId`. + std::shared_ptr findUberJob(UberJobId ujId) const; + + std::shared_ptr findJob(int jobId) const; /// Add an item with a reference number std::shared_ptr add(JobDescription::Ptr const& s); - /// Queue a job to be sent to a worker so it can be started. - void queueJobStart(PriorityCommand::Ptr const& cmd); + /// Add the UberJob `uj` to the list and queue it to be sent to a worker. + void addAndQueueUberJob(std::shared_ptr const& uj); + + /// Queue `cmd`, using the QDispPool, so it can be used to collect the result file. + void queueFileCollect(std::shared_ptr const& cmd); /// Waits for all jobs on _jobStartCmdList to start. This should not be called /// before ALL jobs have been added to the pool. @@ -120,18 +132,23 @@ class Executive : public std::enable_shared_from_this { bool join(); /// Notify the executive that an item has completed - void markCompleted(int refNum, bool success); + void markCompleted(JobId refNum, bool success); /// Squash all the jobs. - void squash(); + void squash(std::string const& note); bool getEmpty() { return _empty; } + /// These values cannot be set until information has been collected from + /// QMeta, which isn't called until some basic checks on the user query + /// have passed. void setQueryId(QueryId id); + QueryId getId() const { return _id; } std::string const& getIdStr() const { return _idStr; } void setScanInteractive(bool interactive) { _scanInteractive = interactive; } + bool getScanInteractive() const { return _scanInteractive; } /// @return number of jobs in flight. int getNumInflight() const; @@ -142,11 +159,10 @@ class Executive : public std::enable_shared_from_this { /// @return true if cancelled bool getCancelled() { return _cancelled; } - XrdSsiService* getXrdSsiService() { return _xrdSsiService; } - - std::shared_ptr getQdispPool() { return _qdispPool; } + /// Return true if LIMIT conditions met. + bool getSuperfluous() { return _superfluous; } - bool startQuery(std::shared_ptr const& jobQuery); + std::shared_ptr getQdispPool() { return _qdispPool; } /// Add 'rowCount' to the total number of rows in the result table. void addResultRows(int64_t rowCount); @@ -157,9 +173,13 @@ class Executive : public std::enable_shared_from_this { /// rows already read in. void checkLimitRowComplete(); - /// @return _limitRowComplete, which can only be meaningful if the + /// Returns the maximum number of rows the worker needs for the LIMIT clause, or + /// a value <= 0 there's no limit that can be applied at the worker. + int getUjRowLimit() const; + + /// @return _rowLimitComplete, which can only be meaningful if the /// user query has not been cancelled. - bool isLimitRowComplete() { return _limitRowComplete && !_cancelled; } + bool isRowLimitComplete() { return _rowLimitComplete && !_cancelled; } /// @return the value of _dataIgnoredCount int incrDataIgnoredCount() { return ++_dataIgnoredCount; } @@ -168,14 +188,66 @@ class Executive : public std::enable_shared_from_this { /// @see python module lsst.qserv.czar.proxy.unlock() void updateProxyMessages(); -private: - Executive(ExecutiveConfig const& c, std::shared_ptr const& ms, - SharedResources::Ptr const& sharedResources, + /// Call UserQuerySelect::buildAndSendUberJobs make new UberJobs for + /// unassigned jobs. + virtual void assignJobsToUberJobs(); + + int getTotalJobs() { return _totalJobs; } + + /// Add an error code and message that may be displayed to the user. + /// @see util::Error for information about parameters. + void addMultiError(int errorCode, int subError, std::string const& errorMsg, bool logLvlErr = true); + void addMultiError(util::MultiError const& multiErr); + + std::string dumpUberJobCounts() const; + + /// Maximum number of times to try running a job before squashing the query. + int getMaxAttempts() const { return _jobMaxAttempts; } + + /// Calling this indicates all Jobs for this user query have been created. + void setAllJobsCreated() { _allJobsCreated = true; } + + /// Returns true if all jobs have been created. + bool isAllJobsCreated() { return _allJobsCreated; } + + /// Send a message to all workers to cancel this query. + /// @param deleteResults - If true, delete all result files for this query on the workers. + void sendWorkersEndMsg(bool deleteResults); + + /// Complete UberJobs have their results on the czar, the + /// incomplete UberJobs need to be stopped and possibly reassigned. + void killIncompleteUberJobsOnWorker(std::string const& workerId); + + // Try to remove this and put in constructor + void setScanInfo(protojson::ScanInfo::Ptr const& scanInfo) { _scanInfo = scanInfo; } + + /// Return a pointer to _scanInfo. + protojson::ScanInfo::Ptr getScanInfo() { return _scanInfo; } + + /// Add fileSize to `_totalResultFileSize` and check if it exceeds limits. + /// If it is too large, check the value against existing UberJob result + /// sizes as `_totalResultFileSize` may include failed UberJobs. + /// If the sum of all UberJob result files size is too large, + /// cancel this user query. + void checkResultFileSize(uint64_t fileSize = 0); + + /// Returns a pointer to a lock on _mtxLimitSquash. + std::shared_ptr> getLimitSquashLock(); + + void collectFile(std::shared_ptr ujPtr, protojson::FileUrlInfo const& fileUrlInfo, + std::string const& idStr); + + /// Return true if the result size limit has been exceeded. + bool resultSizeLimitExceeded() const { return _resultFileSizeExceeded; } + +protected: + Executive(int secondsBetweenUpdates, std::shared_ptr const& ms, + std::shared_ptr const& sharedResources, std::shared_ptr const& queryProgress, std::shared_ptr const& queryProgressHistory, - std::shared_ptr const& querySession); + std::shared_ptr const& querySession, unsigned int maxJobAttempts); - void _setup(); +private: void _setupLimit(); bool _track(int refNum, std::shared_ptr const& r); @@ -187,10 +259,10 @@ class Executive : public std::enable_shared_from_this { void _squashSuperfluous(); - /// @return previous value of _limitRowComplete while setting it to true. + /// @return previous value of _rowLimitComplete while setting it to true. /// This indicates that enough rows have been read to complete the user query /// with a LIMIT clause, and no group by or order by clause. - bool _setLimitRowComplete() { return _limitRowComplete.exchange(true); } + bool _setLimitRowComplete() { return _rowLimitComplete.exchange(true); } // for debugging void _printState(std::ostream& os); @@ -199,28 +271,30 @@ class Executive : public std::enable_shared_from_this { /// The stats are pushed to qdisp::CzarStats. void _updateStats() const; - ExecutiveConfig _config; ///< Personal copy of config + // util::InstanceCount const _icEx{"Executive"}; std::atomic _empty{true}; - std::shared_ptr _messageStore; ///< MessageStore for logging + std::shared_ptr _messageStore; ///< MessageStore for logging - /// RPC interface, static to avoid getting every time a user query starts and separate - /// from _xrdSsiService to avoid conflicts with XrdSsiServiceMock. - XrdSsiService* _xrdSsiService; ///< RPC interface - JobMap _jobMap; ///< Contains information about all jobs. - JobMap _incompleteJobs; ///< Map of incomplete jobs. + JobMap _jobMap; ///< Contains information about all jobs. + JobMap _incompleteJobs; ///< Map of incomplete jobs. /// How many jobs are used in this query. 1 avoids possible 0 of 0 jobs completed race condition. /// The correct value is set when it is available. std::atomic _totalJobs{1}; - QdispPool::Ptr _qdispPool; ///< Shared thread pool for handling commands to and from workers. - std::deque _jobStartCmdList; ///< list of jobs to start. + /// Shared thread pool for handling commands to and from workers. + std::shared_ptr _qdispPool; + + std::deque> _jobStartCmdList; ///< list of jobs to start. /** Execution errors */ util::MultiError _multiError; - std::atomic _requestCount; ///< Count of submitted jobs + std::atomic _requestCount{0}; ///< Count of submitted jobs util::Flag _cancelled{false}; ///< Has execution been cancelled. + /// Set to true when LIMIT conditions have been satisfied. + std::atomic _superfluous{false}; + // Mutexes mutable std::mutex _incompleteJobsMutex; ///< protect incompleteJobs map. @@ -228,11 +302,11 @@ class Executive : public std::enable_shared_from_this { mutable std::mutex _errorsMutex; std::condition_variable _allJobsComplete; - mutable std::recursive_mutex _jobMapMtx; - QueryId _id{0}; ///< Unique identifier for this query. + mutable std::mutex _jobMapMtx; ///< Protects _jobMap. + + QueryId _id = 0; ///< Unique identifier for this query. std::string _idStr{QueryIdHelper::makeIdStr(0, true)}; - // util::InstanceCount _instC{"Executive"}; std::shared_ptr _queryProgress; ///< Query progress, used to update QMeta. std::shared_ptr @@ -240,14 +314,25 @@ class Executive : public std::enable_shared_from_this { /// Last time Executive updated QMeta, defaults to epoch for clock. std::chrono::system_clock::time_point _lastQMetaUpdate; /// Minimum number of seconds between QMeta chunk updates (set by config) - std::chrono::seconds _secondsBetweenQMetaUpdates{60}; + std::chrono::seconds _secondsBetweenQMetaUpdates; std::mutex _lastQMetaMtx; ///< protects _lastQMetaUpdate. - bool _scanInteractive = false; ///< true for interactive scans. + /// true for interactive scans, once set it doesn't change. + bool _scanInteractive = false; + + // Add a job to the _chunkToJobMap + void _addToChunkJobMap(std::shared_ptr const& job); + std::mutex _chunkToJobMapMtx; ///< protects _chunkToJobMap + ChunkIdJobMapType _chunkToJobMap; ///< Map of jobs ordered by chunkId + + /// Map of all UberJobs. Failed UberJobs remain in the map as new ones are created + /// to handle failed UberJobs. + std::map> _uberJobsMap; + mutable std::mutex _uberJobsMapMtx; ///< protects _uberJobs. /// True if enough rows were read to satisfy a LIMIT query with /// no ORDER BY or GROUP BY clauses. - std::atomic _limitRowComplete{false}; + std::atomic _rowLimitComplete{false}; std::atomic _totalResultRows{0}; std::weak_ptr _querySession; @@ -257,27 +342,32 @@ class Executive : public std::enable_shared_from_this { /// true if query can be returned as soon as _limit rows have been read. bool _limitSquashApplies = false; + /// Maximum number of time to try running a job before squashing the query. + unsigned int const _jobMaxAttempts; + /// Number of time data has been ignored for for this user query. std::atomic _dataIgnoredCount{0}; -}; -class MarkCompleteFunc { -public: - typedef std::shared_ptr Ptr; + std::atomic _queryIdSet{false}; ///< Set to true when _id is set. - MarkCompleteFunc(Executive::Ptr const& e, int jobId) : _executive(e), _jobId(jobId) {} - virtual ~MarkCompleteFunc() {} + /// Weak pointer to the UserQuerySelect object for this query. + std::weak_ptr _userQuerySelect; - virtual void operator()(bool success) { - auto exec = _executive.lock(); - if (exec != nullptr) { - exec->markCompleted(_jobId, success); - } - } + /// Flag that is set to true when all jobs have been created. + std::atomic _allJobsCreated{false}; -private: - std::weak_ptr _executive; - int _jobId; + protojson::ScanInfo::Ptr _scanInfo; ///< Scan rating and tables. + + std::atomic _totalResultFileSize{0}; ///< Total size of all UberJob result files. + + /// This mutex is used to limit collecting result files to one at a time + /// but only when the executive will squash the query when the limit is reached. + /// This keeps data transfers (and temporary storage requirements) from + /// getting out of hand. + std::mutex _mtxLimitSquash; + + /// Set to true if the result file is too large. + std::atomic _resultFileSizeExceeded{false}; }; } // namespace lsst::qserv::qdisp diff --git a/src/qdisp/JobDescription.cc b/src/qdisp/JobDescription.cc index dfa87e22ac..6329a013d8 100644 --- a/src/qdisp/JobDescription.cc +++ b/src/qdisp/JobDescription.cc @@ -33,11 +33,10 @@ #include "lsst/log/Log.h" // Qserv headers -#include "proto/ProtoImporter.h" -#include "proto/worker.pb.h" +#include "util/Bug.h" +#include "qdisp/Executive.h" #include "qdisp/ResponseHandler.h" #include "qproc/ChunkQuerySpec.h" -#include "qproc/TaskMsgFactory.h" using namespace std; @@ -47,55 +46,47 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.JobDescription"); namespace lsst::qserv::qdisp { -JobDescription::JobDescription(qmeta::CzarId czarId, QueryId qId, int jobId, ResourceUnit const& resource, - shared_ptr const& respHandler, - shared_ptr const& taskMsgFactory, - shared_ptr const& chunkQuerySpec, - string const& chunkResultName, bool mock) +JobDescription::JobDescription(CzarId czarId, QueryId qId, JobId jobId, ResourceUnit const& resource, + shared_ptr const& chunkQuerySpec, bool mock) : _czarId(czarId), _queryId(qId), _jobId(jobId), _qIdStr(QueryIdHelper::makeIdStr(_queryId, _jobId)), _resource(resource), - _respHandler(respHandler), - _taskMsgFactory(taskMsgFactory), _chunkQuerySpec(chunkQuerySpec), - _chunkResultName(chunkResultName), _mock(mock) {} -bool JobDescription::incrAttemptCount() { - ++_attemptCount; - if (_attemptCount > MAX_JOB_ATTEMPTS) { - LOGS(_log, LOG_LVL_ERROR, "attemptCount greater than maximum number of retries " << _attemptCount); - return false; +bool JobDescription::incrAttemptCount(std::shared_ptr const& exec, bool increase) { + if (increase) { + ++_attemptCount; } - buildPayload(); - return true; -} - -void JobDescription::buildPayload() { - ostringstream os; - _taskMsgFactory->serializeMsg(*_chunkQuerySpec, _chunkResultName, _queryId, _jobId, _attemptCount, - _czarId, os); - _payloads[_attemptCount] = os.str(); -} -bool JobDescription::verifyPayload() const { - proto::ProtoImporter pi; - if (!_mock && !pi.messageAcceptable(_payloads.at(_attemptCount))) { - LOGS(_log, LOG_LVL_DEBUG, _qIdStr << " Error serializing TaskMsg."); - return false; + if (exec != nullptr) { + int maxAttempts = exec->getMaxAttempts(); + if (_attemptCount > 0) { + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " attempts=" << _attemptCount); + } + if (_attemptCount > maxAttempts) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " attempts(" << _attemptCount << ") > maxAttempts(" << maxAttempts + << ") cancelling"); + exec->addMultiError(util::Error::RETRY_FAILS, util::Error::NONE, + "max attempts for chunk reached " + to_string(_attemptCount) + " " + _qIdStr, + true); + exec->squash(string("incrAttemptCount ") + to_string(_attemptCount)); + return false; + } } return true; } bool JobDescription::getScanInteractive() const { return _chunkQuerySpec->scanInteractive; } -int JobDescription::getScanRating() const { return _chunkQuerySpec->scanInfo.scanRating; } +int JobDescription::getScanRating() const { return _chunkQuerySpec->scanInfo->scanRating; } ostream& operator<<(ostream& os, JobDescription const& jd) { - os << "job(id=" << jd._jobId << " payloads.size=" << jd._payloads.size() << " ru=" << jd._resource.path() - << " attemptCount=" << jd._attemptCount << ")"; + os << "job(id=" << jd._jobId << " ru=" << jd._resource.path() << " attemptCount=" << jd._attemptCount + << ")"; return os; } diff --git a/src/qdisp/JobDescription.h b/src/qdisp/JobDescription.h index 6ecfc2e9a8..42e78cc4db 100644 --- a/src/qdisp/JobDescription.h +++ b/src/qdisp/JobDescription.h @@ -29,89 +29,82 @@ // System headers #include +#include #include +// Third party headers +#include "nlohmann/json.hpp" + // Qserv headers #include "global/constants.h" #include "global/intTypes.h" #include "global/ResourceUnit.h" -#include "qmeta/types.h" // Forward declarations namespace lsst::qserv { namespace qproc { - class ChunkQuerySpec; -class TaskMsgFactory; - } // namespace qproc namespace qdisp { +class Executive; class ResponseHandler; -/** Description of a job managed by the executive - */ +/// Description of a job managed by the executive class JobDescription { public: using Ptr = std::shared_ptr; - static JobDescription::Ptr create(qmeta::CzarId czarId, QueryId qId, int jobId, - ResourceUnit const& resource, - std::shared_ptr const& respHandler, - std::shared_ptr const& taskMsgFactory, + static JobDescription::Ptr create(CzarId czarId, QueryId qId, JobId jobId, ResourceUnit const& resource, std::shared_ptr const& chunkQuerySpec, - std::string const& chunkResultName, bool mock = false) { - JobDescription::Ptr jd(new JobDescription(czarId, qId, jobId, resource, respHandler, taskMsgFactory, - chunkQuerySpec, chunkResultName, mock)); + bool mock = false) { + JobDescription::Ptr jd(new JobDescription(czarId, qId, jobId, resource, chunkQuerySpec, mock)); return jd; } JobDescription(JobDescription const&) = delete; JobDescription& operator=(JobDescription const&) = delete; - void buildPayload(); ///< Must be run after construction to avoid problems with unit tests. - int id() const { return _jobId; } + virtual ~JobDescription() = default; + + std::string cName(const char* fnc) { return std::string("JobDescription::") + fnc + " " + _qIdStr; } + + JobId id() const { return _jobId; } ResourceUnit const& resource() const { return _resource; } - std::string const& payload() { return _payloads[_attemptCount]; } - std::shared_ptr respHandler() { return _respHandler; } int getAttemptCount() const { return _attemptCount; } + std::shared_ptr getChunkQuerySpec() { return _chunkQuerySpec; } bool getScanInteractive() const; int getScanRating() const; - /// @returns true when _attemptCount is incremented correctly and the payload is built. - bool incrAttemptCount(); - bool verifyPayload() const; ///< @return true if the payload is acceptable to protobufs. + /// Increase the attempt count by 1 and return false if that puts it over the limit. + bool incrAttemptCount(std::shared_ptr const& exec, bool increase); + + std::shared_ptr getJsForWorker() { return _jsForWorker; } + + void resetJsForWorker() { _jsForWorker.reset(); } friend std::ostream& operator<<(std::ostream& os, JobDescription const& jd); private: - JobDescription(qmeta::CzarId czarId, QueryId qId, int jobId, ResourceUnit const& resource, - std::shared_ptr const& respHandler, - std::shared_ptr const& taskMsgFactory, - std::shared_ptr const& chunkQuerySpec, - std::string const& chunkResultName, bool mock = false); - qmeta::CzarId _czarId; + JobDescription(CzarId czarId, QueryId qId, JobId jobId, ResourceUnit const& resource, + std::shared_ptr const& chunkQuerySpec, bool mock = false); + + CzarId _czarId; QueryId _queryId; - int _jobId; ///< Job's Id number. + JobId _jobId; ///< Job's Id number. std::string const _qIdStr; int _attemptCount{-1}; ///< Start at -1 so that first attempt will be 0, see incrAttemptCount(). ResourceUnit _resource; ///< path, e.g. /q/LSST/23125 - /// _payloads - encoded requests, one per attempt. No guarantee that xrootd is done - /// with the payload buffer, so hang onto all of them until the query is finished. - /// Also, using a map so the strings wont be moved. - /// The xrootd callback function QueryRequest::GetRequest should - /// return something other than a char*. - std::map _payloads; - std::shared_ptr _respHandler; // probably MergingHandler - std::shared_ptr _taskMsgFactory; std::shared_ptr _chunkQuerySpec; - std::string _chunkResultName; bool _mock{false}; ///< True if this is a mock in a unit test. + + /// The information the worker needs to run this job. Reset once sent. + std::shared_ptr _jsForWorker; }; std::ostream& operator<<(std::ostream& os, JobDescription const& jd); diff --git a/src/qdisp/JobQuery.cc b/src/qdisp/JobQuery.cc index 03894965f0..b625b9e2bf 100644 --- a/src/qdisp/JobQuery.cc +++ b/src/qdisp/JobQuery.cc @@ -34,124 +34,49 @@ // Qserv headers #include "global/LogContext.h" #include "qdisp/Executive.h" -#include "qdisp/QueryRequest.h" namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.JobQuery"); } // anonymous namespace +using namespace std; + namespace lsst::qserv::qdisp { JobQuery::JobQuery(Executive::Ptr const& executive, JobDescription::Ptr const& jobDescription, - JobStatus::Ptr const& jobStatus, std::shared_ptr const& markCompleteFunc, - QueryId qid) + qmeta::JobStatus::Ptr const& jobStatus, QueryId qid) : _executive(executive), _jobDescription(jobDescription), - _markCompleteFunc(markCompleteFunc), _jobStatus(jobStatus), _qid(qid), - _idStr(QueryIdHelper::makeIdStr(qid, getIdInt())) { - _qdispPool = executive->getQdispPool(); + _idStr(QueryIdHelper::makeIdStr(qid, getJobId())) { LOGS(_log, LOG_LVL_TRACE, "JobQuery desc=" << _jobDescription); } -JobQuery::~JobQuery() { LOGS(_log, LOG_LVL_DEBUG, "~JobQuery"); } - -/** Attempt to run the job on a worker. - * @return - false if it can not setup the job or the maximum number of attempts has been reached. - */ -bool JobQuery::runJob() { - QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getIdInt()); - LOGS(_log, LOG_LVL_DEBUG, " runJob " << *this); - auto executive = _executive.lock(); - if (executive == nullptr) { - LOGS(_log, LOG_LVL_ERROR, "runJob failed executive==nullptr"); - return false; - } - bool superfluous = executive->isLimitRowComplete(); - bool cancelled = executive->getCancelled(); - bool handlerReset = _jobDescription->respHandler()->reset(); - if (!(cancelled || superfluous) && handlerReset) { - auto criticalErr = [this, &executive](std::string const& msg) { - LOGS(_log, LOG_LVL_ERROR, msg << " " << _jobDescription << " Canceling user query!"); - executive->squash(); // This should kill all jobs in this user query. - }; - - LOGS(_log, LOG_LVL_DEBUG, "runJob checking attempt=" << _jobDescription->getAttemptCount()); - std::lock_guard lock(_rmutex); - if (_jobDescription->getAttemptCount() < _getMaxAttempts()) { - bool okCount = _jobDescription->incrAttemptCount(); - if (!okCount) { - criticalErr("hit structural max of retries"); - return false; - } - if (!_jobDescription->verifyPayload()) { - criticalErr("bad payload"); - return false; - } - } else { - LOGS(_log, LOG_LVL_DEBUG, "runJob max retries"); - criticalErr("hit maximum number of retries"); - return false; - } - - // At this point we are all set to actually run the query. We create a - // a shared pointer to this object to prevent it from escaping while we - // are trying to start this whole process. We also make sure we record - // whether or not we are in SSI as cancellation handling differs. - // - LOGS(_log, LOG_LVL_TRACE, "runJob calls StartQuery()"); - std::shared_ptr jq(shared_from_this()); - _inSsi = true; - if (executive->startQuery(jq)) { - _jobStatus->updateInfo(_idStr, JobStatus::REQUEST, "EXEC"); - return true; - } - _inSsi = false; - } - LOGS(_log, (superfluous ? LOG_LVL_DEBUG : LOG_LVL_WARN), - "runJob failed. cancelled=" << cancelled << " reset=" << handlerReset); - return false; -} +JobQuery::~JobQuery() { LOGS(_log, LOG_LVL_TRACE, "~JobQuery QID=" << _idStr); } /// Cancel response handling. Return true if this is the first time cancel has been called. -bool JobQuery::cancel(bool superfluous) { - QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getIdInt()); - LOGS(_log, LOG_LVL_DEBUG, "JobQuery::cancel()"); +bool JobQuery::cancel(bool superfluous, bool logLvlErr) { + QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getJobId()); if (_cancelled.exchange(true) == false) { - std::lock_guard lock(_rmutex); - // If _inSsi is true then this query request has been passed to SSI and - // _queryRequestPtr cannot be a nullptr. Cancellation is complicated. - bool cancelled = false; - if (_inSsi) { - LOGS(_log, LOG_LVL_DEBUG, "cancel QueryRequest in progress"); - if (_queryRequestPtr->cancel()) { - LOGS(_log, LOG_LVL_DEBUG, "cancelled by QueryRequest"); - cancelled = true; - } else { - LOGS(_log, LOG_LVL_DEBUG, "QueryRequest could not cancel"); - } - } - if (!cancelled) { - std::ostringstream os; - os << _idStr << " cancel QueryRequest=" << _queryRequestPtr; - LOGS(_log, LOG_LVL_DEBUG, os.str()); - if (!superfluous) { - getDescription()->respHandler()->errorFlush(os.str(), -1); - } - auto executive = _executive.lock(); - if (executive == nullptr) { - LOGS(_log, LOG_LVL_ERROR, " can't markComplete cancelled, executive == nullptr"); - return false; - } - executive->markCompleted(getIdInt(), false); + LOGS(_log, LOG_LVL_TRACE, "JobQuery::cancel() " << superfluous); + VMUTEX_NOT_HELD(_jqMtx); + lock_guard lock(_jqMtx); + + string const context = _idStr + " job cancel"; + LOGS(_log, LOG_LVL_DEBUG, context); + auto exec = _executive.lock(); + if (exec == nullptr) { + LOGS(_log, LOG_LVL_ERROR, " can't markComplete cancelled, executive == nullptr"); + return false; } if (!superfluous) { - _jobDescription->respHandler()->processCancel(); + exec->addMultiError(util::Error::CANCEL, util::Error::JOB_CANCEL, context, logLvlErr); } + exec->markCompleted(getJobId(), false); return true; } - LOGS(_log, LOG_LVL_TRACE, "cancel, skipping, already cancelled."); + LOGS(_log, LOG_LVL_TRACE, "JobQuery::cancel, skipping, already cancelled."); return false; } @@ -160,7 +85,7 @@ bool JobQuery::cancel(bool superfluous) { /// cancelling all the jobs that it makes a difference. If either the executive, /// or the job has cancelled, proceeding is probably not a good idea. bool JobQuery::isQueryCancelled() { - QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getIdInt()); + QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getJobId()); auto exec = _executive.lock(); if (exec == nullptr) { LOGS(_log, LOG_LVL_WARN, "_executive == nullptr"); @@ -169,8 +94,83 @@ bool JobQuery::isQueryCancelled() { return exec->getCancelled(); } -std::ostream& operator<<(std::ostream& os, JobQuery const& jq) { - return os << "{" << jq.getIdStr() << jq._jobDescription << " " << *jq._jobStatus << "}"; +bool JobQuery::_setUberJobId(UberJobId ujId) { + QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getJobId()); + VMUTEX_HELD(_jqMtx); + if (_uberJobId >= 0 && ujId != _uberJobId) { + LOGS(_log, LOG_LVL_DEBUG, + __func__ << " couldn't change UberJobId as ujId=" << ujId << " is owned by " << _uberJobId); + return false; + } + _uberJobId = ujId; + return true; } +bool JobQuery::unassignFromUberJob(UberJobId ujId) { + QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getJobId()); + VMUTEX_NOT_HELD(_jqMtx); + lock_guard lock(_jqMtx); + if (_uberJobId < 0) { + LOGS(_log, LOG_LVL_INFO, __func__ << " UberJobId already unassigned. attempt by ujId=" << ujId); + return true; + } + if (_uberJobId != ujId) { + LOGS(_log, LOG_LVL_ERROR, + __func__ << " couldn't change UberJobId as ujId=" << ujId << " is owned by " << _uberJobId); + return false; + } + _uberJobId = -1; + + auto exec = _executive.lock(); + // Do not increase the attempt count as it should have been increased when the job was started. + return true; +} + +void JobQuery::avoidWorker(protojson::WorkerContactInfo::Ptr const& workerContactInfo, + TIMEPOINT familyMapTime) { + VMUTEX_NOT_HELD(_jqMtx); + lock_guard lock(_jqMtx); + _workerAvoidMap[workerContactInfo->wId] = make_pair(workerContactInfo, familyMapTime); +} + +bool JobQuery::isWorkerInAvoidMap(protojson::WorkerContactInfo::Ptr const& workerContactInfo, + TIMEPOINT familyMapTime) { + if (workerContactInfo == nullptr) return false; + VMUTEX_NOT_HELD(_jqMtx); + lock_guard lock(_jqMtx); + auto iter = _workerAvoidMap.find(workerContactInfo->wId); + if (iter == _workerAvoidMap.end()) return false; + WorkerAvoidType const& wat = iter->second; + if (wat.second < familyMapTime) { + // There's a newer family map making this obsolete. + _workerAvoidMap.erase(iter); + return false; + } + protojson::WorkerContactInfo::Ptr const wci = wat.first.lock(); + if (wci == nullptr || wci->wId != workerContactInfo->wId) { + // Original worker information has changed. + _workerAvoidMap.erase(iter); + return false; + } + return true; +} + +int JobQuery::getAttemptCount() const { + VMUTEX_NOT_HELD(_jqMtx); + lock_guard lock(_jqMtx); + return _jobDescription->getAttemptCount(); +} + +ostream& JobQuery::dumpOS(ostream& os) const { + return os << "{" << getIdStr() << _jobDescription << " " << _jobStatus << "}"; +} + +std::string JobQuery::dump() const { + std::ostringstream os; + dumpOS(os); + return os.str(); +} + +std::ostream& operator<<(std::ostream& os, JobQuery const& jq) { return jq.dumpOS(os); } + } // namespace lsst::qserv::qdisp diff --git a/src/qdisp/JobQuery.h b/src/qdisp/JobQuery.h index f23544c212..2279d39962 100644 --- a/src/qdisp/JobQuery.h +++ b/src/qdisp/JobQuery.h @@ -36,100 +36,130 @@ #include "qdisp/JobDescription.h" #include "qdisp/ResponseHandler.h" #include "util/InstanceCount.h" +#include "util/Mutex.h" namespace lsst::qserv::qdisp { -class QdispPool; class QueryRequest; -/** This class is used to describe, monitor, and control a single query to a worker. - * - */ -class JobQuery : public std::enable_shared_from_this { +/// This class is used to describe and monitor the queries for a +/// chunk on the worker. +class JobQuery { public: typedef std::shared_ptr Ptr; /// Factory function to make certain a shared_ptr is used and _setup is called. static JobQuery::Ptr create(Executive::Ptr const& executive, JobDescription::Ptr const& jobDescription, - JobStatus::Ptr const& jobStatus, - std::shared_ptr const& markCompleteFunc, QueryId qid) { - Ptr jq = std::make_shared(executive, jobDescription, jobStatus, markCompleteFunc, qid); - jq->_setup(); + qmeta::JobStatus::Ptr const& jobStatus, QueryId qid) { + Ptr jq = Ptr(new JobQuery(executive, jobDescription, jobStatus, qid)); return jq; } virtual ~JobQuery(); - virtual bool runJob(); QueryId getQueryId() const { return _qid; } - int getIdInt() const { return _jobDescription->id(); } + JobId getJobId() const { return _jobDescription->id(); } std::string const& getIdStr() const { return _idStr; } JobDescription::Ptr getDescription() { return _jobDescription; } - std::shared_ptr getRespHandler() { return _jobDescription->respHandler(); } - JobStatus::Ptr getStatus() { return _jobStatus; } + qmeta::JobStatus::Ptr getStatus() { return _jobStatus; } + + /// Cancel this job. + /// @param superfluous - true means this job is part of a LIMIT query + /// and the result of this job isn't needed to satisfy the LIMIT. + /// @param logLvlErr - put this in the log at log level ERROR when true. + /// There can be 100K+ of these, making the log hard to read. + bool cancel(bool superfluous = false, bool logLvlErr = true); + bool isQueryCancelled(); + + std::shared_ptr getExecutive() { return _executive.lock(); } - void setQueryRequest(std::shared_ptr const& qr) { - std::lock_guard lock(_rmutex); - _queryRequestPtr = qr; + /// If the UberJob is unassigned, change the _uberJobId to ujId. + bool setUberJobId(UberJobId ujId) { + VMUTEX_NOT_HELD(_jqMtx); + std::lock_guard lock(_jqMtx); + return _setUberJobId(ujId); } - std::shared_ptr getQueryRequest() { - std::lock_guard lock(_rmutex); - return _queryRequestPtr; + + UberJobId getUberJobId() const { + VMUTEX_NOT_HELD(_jqMtx); + std::lock_guard lock(_jqMtx); + return _getUberJobId(); } - std::shared_ptr getMarkCompleteFunc() { return _markCompleteFunc; } + bool isInUberJob() const { + VMUTEX_NOT_HELD(_jqMtx); + std::lock_guard lock(_jqMtx); + return _isInUberJob(); + } - bool cancel(bool superfluous = false); - bool isQueryCancelled(); + int getAttemptCount() const; - Executive::Ptr getExecutive() { return _executive.lock(); } + /// If ujId is the current owner, clear ownership. + /// @return true if job is unassigned. + bool unassignFromUberJob(UberJobId ujId); - std::shared_ptr getQdispPool() { return _qdispPool; } + /// The query wasn't found on the `worker` using the FamilyMap from `familyMapTime` + void avoidWorker(std::shared_ptr const& workerContactInfo, + TIMEPOINT familyMapTime); + /// Return true if the worker was previously missing this chunk and using the same map. + /// This function will remove dead worker entries. + bool isWorkerInAvoidMap(std::shared_ptr const& workerContactInfo, + TIMEPOINT familyMapTime); + + std::ostream& dumpOS(std::ostream& os) const; + std::string dump() const; friend std::ostream& operator<<(std::ostream& os, JobQuery const& jq); +protected: /// Make a copy of the job description. JobQuery::_setup() must be called after creation. /// Do not call this directly, use create. JobQuery(Executive::Ptr const& executive, JobDescription::Ptr const& jobDescription, - JobStatus::Ptr const& jobStatus, std::shared_ptr const& markCompleteFunc, - QueryId qid); + qmeta::JobStatus::Ptr const& jobStatus, QueryId qid); - bool isCancelled() { return _cancelled; } + /// @return true if _uberJobId was set, it can only be set if it is unassigned + /// or by the current owner. + /// NOTE: _jqMtx must be held before calling this + bool _setUberJobId(UberJobId ujId); -protected: - void _setup() { _jobDescription->respHandler()->setJobQuery(shared_from_this()); } + /// NOTE: _jqMtx must be held before calling this + UberJobId _getUberJobId() const { + VMUTEX_HELD(_jqMtx); + return _uberJobId; + } - int _getRunAttemptsCount() const { - std::lock_guard lock(_rmutex); - return _jobDescription->getAttemptCount(); + /// NOTE: _jqMtx must be held before calling this + bool _isInUberJob() const { + VMUTEX_HELD(_jqMtx); + return _uberJobId >= 0; } - int _getMaxAttempts() const { return 5; } // Arbitrary value until solid value with reason determined. - int _getAttemptSleepSeconds() const { return 30; } // As above or until added to config file. // Values that don't change once set. std::weak_ptr _executive; /// The job description needs to survive until the task is complete. JobDescription::Ptr _jobDescription; - std::shared_ptr _markCompleteFunc; // JobStatus has its own mutex. - JobStatus::Ptr _jobStatus; ///< Points at status in Executive::_statusMap + qmeta::JobStatus::Ptr _jobStatus; ///< Points at status in Executive::_statusMap QueryId const _qid; // User query id std::string const _idStr; ///< Identifier string for logging. // Values that need mutex protection - mutable std::recursive_mutex _rmutex; ///< protects _jobDescription, - ///< _queryRequestPtr, and _inSsi - - // SSI items - std::shared_ptr _queryRequestPtr; - bool _inSsi{false}; + mutable MUTEX _jqMtx; ///< protects _jobDescription, _queryRequestPtr, _uberJobId // Cancellation std::atomic _cancelled{false}; ///< Lock to make sure cancel() is only called once. - std::shared_ptr _qdispPool; + typedef std::pair, TIMEPOINT> WorkerAvoidType; + std::map _workerAvoidMap; + + /// The UberJobId that this job is assigned to. Values less than zero + /// indicate this job is unassigned. To prevent race conditions, + /// an UberJob may only unassign a job if it has the same ID as + /// _uberJobId. + /// All jobs must be unassigned before they can be reassigned. + UberJobId _uberJobId = -1; }; } // namespace lsst::qserv::qdisp diff --git a/src/qdisp/QueryRequest.cc b/src/qdisp/QueryRequest.cc deleted file mode 100644 index bc78a137e0..0000000000 --- a/src/qdisp/QueryRequest.cc +++ /dev/null @@ -1,477 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2014-2016 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -/** - * @file - * - * @brief QueryRequest. XrdSsiRequest impl for czar query dispatch - * - * @author Daniel L. Wang, SLAC - */ - -// Class header -#include "qdisp/QdispPool.h" -#include "qdisp/QueryRequest.h" - -// System headers -#include -#include -#include - -// LSST headers -#include "lsst/log/Log.h" - -// Qserv headers -#include "cconfig/CzarConfig.h" -#include "czar/Czar.h" -#include "qdisp/CzarStats.h" -#include "global/LogContext.h" -#include "http/Client.h" -#include "http/ClientConnPool.h" -#include "http/Method.h" -#include "proto/worker.pb.h" -#include "qdisp/JobStatus.h" -#include "qdisp/ResponseHandler.h" -#include "util/Bug.h" -#include "util/common.h" -#include "util/InstanceCount.h" -#include "util/Timer.h" - -namespace http = lsst::qserv::http; -namespace qdisp = lsst::qserv::qdisp; - -using namespace std; - -namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.QueryRequest"); - -/** - * The RAII class for removing the HTTP file (if it still exist). - * @note Errors are logged, but not reported to the caller. - */ -class HttpFileRemoverRAII { -public: - HttpFileRemoverRAII() = delete; - HttpFileRemoverRAII(HttpFileRemoverRAII const&) = delete; - HttpFileRemoverRAII& operator=(HttpFileRemoverRAII const&) = delete; - HttpFileRemoverRAII(string const& httpUrl) : _httpUrl(httpUrl) {} - - ~HttpFileRemoverRAII() { - if (_httpUrl.empty()) return; - string const noClientData; - vector const noClientHeaders; - try { - http::Client remover(http::Method::DELETE, _httpUrl, noClientData, noClientHeaders, - qdisp::QueryRequest::makeHttpClientConfig(), - qdisp::QueryRequest::getHttpConnPool()); - remover.read([](char const* ptr, size_t size) -> size_t { return size; }); - } catch (exception const& ex) { - LOGS(_log, LOG_LVL_WARN, - "HttpFileRemoverRAII failed to remove " << _httpUrl << ", ex: " << ex.what()); - } - } - -private: - string const _httpUrl; -}; -} // namespace - -namespace lsst::qserv::qdisp { - -shared_ptr QueryRequest::_httpConnPool; -mutex QueryRequest::_httpConnPoolMutex; - -http::ClientConfig QueryRequest::makeHttpClientConfig() { - http::ClientConfig clientConfig; - clientConfig.httpVersion = CURL_HTTP_VERSION_1_1; // same as in qhttp - clientConfig.bufferSize = CURL_MAX_READ_SIZE; // 10 MB in the current version of libcurl - clientConfig.tcpKeepAlive = true; - clientConfig.tcpKeepIdle = 5; // the default is 60 sec - clientConfig.tcpKeepIntvl = 5; // the default is 60 sec - return clientConfig; -} - -shared_ptr const& QueryRequest::getHttpConnPool() { - lock_guard const lock(_httpConnPoolMutex); - if (nullptr == _httpConnPool) { - _httpConnPool = make_shared( - cconfig::CzarConfig::instance()->getResultMaxHttpConnections()); - } - return _httpConnPool; -} -QueryRequest::QueryRequest(JobQuery::Ptr const& jobQuery) - : _jobQuery(jobQuery), - _qid(jobQuery->getQueryId()), - _jobid(jobQuery->getIdInt()), - _jobIdStr(jobQuery->getIdStr()), - _qdispPool(_jobQuery->getQdispPool()) { - QSERV_LOGCONTEXT_QUERY_JOB(_qid, _jobid); - LOGS(_log, LOG_LVL_TRACE, "New QueryRequest"); -} - -QueryRequest::~QueryRequest() { - QSERV_LOGCONTEXT_QUERY_JOB(_qid, _jobid); - LOGS(_log, LOG_LVL_TRACE, __func__); - if (!_finishedCalled.exchange(true)) { - LOGS(_log, LOG_LVL_WARN, __func__ << " cleaning up calling Finished"); - bool ok = Finished(); - if (!ok) { - LOGS(_log, LOG_LVL_ERROR, __func__ << " Finished NOT ok"); - } - } -} - -// content of request data -char* QueryRequest::GetRequest(int& requestLength) { - QSERV_LOGCONTEXT_QUERY_JOB(_qid, _jobid); - lock_guard lock(_finishStatusMutex); - auto jq = _jobQuery; - if (_finishStatus != ACTIVE || jq == nullptr) { - LOGS(_log, LOG_LVL_DEBUG, __func__ << " called after job finished (cancelled?)"); - requestLength = 0; - return const_cast(""); - } - requestLength = jq->getDescription()->payload().size(); - LOGS(_log, LOG_LVL_DEBUG, "Requesting, payload size: " << requestLength); - // Andy promises that his code won't corrupt it. - return const_cast(jq->getDescription()->payload().data()); -} - -// Must not throw exceptions: calling thread cannot trap them. -// Callback function for XrdSsiRequest. -// -bool QueryRequest::ProcessResponse(XrdSsiErrInfo const& eInfo, XrdSsiRespInfo const& rInfo) { - QSERV_LOGCONTEXT_QUERY_JOB(_qid, _jobid); - LOGS(_log, LOG_LVL_DEBUG, "workerName=" << GetEndPoint() << " " << __func__); - string errorDesc = _jobIdStr + " "; - if (isQueryCancelled()) { - LOGS(_log, LOG_LVL_WARN, __func__ << " job already cancelled"); - cancel(); // calls _errorFinish() - return true; - } - - // Make a copy of the _jobQuery shared_ptr in case _jobQuery gets reset by a call to cancel() - auto jq = _jobQuery; - { - lock_guard lock(_finishStatusMutex); - if ((_finishStatus != ACTIVE) || (jq == nullptr)) { - LOGS(_log, LOG_LVL_WARN, __func__ << " called after job finished (cancelled?)"); - return true; - } - } - if (eInfo.hasError()) { - ostringstream os; - os << _jobIdStr << __func__ << " request failed " << getSsiErr(eInfo, nullptr) << " " - << GetEndPoint(); - jq->getDescription()->respHandler()->errorFlush(os.str(), -1); - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::RESPONSE_ERROR, "SSI"); - _errorFinish(); - return true; - } - - string responseTypeName; // for error reporting - switch (rInfo.rType) { - case XrdSsiRespInfo::isNone: - responseTypeName = "isNone"; - break; - case XrdSsiRespInfo::isData: - if (string(rInfo.buff, rInfo.blen) == "MockResponse") { - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::COMPLETE, "MOCK"); - _finish(); - return true; - } else if (rInfo.blen == 0) { - // Metadata-only responses for the file-based protocol should not have any data - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::RESPONSE_READY, "SSI"); - return _importResultFile(jq); - } - responseTypeName = "isData"; - break; - case XrdSsiRespInfo::isError: - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::RESPONSE_ERROR, "SSI", rInfo.eNum, - string(rInfo.eMsg)); - return _importError(string(rInfo.eMsg), rInfo.eNum); - case XrdSsiRespInfo::isFile: - responseTypeName = "isFile"; - break; - case XrdSsiRespInfo::isStream: - responseTypeName = "isStream"; - break; - default: - responseTypeName = ""; - } - return _importError("Unexpected XrdSsiRespInfo.rType == " + responseTypeName, -1); -} - -/// Retrieve and process a result file using the file-based protocol -/// Uses a copy of JobQuery::Ptr instead of _jobQuery as a call to cancel() would reset _jobQuery. -bool QueryRequest::_importResultFile(JobQuery::Ptr const& jq) { - // The message needs to be parsed to extract the response summary. - int messageSize = 0; - const char* message = GetMetadata(messageSize); - - LOGS(_log, LOG_LVL_DEBUG, __func__ << " _jobIdStr=" << _jobIdStr << ", messageSize=" << messageSize); - - proto::ResponseSummary resp; - if (!(resp.ParseFromArray(message, messageSize) && resp.IsInitialized())) { - string const err = "failed to parse the response summary, messageSize=" + to_string(messageSize); - LOGS(_log, LOG_LVL_ERROR, __func__ << " " << err); - throw util::Bug(ERR_LOC, err); - } - - // The file gets removed regardless of the outcome of the merge operation. - HttpFileRemoverRAII const fileRemover(resp.fileresource_http()); - - // It's possible jq and _jobQuery differ, so need to use jq. - if (jq->isQueryCancelled()) { - LOGS(_log, LOG_LVL_WARN, "QueryRequest::_processData job was cancelled."); - _errorFinish(true); - return false; - } - auto executive = jq->getExecutive(); - if (executive == nullptr || executive->getCancelled() || executive->isLimitRowComplete()) { - if (executive == nullptr || executive->getCancelled()) { - LOGS(_log, LOG_LVL_WARN, "QueryRequest::_processData job was cancelled."); - } else { - int dataIgnored = (executive->incrDataIgnoredCount()); - if ((dataIgnored - 1) % 1000 == 0) { - LOGS(_log, LOG_LVL_INFO, - "QueryRequest::_processData ignoring, enough rows already " << "dataIgnored=" - << dataIgnored); - } - } - _errorFinish(true); - return false; - } - if (!jq->getDescription()->respHandler()->flush(resp)) { - LOGS(_log, LOG_LVL_ERROR, __func__ << " not flushOk"); - _flushError(jq); - return false; - } - _totalRows += resp.rowcount(); - - // If the query meets the limit row complete complete criteria, it will start - // squashing superfluous results so the answer can be returned quickly. - // This needs to be done before marking the current job as complete. - executive->addResultRows(_totalRows); - executive->checkLimitRowComplete(); - - // At this point all data for this job have been read, there's no point in - // having XrdSsi wait for anything. - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::COMPLETE, "COMPLETE"); - _finish(); - - return true; -} - -/// Process an incoming error. -bool QueryRequest::_importError(string const& msg, int code) { - auto jq = _jobQuery; - { - lock_guard lock(_finishStatusMutex); - if (_finishStatus != ACTIVE || jq == nullptr) { - LOGS(_log, LOG_LVL_WARN, - "QueryRequest::_importError code=" << code << " msg=" << msg << " not passed"); - return false; - } - jq->getDescription()->respHandler()->errorFlush(msg, code); - } - _errorFinish(); - return true; -} - -void QueryRequest::ProcessResponseData(XrdSsiErrInfo const& eInfo, char* buff, int blen, bool last) { - string const err = "the method has no use in this implementation of Qserv"; - LOGS(_log, LOG_LVL_ERROR, __func__ << " " << err); - throw util::Bug(ERR_LOC, err); -} - -void QueryRequest::_flushError(JobQuery::Ptr const& jq) { - ResponseHandler::Error err = jq->getDescription()->respHandler()->getError(); - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::MERGE_ERROR, "MERGE", err.getCode(), err.getMsg(), - MSG_ERROR); - _errorFinish(true); -} - -/// @return true if QueryRequest cancelled successfully. -bool QueryRequest::cancel() { - LOGS(_log, LOG_LVL_DEBUG, "QueryRequest::cancel"); - { - lock_guard lock(_finishStatusMutex); - if (_cancelled) { - LOGS(_log, LOG_LVL_DEBUG, "QueryRequest::cancel already cancelled, ignoring"); - return false; // Don't do anything if already cancelled. - } - _cancelled = true; - _retried = true; // Prevent retries. - // Only call the following if the job is NOT already done. - if (_finishStatus == ACTIVE) { - auto jq = _jobQuery; - if (jq != nullptr) jq->getStatus()->updateInfo(_jobIdStr, JobStatus::CANCEL, "CANCEL"); - } - } - return _errorFinish(true); // return true if errorFinish cancelled -} - -/// @return true if this object's JobQuery, or its Executive has been cancelled. -/// It takes time for the Executive to flag all jobs as being cancelled -bool QueryRequest::isQueryCancelled() { - auto jq = _jobQuery; - if (jq == nullptr) { - // Need to check if _jobQuery is null due to cancellation. - return isQueryRequestCancelled(); - } - return jq->isQueryCancelled(); -} - -/// @return true if QueryRequest::cancel() has been called. -/// QueryRequest::isQueryCancelled() is a much better indicator of user query cancellation. -bool QueryRequest::isQueryRequestCancelled() { - lock_guard lock(_finishStatusMutex); - return _cancelled; -} - -/// Cleanup pointers so this class can be deleted. -/// This should only be called by _finish or _errorFinish. -void QueryRequest::cleanup() { - LOGS(_log, LOG_LVL_TRACE, "QueryRequest::cleanup()"); - { - lock_guard lock(_finishStatusMutex); - if (_finishStatus == ACTIVE) { - LOGS(_log, LOG_LVL_ERROR, "QueryRequest::cleanup called before _finish or _errorFinish"); - return; - } - } - - // These need to be outside the mutex lock, or you could delete - // _finishStatusMutex before it is unlocked. - // This should reset _jobquery and _keepAlive without risk of either being deleted - // before being reset. - _jobQuery = nullptr; - _keepAlive = nullptr; -} - -/// Finalize under error conditions and retry or report completion -/// THIS FUNCTION WILL RESULT IN THIS OBJECT BEING DESTROYED, UNLESS there is -/// a local shared pointer for this QueryRequest and/or its owner JobQuery. -/// See QueryRequest::cleanup() -/// @return true if this QueryRequest object had the authority to make changes. -bool QueryRequest::_errorFinish(bool stopTrying) { - LOGS(_log, LOG_LVL_DEBUG, "_errorFinish() shouldCancel=" << stopTrying); - auto jq = _jobQuery; - { - // Running _errorFinish more than once could cause errors. - lock_guard lock(_finishStatusMutex); - if (_finishStatus != ACTIVE || jq == nullptr) { - // Either _finish or _errorFinish has already been called. - LOGS_DEBUG("_errorFinish() job no longer ACTIVE, ignoring " - << " _finishStatus=" << _finishStatus << " ACTIVE=" << ACTIVE << " jq=" << jq); - return false; - } - _finishStatus = ERROR; - } - - // Make the calls outside of the mutex lock. - LOGS(_log, LOG_LVL_DEBUG, "calling Finished(stopTrying=" << stopTrying << ")"); - bool ok = Finished(); - if (_finishedCalled.exchange(true)) { - LOGS(_log, LOG_LVL_WARN, "QueryRequest::_errorFinish Finished() already called"); - } - if (!ok) { - LOGS(_log, LOG_LVL_ERROR, "QueryRequest::_errorFinish NOT ok"); - } - - if (!_retried.exchange(true) && !stopTrying) { - // There's a slight race condition here. _jobQuery::runJob() creates a - // new QueryRequest object which will replace this one in _jobQuery. - // The replacement could show up before this one's cleanup() is called, - // so this will keep this alive. - LOGS(_log, LOG_LVL_DEBUG, "QueryRequest::_errorFinish retrying"); - _keepAlive = jq->getQueryRequest(); // shared pointer to this - if (!jq->runJob()) { - // Retry failed, nothing left to try. - LOGS(_log, LOG_LVL_DEBUG, "errorFinish retry failed"); - _callMarkComplete(false); - } - } else { - _callMarkComplete(false); - } - cleanup(); // Reset smart pointers so this object can be deleted. - return true; -} - -/// Finalize under success conditions and report completion. -/// THIS FUNCTION WILL RESULT IN THIS OBJECT BEING DESTROYED, UNLESS there is -/// a local shared pointer for this QueryRequest and/or its owner JobQuery. -/// See QueryRequest::cleanup() -void QueryRequest::_finish() { - LOGS(_log, LOG_LVL_TRACE, "QueryRequest::_finish"); - { - // Running _finish more than once would cause errors. - lock_guard lock(_finishStatusMutex); - if (_finishStatus != ACTIVE) { - // Either _finish or _errorFinish has already been called. - LOGS(_log, LOG_LVL_WARN, "QueryRequest::_finish called when not ACTIVE, ignoring"); - return; - } - _finishStatus = FINISHED; - } - - bool ok = Finished(); - if (_finishedCalled.exchange(true)) { - LOGS(_log, LOG_LVL_WARN, "QueryRequest::finish Finished() already called"); - } - if (!ok) { - LOGS(_log, LOG_LVL_ERROR, "QueryRequest::finish Finished() !ok "); - } - _callMarkComplete(true); - cleanup(); -} - -/// Inform the Executive that this query completed, and -// Call MarkCompleteFunc only once, it should only be called from _finish() or _errorFinish. -void QueryRequest::_callMarkComplete(bool success) { - if (!_calledMarkComplete.exchange(true)) { - auto jq = _jobQuery; - if (jq != nullptr) jq->getMarkCompleteFunc()->operator()(success); - } -} - -ostream& operator<<(ostream& os, QueryRequest const& qr) { - os << "QueryRequest " << qr._jobIdStr; - return os; -} - -/// @return The error text and code that SSI set. -/// if eCode != nullptr, it is set to the error code set by SSI. -string QueryRequest::getSsiErr(XrdSsiErrInfo const& eInfo, int* eCode) { - int errNum; - string errText = eInfo.Get(errNum); - if (eCode != nullptr) { - *eCode = errNum; - } - ostringstream os; - os << "SSI_Error(" << errNum << ":" << errText << ")"; - return os.str(); -} - -} // namespace lsst::qserv::qdisp diff --git a/src/qdisp/QueryRequest.h b/src/qdisp/QueryRequest.h deleted file mode 100644 index 4a6283f368..0000000000 --- a/src/qdisp/QueryRequest.h +++ /dev/null @@ -1,180 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2014-2015 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_QDISP_QUERYREQUEST_H -#define LSST_QSERV_QDISP_QUERYREQUEST_H - -// System headers -#include -#include -#include -#include -#include -#include - -// Third-party headers -#include "XrdSsi/XrdSsiRequest.hh" - -// Local headers -#include "czar/Czar.h" -#include "qdisp/JobQuery.h" -#include "qdisp/QdispPool.h" - -// Forward declarations - -namespace lsst::qserv::http { -class ClientConfig; -class ClientConnPool; -} // namespace lsst::qserv::http - -namespace lsst::qserv::qdisp { - -/// Bad response received from SSI API -class BadResponseError : public std::exception { -public: - BadResponseError(std::string const& s_) : std::exception(), s("BadResponseError:" + s_) {} - virtual ~BadResponseError() throw() {} - virtual const char* what() const throw() { return s.c_str(); } - std::string s; -}; - -/// Error in QueryRequest -class RequestError : public std::exception { -public: - RequestError(std::string const& s_) : std::exception(), s("QueryRequest error:" + s_) {} - virtual ~RequestError() throw() {} - virtual const char* what() const throw() { return s.c_str(); } - std::string s; -}; - -/// A client implementation of an XrdSsiRequest that adapts qserv's executing -/// queries to the XrdSsi API. -/// -/// Memory allocation notes: -/// In the XrdSsi API, raw pointers are passed around for XrdSsiRequest objects, -/// and care needs to be taken to avoid deleting the request objects before -/// Finished() is called. Typically, an XrdSsiRequest subclass is allocated with -/// operator new, and passed into XrdSsi. At certain points in the transaction, -/// XrdSsi will call methods in the request object or hand back the request -/// object pointer. XrdSsi ceases interest in the object once the -/// XrdSsiRequest::Finished() completes. Generally, this would mean the -/// QueryRequest should clean itself up after calling Finished(). This requires -/// special care, because there is a cancellation function in the wild that may -/// call into QueryRequest after Finished() has been called. The cancellation -/// code is -/// designed to allow the client requester (elsewhere in qserv) to request -/// cancellation without knowledge of XrdSsi, so the QueryRequest registers a -/// cancellation function with its client that maintains a pointer to the -/// QueryRequest. After Finished(), the cancellation function must be prevented -/// from accessing the QueryRequest instance. -class QueryRequest : public XrdSsiRequest, public std::enable_shared_from_this { -public: - typedef std::shared_ptr Ptr; - - static http::ClientConfig makeHttpClientConfig(); - static std::shared_ptr const& getHttpConnPool(); - - static Ptr create(std::shared_ptr const& jobQuery) { - Ptr newQueryRequest(new QueryRequest(jobQuery)); - newQueryRequest->_keepAlive = newQueryRequest; - return newQueryRequest; - } - - virtual ~QueryRequest(); - - /// Called by SSI to get the request payload - /// @return content of request data - char* GetRequest(int& requestLength) override; - - /// Called by SSI to release the allocated request payload. As we don't - /// own the buffer, so we can't release it. Therefore, we accept the - /// default implementation that does nothing. - /// void RelRequestBuffer() override; - - /// Called by SSI when a response is ready - /// precondition: rInfo.rType != isNone - bool ProcessResponse(XrdSsiErrInfo const& eInfo, XrdSsiRespInfo const& rInfo) override; - - /// Called by SSI when new data is available. - void ProcessResponseData(XrdSsiErrInfo const& eInfo, char* buff, int blen, bool last) override; - - bool cancel(); - bool isQueryCancelled(); - bool isQueryRequestCancelled(); - void doNotRetry() { _retried.store(true); } - std::string getSsiErr(XrdSsiErrInfo const& eInfo, int* eCode); - void cleanup(); ///< Must be called when this object is no longer needed. - - friend std::ostream& operator<<(std::ostream& os, QueryRequest const& r); - -private: - // Private constructor to safeguard enable_shared_from_this construction. - QueryRequest(std::shared_ptr const& jobQuery); - - void _callMarkComplete(bool success); - bool _importResultFile(JobQuery::Ptr const& jq); - bool _importError(std::string const& msg, int code); - bool _errorFinish(bool stopTrying = false); - void _finish(); - void _flushError(JobQuery::Ptr const& jq); - - // All instances of the HTTP client class are members of the same pool. This allows - // connection reuse and a significant reduction of the kernel memory pressure. - // Note that the pool gets instantiated at the very first call to method getHttpConnPool() - // because the instantiation depends on the availability of the Czar configuration. - static std::shared_ptr _httpConnPool; - static std::mutex _httpConnPoolMutex; - - /// Job information. Not using a weak_ptr as Executive could drop its JobQuery::Ptr before we're done with - /// it. A call to cancel() could reset _jobQuery early, so copy or protect _jobQuery with - /// _finishStatusMutex as needed. If (_finishStatus == ACTIVE) _jobQuery should be good. - std::shared_ptr _jobQuery; - - std::atomic _retried{false}; ///< Protect against multiple retries of _jobQuery from a - /// single QueryRequest. - std::atomic _calledMarkComplete{false}; ///< Protect against multiple calls to MarkCompleteFunc - /// from a single QueryRequest. - - std::mutex _finishStatusMutex; ///< used to protect _cancelled, _finishStatus, and _jobQuery. - enum FinishStatus { ACTIVE, FINISHED, ERROR } _finishStatus{ACTIVE}; // _finishStatusMutex - bool _cancelled{false}; ///< true if cancelled, protected by _finishStatusMutex. - - std::shared_ptr _keepAlive; ///< Used to keep this object alive during race condition. - QueryId _qid = 0; // for logging - int _jobid = -1; // for logging - std::string _jobIdStr{QueryIdHelper::makeIdStr(0, 0, true)}; ///< for debugging only. - - std::atomic _finishedCalled{false}; - - QdispPool::Ptr _qdispPool; - - int64_t _totalRows = 0; ///< number of rows in query added to the result table. - - std::atomic _rowsIgnored{0}; ///< Limit log messages about rows being ignored. - std::atomic _respCount{0}; ///< number of responses created -}; - -std::ostream& operator<<(std::ostream& os, QueryRequest const& r); - -} // namespace lsst::qserv::qdisp - -#endif // LSST_QSERV_QDISP_QUERYREQUEST_H diff --git a/src/qdisp/ResponseHandler.h b/src/qdisp/ResponseHandler.h index 7c1fbc4efd..1980d3c8ac 100644 --- a/src/qdisp/ResponseHandler.h +++ b/src/qdisp/ResponseHandler.h @@ -32,17 +32,24 @@ // Qserv headers #include "util/Error.h" -// Forward declarations +namespace lsst::qserv::qdisp { -namespace lsst::qserv::proto { -class ResponseSummary; -} // namespace lsst::qserv::proto +class JobQuery; +class UberJob; -// This header declaration +/// Status of the merge at the end of merging. +/// contaminated can be true only if success is false. +class MergeEndStatus { +public: + MergeEndStatus() = default; + explicit MergeEndStatus(bool success_) : success(success_) {} -namespace lsst::qserv::qdisp { + /// True indicates the results were successfully merged + bool success = false; -class JobQuery; + /// True indicates merge results are ruined and this query should be abandoned. + bool contaminated = false; +}; /// ResponseHandler is an interface that handles result bytes. Tasks are /// submitted to an Executive instance naming a resource unit (what resource is @@ -52,39 +59,35 @@ class JobQuery; /// segment of results. class ResponseHandler { public: - typedef util::Error Error; using BufPtr = std::shared_ptr>; typedef std::shared_ptr Ptr; ResponseHandler() {} - void setJobQuery(std::shared_ptr const& jobQuery) { _jobQuery = jobQuery; } + void setUberJob(std::weak_ptr const& ujPtr) { _uberJob = ujPtr; } virtual ~ResponseHandler() {} - /// Process a request for pulling and merging a job result into the result table - /// @param responseSummary - worker response to be analyzed and processed - /// @return true if successful (no error) - virtual bool flush(proto::ResponseSummary const& responseSummary) = 0; + /// Collect result data from the worker and merge it with the query result table. + /// If MergeEndStatus.success == true, then everything is fine. + /// If not .success, and not .contaminated, the user query can be saved by abandoning + /// this UberJob. If .contaminated is true, the result table is fouled and the user + /// query is ruined. + /// @return - @see MergeEndStatus + virtual MergeEndStatus flushHttp(std::string const& fileUrl, uint64_t fileSize) = 0; /// Signal an unrecoverable error condition. No further calls are expected. virtual void errorFlush(std::string const& msg, int code) = 0; - /// @return true if the receiver has completed its duties. - virtual bool finished() const = 0; - virtual bool reset() = 0; ///< Reset the state that a request can be retried. + /// Stop an ongoing file merge, if possible. + /// @return true if the merge was cancelled. + virtual bool cancelFileMerge() = 0; /// Print a string representation of the receiver to an ostream virtual std::ostream& print(std::ostream& os) const = 0; - /// @return an error code and description - virtual Error getError() const = 0; - - /// Do anything that needs to be done if this job gets cancelled. - virtual void processCancel() {}; - - std::weak_ptr getJobQuery() { return _jobQuery; } + std::weak_ptr getUberJob() { return _uberJob; } private: - std::weak_ptr _jobQuery; + std::weak_ptr _uberJob; }; inline std::ostream& operator<<(std::ostream& os, ResponseHandler const& r) { return r.print(os); } diff --git a/src/qdisp/SharedResources.h b/src/qdisp/SharedResources.h deleted file mode 100644 index 37d06f701e..0000000000 --- a/src/qdisp/SharedResources.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * LSST Data Management System - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -#ifndef LSST_QSERV_SHAREDRESOURCES_H -#define LSST_QSERV_SHAREDRESOURCES_H - -// System headers -#include - -namespace lsst::qserv::qdisp { - -class QdispPool; - -/// Put resources that all Executives need to share in one class to reduce -/// the number of arguments passed. -/// This class should be kept simple so it can easily be included in headers -/// without undue compiler performances problems. -class SharedResources { -public: - using Ptr = std::shared_ptr; - - static Ptr create(std::shared_ptr const& qdispPool) { - return Ptr(new SharedResources(qdispPool)); - } - - SharedResources() = delete; - SharedResources(SharedResources const&) = delete; - SharedResources& operator=(SharedResources const&) = delete; - ~SharedResources() = default; - - std::shared_ptr getQdispPool() { return _qdispPool; } - -private: - SharedResources(std::shared_ptr const& qdispPool) : _qdispPool(qdispPool) {} - - /// Thread pool for handling Responses from XrdSsi. - std::shared_ptr _qdispPool; -}; - -} // namespace lsst::qserv::qdisp - -#endif // LSST_QSERV_SHAREDRESOURCES_H diff --git a/src/qdisp/UberJob.cc b/src/qdisp/UberJob.cc new file mode 100644 index 0000000000..9ae2a1ba14 --- /dev/null +++ b/src/qdisp/UberJob.cc @@ -0,0 +1,533 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "qdisp/UberJob.h" + +// System headers +#include + +// Third-party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "czar/Czar.h" +#include "cconfig/CzarConfig.h" +#include "global/LogContext.h" +#include "http/Client.h" +#include "http/MetaModule.h" +#include "protojson/UberJobReadyMsg.h" +#include "protojson/UberJobMsg.h" +#include "qdisp/CzarStats.h" +#include "qdisp/JobQuery.h" +#include "qmeta/JobStatus.h" +#include "qproc/ChunkQuerySpec.h" +#include "util/Bug.h" +#include "util/common.h" +#include "util/QdispPool.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.UberJob"); +} + +namespace lsst::qserv::qdisp { + +UberJob::Ptr UberJob::create(Executive::Ptr const& executive, + std::shared_ptr const& respHandler, int uberJobId, + CzarId czarId, protojson::WorkerContactInfo::Ptr const& workerContactInfo, + TIMEPOINT familyMapTimestamp) { + auto uJob = UberJob::Ptr( + new UberJob(executive, respHandler, uberJobId, czarId, workerContactInfo, familyMapTimestamp)); + + uJob->_setup(); + return uJob; +} + +UberJob::UberJob(Executive::Ptr const& executive, std::shared_ptr const& respHandler, + UberJobId uberJobId_, CzarId czarId_, + protojson::WorkerContactInfo::Ptr const& workerContactInfo, TIMEPOINT familyMapTimestamp_) + : UberJobBase(executive->getId(), uberJobId_, czarId_), + _executive(executive), + _respHandler(respHandler), + _rowLimit(executive->getUjRowLimit()), + _familyMapTimestamp(familyMapTimestamp_) { + LOGS(_log, LOG_LVL_TRACE, _idStr << " created"); +} + +UberJob::~UberJob() { + // UberJobs are not deleted until the executive has been deleted, which means + // the query is done before this is called. + getRespHandler()->cancelFileMerge(); +} + +void UberJob::_setup() { + UberJob::Ptr ujPtr = static_pointer_cast(shared_from_this()); + _respHandler->setUberJob(ujPtr); +} + +bool UberJob::addJob(JobQuery::Ptr const& job) { + bool success = false; + if (job->setUberJobId(getUjId())) { + lock_guard lck(_jobsMtx); + _jobs.push_back(job); + success = true; + } + if (!success) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " job already in UberJob job=" << job->dump() << " uberJob=" << *this); + } + return success; +} + +void UberJob::runUberJob() { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " start"); + // Build the uberjob payload for each job. + nlohmann::json uj; + unique_lock jobsLock(_jobsMtx); + auto exec = _executive.lock(); + if (exec == nullptr || exec->getCancelled()) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " executive shutdown"); + return; + } + + // Send the uberjob to the worker + auto const method = http::Method::POST; + auto [ciwId, ciwHost, ciwManagment, ciwPort] = _wContactInfo->getAll(); + string const url = "http://" + ciwHost + ":" + to_string(ciwPort) + "/queryjob"; + vector const headers = {"Content-Type: application/json"}; + auto const& czarConfig = cconfig::CzarConfig::instance(); + + uint64_t maxTableSizeMB = czarConfig->getMaxTableSizeMB(); + auto czInfo = protojson::CzarContactInfo::create( + czarConfig->name(), czarConfig->id(), czarConfig->replicationHttpPort(), + czar::Czar::getCzar()->getFqdn(), czar::Czar::czarStartupTime); + auto scanInfoPtr = exec->getScanInfo(); + bool scanInteractive = exec->getScanInteractive(); + + auto uberJobMsg = protojson::UberJobMsg::create( + http::MetaModule::version, czarConfig->replicationInstanceId(), czarConfig->replicationAuthKey(), + czInfo, _wContactInfo, _queryId, _uberJobId, _rowLimit, maxTableSizeMB, scanInfoPtr, + scanInteractive, _jobs); + + json request = uberJobMsg->toJson(); + + jobsLock.unlock(); // unlock so other _jobsMtx threads can advance while this waits for transmit + + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " REQ " << request); + string const requestContext = "Czar: '" + http::method2string(method) + "' request to '" + url + "'"; + LOGS(_log, LOG_LVL_TRACE, + cName(__func__) << " czarPost url=" << url << " request=" << request.dump() + << " headers=" << headers[0]); + + auto commandHttpPool = czar::Czar::getCzar()->getCommandHttpPool(); + http::ClientConfig clientConfig; + clientConfig.httpVersion = CURL_HTTP_VERSION_1_1; // same as in qhttp + clientConfig.bufferSize = CURL_MAX_READ_SIZE; // 10 MB in the current version of libcurl + clientConfig.tcpKeepAlive = true; + clientConfig.tcpKeepIdle = 30; // the default is 60 sec + clientConfig.tcpKeepIntvl = 5; // the default is 60 sec + http::Client client(method, url, request.dump(), headers, clientConfig, commandHttpPool); + bool transmitSuccess = false; + string exceptionWhat; + try { + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " sending"); + json const response = client.readAsJson(); + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " worker recv"); + if (0 != response.at("success").get()) { + transmitSuccess = true; + } else { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " ujresponse success=0"); + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, requestContext + " ujresponse failed, ex: " + ex.what()); + exceptionWhat = ex.what(); + } + if (!transmitSuccess) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " transmit failure, try to send jobs elsewhere"); + _unassignJobs(); // locks _jobsMtx + setStatusIfOk(qmeta::JobStatus::RESPONSE_ERROR, + cName(__func__) + " not transmitSuccess " + exceptionWhat); + } else { + setStatusIfOk(qmeta::JobStatus::REQUEST, cName(__func__) + " transmitSuccess"); // locks _jobsMtx + } + return; +} + +void UberJob::_unassignJobs() { + LOGS(_log, LOG_LVL_INFO, cName(__func__)); + lock_guard lck(_jobsMtx); + auto exec = _executive.lock(); + if (exec == nullptr) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " exec is null"); + return; + } + for (auto&& job : _jobs) { + string jid = job->getIdStr(); + if (!job->unassignFromUberJob(getUjId())) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " could not unassign job=" << jid << " cancelling"); + exec->addMultiError(util::Error::INTERNAL, util::Error::RETRY_UNASSIGN, + "unable to unassign " + jid, true); + exec->squash("_unassignJobs failure"); + return; + } + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) << " job=" << jid << " attempts=" << job->getAttemptCount()); + } + _jobs.clear(); +} + +bool UberJob::_setStatusIfOk(qmeta::JobStatus::State newState, string const& msg) { + // must be locked _jobsMtx + auto currentState = _jobStatus->getState(); + // Setting the same state twice indicates that the system is trying to do something it + // has already done, so doing it a second time would be an error. + if (newState <= currentState) { + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " could not change from state=" << _jobStatus->stateStr(currentState) + << " to " << _jobStatus->stateStr(newState)); + return false; + } + + // Overwriting errors is probably not a good idea. + if (currentState >= qmeta::JobStatus::CANCEL && currentState < qmeta::JobStatus::COMPLETE) { + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " already error current=" << _jobStatus->stateStr(currentState) + << " new=" << _jobStatus->stateStr(newState)); + return false; + } + + _jobStatus->updateInfo(getIdStr(), newState, msg, 0, "", MSG_INFO); + for (auto&& jq : _jobs) { + jq->getStatus()->updateInfo(jq->getIdStr(), newState, msg, 0, "", MSG_INFO); + } + return true; +} + +void UberJob::callMarkCompleteFunc(bool success) { + LOGS(_log, LOG_LVL_DEBUG, "UberJob::callMarkCompleteFunc success=" << success); + + lock_guard lck(_jobsMtx); + // Need to set this uberJob's status, however exec->markCompleted will set + // the status for each job when it is called. + // "COMPLETE" and "CANCEL" are used by QmetaMysql to reduce the rows used in qmeta. + string const source = success ? "COMPLETE" : "CANCEL"; + _jobStatus->updateInfo(getIdStr(), qmeta::JobStatus::COMPLETE, source, 0, "", MSG_INFO); + for (auto&& job : _jobs) { + string idStr = job->getIdStr(); + if (success) { + job->getStatus()->updateInfo(idStr, qmeta::JobStatus::COMPLETE, source, 0, "", MSG_INFO); + } else { + job->getStatus()->updateInfoNoErrorOverwrite(idStr, qmeta::JobStatus::RESULT_ERROR, source, + util::Error::INTERNAL, "UberJob_failure", MSG_ERROR); + } + auto exec = _executive.lock(); + exec->markCompleted(job->getJobId(), success); + } + + // No longer need these here. Executive should still have copies. + _jobs.clear(); +} + +protojson::ExecutiveRespMsg::Ptr UberJob::importResultFile(protojson::FileUrlInfo const& fileUrlInfo_) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << fileUrlInfo_.dump()); + + auto exec = _executive.lock(); + if (exec == nullptr) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) + " no executive"); + return protojson::ExecutiveRespMsg::create(true, true, _queryId, _uberJobId, _czarId, "cancelled", + "Query cancelled no executive"); + } + + if (exec->getCancelled()) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " import job was cancelled."); + return protojson::ExecutiveRespMsg::create(true, true, _queryId, _uberJobId, _czarId, "cancelled", + "Query cancelled"); + } + + if (exec->isRowLimitComplete()) { + int dataIgnored = exec->incrDataIgnoredCount(); + if ((dataIgnored - 1) % 1000 == 0) { + LOGS(_log, LOG_LVL_INFO, + "UberJob ignoring, enough rows already " << "dataIgnored=" << dataIgnored); + } + return protojson::ExecutiveRespMsg::create(true, false, _queryId, _uberJobId, _czarId, "rowLimited", + "Enough rows already"); + } + + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " fileSize=" << fileUrlInfo_.fileSize); + bool const statusSet = + setStatusIfOk(qmeta::JobStatus::RESPONSE_READY, getIdStr() + " " + fileUrlInfo_.fileUrl); + // During flaky communications, it's possible to get messages out of order, which can make for a real + // mess. Going to err on the side of caution and give up if things are not as expected. + if (!statusSet) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " setStatusFail could not set status to RESPONSE_READY"); + // If this UberJob has not started merging, this will kill it. If it has started merging, + // a previous message worked and everything should be okay. + bool killed = killUberJob(); + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " killUberJob " + << (killed ? "stopped before merge" : "already merging or merged")); + // Since things are strange, don't flag the worker result file as obsolete at this point. + return protojson::ExecutiveRespMsg::create(true, false, _queryId, _uberJobId, _czarId, + "setStatusFail", "could not set status to RESPONSE_READY"); + } + + weak_ptr ujThis = static_pointer_cast(shared_from_this()); + + // fileCollectFunc will be put on the queue to run later. + string const idStr = _idStr; + auto fileCollectFunc = [ujThis, fileUrlInfo_, idStr](util::CmdData*) { + auto ujPtr = ujThis.lock(); + if (ujPtr == nullptr) { + LOGS(_log, LOG_LVL_DEBUG, + "UberJob::fileCollectFunction uberjob ptr is null " << idStr << " " << fileUrlInfo_.fileUrl); + return; + } + auto exec = ujPtr->getExecutive(); + if (exec == nullptr) { + LOGS(_log, LOG_LVL_DEBUG, + "UberJob::fileCollectFunction exec ptr is null " << idStr << " " << fileUrlInfo_.fileUrl); + return; + } + + exec->collectFile(ujPtr, fileUrlInfo_, idStr); + }; + + auto cmd = util::PriorityCommand::Ptr(new util::PriorityCommand(fileCollectFunc)); + exec->queueFileCollect(cmd); + + // The file collection has been queued for later, let the worker know that it's okay so far. + return protojson::ExecutiveRespMsg::create(true, false, _queryId, _uberJobId, _czarId, "", + "queued for collection"); +} + +void UberJob::workerError(util::MultiError const& multiErr_, protojson::ExecutiveRespMsg& execRespMsg) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " multiErr=" << multiErr_); + + auto exec = _executive.lock(); + if (exec == nullptr || exec->getCancelled()) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " no executive or cancelled " << multiErr_); + execRespMsg.success = true; + execRespMsg.dataObsolete = true; + execRespMsg.errorType = "queryEnded"; + execRespMsg.note = "nullExecutive"; + return; + } + + if (exec->isRowLimitComplete()) { + int dataIgnored = exec->incrDataIgnoredCount(); + if ((dataIgnored - 1) % 1000 == 0) { + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " ignoring, enough rows already " + << "dataIgnored=" << dataIgnored); + } + execRespMsg.success = true; + execRespMsg.dataObsolete = true; + execRespMsg.errorType = "none"; + execRespMsg.note = "rowLimitComplete"; + return; + } + + exec->addMultiError(multiErr_); + string mErrMsg = multiErr_.toOneLineString(); + + // Is this a missing table error? It may be recoverable. + bool missingTable = false; + bool otherErrors = false; + auto errVect = multiErr_.getVector(); + set missingTableJobs; + for (auto const& err : errVect) { + switch (err.getCode()) { + case util::Error::WORKER_SQL: { + int subErr = err.getSubCode(); + if (subErr == util::Error::UNKNOWN_TABLE || subErr == util::Error::NONEXISTANT_TABLE) { + missingTable = true; + auto jobIdVect = err.getJobIdsVect(); + missingTableJobs.insert(jobIdVect.begin(), jobIdVect.end()); + } else { + otherErrors = true; + } + break; + } + default: + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " other err code=" << err.getCode()); + otherErrors = true; + } + } + bool recoverableError = missingTable && !otherErrors; + if (recoverableError) { + // The czar needs to use alternates or new maps in hopes of finding + // replicas with the missing tables. + for (int jobId : missingTableJobs) { + // Find the job(s) and flag it not to use the worker, unless there's a newer map than the one used + // to make this UberJob + auto job = exec->findJob(jobId); + if (job != nullptr) { + job->avoidWorker(_wContactInfo, _familyMapTimestamp); + } + } + _unassignJobs(); + } else { + // Get the error message to the user and kill the user query. + exec->addMultiError(multiErr_); + exec->squash(string("UberJob::workerError ") + mErrMsg); + } + + execRespMsg.success = true; + execRespMsg.dataObsolete = true; + execRespMsg.errorType = mErrMsg; +} + +protojson::ExecutiveRespMsg::Ptr UberJob::importResultError(bool shouldCancel, string const& errorType, + string const& note) { + // If there's been an error, the worker should consider the result file obsolete. + auto respMsg = + protojson::ExecutiveRespMsg::create(true, true, _queryId, _uberJobId, _czarId, errorType, note); + // In all cases, the worker should delete the file as this czar will not ask for it. + + auto exec = _executive.lock(); + if (exec != nullptr) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " shouldCancel=" << shouldCancel << " errorType=" << errorType << " " + << note); + if (shouldCancel) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " failing jobs"); + callMarkCompleteFunc(false); // all jobs failed, no retry + exec->squash(string("_importResultError shouldCancel")); + } else { + /// - each JobQuery in _jobs needs to be flagged as needing to be + /// put in an UberJob and it's attempt count increased and checked + /// against the attempt limit. + /// - executive needs to be told to make new UberJobs until all + /// JobQueries are being handled by an UberJob. + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " reassigning jobs"); + _unassignJobs(); + exec->assignJobsToUberJobs(); + } + } else { + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " already cancelled shouldCancel=" << shouldCancel + << " errorType=" << errorType << " " << note); + } + return respMsg; +} + +bool UberJob::importResultFinish() { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " start"); + + /// If this is called, the file has been collected and the worker should delete it + /// + /// This function should call markComplete for all jobs in the uberjob + /// and return a "success:1" json message to be sent to the worker. + bool const statusSet = + setStatusIfOk(qmeta::JobStatus::RESPONSE_DONE, getIdStr() + " _importResultFinish"); + if (statusSet) { + bool const success = true; + callMarkCompleteFunc(success); // sets status to COMPLETE + } + return statusSet; +} + +void UberJob::_workerErrorFinish(protojson::ExecutiveRespMsg& execRespMsg, std::string const& errorType, + std::string const& note) { + // If this is called, the error has been received and the worker should delete + // the result file. + // Return error message received "success:1" json message to be sent to the worker. + auto exec = _executive.lock(); + if (exec == nullptr) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " executive is null"); + execRespMsg.success = true; + execRespMsg.dataObsolete = true; + execRespMsg.errorType = "cancelled_QID"; + execRespMsg.note = " executive is null"; + return; + } + + execRespMsg.success = true; + return; +} + +bool UberJob::killUberJob() { + // Usually called when a worker has effectively died. + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " stopping this UberJob and re-assigning jobs."); + + auto exec = _executive.lock(); + if (exec == nullptr || exec->getCancelled()) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " no executive or cancelled"); + return true; + } + + if (exec->isRowLimitComplete()) { + int dataIgnored = exec->incrDataIgnoredCount(); + if ((dataIgnored - 1) % 1000 == 0) { + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " ignoring, enough rows already."); + } + return true; + } + + // At this point the user query appears to be alive and needs more data. + // The reason to call this is that it is likely that a worker died and the + // jobs need to go to different workers to be run. + + // Put this UberJob on the list of UberJobs that the worker should drop. + auto activeWorkerMap = czar::Czar::getCzar()->getActiveWorkerMap(); + auto activeWorker = activeWorkerMap->getActiveWorker(_wContactInfo->wId); + if (activeWorker != nullptr) { + activeWorker->addDeadUberJob(_queryId, _uberJobId); + } + + // If there are any ongoing file merges, either the czar has managed to collect + // the result file from the worker and it will be merged or the czar will not be + // able to retrieve said file and this UberJob will be killed. + + // This will only return false if merging the result file with the table has + // happened or is happening now (all required data for this UberJob is already on + // this czar.) + bool cancelledMerge = getRespHandler()->cancelFileMerge(); + if (cancelledMerge) { + // The merge could be cancelled + _unassignJobs(); + } + // Let Czar::_monitor reassign jobs - other UberJobs are probably being killed + // so waiting probably gets a better distribution. + return cancelledMerge; +} + +std::ostream& UberJob::dump(std::ostream& os) const { + os << "(jobs sz=" << _jobs.size() << "("; + lock_guard lockJobsMtx(_jobsMtx); + for (auto const& job : _jobs) { + JobDescription::Ptr desc = job->getDescription(); + ResourceUnit ru = desc->resource(); + os << ru.db() << ":" << ru.chunk() << ","; + } + os << "))"; + return os; +} + +} // namespace lsst::qserv::qdisp diff --git a/src/qdisp/UberJob.h b/src/qdisp/UberJob.h new file mode 100644 index 0000000000..a1c6c55692 --- /dev/null +++ b/src/qdisp/UberJob.h @@ -0,0 +1,174 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_QDISP_UBERJOB_H +#define LSST_QSERV_QDISP_UBERJOB_H + +// System headers + +// Qserv headers +#include "czar/CzarChunkMap.h" +#include "czar/CzarRegistry.h" +#include "global/UberJobBase.h" +#include "qdisp/Executive.h" +#include "qmeta/JobStatus.h" +#include "util/MultiError.h" + +namespace lsst::qserv::protojson { +class FileUrlInfo; +} + +namespace lsst::qserv::util { +class QdispPool; +} + +namespace lsst::qserv::qdisp { + +class JobQuery; + +/// This class is a contains x number of jobs that need to go to the same worker +/// from a single user query, and contact information for the worker. It also holds +/// some information common to all jobs. +/// The UberJob constructs the message to send to the worker and handles collecting +/// and merging the results. +/// When this UberJobCompletes, all the Jobs it contains are registered as completed. +/// If this UberJob fails, it will be destroyed, un-assigning all of its Jobs. +/// Those Jobs will need to be reassigned to new UberJobs, or the query cancelled. +class UberJob : public UberJobBase { +public: + using Ptr = std::shared_ptr; + + static Ptr create(std::shared_ptr const& executive, + std::shared_ptr const& respHandler, int uberJobId, CzarId czarId, + std::shared_ptr const& workerContactInfo, + TIMEPOINT familyMapTimestamp); + + UberJob() = delete; + UberJob(UberJob const&) = delete; + UberJob& operator=(UberJob const&) = delete; + + virtual ~UberJob(); + + std::string cName(const char* funcN) const override { + return std::string("UberJob::") + funcN + " " + getIdStr(); + } + + bool addJob(std::shared_ptr const& job); + + /// Make a json version of this UberJob and send it to its worker. + virtual void runUberJob(); + + /// Kill this UberJob and unassign all Jobs so they can be used in a new UberJob if needed. + /// @return true if the UberJob results were stopped from merging. False means + /// the results for this UberJob were already being merged or were merged before + /// killUberJob was called. + /// Note that returning false means merging either already finished or merging has already + /// started and there's no way to stop it without corrupting the results. + bool killUberJob(); + + std::shared_ptr getRespHandler() { return _respHandler; } + std::shared_ptr getStatus() { return _jobStatus; } + + void callMarkCompleteFunc(bool success); ///< call markComplete for all jobs in this UberJob. + std::shared_ptr getExecutive() { return _executive.lock(); } + + /// Return false if not ok to set the status to newState, otherwise set the state for + /// this UberJob and all jobs it contains to newState. + /// This is used both to set status and prevent the system from repeating operations + /// that have already happened. If it returns false, the thread calling this + /// should stop processing. + bool setStatusIfOk(qmeta::JobStatus::State newState, std::string const& msg) { + std::lock_guard jobLock(_jobsMtx); + return _setStatusIfOk(newState, msg); + } + + int getJobCount() const { return _jobs.size(); } + + /// Set the worker information needed to send messages to the worker believed to + /// be responsible for the chunks handled in this UberJob. + void setWorkerContactInfo(protojson::WorkerContactInfo::Ptr const& wContactInfo) { + _wContactInfo = wContactInfo; + } + + protojson::WorkerContactInfo::Ptr getWorkerContactInfo() { return _wContactInfo; } + + /// Queue the lambda function to collect and merge the results from the worker. + /// @return a json message indicating success unless the query has been + /// cancelled, limit row complete, or similar. + protojson::ExecutiveRespMsg::Ptr importResultFile(protojson::FileUrlInfo const& fileUrlInfo_); + + /// Handle an error from the worker. + void workerError(util::MultiError const& multiErr_, protojson::ExecutiveRespMsg& execRespMsg); + + void setResultFileSize(uint64_t fileSize) { _resultFileSize = fileSize; } + uint64_t getResultFileSize() { return _resultFileSize; } + + /// Update UberJob status, return true if successful. + bool importResultFinish(); + + /// Import and error from trying to collect results. + protojson::ExecutiveRespMsg::Ptr importResultError(bool shouldCancel, std::string const& errorType, + std::string const& note); + + std::ostream& dump(std::ostream& os) const override; + +protected: + UberJob(std::shared_ptr const& executive, std::shared_ptr const& respHandler, + UberJobId uberJobId_, CzarId czarId_, + std::shared_ptr const& workerContactInfo, + TIMEPOINT familyMapTimestamp); + +private: + /// Used to setup elements that can't be done in the constructor. + void _setup(); + + /// @see setStatusIfOk + /// note: _jobsMtx must be locked before calling. + bool _setStatusIfOk(qmeta::JobStatus::State newState, std::string const& msg); + + /// unassign all Jobs in this UberJob and set the Executive flag to indicate that Jobs need + /// reassignment. The list of _jobs is cleared, so multiple calls of this should be harmless. + void _unassignJobs(); + + /// Let the Executive know about errors while handling results. + void _workerErrorFinish(protojson::ExecutiveRespMsg& execRespMsg, + std::string const& errorType = std::string(), + std::string const& note = std::string()); + + std::vector> _jobs; ///< List of Jobs in this UberJob. + mutable std::mutex _jobsMtx; ///< Protects _jobs, _jobStatus + std::atomic _started{false}; + qmeta::JobStatus::Ptr _jobStatus{new qmeta::JobStatus()}; + + std::weak_ptr _executive; + std::shared_ptr _respHandler; + int const _rowLimit; ///< Number of rows in the query LIMIT clause. + uint64_t _resultFileSize = 0; + + /// Contact information for the target worker. + protojson::WorkerContactInfo::Ptr _wContactInfo; + + /// Timestamp of the family map used to create this UberJob. + TIMEPOINT _familyMapTimestamp; +}; + +} // namespace lsst::qserv::qdisp + +#endif // LSST_QSERV_QDISP_UBERJOB_H diff --git a/src/qdisp/XrdSsiMocks.cc b/src/qdisp/XrdSsiMocks.cc deleted file mode 100644 index bbfb243619..0000000000 --- a/src/qdisp/XrdSsiMocks.cc +++ /dev/null @@ -1,312 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2015-2016 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - * - * @author John Gates, SLAC - */ - -// System headers -#include -#include -#include -#include -#include -#include -#include -#include - -// Third party headers -#include "XrdSsi/XrdSsiErrInfo.hh" -#include "XrdSsi/XrdSsiResponder.hh" -#include "XrdSsi/XrdSsiStream.hh" - -// LSST headers -#include "lsst/log/Log.h" -#include "proto/worker.pb.h" -#include "util/threadSafe.h" - -// Qserv headers -#include "qdisp/Executive.h" -#include "qdisp/QueryRequest.h" -#include "qdisp/XrdSsiMocks.h" - -using namespace std; - -namespace { - -LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.XrdSsiMock"); - -lsst::qserv::util::FlagNotify _go(true); - -std::atomic canCount(0); -std::atomic finCount(0); -std::atomic reqCount(0); -std::atomic totCount(0); - -bool _aOK = true; - -enum RespType { RESP_BADREQ, RESP_DATA, RESP_ERROR, RESP_ERRNR, RESP_STREAM, RESP_STRERR }; - -class Agent : public XrdSsiResponder, public XrdSsiStream { -public: - void Finished(XrdSsiRequest& rqstR, XrdSsiRespInfo const& rInfo, bool cancel) override { - const char* how = (cancel ? " cancelled" : ""); - LOGS(_log, LOG_LVL_DEBUG, "Finished: " << _rNum << " rName=" << _rName << how); - _rrMutex.lock(); - UnBindRequest(); - if (cancel) canCount++; - finCount++; - _isFIN = true; - if (_active) { - _rrMutex.unlock(); - } else { - _rrMutex.unlock(); - delete this; - } - } - - void Reply(RespType rType) { - _go.wait(true); - - // We may have been cancelled before being able to reply - // - if (_isCancelled(true)) return; // we are locked now - - // Do requested reply - // - switch (rType) { - case RESP_DATA: - _ReplyData(); - break; - case RESP_ERRNR: - _reqP->doNotRetry(); - // Fallthrough - case RESP_ERROR: - _ReplyError(); - break; - case RESP_STRERR: - _noData = true; - _reqP->doNotRetry(); // Kill retries on stream errors - _ReplyStream(); - break; - default: - _reqP->doNotRetry(); - _ReplyError("Bad mock request!", 13); - break; - } - _isCancelled(false); - } - - bool SetBuff(XrdSsiErrInfo& eRef, char* buff, int blen) override { - // We may have been cancelled while waiting - // - if (_isCancelled(true)) return false; - std::thread(&Agent::_StrmResp, this, &eRef, buff, blen).detach(); - _rrMutex.unlock(); - return true; - } - - Agent(lsst::qserv::qdisp::QueryRequest* rP, std::string const& rname, int rnum) - : XrdSsiStream(XrdSsiStream::isPassive), - _reqP(rP), - _rName(rname), - _rNum(rnum), - _noData(true), - _isFIN(false), - _active(true) { - // Initialize a null message we will return as a response - // - _responseSummary = - google::protobuf::Arena::CreateMessage(_arena.get()); - lsst::qserv::proto::ResponseSummary* responseSummary = _responseSummary; - responseSummary->set_wname("localhost"); - std::string str; - responseSummary->SerializeToString(&str); - _msgBuf = str; - _bOff = 0; - _bLen = _msgBuf.size(); - } - - ~Agent() {} - -private: - bool _isCancelled(bool activate) { - if (activate) _rrMutex.lock(); - if (_isFIN) { - _rrMutex.unlock(); - delete this; - return true; - } - _active = activate; - if (!activate) _rrMutex.unlock(); - return false; - } - - void _ReplyData() { - _rspBuf = "MockResponse"; - SetResponse(_rspBuf.data(), _rspBuf.size()); - } - - void _ReplyError(const char* eMsg = "Mock Request Ignored!", int eNum = 17) { - SetErrResponse(eMsg, eNum); - } - - void _ReplyStream() { - auto stat = _setMetaData(_msgBuf.size()); - if (stat != Status::wasPosted) { - LOGS(_log, LOG_LVL_ERROR, "Agent::_ReplyStream _setMetadata failed " << stat); - } - SetResponse(this); - } - - void _StrmResp(XrdSsiErrInfo* eP, char* buff, int blen) { - std::cerr << "Stream: cleint asks for " << blen << " bytes, have " << _bLen << '\n' << std::flush; - bool last; - - // Check for cancellation while we were waiting - // - if (_isCancelled(true)) return; - - // Either reply with an error or actual data - // - if (_noData) { - blen = -17; - last = true; - eP->Set("Mock stream error!", 17); - } else { - if (_bLen <= blen) { - memcpy(buff, _msgBuf.data() + _bOff, _bLen); - blen = _bLen; - _bLen = 0; - last = true; - } else { - memcpy(buff, _msgBuf.data() + _bOff, blen); - _bOff += blen; - _bLen -= blen; - last = false; - } - } - _reqP->ProcessResponseData(*eP, buff, blen, last); - _isCancelled(false); - } - - Status _setMetaData(size_t sz) { - string str; - _responseSummary->SerializeToString(&str); - _metadata = str; - return SetMetadata(_metadata.data(), _metadata.size()); - } - - std::recursive_mutex _rrMutex; - lsst::qserv::qdisp::QueryRequest* _reqP; - std::string _rName; - std::string _rspBuf; - std::string _msgBuf; - int _bOff; - int _bLen; - int _rNum; - bool _noData; - bool _isFIN; - bool _active; - std::string _metadata; - lsst::qserv::proto::ResponseSummary* _responseSummary; - std::unique_ptr _arena{make_unique()}; -}; -} // namespace - -namespace lsst::qserv::qdisp { - -std::string XrdSsiServiceMock::_myRName; - -int XrdSsiServiceMock::getCount() { return totCount; } - -int XrdSsiServiceMock::getCanCount() { return canCount; } - -int XrdSsiServiceMock::getFinCount() { return finCount; } - -int XrdSsiServiceMock::getReqCount() { return reqCount; } - -bool XrdSsiServiceMock::isAOK() { return _aOK; } - -void XrdSsiServiceMock::Reset() { - canCount = 0; - finCount = 0; - reqCount = 0; -} - -void XrdSsiServiceMock::setGo(bool go) { _go.exchangeNotify(go); } - -void XrdSsiServiceMock::ProcessRequest(XrdSsiRequest& reqRef, XrdSsiResource& resRef) { - static struct { - const char* cmd; - RespType rType; - } reqTab[] = {{"respdata", RESP_DATA}, {"resperror", RESP_ERROR}, {"resperrnr", RESP_ERRNR}, - {"respstream", RESP_STREAM}, {"respstrerr", RESP_STRERR}, {0, RESP_BADREQ}}; - - int reqNum = totCount++; - - // Check if we should verify the resource name - // - if (_myRName.size() && _myRName != resRef.rName) { - LOGS_DEBUG("Expected rname " << _myRName << " got " << resRef.rName << " from req #" << reqNum); - _aOK = false; - } - - // Get the query request object for this request and process it. - QueryRequest* r = dynamic_cast(&reqRef); - if (r) { - Agent* aP = new Agent(r, resRef.rName, reqNum); - RespType doResp; - aP->BindRequest(reqRef); - - // Get the request data and setup to handle request. Make sure the - // request string is null terminated (it should be). - // - std::string reqStr; - int reqLen; - const char* reqData = r->GetRequest(reqLen); - if (reqData != nullptr) reqStr.assign(reqData, reqLen); - reqData = reqStr.c_str(); - - // Convert request to response type - // - int i = 0; - while (reqTab[i].cmd && strcmp(reqTab[i].cmd, reqData)) i++; - if (reqTab[i].cmd) { - doResp = reqTab[i].rType; - } else { - LOGS_DEBUG("Unknown request '" << reqData << "' from req #" << reqNum); - _aOK = false; - doResp = RESP_BADREQ; - } - - // Release the request buffer (typically a no-op) - // - if (reqLen != 0) r->ReleaseRequestBuffer(); - - // Schedule a response - // - reqCount++; - std::thread(&Agent::Reply, aP, doResp).detach(); - } -} - -} // namespace lsst::qserv::qdisp diff --git a/src/qdisp/XrdSsiMocks.h b/src/qdisp/XrdSsiMocks.h deleted file mode 100644 index 61cad5b731..0000000000 --- a/src/qdisp/XrdSsiMocks.h +++ /dev/null @@ -1,72 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2015-2016 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - * - * @author: John Gates, SLAC (heavily modified by Andrew Hanushevsky, SLAC) - */ - -#ifndef LSST_QSERV_QDISP_XRDSSIMOCKS_H -#define LSST_QSERV_QDISP_XRDSSIMOCKS_H - -// External headers -#include "XrdSsi/XrdSsiRequest.hh" -#include "XrdSsi/XrdSsiResource.hh" -#include "XrdSsi/XrdSsiService.hh" - -// Local headers - -namespace lsst::qserv::qdisp { - -class Executive; - -/** A simplified version of XrdSsiService for testing qserv. - */ -class XrdSsiServiceMock : public XrdSsiService { -public: - void ProcessRequest(XrdSsiRequest &reqRef, XrdSsiResource &resRef) override; - - XrdSsiServiceMock(Executive *executive) {}; - - virtual ~XrdSsiServiceMock() {} - - static int getCount(); - - static int getCanCount(); - - static int getFinCount(); - - static int getReqCount(); - - static bool isAOK(); - - static void Reset(); - - static void setGo(bool go); - - static void setRName(std::string const &rname) { _myRName = rname; } - -private: - static std::string _myRName; -}; - -} // namespace lsst::qserv::qdisp - -#endif diff --git a/src/qdisp/testQDisp.cc b/src/qdisp/testQDisp.cc index 5f69481158..c3a462f455 100644 --- a/src/qdisp/testQDisp.cc +++ b/src/qdisp/testQDisp.cc @@ -38,54 +38,160 @@ // Qserv headers #include "ccontrol/MergingHandler.h" #include "global/ResourceUnit.h" +#include "qdisp/CzarStats.h" #include "qdisp/Executive.h" #include "qdisp/JobQuery.h" -#include "qdisp/MessageStore.h" -#include "qdisp/QueryRequest.h" -#include "qdisp/SharedResources.h" -#include "qdisp/XrdSsiMocks.h" #include "qmeta/QProgress.h" #include "qmeta/QProgressHistory.h" +#include "qmeta/MessageStore.h" #include "qproc/ChunkQuerySpec.h" -#include "qproc/TaskMsgFactory.h" +#include "util/QdispPool.h" #include "util/threadSafe.h" namespace test = boost::test_tools; using namespace lsst::qserv; +using namespace std; namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.testQDisp"); } typedef util::Sequential SequentialInt; -typedef std::vector RequesterVector; +typedef vector RequesterVector; -namespace lsst::qserv::qproc { +namespace lsst::qserv::qdisp { -// Normally, there's one TaskMsgFactory that all jobs in a user query share. -// In this case, there's one MockTaskMsgFactory per job with a payload specific -// for that job. -class MockTaskMsgFactory : public TaskMsgFactory { +class ExecutiveUT; + +class TestInfo : public ResponseHandler { +public: + using Ptr = std::shared_ptr; + + TestInfo() {} + virtual ~TestInfo() {} + + bool goWait() { + unique_lock ulock(_infoMtx); + _infoCV.wait(ulock, [this]() { return _go == true; }); + return _ok; + } + + void setGo(bool val) { + lock_guard lg(_infoMtx); + _go = val; + _infoCV.notify_all(); + } + + // virtual function that won't be needed + + MergeEndStatus flushHttp(std::string const& fileUrl, uint64_t fileSize) override { + return MergeEndStatus(true); + } + void errorFlush(std::string const& msg, int code) override {}; + bool cancelFileMerge() override { return cancelFileMergeRet; }; + + /// Print a string representation of the receiver to an ostream + std::ostream& print(std::ostream& os) const override { + os << "TestInfo ujCount=" << ujCount; + return os; + } + + atomic ujCount = 0; + bool cancelFileMergeRet = false; + +private: + bool _ok = true; + bool _go = true; + mutex _infoMtx; + condition_variable _infoCV; +}; + +/// Version of UberJob specifically for this unit test. +class UberJobUT : public UberJob { public: - MockTaskMsgFactory(std::string const& mockPayload_) : TaskMsgFactory(), mockPayload(mockPayload_) {} - void serializeMsg(ChunkQuerySpec const& s, std::string const& chunkResultName, QueryId queryId, int jobId, - int attemptCount, qmeta::CzarId czarId, std::ostream& os) override { - os << mockPayload; + using PtrUT = std::shared_ptr; + + UberJobUT(std::shared_ptr const& executive, + std::shared_ptr const& respHandler, int uberJobId, CzarId czarId, + protojson::WorkerContactInfo::Ptr const& workerContactInfo, TIMEPOINT familyMapTimestamp, + czar::CzarChunkMap::WorkerChunksData::Ptr const& workerData, TestInfo::Ptr const& testInfo_) + : UberJob(executive, respHandler, uberJobId, czarId, workerContactInfo, familyMapTimestamp), + testInfo(testInfo_) {} + + void runUberJob() override { + LOGS(_log, LOG_LVL_INFO, "runUberJob() chunkId=" << chunkId); + bool ok = testInfo->goWait(); + int c = -1; + if (ok) { + c = ++(testInfo->ujCount); + } + callMarkCompleteFunc(ok); + LOGS(_log, LOG_LVL_INFO, "runUberJob() end chunkId=" << chunkId << " c=" << c); } - std::string mockPayload; + + TestInfo::Ptr testInfo; + int chunkId = -1; }; -} // namespace lsst::qserv::qproc +/// Version of Executive specifically for this unit test. +class ExecutiveUT : public Executive { +public: + using PtrUT = shared_ptr; + + ~ExecutiveUT() override = default; + + ExecutiveUT(int qmetaTimeBetweenUpdates, shared_ptr const& ms, + util::QdispPool::Ptr const& qdispPool, shared_ptr const& qProgress, + shared_ptr const& queryProgressHistory, + shared_ptr const& querySession, TestInfo::Ptr const& testInfo_) + : Executive(qmetaTimeBetweenUpdates, ms, qdispPool, qProgress, queryProgressHistory, querySession, + 5 /* jobMaxAttempts */), + testInfo(testInfo_) { + workerContactInfo = + protojson::WorkerContactInfo::create("wrkId", "10.0.0.1", "hosty", 3456, CLOCK::now()); + } + + void assignJobsToUberJobs() override { + vector ujVect; + TIMEPOINT familyMapTimestamp = CLOCK::now(); + + // Make an UberJobUnitTest for each job + qdisp::Executive::ChunkIdJobMapType unassignedChunks = unassignedChunksInQuery(); + for (auto const& [chunkId, jqPtr] : unassignedChunks) { + auto exec = shared_from_this(); + PtrUT execUT = dynamic_pointer_cast(exec); + auto uJob = UberJobUT::PtrUT(new UberJobUT(execUT, testInfo, ujId++, czarId, workerContactInfo, + familyMapTimestamp, targetWorker, testInfo)); + + uJob->chunkId = chunkId; + uJob->addJob(jqPtr); + ujVect.push_back(uJob); + } + + for (auto const& ujPtr : ujVect) { + addAndQueueUberJob(ujPtr); + } + LOGS(_log, LOG_LVL_INFO, "assignJobsToUberJobs() end"); + } + + CzarId czarId = 1; + UberJobId ujId = 1; + int rowLimit = 0; + czar::CzarChunkMap::WorkerChunksData::Ptr targetWorker = nullptr; + + TestInfo::Ptr testInfo; + protojson::WorkerContactInfo::Ptr workerContactInfo; +}; + +} // namespace lsst::qserv::qdisp qdisp::JobDescription::Ptr makeMockJobDescription(qdisp::Executive::Ptr const& ex, int sequence, ResourceUnit const& ru, std::string msg, std::shared_ptr const& mHandler) { - auto mockTaskMsgFactory = std::make_shared(msg); auto cqs = std::make_shared(); // dummy, unused in this case. std::string chunkResultName = "dummyResultTableName"; - qmeta::CzarId const czarId = 1; - auto job = qdisp::JobDescription::create(czarId, ex->getId(), sequence, ru, mHandler, mockTaskMsgFactory, - cqs, chunkResultName, true); + CzarId const czarId = 1; + auto job = qdisp::JobDescription::create(czarId, ex->getId(), sequence, ru, cqs, true); return job; } @@ -93,45 +199,47 @@ qdisp::JobDescription::Ptr makeMockJobDescription(qdisp::Executive::Ptr const& e // that we return a shared pointer to the last constructed JobQuery object. // This only makes sense for single query jobs. // + std::shared_ptr addMockRequests(qdisp::Executive::Ptr const& ex, SequentialInt& sequence, - int chunkID, std::string msg, RequesterVector& rv) { - ResourceUnit ru; + int startingChunkId, std::string msg, RequesterVector& rv) { std::shared_ptr jobQuery; int copies = rv.size(); - ru.setAsDbChunk("Mock", chunkID); for (int j = 0; j < copies; ++j) { + ResourceUnit ru; + int chunkId = startingChunkId + j; + ru.setAsDbChunk("Mock", chunkId); // The job copies the JobDescription. qdisp::JobDescription::Ptr job = makeMockJobDescription(ex, sequence.incr(), ru, msg, rv[j]); jobQuery = ex->add(job); } + ex->setAllJobsCreated(); return jobQuery; } -/** Start adds 'copies' number of test requests that each sleep for 'millisecs' time - * before signaling to 'ex' that they are done. - * Returns time to complete in seconds. - */ -std::shared_ptr executiveTest(qdisp::Executive::Ptr const& ex, SequentialInt& sequence, +std::shared_ptr executiveTest(qdisp::ExecutiveUT::PtrUT const& ex, SequentialInt& sequence, int chunkId, std::string msg, int copies) { + LOGS(_log, LOG_LVL_INFO, "executiveTest start"); // Test class Executive::add // Modeled after ccontrol::UserQuery::submit() ResourceUnit ru; - std::string chunkResultName = "mock"; std::shared_ptr infileMerger; - ccontrol::MergingHandler::Ptr mh = - std::make_shared(infileMerger, chunkResultName); + ccontrol::MergingHandler::Ptr mh = std::make_shared(infileMerger, ex); + RequesterVector rv; for (int j = 0; j < copies; ++j) { rv.push_back(mh); } - return addMockRequests(ex, sequence, chunkId, msg, rv); + auto ret = addMockRequests(ex, sequence, chunkId, msg, rv); + ex->assignJobsToUberJobs(); + LOGS(_log, LOG_LVL_INFO, "executiveTest end"); + return ret; } /** This function is run in a separate thread to fail the test if it takes too long * for the jobs to complete. */ void timeoutFunc(std::atomic& flagDone, int millisecs) { - LOGS_DEBUG("timeoutFunc"); + LOGS_INFO("timeoutFunc"); int total = 0; bool done = flagDone; int maxTime = millisecs * 1000; @@ -140,7 +248,7 @@ void timeoutFunc(std::atomic& flagDone, int millisecs) { total += sleepTime; usleep(sleepTime); done = flagDone; - LOGS_DEBUG("timeoutFunc done=" << done << " total=" << total); + LOGS_INFO("timeoutFunc done=" << done << " total=" << total); } LOGS_ERROR("timeoutFunc done=" << done << " total=" << total << " timedOut=" << (total >= maxTime)); BOOST_REQUIRE(done == true); @@ -152,29 +260,23 @@ void timeoutFunc(std::atomic& flagDone, int millisecs) { class SetupTest { public: std::string qrMsg; - std::string str; - qdisp::ExecutiveConfig::Ptr conf; - std::shared_ptr ms; - qdisp::QdispPool::Ptr qdispPool; - qdisp::SharedResources::Ptr sharedResources; - qdisp::Executive::Ptr ex; + std::shared_ptr ms; + util::QdispPool::Ptr qdispPool; + qdisp::ExecutiveUT::PtrUT ex; std::shared_ptr jqTest; // used only when needed - boost::asio::io_service asioIoService; + qdisp::TestInfo::Ptr testInfo = qdisp::TestInfo::Ptr(new qdisp::TestInfo()); - SetupTest(const char* request) { + SetupTest(const char* request, util::QdispPool::Ptr const& qPool_) : qdispPool(qPool_) { + LOGS(_log, LOG_LVL_INFO, "SetupTest start"); qrMsg = request; - qdisp::XrdSsiServiceMock::Reset(); - str = qdisp::ExecutiveConfig::getMockStr(); - conf = std::make_shared(str, 0); // No updating of QMeta. - ms = std::make_shared(); - qdispPool = std::make_shared(true); - sharedResources = qdisp::SharedResources::create(qdispPool); - - std::shared_ptr queryProgress; // No updating QProgress, nullptr + ms = std::make_shared(); + auto tInfo = qdisp::TestInfo::Ptr(new qdisp::TestInfo()); + std::shared_ptr qProgress; // No updating QProgress, nullptr std::shared_ptr queryProgressHistory; // No updating QProgressHistory, nullptr - ex = qdisp::Executive::create(*conf, ms, sharedResources, queryProgress, queryProgressHistory, - nullptr, asioIoService); + ex = qdisp::ExecutiveUT::PtrUT(new qdisp::ExecutiveUT(60, ms, qdispPool, qProgress, + queryProgressHistory, nullptr, testInfo)); + LOGS(_log, LOG_LVL_INFO, "SetupTest end"); } ~SetupTest() {} }; @@ -188,7 +290,19 @@ BOOST_AUTO_TEST_SUITE(Suite) int chunkId = 1234; int millisInt = 50000; +util::QdispPool::Ptr globalQdispPool; +qdisp::CzarStats::Ptr globalCzarStats; + BOOST_AUTO_TEST_CASE(Executive) { + int qPoolSize = 1000; + int maxPriority = 2; + vector vectRunSizes = {50, 50, 50, 50}; + vector vectMinRunningSizes = {0, 1, 3, 3}; + globalQdispPool = util::QdispPool::Ptr( + new util::QdispPool(qPoolSize, maxPriority, vectRunSizes, vectMinRunningSizes)); + qdisp::CzarStats::setup(globalQdispPool); + globalCzarStats = qdisp::CzarStats::get(); + // Variables for all executive sub-tests. Note that all executive tests // are full roundtrip tests. So, if these succeed then it's likely all // other query tests will succeed. So, much of this is redundant. @@ -197,64 +311,56 @@ BOOST_AUTO_TEST_CASE(Executive) { int jobs = 0; _log.setLevel(LOG_LVL_DEBUG); // Ugly but boost test suite forces this std::thread timeoutT(&timeoutFunc, std::ref(done), millisInt); - qdisp::XrdSsiServiceMock::setRName("/chk/Mock/1234"); // Test single instance { - LOGS_DEBUG("Executive single query test"); - SetupTest tEnv("respdata"); + LOGS_INFO("Executive single query test"); + SetupTest tEnv("respdata", globalQdispPool); SequentialInt sequence(0); tEnv.jqTest = executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 1); jobs = 1; - LOGS_DEBUG("jobs=1"); + LOGS_INFO("jobs=1"); tEnv.ex->join(); - LOGS_DEBUG("Executive single query test checking"); - BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qdisp::JobStatus::COMPLETE); + LOGS_INFO("Executive single query test checking"); + BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qmeta::JobStatus::COMPLETE); BOOST_CHECK(tEnv.ex->getEmpty() == true); } // Test 4 jobs { - LOGS_DEBUG("Executive four parallel jobs test"); - SetupTest tEnv("respdata"); + LOGS_INFO("Executive four parallel jobs test"); + SetupTest tEnv("respdata", globalQdispPool); SequentialInt sequence(0); executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 4); jobs += 4; - LOGS_DEBUG("ex->joining()"); + LOGS_INFO("ex->joining()"); tEnv.ex->join(); - LOGS_DEBUG("Executive four parallel jobs test checking"); + LOGS_INFO("Executive four parallel jobs test checking"); BOOST_CHECK(tEnv.ex->getEmpty() == true); } // Test that we can detect ex._empty == false. { - LOGS_DEBUG("Executive detect non-empty job queue test"); - SetupTest tEnv("respdata"); + LOGS_INFO("Executive detect non-empty job queue test"); + SetupTest tEnv("respdata", globalQdispPool); SequentialInt sequence(0); - qdisp::XrdSsiServiceMock::setGo(false); executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 5); jobs += 5; - while (qdisp::XrdSsiServiceMock::getCount() < jobs) { - LOGS_DEBUG("waiting for _count(" << qdisp::XrdSsiServiceMock::getCount() << ") == jobs(" << jobs - << ")"); - usleep(10000); - } BOOST_CHECK(tEnv.ex->getEmpty() == false); - qdisp::XrdSsiServiceMock::setGo(true); - LOGS_DEBUG("ex->joining()"); + LOGS_INFO("ex->joining()"); tEnv.ex->join(); - LOGS_DEBUG("ex->join() joined"); + LOGS_INFO("ex->join() joined"); BOOST_CHECK(tEnv.ex->getEmpty() == true); } done = true; timeoutT.join(); - LOGS_DEBUG("Executive test end"); + LOGS_INFO("Executive test end"); } BOOST_AUTO_TEST_CASE(MessageStore) { - LOGS_DEBUG("MessageStore test start"); - qdisp::MessageStore ms; + LOGS_INFO("MessageStore test start"); + qmeta::MessageStore ms; BOOST_CHECK(ms.messageCount() == 0); ms.addMessage(123, "EXECUTIVE", 456, "test1"); std::string str("test2"); @@ -262,112 +368,39 @@ BOOST_AUTO_TEST_CASE(MessageStore) { ms.addMessage(86, "EXECUTIVE", -12, "test3"); BOOST_CHECK(ms.messageCount() == 3); BOOST_CHECK(ms.messageCount(-12) == 2); - qdisp::QueryMessage qm = ms.getMessage(1); + qmeta::QueryMessage qm = ms.getMessage(1); BOOST_CHECK(qm.chunkId == 124 && qm.code == -12 && str.compare(qm.description) == 0); - LOGS_DEBUG("MessageStore test end"); -} - -BOOST_AUTO_TEST_CASE(QueryRequest) { - { - LOGS_DEBUG("QueryRequest error retry test"); - // Setup Executive and for retry test when receiving an error - // Note executive maps RESPONSE_ERROR to RESULT_ERROR - SetupTest tEnv("resperror"); - SequentialInt sequence(0); - tEnv.jqTest = executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 1); - tEnv.ex->join(); - BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qdisp::JobStatus::RESULT_ERROR); - BOOST_CHECK(qdisp::XrdSsiServiceMock::getFinCount() > 1); // Retried, eh? - BOOST_CHECK(qdisp::XrdSsiServiceMock::getFinCount() == qdisp::XrdSsiServiceMock::getReqCount()); - } - - { - LOGS_DEBUG("QueryRequest error noretry test 2"); - // Setup Executive and for no retry test when receiving an error - // Note executive maps RESPONSE_ERROR to RESULT_ERROR - SetupTest tEnv("resperrnr"); - SequentialInt sequence(0); - tEnv.jqTest = executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 1); - tEnv.ex->join(); - BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qdisp::JobStatus::RESULT_ERROR); - BOOST_CHECK(qdisp::XrdSsiServiceMock::getFinCount() == 1); - } - - { - LOGS_DEBUG("QueryRequest stream with data error test"); - // Setup Executive and for no retry test when receiving an error - // Note executive maps RESPONSE_DATA_NACK to RESULT_ERROR - SetupTest tEnv("respstrerr"); - SequentialInt sequence(0); - tEnv.jqTest = executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 1); - tEnv.ex->join(); - LOGS_DEBUG("tEnv.jqTest->...state = " << tEnv.jqTest->getStatus()->getInfo().state); - BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qdisp::JobStatus::RESULT_ERROR); - BOOST_CHECK(qdisp::XrdSsiServiceMock::getFinCount() == 1); // No retries! - } - - // We wish we could do the stream response with no results test but the - // needed information is too complex to figure out (well, one day we will). - // So, we've commented this out but the framework exists modulo the needed - // responses (see XrdSsiMocks::Agent). So, this gets punted into the - // integration test (too bad). - /* - { - LOGS_DEBUG("QueryRequest stream with no results test"); - SetupTest tEnv("respstream"); - SequentialInt sequence(0); - tEnv.jqTest = executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 1); - tEnv.ex->join(); - BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == - qdisp::JobStatus::COMPLETE); - BOOST_CHECK(qdisp::XrdSsiServiceMock::getFinCount() == 1); - } - */ - LOGS_DEBUG("QueryRequest test end"); + LOGS_INFO("MessageStore test end"); } BOOST_AUTO_TEST_CASE(ExecutiveCancel) { // Test that aJobQuery can be cancelled and ends in correct state // { - LOGS_DEBUG("ExecutiveCancel: squash it test"); - SetupTest tEnv("respdata"); - qdisp::XrdSsiServiceMock::setGo(false); // Can't let jobs run or they are untracked before squash + LOGS_INFO("ExecutiveCancel: squash it test"); + SetupTest tEnv("respdata", globalQdispPool); + tEnv.testInfo->setGo(false); // Can't let jobs run or they are untracked before + // squash SequentialInt sequence(0); tEnv.jqTest = executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 1); - tEnv.ex->squash(); - qdisp::XrdSsiServiceMock::setGo(true); + tEnv.ex->squash("test"); usleep(250000); // Give mock threads a quarter second to complete. tEnv.ex->join(); BOOST_CHECK(tEnv.jqTest->isQueryCancelled() == true); - // Note that the query might not have actually called ProcessRequest() - // but if it did, then it must have called Finished() with cancel. - // - BOOST_CHECK(qdisp::XrdSsiServiceMock::getCanCount() == qdisp::XrdSsiServiceMock::getReqCount()); } // Test that multiple JobQueries are cancelled. { - LOGS_DEBUG("ExecutiveCancel: squash 20 test"); - SetupTest tEnv("respdata"); - qdisp::XrdSsiServiceMock::setGo(false); // Can't let jobs run or they are untracked before squash + LOGS_INFO("ExecutiveCancel: squash 20 test"); + SetupTest tEnv("respdata", globalQdispPool); + // squash SequentialInt sequence(0); executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 20); - tEnv.ex->squash(); - tEnv.ex->squash(); // check that squashing twice doesn't cause issues. - qdisp::XrdSsiServiceMock::setGo(true); - usleep(250000); // Give mock threads a quarter second to complete. + tEnv.ex->squash("test"); + tEnv.ex->squash("test"); // check that squashing twice doesn't cause issues. + usleep(250000); // Give mock threads a quarter second to complete. tEnv.ex->join(); - // Note that the cancel count might not be 20 as some queries will cancel - // themselves before they get around to issuing ProcessRequest(). - // - BOOST_CHECK(qdisp::XrdSsiServiceMock::getCanCount() == qdisp::XrdSsiServiceMock::getReqCount()); } } -BOOST_AUTO_TEST_CASE(ServiceMock) { - // Verify that our service object did not see anything unusual. - BOOST_CHECK(qdisp::XrdSsiServiceMock::isAOK()); -} - BOOST_AUTO_TEST_SUITE_END() diff --git a/src/qhttp/Server.cc b/src/qhttp/Server.cc index 5116255cb6..6e167d1635 100644 --- a/src/qhttp/Server.cc +++ b/src/qhttp/Server.cc @@ -57,6 +57,8 @@ namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.qhttp"); } +using namespace std; + namespace lsst::qserv::qhttp { Server::Ptr Server::create(asio::io_service& io_service, unsigned short port, int backlog, @@ -115,11 +117,11 @@ void Server::_accept() { [](auto& weakSocket) { return weakSocket.expired(); }); auto numExpired = _activeSockets.end() - removed; if (numExpired != 0) { - LOGLS_DEBUG(_log, logger(this) << "purging tracking for " << numExpired << " expired socket(s)"); + LOGLS_TRACE(_log, logger(this) << "purging tracking for " << numExpired << " expired socket(s)"); _activeSockets.erase(removed, _activeSockets.end()); } _activeSockets.push_back(socket); - LOGLS_DEBUG(_log, logger(this) << "tracking new socket"); + LOGLS_TRACE(_log, logger(this) << "tracking new socket"); } auto self = shared_from_this(); @@ -128,13 +130,18 @@ void Server::_accept() { LOGLS_DEBUG(_log, logger(self) << "accept chain exiting"); return; } - if (!ec) { - LOGLS_INFO(_log, logger(self) << logger(socket) << "connect from " << socket->remote_endpoint()); - boost::system::error_code ignore; - socket->set_option(ip::tcp::no_delay(true), ignore); - self->_readRequest(socket); - } else { - LOGLS_ERROR(_log, logger(self) << "accept failed: " << ec.message()); + try { + if (!ec) { + LOGLS_INFO(_log, logger(self) + << logger(socket) << "connect from " << socket->remote_endpoint()); + boost::system::error_code ignore; + socket->set_option(ip::tcp::no_delay(true), ignore); + self->_readRequest(socket); + } else { + LOGLS_ERROR(_log, logger(self) << "accept failed: " << ec.message()); + } + } catch (boost::system::system_error const& bEx) { + LOGS(_log, LOG_LVL_ERROR, "qhttp::Server::_accept lambda threw " << bEx.what()); } self->_accept(); // start accept again for the next incoming connection }); @@ -187,7 +194,7 @@ std::shared_ptr Server::_startTimer(std::shared_ptrlowest_layer().shutdown(ip::tcp::socket::shutdown_both, ignore); socket->lowest_layer().close(ignore); } else if (ec == asio::error::operation_aborted) { - LOGLS_DEBUG(_log, logger(self) << logger(socket) << "read timeout timer canceled"); + LOGLS_TRACE(_log, logger(self) << logger(socket) << "read timeout timer canceled"); } else { LOGLS_ERROR(_log, logger(self) << logger(socket) << "read timeout timer: " << ec.message()); } @@ -209,13 +216,15 @@ void Server::_readRequest(std::shared_ptr socket) { self, socket, [self, socket, startTime, reuseSocket](boost::system::error_code const& ec, std::size_t sent) { chrono::duration elapsed = chrono::steady_clock::now() - startTime; - LOGLS_INFO(_log, logger(self) - << logger(socket) << "request duration " << elapsed.count() << "ms"); + string logStr; + if (LOG_CHECK_LVL(_log, LOG_LVL_INFO)) { + logStr = string("request duration ") + to_string(elapsed.count()) + "ms"; + } if (!ec && *reuseSocket) { - LOGLS_DEBUG(_log, logger(self) << logger(socket) << "lingering"); + LOGLS_INFO(_log, logger(self) << logger(socket) << logStr << " lingering"); self->_readRequest(socket); } else { - LOGLS_DEBUG(_log, logger(self) << logger(socket) << "closing"); + LOGLS_INFO(_log, logger(self) << logger(socket) << logStr << " closing"); boost::system::error_code ignore; socket->lowest_layer().shutdown(ip::tcp::socket::shutdown_both, ignore); socket->lowest_layer().close(ignore); @@ -233,8 +242,11 @@ void Server::_readRequest(std::shared_ptr socket) { if (ec == asio::error::operation_aborted) { LOGLS_ERROR(_log, logger(self) << logger(socket) << "header read canceled"); } else if (ec) { - LOGLS_ERROR(_log, logger(self) - << logger(socket) << "header read failed: " << ec.message()); + // "End of file" happens very frequently and shouldn't be logged as an error. + auto logLvl = LOG_LVL_ERROR; + if (ec == asio::error::eof) logLvl = LOG_LVL_INFO; + LOGS(_log, logLvl, + logger(self) << logger(socket) << "header read failed: " << ec.message()); } timer->cancel(); if (ec) return; diff --git a/src/qmeta/CMakeLists.txt b/src/qmeta/CMakeLists.txt index f0b307fef0..dda90db9a3 100644 --- a/src/qmeta/CMakeLists.txt +++ b/src/qmeta/CMakeLists.txt @@ -1,6 +1,8 @@ add_library(qmeta SHARED) target_sources(qmeta PRIVATE + JobStatus.cc + MessageStore.cc QMeta.cc QMetaMysql.cc QMetaSelect.cc @@ -14,8 +16,8 @@ target_sources(qmeta PRIVATE target_link_libraries(qmeta PUBLIC cconfig global + http mysql - qdisp log util ) @@ -26,7 +28,6 @@ add_executable(testQMeta testQMeta.cc) target_link_libraries(testQMeta cconfig - proto qmeta sql Boost::unit_test_framework diff --git a/src/qmeta/Exceptions.h b/src/qmeta/Exceptions.h index deeccf73b7..60ce48b353 100644 --- a/src/qmeta/Exceptions.h +++ b/src/qmeta/Exceptions.h @@ -28,7 +28,7 @@ #include "boost/lexical_cast.hpp" // Qserv headers -#include "qmeta/types.h" +#include "global/intTypes.h" #include "sql/SqlErrorObject.h" #include "util/Issue.h" @@ -109,6 +109,15 @@ class MissingTableError : public QMetaError { virtual std::string typeName() const override { return "MissingTableError"; } }; +/// Exception thrown when the specified metadata table is empty. +class EmptyTableError : public QMetaError { +public: + EmptyTableError(util::Issue::Context const& ctx, std::string const& table) + : QMetaError(ctx, "Query metadata table is empty: " + table) {} + + virtual std::string typeName() const override { return "EmptyTableError"; } +}; + /// Exception thrown when database consistency is violated. class ConsistencyError : public QMetaError { public: diff --git a/src/qdisp/JobStatus.cc b/src/qmeta/JobStatus.cc similarity index 78% rename from src/qdisp/JobStatus.cc rename to src/qmeta/JobStatus.cc index 20dad135a3..1498cfcd79 100644 --- a/src/qdisp/JobStatus.cc +++ b/src/qmeta/JobStatus.cc @@ -33,7 +33,7 @@ */ // Class header -#include "qdisp/JobStatus.h" +#include "qmeta/JobStatus.h" // System headers #include @@ -44,18 +44,23 @@ #include "lsst/log/Log.h" namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.JobStatus"); +LOG_LOGGER _log = LOG_GET("lsst.qserv.qmeta.JobStatus"); } -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { JobStatus::Info::Info() : state(UNKNOWN), stateCode(0) { stateTime = getNow(); } void JobStatus::updateInfo(std::string const& idMsg, JobStatus::State s, std::string const& source, int code, std::string const& desc, MessageSeverity severity) { std::lock_guard lock(_mutex); + _updateInfo(idMsg, s, source, code, desc, severity); +} - LOGS(_log, LOG_LVL_DEBUG, idMsg << " Updating state to: " << s << " code=" << code << " " << desc); +void JobStatus::_updateInfo(std::string const& idMsg, JobStatus::State s, std::string const& source, int code, + std::string const& desc, MessageSeverity severity) { + LOGS(_log, LOG_LVL_DEBUG, + idMsg << " Updating state to: " << s << " code=" << code << " " << desc << " src=" << source); _info.stateTime = getNow(); _info.state = s; _info.stateCode = code; @@ -64,6 +69,17 @@ void JobStatus::updateInfo(std::string const& idMsg, JobStatus::State s, std::st _info.severity = severity; } +void JobStatus::updateInfoNoErrorOverwrite(std::string const& idMsg, JobStatus::State s, + std::string const& source, int code, std::string const& desc, + MessageSeverity severity) { + std::lock_guard lock(_mutex); + auto jState = _info.state; + if (jState != qmeta::JobStatus::CANCEL && jState != qmeta::JobStatus::RESPONSE_ERROR && + jState != qmeta::JobStatus::RESULT_ERROR && jState != qmeta::JobStatus::MERGE_ERROR) { + _updateInfo(idMsg, s, source, code, desc, severity); + } +} + std::string JobStatus::stateStr(JobStatus::State const& state) { std::string msg("?"); switch (state) { @@ -140,4 +156,4 @@ std::ostream& operator<<(std::ostream& os, JobStatus::Info const& info) { return os; } -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta diff --git a/src/qdisp/JobStatus.h b/src/qmeta/JobStatus.h similarity index 77% rename from src/qdisp/JobStatus.h rename to src/qmeta/JobStatus.h index d693921001..7278fba443 100644 --- a/src/qdisp/JobStatus.h +++ b/src/qmeta/JobStatus.h @@ -20,8 +20,8 @@ * the GNU General Public License along with this program. If not, * see . */ -#ifndef LSST_QSERV_QDISP_JOBSTATUS_H -#define LSST_QSERV_QDISP_JOBSTATUS_H +#ifndef LSST_QSERV_QMETA_JOBSTATUS_H +#define LSST_QSERV_QMETA_JOBSTATUS_H // System headers #include @@ -34,9 +34,9 @@ // qserv headers #include "global/constants.h" -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { -/** Monitor execution of a chunk query against an SSI ressource +/** Monitor execution of a chunk query. * * JobStatus instances receive timestamped reports of execution State. This * allows a manager object to receive updates on status without exposing its @@ -61,13 +61,14 @@ class JobStatus { UNKNOWN = 0, REQUEST = 1203, RESPONSE_READY, - RESPONSE_ERROR, RESPONSE_DATA, RESPONSE_DATA_NACK, RESPONSE_DONE, + CANCEL, + RESPONSE_ERROR, // Errors must be between CANCEL and COMPLETE RESULT_ERROR, MERGE_ERROR, - CANCEL, + RETRY_ERROR, COMPLETE = 2000 }; @@ -93,8 +94,14 @@ class JobStatus { * - resourceUnit should be extracted from Info (beware of mutex) * - Info should be put in a vector */ - void updateInfo(std::string const& idMsg, State s, std::string const& source, int code = 0, - std::string const& desc = "", MessageSeverity severity = MSG_INFO); + void updateInfo(std::string const& idMsg, State s, std::string const& source, int code, + std::string const& desc, MessageSeverity severity); + + /// Same as updateInfo() except existing error states are not overwritten. + /// @see updateInfo() + /// @return Negative values indicate the status was changed, zero and positive values + void updateInfoNoErrorOverwrite(std::string const& idMsg, State s, std::string const& source, int code, + std::string const& desc, MessageSeverity severity); struct Info { Info(); @@ -102,7 +109,7 @@ class JobStatus { // with each invocation of report(). State state; ///< Actual state TimeType stateTime; ///< Last modified timestamp - int stateCode; ///< Code associated with state (e.g. xrd or mysql error code) + int stateCode; ///< Code associated with state (e.g. mysql error code) std::string stateDesc; ///< Textual description std::string source = ""; ///< Source of the current state. MessageSeverity severity = MSG_INFO; ///< Severity of the message. @@ -116,11 +123,21 @@ class JobStatus { return _info; } + State getState() const { + std::lock_guard lock(_mutex); + return _info.state; + } + static std::string stateStr(JobStatus::State const& state); friend std::ostream& operator<<(std::ostream& os, JobStatus const& es); private: + /// @see updateInfo() + /// note: _mutex must be held before calling. + void _updateInfo(std::string const& idMsg, JobStatus::State s, std::string const& source, int code, + std::string const& desc, MessageSeverity severity); + Info _info; mutable std::mutex _mutex; ///< Mutex to guard concurrent updates }; @@ -128,6 +145,6 @@ std::ostream& operator<<(std::ostream& os, JobStatus const& es); std::ostream& operator<<(std::ostream& os, JobStatus::Info const& inf); std::ostream& operator<<(std::ostream& os, JobStatus::State const& state); -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta -#endif // LSST_QSERV_QDISP_JOBSTATUS_H +#endif // LSST_QSERV_META_JOBSTATUS_H diff --git a/src/qdisp/MessageStore.cc b/src/qmeta/MessageStore.cc similarity index 66% rename from src/qdisp/MessageStore.cc rename to src/qmeta/MessageStore.cc index 784dd847b3..41704fc68a 100644 --- a/src/qdisp/MessageStore.cc +++ b/src/qmeta/MessageStore.cc @@ -23,7 +23,7 @@ // See MessageStore.h // Class header -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" // System headers #include @@ -36,31 +36,38 @@ // Qserv headers #include "global/constants.h" -#include "qdisp/JobStatus.h" + +using namespace std; namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.MessageStore"); +LOG_LOGGER _log = LOG_GET("lsst.qserv.qmeta.MessageStore"); } -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { //////////////////////////////////////////////////////////////////////// // public //////////////////////////////////////////////////////////////////////// +string QueryMessage::dump() const { + stringstream os; + os << "QueryMessage(chId=" << chunkId << " src=" << msgSource << " code=" << code + << " desc=" << description << " severity=" << severity << ")"; + return os.str(); +} + void MessageStore::addMessage(int chunkId, std::string const& msgSource, int code, std::string const& description, MessageSeverity severity, - JobStatus::TimeType timestamp) { - if (timestamp == JobStatus::TimeType()) { - timestamp = JobStatus::getNow(); + qmeta::JobStatus::TimeType timestamp) { + if (timestamp == qmeta::JobStatus::TimeType()) { + timestamp = qmeta::JobStatus::getNow(); } + QueryMessage qMsg(chunkId, msgSource, code, description, timestamp, severity); auto level = code < 0 ? LOG_LVL_ERROR : LOG_LVL_DEBUG; - LOGS(_log, level, "Add msg: " << chunkId << " " << msgSource << " " << code << " " << description); - { - std::lock_guard lock(_storeMutex); - _queryMessages.insert(_queryMessages.end(), - QueryMessage(chunkId, msgSource, code, description, timestamp, severity)); - } + LOGS(_log, level, "Add msg: " << qMsg.dump()); + + std::lock_guard lock(_storeMutex); + _queryMessages.push_back(qMsg); } void MessageStore::addErrorMessage(std::string const& msgSource, std::string const& description) { @@ -80,4 +87,14 @@ int MessageStore::messageCount(int code) const { return count; } -} // namespace lsst::qserv::qdisp +string MessageStore::dump() const { + stringstream os; + os << "MessageStore[count=" << _queryMessages.size(); + for (auto const& msg : _queryMessages) { + os << "{" << msg.dump() << "}\n"; + } + os << "]"; + return os.str(); +} + +} // namespace lsst::qserv::qmeta diff --git a/src/qdisp/MessageStore.h b/src/qmeta/MessageStore.h similarity index 85% rename from src/qdisp/MessageStore.h rename to src/qmeta/MessageStore.h index c42114f01e..7cc389c8cf 100644 --- a/src/qdisp/MessageStore.h +++ b/src/qmeta/MessageStore.h @@ -29,8 +29,8 @@ /// The MessageStore classes are responsible for maintaining status and /// error messages associated with a query. -#ifndef LSST_QSERV_QDISP_MESSAGESTORE_H -#define LSST_QSERV_QDISP_MESSAGESTORE_H +#ifndef LSST_QSERV_QMETA_MESSAGESTORE_H +#define LSST_QSERV_QMETA_MESSAGESTORE_H // System headers #include @@ -40,13 +40,13 @@ // Qserv headers #include "global/constants.h" -#include "qdisp/JobStatus.h" +#include "qmeta/JobStatus.h" -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { struct QueryMessage { QueryMessage(int chunkId_, std::string const& msgSource_, int code_, std::string description_, - JobStatus::TimeType timestamp_, MessageSeverity severity_) + qmeta::JobStatus::TimeType timestamp_, MessageSeverity severity_) : chunkId(chunkId_), msgSource(msgSource_), code(code_), @@ -58,8 +58,13 @@ struct QueryMessage { std::string msgSource; int code; std::string description; - JobStatus::TimeType timestamp; + qmeta::JobStatus::TimeType timestamp; MessageSeverity severity; + + /// Return string below the length to fit in source in database, which is varchar(63). + static std::string limitSrc(std::string const& src) { return src.substr(0, 62); } + + std::string dump() const; }; /** Store messages issued by Qserv workers and czar @@ -95,7 +100,7 @@ class MessageStore { */ void addMessage(int chunkId, std::string const& msgSource, int code, std::string const& description, MessageSeverity severity_ = MessageSeverity::MSG_INFO, - JobStatus::TimeType timestamp = JobStatus::TimeType()); + qmeta::JobStatus::TimeType timestamp = qmeta::JobStatus::TimeType()); /** Add an error message to this MessageStore * @@ -112,11 +117,13 @@ class MessageStore { int messageCount() const; int messageCount(int code) const; + std::string dump() const; + private: std::mutex _storeMutex; std::vector _queryMessages; }; -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta -#endif // LSST_QSERV_QDISP_MESSAGESTORE_H +#endif // LSST_QSERV_QMETA_MESSAGESTORE_H diff --git a/src/qmeta/QInfo.h b/src/qmeta/QInfo.h index b8b6d5e73b..763f6daa80 100644 --- a/src/qmeta/QInfo.h +++ b/src/qmeta/QInfo.h @@ -27,7 +27,6 @@ #include // Qserv headers -#include "qmeta/types.h" namespace lsst::qserv::qmeta { diff --git a/src/qmeta/QMeta.h b/src/qmeta/QMeta.h index 92fc832d7f..3d99bf99f5 100644 --- a/src/qmeta/QMeta.h +++ b/src/qmeta/QMeta.h @@ -23,6 +23,7 @@ #define LSST_QSERV_QMETA_QMETA_H // System headers +#include #include #include #include @@ -30,19 +31,62 @@ #include // Qserv headers +#include "global/clock_defs.h" #include "global/intTypes.h" #include "qmeta/QInfo.h" -#include "qmeta/types.h" namespace lsst::qserv::qdisp { -class MessageStore; class QueryMessage; } // namespace lsst::qserv::qdisp namespace lsst::qserv::qmeta { +class MessageStore; + /// @addtogroup qmeta +/** + * The structure ChunkMap encapsulates a disposition of chunks at Qserv workers + * along with a time when the map was updated. + * + * Here is an example on how to using the map for getting info on all chunks in + * the given context: + * @code + * std::string const worker = "worker-001"; + * std::string const database = "LSST-DR01"; + * std::string const table = "Object"; + * + * ChunkMap const& chunkMap = ...; + * for (auto const& [chunk, size] : chunkMap[worker][database][table]) { + * ... + * } + * @endcode + */ +struct QMetaChunkMap { + /// @return 'true' if the map is empty (or constructed using the default constructor) + bool empty() const { + return workers.empty() || (std::chrono::time_point() == updateTime); + } + + // NOTE: Separate types were added here for the sake of clarity to avoid + // a definition of the unreadable nested map. + + struct ChunkInfo { + unsigned int chunk = 0; ///< The chunk number + size_t size = 0; ///< The file size (in bytes) of the chunk table + }; + typedef std::vector Chunks; ///< Collection of chunks + typedef std::map Tables; ///< tables-to-chunks + typedef std::map Databases; ///< Databases-to-tables + typedef std::map Workers; ///< Workers-to-databases + + /// The chunk disposition map for all workers. + Workers workers; + + /// The last time the map was updated (since UNIX Epoch). + TIMEPOINT updateTime; +}; + /** * @ingroup qmeta * @brief Interface for query metadata. @@ -265,7 +309,23 @@ class QMeta { virtual void saveResultQuery(QueryId queryId, std::string const& query) = 0; /// Write messages/errors generated during the query to the QMessages table. - virtual void addQueryMessages(QueryId queryId, std::shared_ptr const& msgStore) = 0; + virtual void addQueryMessages(QueryId queryId, std::shared_ptr const& msgStore) = 0; + + /** + * Fetch the chunk map which was updated after the specified time point. + * @param prevUpdateTime The cut off time for the chunk map age. Note that the default + * value of the parameter represents the start time of the UNIX Epoch. Leaving the default + * value forces an attempt to read the map from the database if the one would exist + * in there. + * @return Return the most current chunk disposition or the empty object if the persistent + * map is older than it was requested.The result could be evaluated by calling + * method empty() on the result object. + * @throws EmptyTableError if the corresponding metadata table doesn't have any record + * @throws SqlError for any other error related to MySQL + */ + virtual QMetaChunkMap getChunkMap( + std::chrono::time_point const& prevUpdateTime = + std::chrono::time_point()) = 0; protected: // Default constructor diff --git a/src/qmeta/QMetaMysql.cc b/src/qmeta/QMetaMysql.cc index c522d1ab3c..9c1866fca8 100644 --- a/src/qmeta/QMetaMysql.cc +++ b/src/qmeta/QMetaMysql.cc @@ -25,6 +25,7 @@ // System headers #include +#include // Third-party headers #include "boost/lexical_cast.hpp" @@ -34,13 +35,15 @@ #include "lsst/log/Log.h" // Qserv headers -#include "qdisp/JobStatus.h" -#include "qdisp/MessageStore.h" +#include "global/stringUtil.h" #include "qmeta/Exceptions.h" +#include "qmeta/JobStatus.h" +#include "qmeta/MessageStore.h" #include "qmeta/QMetaTransaction.h" #include "sql/SqlConnection.h" #include "sql/SqlConnectionFactory.h" #include "sql/SqlResults.h" +#include "util/TimeUtils.h" using namespace std; @@ -733,14 +736,14 @@ void QMetaMysql::saveResultQuery(QueryId queryId, string const& query) { trans->commit(); } -void QMetaMysql::addQueryMessages(QueryId queryId, shared_ptr const& msgStore) { +void QMetaMysql::addQueryMessages(QueryId queryId, shared_ptr const& msgStore) { int msgCount = msgStore->messageCount(); int cancelCount = 0; int completeCount = 0; int execFailCount = 0; map msgCountMap; for (int i = 0; i != msgCount; ++i) { - qdisp::QueryMessage const& qMsg = msgStore->getMessage(i); + qmeta::QueryMessage const& qMsg = msgStore->getMessage(i); try { _addQueryMessage(queryId, qMsg, cancelCount, completeCount, execFailCount, msgCountMap); } catch (qmeta::SqlError const& ex) { @@ -749,26 +752,116 @@ void QMetaMysql::addQueryMessages(QueryId queryId, shared_ptr 0 || execFailCount > 0) { - qdisp::QueryMessage qm(-1, "CANCELTOTAL", 0, + qmeta::QueryMessage qm(-1, "CANCELTOTAL", 0, string("{\"CANCEL_count\":") + to_string(cancelCount) + ", \"EXECFAIL_count\":" + to_string(execFailCount) + ", \"COMPLETE_count\":" + to_string(completeCount) + "}", - qdisp::JobStatus::getNow(), MessageSeverity::MSG_INFO); + qmeta::JobStatus::getNow(), MessageSeverity::MSG_INFO); _addQueryMessage(queryId, qm, cancelCount, completeCount, execFailCount, msgCountMap); } for (auto const& elem : msgCountMap) { if (elem.second.count > _maxMsgSourceStore) { + // QMessages source column is VARCHAR(63) string source = string("MANY_") + elem.first; + source = QueryMessage::limitSrc(source); string desc = string("{\"msgSource\":") + elem.first + ", \"count\":" + to_string(elem.second.count) + "}"; - qdisp::QueryMessage qm(-1, source, 0, desc, qdisp::JobStatus::getNow(), elem.second.severity); + qmeta::QueryMessage qm(-1, source, 0, desc, qmeta::JobStatus::getNow(), elem.second.severity); _addQueryMessage(queryId, qm, cancelCount, completeCount, execFailCount, msgCountMap); } } } -void QMetaMysql::_addQueryMessage(QueryId queryId, qdisp::QueryMessage const& qMsg, int& cancelCount, +QMetaChunkMap QMetaMysql::getChunkMap(chrono::time_point const& prevUpdateTime) { + lock_guard lock(_dbMutex); + + QMetaChunkMap chunkMap; + + auto trans = QMetaTransaction::create(*_conn); + + // Check if the table needs to be read. Note that the default value of + // the previous update timestamp always forces an attempt to read the map. + auto const updateTime = _getChunkMapUpdateTime(lock); + LOGS(_log, LOG_LVL_INFO, + "QMetaMysql::getChunkMap updateTime=" << util::TimeUtils::timePointToDateTimeString(updateTime)); + bool const force = + (prevUpdateTime == chrono::time_point()) || (prevUpdateTime < updateTime); + if (!force) { + trans->commit(); + chunkMap.updateTime = prevUpdateTime; + return chunkMap; + } + + // Read the map itself + + sql::SqlErrorObject errObj; + sql::SqlResults results; + + string const tableName = "chunkMap"; + string const query = "SELECT `worker`,`database`,`table`,`chunk`,`size` FROM `" + tableName + "`"; + LOGS(_log, LOG_LVL_DEBUG, "Executing query: " << query); + if (!_conn->runQuery(query, results, errObj)) { + LOGS(_log, LOG_LVL_ERROR, "query failed: " << query); + throw SqlError(ERR_LOC, errObj); + } + vector> const rows = results.extractFirstNColumns(5); + trans->commit(); + + try { + for (auto const& row : rows) { + string const& worker = row[0]; + string const& database = row[1]; + string const& table = row[2]; + unsigned int chunk = lsst::qserv::stoui(row[3]); + size_t const size = stoull(row[4]); + chunkMap.workers[worker][database][table].push_back(QMetaChunkMap::ChunkInfo{chunk, size}); + LOGS(_log, LOG_LVL_TRACE, + "QMetaInsrt{worker=" << worker << " dbN=" << database << " tblN=" << table + << " chunk=" << chunk << " sz=" << size); + } + chunkMap.updateTime = updateTime; + } catch (exception const& ex) { + string const msg = "Failed to parse result set of query " + query + ", ex: " + string(ex.what()); + throw ConsistencyError(ERR_LOC, msg); + } + return chunkMap; +} + +chrono::time_point QMetaMysql::_getChunkMapUpdateTime(lock_guard const& lock) { + sql::SqlErrorObject errObj; + sql::SqlResults results; + string const tableName = "chunkMapStatus"; + string const query = "SELECT UNIX_TIMESTAMP(`update_time`) FROM `" + tableName + + "` ORDER BY `update_time` DESC LIMIT 1"; + + LOGS(_log, LOG_LVL_DEBUG, "Executing query: " << query); + if (!_conn->runQuery(query, results, errObj)) { + LOGS(_log, LOG_LVL_ERROR, "query failed: " << query); + throw SqlError(ERR_LOC, errObj); + } + vector updateTime; + if (!results.extractFirstColumn(updateTime, errObj)) { + LOGS(_log, LOG_LVL_ERROR, "Failed to extract result set of query " + query); + throw SqlError(ERR_LOC, errObj); + } + if (updateTime.empty()) { + LOGS(_log, LOG_LVL_TRACE, + "QMetaMysql::_getChunkMapUpdateTime empty chunkMapStatus; returning default"); + return chrono::time_point(); + } else if (updateTime.size() > 1) { + throw ConsistencyError(ERR_LOC, "Too many rows in result set of query " + query); + } + try { + LOGS(_log, LOG_LVL_TRACE, "QMetaMysql::_getChunkMapUpdateTime " << updateTime[0]); + return chrono::time_point() + chrono::seconds(stol(updateTime[0])); + } catch (exception const& ex) { + string const msg = "Failed to parse result set of query " + query + ", ex: " + string(ex.what()); + throw ConsistencyError(ERR_LOC, msg); + } +} + +void QMetaMysql::_addQueryMessage(QueryId queryId, qmeta::QueryMessage const& qMsg, int& cancelCount, int& completeCount, int& execFailCount, map& msgCountMap) { // Don't add duplicate messages. if (qMsg.msgSource == "DUPLICATE") return; @@ -820,16 +913,17 @@ void QMetaMysql::_addQueryMessage(QueryId queryId, qdisp::QueryMessage const& qM // build query std::string severity = (qMsg.severity == MSG_INFO ? "INFO" : "ERROR"); + string source = QueryMessage::limitSrc(qMsg.msgSource); string query = "INSERT INTO QMessages (queryId, msgSource, chunkId, code, severity, message, timestamp) VALUES " "("; query += to_string(queryId); - query += ", \"" + _conn->escapeString(qMsg.msgSource) + "\""; + query += ", \"" + _conn->escapeString(source) + "\""; query += ", " + to_string(qMsg.chunkId); query += ", " + to_string(qMsg.code); query += ", \"" + _conn->escapeString(severity) + "\""; query += ", \"" + _conn->escapeString(qMsg.description) + "\""; - query += ", " + to_string(qdisp::JobStatus::timeToInt(qMsg.timestamp)); + query += ", " + to_string(qmeta::JobStatus::timeToInt(qMsg.timestamp)); query += ")"; // run query sql::SqlErrorObject errObj; diff --git a/src/qmeta/QMetaMysql.h b/src/qmeta/QMetaMysql.h index c34d577dc0..c48b3c3bcd 100644 --- a/src/qmeta/QMetaMysql.h +++ b/src/qmeta/QMetaMysql.h @@ -23,6 +23,7 @@ #define LSST_QSERV_QMETA_QMETAMYSQL_H // System headers +#include #include #include #include @@ -41,11 +42,12 @@ class SqlConnection; namespace lsst::qserv::qmeta { +class QueryMessage; + /// @addtogroup qmeta /** * @ingroup qmeta - * * @brief Mysql-based implementation of qserv metadata. */ @@ -244,7 +246,11 @@ class QMetaMysql : public QMeta { void saveResultQuery(QueryId queryId, std::string const& query) override; /// @see QMeta::addQueryMessages() - void addQueryMessages(QueryId queryId, std::shared_ptr const& msgStore) override; + void addQueryMessages(QueryId queryId, std::shared_ptr const& msgStore) override; + + /// @see QMeta::getChunkMap + QMetaChunkMap getChunkMap(std::chrono::time_point const& prevUpdateTime = + std::chrono::time_point()) override; protected: /// Check that all necessary tables exist @@ -260,8 +266,19 @@ class QMetaMysql : public QMeta { }; private: + /** + * Read the last update time of the chunk map. + * @param A lock acquired on the mutex _dbMutex. + * @return The update time + * @throw SqlError For any SQL-specific error + * @throw ConsistencyError For any problem met when parsing or interpreting results read + * from the table. + */ + std::chrono::time_point _getChunkMapUpdateTime( + std::lock_guard const& lock); + /// Add qMsg to the permanent message table. - void _addQueryMessage(QueryId queryId, qdisp::QueryMessage const& qMsg, int& cancelCount, + void _addQueryMessage(QueryId queryId, qmeta::QueryMessage const& qMsg, int& cancelCount, int& completeCount, int& execFailCount, std::map& msgCountMap); diff --git a/src/qmeta/testQMeta.cc b/src/qmeta/testQMeta.cc index bc4e19521c..00d0d5d801 100644 --- a/src/qmeta/testQMeta.cc +++ b/src/qmeta/testQMeta.cc @@ -37,6 +37,7 @@ #include "qmeta/QMetaMysql.h" #include "qmeta/QProgress.h" #include "qmeta/QProgressData.h" +#include "qmeta/MessageStore.h" #include "sql/SqlConnection.h" #include "sql/SqlConnectionFactory.h" #include "sql/SqlErrorObject.h" @@ -50,6 +51,7 @@ using lsst::qserv::mysql::MySqlConfig; using namespace lsst::qserv::qmeta; +using lsst::qserv::CzarId; using lsst::qserv::sql::SqlConnection; using lsst::qserv::sql::SqlConnectionFactory; using lsst::qserv::sql::SqlErrorObject; @@ -377,4 +379,10 @@ BOOST_AUTO_TEST_CASE(messWithQueryStats) { BOOST_CHECK(caught); } +BOOST_AUTO_TEST_CASE(getChunkMap) { + // The test assumes that the underlying tables exists and it's empty. + QMetaChunkMap chunkMap; + BOOST_CHECK_THROW(qMeta->getChunkMap(), EmptyTableError); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/qmeta/types.h b/src/qmeta/types.h deleted file mode 100644 index 28e8338fa4..0000000000 --- a/src/qmeta/types.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * LSST Data Management System - * Copyright 2015 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_QMETA_TYPES_H -#define LSST_QSERV_QMETA_TYPES_H - -// System headers -#include -#include - -// Third-party headers - -// Qserv headers -#include "global/intTypes.h" - -namespace lsst::qserv::qmeta { - -/* - * typedefs for commonly used types. - */ - -/// Typedef for Czar ID in query metadata. -typedef std::uint32_t CzarId; - -} // namespace lsst::qserv::qmeta - -#endif // LSST_QSERV_QMETA_TYPES_H diff --git a/src/qproc/CMakeLists.txt b/src/qproc/CMakeLists.txt index e95b72e0b2..59fee1ca79 100644 --- a/src/qproc/CMakeLists.txt +++ b/src/qproc/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(qproc SHARED) -add_dependencies(qproc proto) target_sources(qproc PRIVATE ChunkQuerySpec.cc @@ -8,7 +7,6 @@ target_sources(qproc PRIVATE IndexMap.cc QuerySession.cc SecondaryIndex.cc - TaskMsgFactory.cc ) target_link_libraries(qproc PRIVATE @@ -17,9 +15,7 @@ target_link_libraries(qproc PRIVATE sphgeom ) -install( - TARGETS qproc -) +install(TARGETS qproc) FUNCTION(qproc_tests) FOREACH(TEST IN ITEMS ${ARGV}) @@ -28,6 +24,7 @@ FUNCTION(qproc_tests) cconfig ccontrol czar + global parser qana qdisp @@ -36,7 +33,6 @@ FUNCTION(qproc_tests) css qmeta rproc - xrdreq Boost::unit_test_framework Threads::Threads ) diff --git a/src/qproc/ChunkQuerySpec.h b/src/qproc/ChunkQuerySpec.h index 6f777e2feb..b2e1e50806 100644 --- a/src/qproc/ChunkQuerySpec.h +++ b/src/qproc/ChunkQuerySpec.h @@ -39,7 +39,7 @@ // Qserv headers #include "global/DbTable.h" #include "global/stringTypes.h" -#include "proto/ScanTableInfo.h" +#include "protojson/ScanTableInfo.h" namespace lsst::qserv::qproc { @@ -52,7 +52,7 @@ class ChunkQuerySpec { using Ptr = std::shared_ptr; ChunkQuerySpec() {} - ChunkQuerySpec(std::string const& db_, int chunkId_, proto::ScanInfo const& scanInfo_, + ChunkQuerySpec(std::string const& db_, int chunkId_, protojson::ScanInfo::Ptr const& scanInfo_, bool scanInteractive_) : db(db_), chunkId(chunkId_), scanInfo(scanInfo_), scanInteractive(scanInteractive_) {} @@ -60,13 +60,14 @@ class ChunkQuerySpec { std::string db{ ""}; ///< dominant db (any database if there are multiple databases referenced in the query) int chunkId{0}; - proto::ScanInfo scanInfo; ///< shared-scan candidates + protojson::ScanInfo::Ptr scanInfo; ///< shared-scan candidates // Consider saving subChunkTable templates, and substituting the chunkIds // and subChunkIds into them on-the-fly. bool scanInteractive{false}; DbTableSet subChunkTables; std::vector subChunkIds; std::vector queries; + std::vector queryTemplates; // Consider promoting the concept of container of ChunkQuerySpec // in the hopes of increased code cleanliness. std::shared_ptr nextFragment; ///< ad-hoc linked list (consider removal) diff --git a/src/qproc/ChunkSpec.cc b/src/qproc/ChunkSpec.cc index 1bd36261fb..fa9a8132ff 100644 --- a/src/qproc/ChunkSpec.cc +++ b/src/qproc/ChunkSpec.cc @@ -44,7 +44,15 @@ namespace { // File-scope helpers /// A "good" number of subchunks to include in a chunk query. This is /// a guess. The best value is an open question -int const GOOD_SUBCHUNK_COUNT = 20; +// TODO:UJ `ChunkSpecFragmenter` has the purpose of limiting the +// number of subchunks per ChunkSpec (which works out to +// subchunkids per Job). +// Each subchunk gets its own task on the worker, so this +// is probably no longer helpful. Making the limit absurdly +// high should have the effect of disabling the code +// while checking if there are unexpected side effects. +// int const GOOD_SUBCHUNK_COUNT = 20; +int const GOOD_SUBCHUNK_COUNT = 2'000'000; } // namespace namespace lsst::qserv::qproc { diff --git a/src/qproc/ChunkSpec.h b/src/qproc/ChunkSpec.h index 9bf31053ee..777cd9d87f 100644 --- a/src/qproc/ChunkSpec.h +++ b/src/qproc/ChunkSpec.h @@ -93,6 +93,8 @@ ChunkSpecVector intersect(ChunkSpecVector const& a, ChunkSpecVector const& b); void normalize(ChunkSpecVector& specs); /// An iterating fragmenter to reduce the number of subChunkIds per ChunkSpec +/// TODO:UJ Fragmenting the the Jobs probably no longer makes sense, see +/// `GOOD_SUBCHUNK_COUNT` definition. class ChunkSpecFragmenter { public: ChunkSpecFragmenter(ChunkSpec const& s); diff --git a/src/qproc/QuerySession.cc b/src/qproc/QuerySession.cc index d6f06009ef..da02e650c9 100644 --- a/src/qproc/QuerySession.cc +++ b/src/qproc/QuerySession.cc @@ -399,8 +399,8 @@ void QuerySession::print(std::ostream& os) const { os << " needs merge: " << this->needsMerge(); os << " 1st parallel statement: \"" << par << "\""; os << " merge statement: \"" << mer << "\""; - os << " scanRating:" << _context->scanInfo.scanRating; - for (auto const& tbl : _context->scanInfo.infoTables) { + os << " scanRating:" << _context->scanInfo->scanRating; + for (auto const& tbl : _context->scanInfo->infoTables) { os << " ScanTable: " << tbl.db << "." << tbl.table << " lock=" << tbl.lockInMemory << " rating=" << tbl.scanRating; } @@ -438,6 +438,8 @@ std::ostream& operator<<(std::ostream& out, QuerySession const& querySession) { return out; } +protojson::ScanInfo::Ptr QuerySession::getScanInfo() const { return _context->scanInfo; } + ChunkQuerySpec::Ptr QuerySession::buildChunkQuerySpec(query::QueryTemplate::Vect const& queryTemplates, ChunkSpec const& chunkSpec, bool fillInChunkIdTag) const { diff --git a/src/qproc/QuerySession.h b/src/qproc/QuerySession.h index 32951002b2..773c48072f 100644 --- a/src/qproc/QuerySession.h +++ b/src/qproc/QuerySession.h @@ -187,6 +187,8 @@ class QuerySession { void setScanInteractive(); bool getScanInteractive() const { return _scanInteractive; } + protojson::ScanInfo::Ptr getScanInfo() const; + /** * Print query session to stream. * diff --git a/src/qproc/TaskMsgFactory.cc b/src/qproc/TaskMsgFactory.cc deleted file mode 100644 index ac7a5afe09..0000000000 --- a/src/qproc/TaskMsgFactory.cc +++ /dev/null @@ -1,150 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2013-2017 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -/** - * @file - * - * @brief TaskMsgFactory is a factory for TaskMsg (protobuf) objects. - * - * @author Daniel L. Wang, SLAC - */ - -// Class header -#include "qproc/TaskMsgFactory.h" - -// System headers -#include - -// Third-party headers - -// LSST headers -#include "lsst/log/Log.h" - -// Qserv headers -#include "cconfig/CzarConfig.h" -#include "global/intTypes.h" -#include "qmeta/types.h" -#include "qproc/ChunkQuerySpec.h" -#include "util/common.h" - -namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.qproc.TaskMsgFactory"); -} - -namespace lsst::qserv::qproc { - -std::shared_ptr TaskMsgFactory::_makeMsg(ChunkQuerySpec const& chunkQuerySpec, - std::string const& chunkResultName, QueryId queryId, - int jobId, int attemptCount, qmeta::CzarId czarId) { - std::string resultTable("Asdfasfd"); - if (!chunkResultName.empty()) { - resultTable = chunkResultName; - } - auto taskMsg = std::make_shared(); - // shared - taskMsg->set_db(chunkQuerySpec.db); - taskMsg->set_queryid(queryId); - taskMsg->set_jobid(jobId); - taskMsg->set_attemptcount(attemptCount); - taskMsg->set_czarid(czarId); - // scanTables (for shared scans) - // check if more than 1 db in scanInfo - std::string db; - for (auto const& sTbl : chunkQuerySpec.scanInfo.infoTables) { - if (db.empty()) { - db = sTbl.db; - } - } - - for (auto const& sTbl : chunkQuerySpec.scanInfo.infoTables) { - lsst::qserv::proto::TaskMsg_ScanTable* msgScanTbl = taskMsg->add_scantable(); - sTbl.copyToScanTable(msgScanTbl); - } - - taskMsg->set_scanpriority(chunkQuerySpec.scanInfo.scanRating); - taskMsg->set_scaninteractive(chunkQuerySpec.scanInteractive); - taskMsg->set_maxtablesize_mb(cconfig::CzarConfig::instance()->getMaxTableSizeMB()); - - // per-chunk - taskMsg->set_chunkid(chunkQuerySpec.chunkId); - // per-fragment - // TODO refactor to simplify - if (chunkQuerySpec.nextFragment.get()) { - ChunkQuerySpec const* sPtr = &chunkQuerySpec; - while (sPtr) { - LOGS(_log, LOG_LVL_TRACE, "nextFragment"); - for (unsigned int t = 0; t < (sPtr->queries).size(); t++) { - LOGS(_log, LOG_LVL_TRACE, (sPtr->queries).at(t)); - } - // Linked fragments will not have valid subChunkTables vectors, - // So, we reuse the root fragment's vector. - _addFragment(*taskMsg, resultTable, chunkQuerySpec.subChunkTables, sPtr->subChunkIds, - sPtr->queries); - sPtr = sPtr->nextFragment.get(); - } - } else { - LOGS(_log, LOG_LVL_TRACE, "no nextFragment"); - for (unsigned int t = 0; t < (chunkQuerySpec.queries).size(); t++) { - LOGS(_log, LOG_LVL_TRACE, (chunkQuerySpec.queries).at(t)); - } - _addFragment(*taskMsg, resultTable, chunkQuerySpec.subChunkTables, chunkQuerySpec.subChunkIds, - chunkQuerySpec.queries); - } - return taskMsg; -} - -void TaskMsgFactory::_addFragment(proto::TaskMsg& taskMsg, std::string const& resultName, - DbTableSet const& subChunkTables, std::vector const& subChunkIds, - std::vector const& queries) { - proto::TaskMsg::Fragment* frag = taskMsg.add_fragment(); - frag->set_resulttable(resultName); - - for (auto& qry : queries) { - frag->add_query(qry); - } - - proto::TaskMsg_Subchunk sc; - - // Add the db+table pairs to the subchunk. - for (auto& tbl : subChunkTables) { - proto::TaskMsg_Subchunk_DbTbl* dbTbl = sc.add_dbtbl(); - dbTbl->set_db(tbl.db); - dbTbl->set_tbl(tbl.table); - LOGS(_log, LOG_LVL_TRACE, "added dbtbl=" << tbl.db << "." << tbl.table); - } - - for (auto& subChunkId : subChunkIds) { - sc.add_id(subChunkId); - } - - frag->mutable_subchunks()->CopyFrom(sc); -} - -void TaskMsgFactory::serializeMsg(ChunkQuerySpec const& s, std::string const& chunkResultName, - QueryId queryId, int jobId, int attemptCount, qmeta::CzarId czarId, - std::ostream& os) { - std::shared_ptr m = _makeMsg(s, chunkResultName, queryId, jobId, attemptCount, czarId); - m->SerializeToOstream(&os); -} - -} // namespace lsst::qserv::qproc diff --git a/src/qproc/TaskMsgFactory.h b/src/qproc/TaskMsgFactory.h deleted file mode 100644 index dc2d0ed130..0000000000 --- a/src/qproc/TaskMsgFactory.h +++ /dev/null @@ -1,73 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2013-2017 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -#ifndef LSST_QSERV_QPROC_TASKMSGFACTORY_H -#define LSST_QSERV_QPROC_TASKMSGFACTORY_H -/** - * @file - * - * @brief TaskMsgFactory is a factory for TaskMsg (protobuf) objects. - * - * @author Daniel L. Wang, SLAC - */ - -// System headers -#include -#include - -// Qserv headers -#include "global/DbTable.h" -#include "global/intTypes.h" -#include "proto/worker.pb.h" -#include "qmeta/types.h" - -namespace lsst::qserv::qproc { - -class ChunkQuerySpec; - -/// TaskMsgFactory is a factory for TaskMsg (protobuf) objects. -/// All member variables must be thread safe. -class TaskMsgFactory { -public: - using Ptr = std::shared_ptr; - - TaskMsgFactory() = default; - virtual ~TaskMsgFactory() {} - - /// Construct a TaskMsg and serialize it to a stream - virtual void serializeMsg(ChunkQuerySpec const& s, std::string const& chunkResultName, QueryId queryId, - int jobId, int attemptCount, qmeta::CzarId czarId, std::ostream& os); - -private: - std::shared_ptr _makeMsg(ChunkQuerySpec const& s, std::string const& chunkResultName, - QueryId queryId, int jobId, int attemptCount, - qmeta::CzarId czarId); - - void _addFragment(proto::TaskMsg& taskMsg, std::string const& resultName, - DbTableSet const& subChunkTables, std::vector const& subChunkIds, - std::vector const& queries); -}; - -} // namespace lsst::qserv::qproc - -#endif // LSST_QSERV_QPROC_TASKMSGFACTORY_H diff --git a/src/qproc/testQueryAnaDuplSelectExpr.cc b/src/qproc/testQueryAnaDuplSelectExpr.cc index e090b3f55d..27fe4f0d2b 100644 --- a/src/qproc/testQueryAnaDuplSelectExpr.cc +++ b/src/qproc/testQueryAnaDuplSelectExpr.cc @@ -65,7 +65,6 @@ using lsst::qserv::query::QueryContext; using lsst::qserv::sql::SqlConfig; using lsst::qserv::tests::QueryAnaFixture; using lsst::qserv::util::Error; -using lsst::qserv::util::ErrorCode; using lsst::qserv::util::MultiError; /** @@ -79,8 +78,8 @@ std::string build_exception_msg(std::string n, std::string name, std::string pos MultiError multiError; boost::format dupl_err_msg = boost::format(DuplSelectExprPlugin::ERR_MSG) % name % pos; - Error error(ErrorCode::DUPLICATE_SELECT_EXPR, dupl_err_msg.str()); - multiError.push_back(error); + Error error(Error::DUPLICATE_SELECT_EXPR, Error::NONE, dupl_err_msg.str()); + multiError.insert(error); std::string err_msg = "AnalysisError:" + DuplSelectExprPlugin::EXCEPTION_MSG + multiError.toOneLineString(); return err_msg; diff --git a/src/qproc/testQueryAnaGeneral.cc b/src/qproc/testQueryAnaGeneral.cc index 27d1bc0676..5cd6711350 100644 --- a/src/qproc/testQueryAnaGeneral.cc +++ b/src/qproc/testQueryAnaGeneral.cc @@ -796,9 +796,9 @@ BOOST_AUTO_TEST_CASE(SimpleScan) { } BOOST_CHECK(nullptr == context->secIdxRestrictors); BOOST_CHECK(nullptr == context->areaRestrictors); - BOOST_CHECK_EQUAL(context->scanInfo.infoTables.size(), 1U); - if (context->scanInfo.infoTables.size() >= 1) { - auto p = context->scanInfo.infoTables.front(); + BOOST_CHECK_EQUAL(context->scanInfo->infoTables.size(), 1U); + if (context->scanInfo->infoTables.size() >= 1) { + auto p = context->scanInfo->infoTables.front(); BOOST_CHECK_EQUAL(p.db, "LSST"); BOOST_CHECK_EQUAL(p.table, "Object"); } diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index 5df3c784c4..f585561306 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(query SHARED) -add_dependencies(query proto) target_sources(query PRIVATE AggOp.cc @@ -45,9 +44,7 @@ target_link_libraries(query PUBLIC sphgeom ) -install( - TARGETS query -) +install(TARGETS query) FUNCTION(query_tests) FOREACH(TEST IN ITEMS ${ARGV}) @@ -64,7 +61,6 @@ FUNCTION(query_tests) qmeta query rproc - xrdreq Boost::unit_test_framework Threads::Threads ) diff --git a/src/query/QueryContext.h b/src/query/QueryContext.h index 77ad177234..b171995376 100644 --- a/src/query/QueryContext.h +++ b/src/query/QueryContext.h @@ -39,7 +39,7 @@ // Local headers #include "css/CssAccess.h" #include "global/stringTypes.h" -#include "proto/ScanTableInfo.h" +#include "protojson/ScanTableInfo.h" #include "qana/QueryMapping.h" #include "query/FromList.h" #include "query/typedefs.h" @@ -84,7 +84,7 @@ class QueryContext { std::shared_ptr databaseModels; ///< contains database schema information. - proto::ScanInfo scanInfo; // Tables scanned (for shared scans) + protojson::ScanInfo::Ptr scanInfo{protojson::ScanInfo::create()}; // Tables scanned (for shared scans) /** * @brief Add a TableRef to the list of tables used by this query. diff --git a/src/query/QueryTemplate.cc b/src/query/QueryTemplate.cc index 699a6faab2..32e628e90d 100644 --- a/src/query/QueryTemplate.cc +++ b/src/query/QueryTemplate.cc @@ -43,6 +43,8 @@ #include "query/ColumnRef.h" #include "query/TableRef.h" +using namespace std; + namespace lsst::qserv::query { //////////////////////////////////////////////////////////////////////// @@ -204,4 +206,18 @@ QueryTemplate::GetAliasMode QueryTemplate::getTableAliasMode() const { return DONT_USE; // should never get here but to satisfy the compiler. } +string QueryTemplate::dump() const { + ostringstream os; + os << "QueryTemplate quoteIdents=" << _quoteIdentifiers; + os << " useColOnly=" << _useColumnOnly; + os << " aliasMode=" << _aliasMode; + os << " entries={"; + for (auto const& entry : _entries) { + os << "(dynamic=" << entry->isDynamic(); + os << ":val=" << entry->getValue() << ")"; + } + os << "}"; + return os.str(); +} + } // namespace lsst::qserv::query diff --git a/src/query/QueryTemplate.h b/src/query/QueryTemplate.h index 5be5e3ac03..b0ffad8ba2 100644 --- a/src/query/QueryTemplate.h +++ b/src/query/QueryTemplate.h @@ -208,6 +208,8 @@ class QueryTemplate { return os << qt.sqlFragment(); } + std::string dump() const; + private: EntryPtrVector _entries; SetAliasMode _aliasMode{USE_ALIAS}; diff --git a/src/query/testRepr.cc b/src/query/testRepr.cc index 29e64823ce..073a922291 100644 --- a/src/query/testRepr.cc +++ b/src/query/testRepr.cc @@ -75,11 +75,11 @@ BOOST_AUTO_TEST_CASE(Factory) { // and the tree is constructed via a push down list. The aim here // was to keep the specification parser as simple as possible... -const std::string RenderedBoolTermFromRPN(const char **rpn) { +const std::string RenderedBoolTermFromRPN(const char** rpn) { BoolTerm::PtrVector pdl; int opcount; - for (const char **t = rpn; *t; ++t) { + for (const char** t = rpn; *t; ++t) { if (sscanf(*t, "%d", &opcount) == 1) { ; } else if (!strcmp(*t, "AND")) { @@ -117,7 +117,7 @@ BOOST_AUTO_TEST_CASE(BoolTermRenderParens) { // | +-- A // | +-- B // +-- C - const char *test0[] = {"C", "B", "A", "2", "AND", "2", "AND", nullptr}; + const char* test0[] = {"C", "B", "A", "2", "AND", "2", "AND", nullptr}; BOOST_CHECK_EQUAL(RenderedBoolTermFromRPN(test0), "A AND B AND C"); // AND @@ -125,7 +125,7 @@ BOOST_AUTO_TEST_CASE(BoolTermRenderParens) { // | +-- A // | +-- B // +-- C - const char *test1[] = {"C", "B", "A", "2", "OR", "2", "AND", nullptr}; + const char* test1[] = {"C", "B", "A", "2", "OR", "2", "AND", nullptr}; BOOST_CHECK_EQUAL(RenderedBoolTermFromRPN(test1), "(A OR B) AND C"); // OR @@ -133,7 +133,7 @@ BOOST_AUTO_TEST_CASE(BoolTermRenderParens) { // | +-- A // | +-- B // +-- C - const char *test2[] = {"C", "B", "A", "2", "AND", "2", "OR", nullptr}; + const char* test2[] = {"C", "B", "A", "2", "AND", "2", "OR", nullptr}; BOOST_CHECK_EQUAL(RenderedBoolTermFromRPN(test2), "A AND B OR C"); // OR @@ -141,7 +141,7 @@ BOOST_AUTO_TEST_CASE(BoolTermRenderParens) { // | +-- A // | +-- B // +-- C - const char *test3[] = {"C", "B", "A", "2", "OR", "2", "OR", nullptr}; + const char* test3[] = {"C", "B", "A", "2", "OR", "2", "OR", nullptr}; BOOST_CHECK_EQUAL(RenderedBoolTermFromRPN(test3), "A OR B OR C"); // AND @@ -151,7 +151,7 @@ BOOST_AUTO_TEST_CASE(BoolTermRenderParens) { // | +-- C // | +-- D // +-- E - const char *test4[] = {"E", "D", "C", "B", "3", "OR", "A", "3", "AND", nullptr}; + const char* test4[] = {"E", "D", "C", "B", "3", "OR", "A", "3", "AND", nullptr}; BOOST_CHECK_EQUAL(RenderedBoolTermFromRPN(test4), "A AND (B OR C OR D) AND E"); // OR @@ -161,7 +161,7 @@ BOOST_AUTO_TEST_CASE(BoolTermRenderParens) { // | +-- C // | +-- D // +-- E - const char *test5[] = {"E", "D", "C", "B", "3", "AND", "A", "3", "OR", nullptr}; + const char* test5[] = {"E", "D", "C", "B", "3", "AND", "A", "3", "OR", nullptr}; BOOST_CHECK_EQUAL(RenderedBoolTermFromRPN(test5), "A OR B AND C AND D OR E"); } diff --git a/src/replica/CMakeLists.txt b/src/replica/CMakeLists.txt index dace6d4df8..f57814cf1e 100644 --- a/src/replica/CMakeLists.txt +++ b/src/replica/CMakeLists.txt @@ -23,10 +23,7 @@ target_link_libraries(replica PUBLIC replica_util replica_worker css - xrdreq - xrdsvc - XrdCl - XrdSsiLib + global http qhttp qmeta @@ -69,9 +66,7 @@ replica_utils( qserv-replica-worker ) -install( - TARGETS replica -) +install(TARGETS replica) function(REPLICA_TESTS) foreach(TEST IN ITEMS ${ARGV}) diff --git a/src/replica/apps/CMakeLists.txt b/src/replica/apps/CMakeLists.txt index 5cf597b00a..e88e0ad052 100644 --- a/src/replica/apps/CMakeLists.txt +++ b/src/replica/apps/CMakeLists.txt @@ -1,8 +1,11 @@ add_library(replica_apps OBJECT) + add_dependencies(replica_apps replica_proto) + target_include_directories(replica_apps PRIVATE ${XROOTD_INCLUDE_DIRS} ) + target_sources(replica_apps PRIVATE AbortTransactionApp.cc AdminApp.cc @@ -35,7 +38,6 @@ target_sources(replica_apps PRIVATE QhttpTestApp.cc QservWorkerApp.cc QservWorkerPingApp.cc - QservXrootdSsiApp.cc RebalanceApp.cc RegistryHttpApp.cc ReplicateApp.cc @@ -45,7 +47,7 @@ target_sources(replica_apps PRIVATE VerifyApp.cc WorkerApp.cc ) + target_link_libraries(replica_apps PUBLIC log - XrdSsiLib ) diff --git a/src/replica/apps/QservXrootdSsiApp.cc b/src/replica/apps/QservXrootdSsiApp.cc deleted file mode 100644 index 13658dc2b1..0000000000 --- a/src/replica/apps/QservXrootdSsiApp.cc +++ /dev/null @@ -1,206 +0,0 @@ -/* - * LSST Data Management System - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// Class header -#include "replica/apps/QservXrootdSsiApp.h" - -// System headers -#include -#include -#include -#include -#include -#include -#include -#include - -// Third-party headers -#include "XrdSsi/XrdSsiErrInfo.hh" -#include "XrdSsi/XrdSsiProvider.hh" -#include "XrdSsi/XrdSsiRequest.hh" -#include "XrdSsi/XrdSsiResource.hh" -#include "XrdSsi/XrdSsiService.hh" - -// Qserv headers -#include "global/ResourceUnit.h" -#include "util/BlockPost.h" -#include "util/TimeUtils.h" - -using namespace std; -using namespace lsst::qserv; - -extern XrdSsiProvider* XrdSsiProviderClient; - -namespace { -string const description = - "This application sends requests to Qserv workers over XROOTD/SSI for a purpose of testing" - " the performance, scalability and stability of the message delivery services."; - -bool const injectDatabaseOptions = false; -bool const boostProtobufVersionCheck = false; -bool const enableServiceProvider = false; - -double const millisecondsInSecond = 1000.; - -/// @return 'YYYY-MM-DD HH:MM:SS.mmm ' -string timestamp() { - return util::TimeUtils::toDateTimeString(chrono::milliseconds(util::TimeUtils::now())) + " "; -} - -string getErrorText(XrdSsiErrInfo const& e) { - ostringstream os; - int errCode; - os << "XrdSsiError error: " << e.Get(errCode); - os << ", code=" << errCode; - return os.str(); -} - -class SsiRequest : public XrdSsiRequest { -public: - explicit SsiRequest(string const& id, atomic& numFinishedRequests) - : _id(id), _numFinishedRequests(numFinishedRequests) {} - virtual ~SsiRequest() {} - char* GetRequest(int& requestLength) override { - // cout << "SsiRequest::" << __func__ << " id: " << _id << endl; - requestLength = 16; - return _requestData; - } - bool ProcessResponse(XrdSsiErrInfo const& eInfo, XrdSsiRespInfo const& rInfo) override { - int errCode; - eInfo.Get(errCode); - if (errCode != 0) { - // cout << "SsiRequest::" << __func__ << " id: " << _id << ": " << ::getErrorText(eInfo) << endl; - } - // Finished(); - //_numFinishedRequests.fetch_add(1); - return true; - } - void ProcessResponseData(XrdSsiErrInfo const& eInfo, char* buff, int blen, bool last) override { - int errCode; - eInfo.Get(errCode); - if (errCode != 0) { - cout << "SsiRequest::" << __func__ << " id: " << _id << ": " << ::getErrorText(eInfo) << endl; - } - } - -private: - string _id; - atomic& _numFinishedRequests; - char _requestData[1024]; -}; - -} // namespace - -namespace lsst::qserv::replica { - -QservXrootdSsiApp::Ptr QservXrootdSsiApp::create(int argc, char* argv[]) { - return Ptr(new QservXrootdSsiApp(argc, argv)); -} - -QservXrootdSsiApp::QservXrootdSsiApp(int argc, char* argv[]) - : Application(argc, argv, ::description, ::injectDatabaseOptions, ::boostProtobufVersionCheck, - ::enableServiceProvider) { - parser().required("url", "The connection URL for the XROOTD/SSI services.", _url) - .option("num-threads", "The number of threads for running the test.", _numThreads) - .option("report-interval-ms", - "An interval (milliseonds) for reporting the performance counters. Must be greater than " - "0.", - _reportIntervalMs) - .flag("progress", "The flag which would turn on periodic progress report on the requests.", - _progress) - .flag("verbose", "The flag which would turn on detailed report on the requests.", _verbose); -} - -int QservXrootdSsiApp::runImpl() { - // Connect to the service - XrdSsiErrInfo eInfo; - XrdSsiService* xrdSsiService = XrdSsiProviderClient->GetService(eInfo, _url); - if (!xrdSsiService) { - cerr << "Error obtaining XrdSsiService: serviceUrl=" << _url << ", " << ::getErrorText(eInfo) << endl; - return 1; - } - - // Counters updated by the requests - atomic numRequests(0); - atomic numFinishedRequests(0); - - // The requests - vector> requests; - mutex requestsMutex; - - // Launch all threads in the pool - atomic numThreadsActive{0}; - vector threads; - for (size_t i = 0; i < _numThreads; ++i) { - numThreadsActive.fetch_add(1); - threads.push_back(thread([&]() { - for (int chunk = 0; chunk < 150000; ++chunk) { - string const id = to_string(i) + ":" + to_string(chunk); - XrdSsiResource::Affinity const affinity = XrdSsiResource::Strong; - XrdSsiResource resource("/chk/wise_01/" + to_string(chunk), "", id, "", 0, affinity); - shared_ptr<::SsiRequest> request(new ::SsiRequest(id, numFinishedRequests)); - xrdSsiService->ProcessRequest(*(request.get()), resource); - ++numRequests; - lock_guard lock(requestsMutex); - requests.push_back(request); - } - // util::BlockPost bp(10*1000, 20*1000); - // bp.wait(); - numThreadsActive.fetch_sub(1); - cout << ::timestamp() << "Thread " << i << " finished" << endl; - })); - } - - // Begin the monitoring & reporting cycle - util::BlockPost bp(_reportIntervalMs, _reportIntervalMs + 1); - while (numThreadsActive.load() > 0) { - uint64_t beginNumRequests = numRequests; - bp.wait(_reportIntervalMs); - uint64_t const endNumRequests = numRequests; - double const requestsPerSecond = - (endNumRequests - beginNumRequests) / (_reportIntervalMs / millisecondsInSecond); - if (_progress) { - cout << ::timestamp() << "Sent: " << setprecision(7) << requestsPerSecond << " Req/s" << endl; - } - beginNumRequests = endNumRequests; - } - for (auto&& t : threads) { - t.join(); - } - // while (numFinishedRequests.load() < numRequests) { - // cout << ::timestamp() << "Waiting for all requests to finish: " << numFinishedRequests.load() << " - // / " - // << numRequests.load() << endl; - // bp.wait(1000); - // } - // cout << ::timestamp() << "All requests reported as fiished" << endl; - cout << ::timestamp() << "All threads finished, calling Finished() on " << requests.size() << " requests" - << endl; - for (auto&& request : requests) { - request->Finished(true); - } - cout << ::timestamp() << "Done calling Finished() on the requests" << endl; - util::BlockPost bp1(10 * 1000, 20 * 1000); - bp1.wait(); - return 0; -} - -} // namespace lsst::qserv::replica diff --git a/src/replica/apps/QservXrootdSsiApp.h b/src/replica/apps/QservXrootdSsiApp.h deleted file mode 100644 index 8a990bf2d5..0000000000 --- a/src/replica/apps/QservXrootdSsiApp.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * LSST Data Management System - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_REPLICA_QSERVXROOTDSSIAPP_H -#define LSST_QSERV_REPLICA_QSERVXROOTDSSIAPP_H - -// System headers -#include -#include - -// Qserv headers -#include "replica/apps/Application.h" - -// This header declarations -namespace lsst::qserv::replica { - -/** - * Class QservXrootdSsiApp is performance and scalability test for the XROOTD/SSI. - */ -class QservXrootdSsiApp : public Application { -public: - /// The pointer type for instances of the class - typedef std::shared_ptr Ptr; - - /** - * The factory method is the only way of creating objects of this class - * because of the very base class's inheritance from 'enable_shared_from_this'. - * - * @param argc The number of command-line arguments. - * @param argv The vector of command-line arguments. - */ - static Ptr create(int argc, char* argv[]); - - QservXrootdSsiApp() = delete; - QservXrootdSsiApp(QservXrootdSsiApp const&) = delete; - QservXrootdSsiApp& operator=(QservXrootdSsiApp const&) = delete; - - virtual ~QservXrootdSsiApp() final = default; - -protected: - /// @see Application::runImpl() - virtual int runImpl() final; - -private: - /// @see QservXrootdSsiApp::create() - QservXrootdSsiApp(int argc, char* argv[]); - - std::string _url; ///< The connection URL for the XROOTD/SSI services. - size_t _numThreads = 1; ///< The number of threads for running the test. - int _reportIntervalMs = 1000; ///< An interval for reporting the performance counters. - bool _progress = false; ///< For periodic progress reports on the requests. - bool _verbose = false; ///< For detailed report on the requests. -}; - -} // namespace lsst::qserv::replica - -#endif /* LSST_QSERV_REPLICA_QSERVXROOTDSSIAPP_H */ diff --git a/src/replica/config/CMakeLists.txt b/src/replica/config/CMakeLists.txt index 7c621d4a1f..402f219941 100644 --- a/src/replica/config/CMakeLists.txt +++ b/src/replica/config/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(replica_config OBJECT) + target_sources(replica_config PRIVATE ConfigCzar.cc ConfigDatabase.cc @@ -12,7 +13,8 @@ target_sources(replica_config PRIVATE Configuration.cc ConfigurationSchema.cc ) + target_link_libraries(replica_config PUBLIC log sphgeom -) \ No newline at end of file +) diff --git a/src/replica/config/ConfigCzar.cc b/src/replica/config/ConfigCzar.cc index a90b87af7c..3226c92943 100644 --- a/src/replica/config/ConfigCzar.cc +++ b/src/replica/config/ConfigCzar.cc @@ -42,7 +42,7 @@ ConfigCzar::ConfigCzar(json const& obj) { } try { parseRequired(name, obj, "name"); - parseRequired(id, obj, "id"); + parseRequired(id, obj, "id"); parseRequired(host.addr, obj.at("host"), "addr"); parseRequired(host.name, obj.at("host"), "name"); parseOptional(port, obj, "port"); diff --git a/src/replica/config/ConfigCzar.h b/src/replica/config/ConfigCzar.h index f552a6dd3c..4ca9ce544c 100644 --- a/src/replica/config/ConfigCzar.h +++ b/src/replica/config/ConfigCzar.h @@ -30,7 +30,7 @@ #include "nlohmann/json.hpp" // Qserv headers -#include "qmeta/types.h" +#include "global/intTypes.h" #include "replica/config/ConfigHost.h" // This header declarations @@ -41,10 +41,10 @@ namespace lsst::qserv::replica { */ class ConfigCzar { public: - std::string name; ///< The logical name of a Czar. - qmeta::CzarId id = 0; ///< The unique identifier of a Czar. - ConfigHost host; ///< The host name (and IP address) of the Czar management service. - uint16_t port = 0; ///< The port number of the Czar management service. + std::string name; ///< The logical name of a Czar. + CzarId id = 0; ///< The unique name of a Czar. + ConfigHost host; ///< The host name (and IP address) of the Czar management service. + uint16_t port = 0; ///< The port number of the Czar management service. /** * Construct from a JSON object. diff --git a/src/replica/config/Configuration.cc b/src/replica/config/Configuration.cc index 603225d728..be54b00d38 100644 --- a/src/replica/config/Configuration.cc +++ b/src/replica/config/Configuration.cc @@ -796,8 +796,8 @@ ConfigCzar Configuration::updateCzar(ConfigCzar const& czar) { throw ConfigUnknownCzar(_context(__func__) + " unknown Czar '" + czar.name + "'.", czar.name); } -map Configuration::czarIds() const { - map ids; +map Configuration::czarIds() const { + map ids; replica::Lock const lock(_mtx, _context(__func__)); for (auto&& [name, czar] : _czars) { ids[czar.id] = name; diff --git a/src/replica/config/Configuration.h b/src/replica/config/Configuration.h index c2f2282230..7eb1929795 100644 --- a/src/replica/config/Configuration.h +++ b/src/replica/config/Configuration.h @@ -43,7 +43,6 @@ // Qserv headers #include "global/constants.h" -#include "qmeta/types.h" #include "replica/config/ConfigCzar.h" #include "replica/config/ConfigDatabase.h" #include "replica/config/ConfigDatabaseFamily.h" @@ -728,7 +727,7 @@ class Configuration { * @return Mapping between the unique identifiers to the corresponding names * for all known Czars. */ - std::map czarIds() const; + std::map czarIds() const; /// @param showPassword If a value of the flag is 'false' then hash a password in the result. /// @return The JSON representation of the object. diff --git a/src/replica/contr/CMakeLists.txt b/src/replica/contr/CMakeLists.txt index 6b6d2aa34a..b0d9ad6a9c 100644 --- a/src/replica/contr/CMakeLists.txt +++ b/src/replica/contr/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(replica_contr OBJECT) + target_sources(replica_contr PRIVATE Controller.cc DeleteWorkerTask.cc @@ -26,6 +27,7 @@ target_sources(replica_contr PRIVATE ReplicationTask.cc Task.cc ) + target_link_libraries(replica_contr PUBLIC log sphgeom diff --git a/src/replica/contr/HttpQservMonitorModule.cc b/src/replica/contr/HttpQservMonitorModule.cc index 9e0fd4ba28..42ff6ed88f 100644 --- a/src/replica/contr/HttpQservMonitorModule.cc +++ b/src/replica/contr/HttpQservMonitorModule.cc @@ -40,7 +40,6 @@ #include "mysql/MySqlConfig.h" #include "qmeta/UserTables.h" #include "qmeta/UserTableIngestRequest.h" -#include "qmeta/types.h" #include "replica/config/Configuration.h" #include "replica/config/ConfigDatabase.h" #include "replica/jobs/QservStatusJob.h" @@ -130,7 +129,7 @@ void extractQInfo(Connection::Ptr const& conn, json& result) { * Such explicit conversion is required because the JSON library doesn't support * numeric keys in the JSON objects. The keys have to be turned into strings. */ -json czarIdsToJson(map const& ids) { +json czarIdsToJson(map const& ids) { json result = json::object(); for (auto&& [id, name] : ids) { result[to_string(id)] = name; @@ -413,10 +412,10 @@ wbase::TaskSelector HttpQservMonitorModule::_translateTaskSelector(string const& } } selector.maxTasks = query().optionalUInt("max_tasks", 0); - debug(func, "include_tasks=" + replica::bool2str(selector.includeTasks)); - debug(func, "query_ids=" + util::String::toString(selector.queryIds)); - debug(func, "task_states=" + util::String::toString(selector.taskStates)); - debug(func, "max_tasks=" + to_string(selector.maxTasks)); + trace(func, "include_tasks=" + replica::bool2str(selector.includeTasks) + + " query_ids=" + util::String::toString(selector.queryIds) + + " task_states=" + util::String::toString(selector.taskStates) + + " max_tasks=" + to_string(selector.maxTasks)); return selector; } @@ -669,7 +668,7 @@ json HttpQservMonitorModule::_currentUserQueries(Connection::Ptr& conn, ::parseFieldIntoJson(__func__, row, "samplingTime", resultRow); ::parseFieldIntoJson(__func__, row, "samplingTime_sec", resultRow); ::parseFieldIntoJson(__func__, row, "query", resultRow); - ::parseFieldIntoJson(__func__, row, "czarId", resultRow); + ::parseFieldIntoJson(__func__, row, "czarId", resultRow); ::parseFieldIntoJson(__func__, row, "qType", resultRow); // Optionally, add the name of corresponding worker scheduler @@ -702,7 +701,7 @@ json HttpQservMonitorModule::_pastUserQueries(Connection::Ptr& conn, string cons while (conn->next(row)) { json resultRow; ::parseFieldIntoJson(__func__, row, "queryId", resultRow); - ::parseFieldIntoJson(__func__, row, "czarId", resultRow); + ::parseFieldIntoJson(__func__, row, "czarId", resultRow); ::parseFieldIntoJson(__func__, row, "qType", resultRow); ::parseFieldIntoJson(__func__, row, "czarId", resultRow); ::parseFieldIntoJson(__func__, row, "user", resultRow); diff --git a/src/replica/contr/Task.h b/src/replica/contr/Task.h index fb5803fdbb..5fc9446296 100644 --- a/src/replica/contr/Task.h +++ b/src/replica/contr/Task.h @@ -198,6 +198,12 @@ class Task : public EventLogger, public std::enable_shared_from_this { */ void debug(std::string const& msg) { LOGS(_log, LOG_LVL_DEBUG, context() << msg); } + /** + * Log a message into the Logger's LOG_LVL_WARN stream. + * @param msg A message to be logged. + */ + void warn(std::string const& msg) { LOGS(_log, LOG_LVL_WARN, context() << msg); } + /** * Log a message into the Logger's LOG_LVL_ERROR stream. * @param msg A message to be logged. diff --git a/src/replica/ingest/CMakeLists.txt b/src/replica/ingest/CMakeLists.txt index 053ebb82b2..018637b0f4 100644 --- a/src/replica/ingest/CMakeLists.txt +++ b/src/replica/ingest/CMakeLists.txt @@ -1,5 +1,7 @@ add_library(replica_ingest OBJECT) + add_dependencies(replica_ingest replica_proto) + target_sources(replica_ingest PRIVATE IngestClient.cc IngestDataHttpSvcMod.cc @@ -17,6 +19,7 @@ target_sources(replica_ingest PRIVATE IngestUtils.cc TransactionContrib.cc ) + target_link_libraries(replica_ingest PUBLIC log ) diff --git a/src/replica/jobs/CMakeLists.txt b/src/replica/jobs/CMakeLists.txt index c87abcdfaa..9ef8d1994d 100644 --- a/src/replica/jobs/CMakeLists.txt +++ b/src/replica/jobs/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(replica_jobs OBJECT) + target_sources(replica_jobs PRIVATE AbortTransactionJob.cc ClusterHealthJob.cc @@ -37,6 +38,7 @@ target_sources(replica_jobs PRIVATE SqlRowStatsJob.cc VerifyJob.cc ) + target_link_libraries(replica_jobs PUBLIC log ) diff --git a/src/replica/mysql/CMakeLists.txt b/src/replica/mysql/CMakeLists.txt index 6e7fd3c616..a78fe42253 100644 --- a/src/replica/mysql/CMakeLists.txt +++ b/src/replica/mysql/CMakeLists.txt @@ -1,5 +1,7 @@ add_library(replica_mysql OBJECT) + add_dependencies(replica_mysql replica_proto) + target_sources(replica_mysql PRIVATE DatabaseMySQL.cc DatabaseMySQLGenerator.cc @@ -7,6 +9,7 @@ target_sources(replica_mysql PRIVATE DatabaseMySQLTypes.cc DatabaseMySQLUtils.cc ) + target_link_libraries(replica_mysql PUBLIC log ) diff --git a/src/replica/proto/CMakeLists.txt b/src/replica/proto/CMakeLists.txt index b61599d8c5..d365618f87 100644 --- a/src/replica/proto/CMakeLists.txt +++ b/src/replica/proto/CMakeLists.txt @@ -1,6 +1,7 @@ protobuf_generate_cpp(REPLICA_PB_SRCS REPLICA_PB_HDRS protocol.proto) add_library(replica_proto OBJECT) + target_sources(replica_proto PRIVATE ${REPLICA_PB_SRCS} ${REPLICA_PB_HDRS} diff --git a/src/replica/qserv/CMakeLists.txt b/src/replica/qserv/CMakeLists.txt index 3485c46a0c..90011302c6 100644 --- a/src/replica/qserv/CMakeLists.txt +++ b/src/replica/qserv/CMakeLists.txt @@ -1,7 +1,9 @@ add_library(replica_qserv OBJECT) + target_include_directories(replica_qserv PRIVATE ${XROOTD_INCLUDE_DIRS} ) + target_sources(replica_qserv PRIVATE AddReplicaQservMgtRequest.cc GetReplicasQservMgtRequest.cc @@ -19,8 +21,8 @@ target_sources(replica_qserv PRIVATE RemoveReplicaQservMgtRequest.cc SetReplicasQservMgtRequest.cc TestEchoQservMgtRequest.cc - XrdCmsgetVnId.cc ) + target_link_libraries(replica_qserv PUBLIC log ) diff --git a/src/replica/qserv/QservMgtServices.h b/src/replica/qserv/QservMgtServices.h index 7256c86238..09dc06c1e1 100644 --- a/src/replica/qserv/QservMgtServices.h +++ b/src/replica/qserv/QservMgtServices.h @@ -44,8 +44,6 @@ #include "wbase/TaskState.h" // Forward declarations -class XrdSsiService; - namespace lsst::qserv::wbase { class TaskSelector; } // namespace lsst::qserv::wbase diff --git a/src/replica/qserv/XrdCmsgetVnId.cc b/src/replica/qserv/XrdCmsgetVnId.cc deleted file mode 100644 index 4173cbe8dc..0000000000 --- a/src/replica/qserv/XrdCmsgetVnId.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* - * LSST Data Management System - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// System headers -#include -#include -#include - -// Qserv headers -#include "global/stringUtil.h" -#include "replica/config/Configuration.h" -#include "replica/mysql/DatabaseMySQL.h" -#include "replica/mysql/DatabaseMySQLUtils.h" -#include "util/String.h" - -// XrootD headers -#include "XrdCms/XrdCmsVnId.hh" -#include "XrdSys/XrdSysError.hh" -#include "XrdVersion.hh" - -using namespace std; -using namespace lsst::qserv::replica; -using namespace lsst::qserv::replica::database::mysql; -namespace util = lsst::qserv::util; - -/** - * @brief Read a value of the VNID from the Qserv worker database that's - * configured via a MySQL connection string passed among the input - * parameters of the function. - * - * The list of input parameters has the following syntax: - * @code - * - * @code - * - * Where: - * work-db-conn-url: the database connector string for the worker's MySQL service - * max-reconnects: the maximum number of reconnects to he service - * conn-timeout-sec: the timeout for connecting to the service and executing the query - */ -extern "C" string XrdCmsgetVnId(XrdCmsgetVnIdArgs) { - string const context = string(__func__) + ": "; - string vnId; - try { - bool const greedy = true; - vector args = util::String::split(parms, " ", greedy); - if (args.size() != 3) { - eDest.Say(context.data(), "illegal number of parameters for the plugin. ", - "Exactly 3 parameters are required: " - "."); - } else { - string const qservWorkerDbUrl = args[0]; - Configuration::setQservWorkerDbUrl(qservWorkerDbUrl); - // Parameter 'maxReconnects' limits the total number of retries to execute the query in case - // if the query fails during execution. If the parameter's value is set to 0 then the default - // value of the parameter will be pulled by the query processor from the Replication - // system's Configuration. - unsigned int maxReconnects = lsst::qserv::stoui(args[1]); - // Parameter 'timeoutSec' is used both while connecting to the database server and for executing - // the query. If the MySQl service won't respond to the connection attempts beyond a period of - // time specified by the parameter then the operation will fail. Similarly, if the query execution - // will take longer than it's specified in the parameter then the query will fail. If the - // parameter's value is set to 0 then the default value of the parameter will be pulled by the - // query processor from the Replication system's Configuration. - unsigned int timeoutSec = lsst::qserv::stoui(args[2]); - // This parameter allows the database connector to make reconnects if the MySQL service - // won't be responding (or not be up) at the initial connection attempt. - bool const allowReconnects = true; - // Using the RAII-style connection handler to automatically close the connection and - // release resources in case of exceptions. - ConnectionHandler const handler(Connection::open2( - Configuration::qservWorkerDbParams("qservw_worker"), allowReconnects, timeoutSec)); - QueryGenerator const g(handler.conn); - handler.conn->executeInOwnTransaction( - [&context, &vnId, &eDest, &g](decltype(handler.conn) conn) { - string const query = g.select("id") + g.from("Id"); - if (!selectSingleValue(conn, query, vnId)) { - eDest.Say(context.data(), - "worker identity is not set in the Qserv worker database."); - } - }, - maxReconnects, timeoutSec); - } - } catch (exception const& ex) { - eDest.Say(context.data(), - "failed to pull worker identity from the Qserv worker database, ex:", ex.what()); - } - eDest.Say(context.data(), "vnid: ", vnId.data()); - return vnId; -} - -XrdVERSIONINFO(XrdCmsgetVnId, vnid_mysql_0); diff --git a/src/replica/registry/CMakeLists.txt b/src/replica/registry/CMakeLists.txt index 2b7e6cc6fc..21d3ed3261 100644 --- a/src/replica/registry/CMakeLists.txt +++ b/src/replica/registry/CMakeLists.txt @@ -1,10 +1,12 @@ add_library(replica_registry OBJECT) + target_sources(replica_registry PRIVATE Registry.cc RegistryHttpSvc.cc RegistryHttpSvcMod.cc RegistryServices.cc ) + target_link_libraries(replica_registry PUBLIC log ) diff --git a/src/replica/registry/Registry.cc b/src/replica/registry/Registry.cc index 725c927352..98d60f3c1f 100644 --- a/src/replica/registry/Registry.cc +++ b/src/replica/registry/Registry.cc @@ -26,7 +26,6 @@ #include "http/Auth.h" #include "http/Client.h" #include "http/MetaModule.h" -#include "qmeta/types.h" #include "replica/config/Configuration.h" #include "replica/config/ConfigWorker.h" #include "util/common.h" @@ -149,7 +148,7 @@ vector Registry::czars() const { } else { czar.name = czarName; } - czar.id = czarJson.at("id").get(); + czar.id = czarJson.at("id").get(); czar.host.addr = czarJson.at("host-addr").get(); czar.host.name = czarJson.at("host-name").get(); czar.port = czarJson.at("management-port").get(); diff --git a/src/replica/requests/CMakeLists.txt b/src/replica/requests/CMakeLists.txt index 58ec36d006..81b3ba4705 100644 --- a/src/replica/requests/CMakeLists.txt +++ b/src/replica/requests/CMakeLists.txt @@ -1,5 +1,7 @@ add_library(replica_requests OBJECT) + add_dependencies(replica_requests replica_proto) + target_sources(replica_requests PRIVATE DeleteRequest.cc DirectorIndexRequest.cc @@ -36,6 +38,7 @@ target_sources(replica_requests PRIVATE StatusRequest.cc StopRequest.cc ) + target_link_libraries(replica_requests PUBLIC log ) diff --git a/src/replica/services/CMakeLists.txt b/src/replica/services/CMakeLists.txt index 7d849bec4a..bd42597f50 100644 --- a/src/replica/services/CMakeLists.txt +++ b/src/replica/services/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(replica_services OBJECT) + target_sources(replica_services PRIVATE ChunkLocker.cc DatabaseServices.cc @@ -6,6 +7,7 @@ target_sources(replica_services PRIVATE DatabaseServicesPool.cc ServiceProvider.cc ) + target_link_libraries(replica_services PUBLIC log ) diff --git a/src/replica/tools/qserv-replica-test.cc b/src/replica/tools/qserv-replica-test.cc index 1dec5cb5d9..6d7136cc8c 100644 --- a/src/replica/tools/qserv-replica-test.cc +++ b/src/replica/tools/qserv-replica-test.cc @@ -34,7 +34,6 @@ #include "replica/apps/QhttpTestApp.h" #include "replica/apps/TransactionsApp.h" #include "replica/apps/QservWorkerPingApp.h" -#include "replica/apps/QservXrootdSsiApp.h" using namespace std; using namespace lsst::qserv::replica; @@ -52,7 +51,6 @@ ApplicationColl getAppColl() { coll.add("QHTTP"); coll.add("TRANSACTIONS"); coll.add("WORKER-PING"); - coll.add("XROOTD-SSI"); return coll; } } // namespace diff --git a/src/replica/util/CMakeLists.txt b/src/replica/util/CMakeLists.txt index 4dab0e6159..bc14aa154f 100644 --- a/src/replica/util/CMakeLists.txt +++ b/src/replica/util/CMakeLists.txt @@ -1,5 +1,7 @@ add_library(replica_util OBJECT) + add_dependencies(replica_util replica_proto) + target_sources(replica_util PRIVATE ChttpSvc.cc ChunkNumber.cc @@ -16,6 +18,7 @@ target_sources(replica_util PRIVATE SqlSchemaUtils.cc SuccessRateGenerator.cc ) + target_link_libraries(replica_util PUBLIC log sphgeom diff --git a/src/replica/worker/CMakeLists.txt b/src/replica/worker/CMakeLists.txt index fbe77ebcff..371aea9483 100644 --- a/src/replica/worker/CMakeLists.txt +++ b/src/replica/worker/CMakeLists.txt @@ -1,5 +1,7 @@ add_library(replica_worker OBJECT) + add_dependencies(replica_worker replica_proto) + target_sources(replica_worker PRIVATE FileClient.cc FileServer.cc @@ -20,6 +22,7 @@ target_sources(replica_worker PRIVATE WorkerSqlRequest.cc WorkerUtils.cc ) + target_link_libraries(replica_worker PUBLIC log sphgeom diff --git a/src/rproc/CMakeLists.txt b/src/rproc/CMakeLists.txt index 2cd4f0765a..01a124c65b 100644 --- a/src/rproc/CMakeLists.txt +++ b/src/rproc/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(rproc SHARED) -add_dependencies(rproc proto) target_sources(rproc PRIVATE InfileMerger.cc @@ -11,6 +10,4 @@ target_link_libraries(rproc PUBLIC log ) -install(TARGETS rproc - EXPORT qserv -) +install(TARGETS rproc) diff --git a/src/rproc/InfileMerger.cc b/src/rproc/InfileMerger.cc index 2e83c022f6..29c30db1d9 100644 --- a/src/rproc/InfileMerger.cc +++ b/src/rproc/InfileMerger.cc @@ -56,10 +56,9 @@ // Qserv headers #include "cconfig/CzarConfig.h" #include "global/intTypes.h" -#include "mysql/CsvBuffer.h" -#include "proto/ProtoImporter.h" -#include "proto/worker.pb.h" +#include "mysql/CsvMemDisk.h" #include "qdisp/CzarStats.h" +#include "qdisp/UberJob.h" #include "qproc/DatabaseModels.h" #include "query/ColumnRef.h" #include "query/SelectStmt.h" @@ -86,7 +85,7 @@ std::string getTimeStampId() { struct timeval now; int rc = gettimeofday(&now, nullptr); if (rc != 0) { - throw util::Error(util::ErrorCode::INTERNAL, "Failed to get timestamp."); + throw util::Error(util::Error::INTERNAL, util::Error::NONE, "Failed to get timestamp."); } std::ostringstream s; s << (now.tv_sec % 10000) << now.tv_usec; @@ -137,7 +136,8 @@ InfileMerger::InfileMerger(rproc::InfileMergerConfig const& c, _maxResultTableSizeBytes(cconfig::CzarConfig::instance()->getMaxTableSizeMB() * MB_SIZE_BYTES) { _fixupTargetName(); if (!_setupConnectionMyIsam()) { - throw util::Error(util::ErrorCode::MYSQLCONNECT, "InfileMerger mysql connect failure."); + throw util::Error(util::Error::MYSQLCONNECT, util::Error::NONE, + "InfileMerger mysql connect failure."); } // The DEBUG level is good here since this report will be made onces per query, @@ -159,24 +159,18 @@ void InfileMerger::_setQueryIdStr(std::string const& qIdStr) { _queryIdStrSet = true; } -void InfileMerger::mergeCompleteFor(int jobId) { - std::lock_guard resultSzLock(_mtxResultSizeMtx); - _totalResultSize += _perJobResultSize[jobId]; -} - -bool InfileMerger::merge(proto::ResponseSummary const& resp, - std::shared_ptr const& csvStream) { - int const jobId = resp.jobid(); - std::string queryIdJobStr = QueryIdHelper::makeIdStr(resp.queryid(), jobId); +bool InfileMerger::mergeHttp(qdisp::UberJob::Ptr const& uberJob, uint64_t fileSize, + std::shared_ptr const& csvMemDisk) { + std::string queryIdJobStr = uberJob->getIdStr(); if (!_queryIdStrSet) { - _setQueryIdStr(QueryIdHelper::makeIdStr(resp.queryid())); + _setQueryIdStr(QueryIdHelper::makeIdStr(uberJob->getQueryId())); } TimeCountTracker::CALLBACKFUNC cbf = [](TIMEPOINT start, TIMEPOINT end, double bytes, bool success) { if (!success) return; if (std::chrono::duration const seconds = end - start; seconds.count() > 0) { - qdisp::CzarStats::get()->addXRootDSSIRecvRate(bytes / seconds.count()); + qdisp::CzarStats::get()->addDataRecvRate(bytes / seconds.count()); } }; auto tct = make_shared>(cbf); @@ -185,46 +179,59 @@ bool InfileMerger::merge(proto::ResponseSummary const& resp, // Add columns to rows in virtFile. util::Timer virtFileT; virtFileT.start(); - auto const csvBuffer = mysql::newCsvStreamBuffer(csvStream); + auto const csvBuffer = mysql::newCsvMemDiskBuffer(csvMemDisk); std::string const virtFile = _infileMgr.prepareSrc(csvBuffer); std::string const infileStatement = sql::formLoadInfile(_mergeTable, virtFile); virtFileT.stop(); - size_t tResultSize; - { - std::lock_guard resultSzLock(_mtxResultSizeMtx); - _perJobResultSize[jobId] += resp.transmitsize(); - tResultSize = _totalResultSize + _perJobResultSize[jobId]; - } - if (tResultSize > _maxResultTableSizeBytes) { - std::ostringstream os; - os << queryIdJobStr << " cancelling the query, queryResult table " << _mergeTable - << " is too large at " << tResultSize << " bytes, max allowed size is " << _maxResultTableSizeBytes - << " bytes"; - LOGS(_log, LOG_LVL_ERROR, os.str()); - _error = util::Error(-1, os.str(), -1); - _resultSizeLimitExceeded.store(true); - return false; - } - - tct->addToValue(resp.transmitsize()); + tct->addToValue(fileSize); tct->setSuccess(); - tct.reset(); // stop transmit recieve timer before merging happens. + tct.reset(); // stop transmit receive timer before merging happens. // Stop here (if requested) after collecting stats on the amount of data collected // from workers. - if (_config.debugNoMerge) return true; + if (_config.debugNoMerge) { + return true; + } + + // Need to block here to make sure the result able needs these rows or not. + lock_guard lgFinal(_finalMergeMtx); + // Don't merge if the query got cancelled. + auto executive = uberJob->getExecutive(); + if (executive == nullptr || executive->getCancelled() || executive->isRowLimitComplete()) { + return true; + } + + if (csvMemDisk->isFileError()) { + // The file couldn't be opened for writing, so giving up + // now should keep the result table from getting contaminated. + return false; + } auto start = std::chrono::system_clock::now(); - ret = _applyMysqlMyIsam(infileStatement, resp.transmitsize()); + // The following will call some version of CsvStream::pop() at least once. + ret = _applyMysqlMyIsam(infileStatement, fileSize); auto end = std::chrono::system_clock::now(); auto mergeDur = std::chrono::duration_cast(end - start); LOGS(_log, LOG_LVL_DEBUG, "mergeDur=" << mergeDur.count()); - if (not ret) { + if (ret) { + lock_guard resultSzLock(_mtxResultSizeMtx); + _totalResultSize += fileSize; + size_t tResultSize = _totalResultSize; + /// Check file size here to see if it has gotten too large, this will probably only trip in LIMIT + /// queries. + if (tResultSize > _maxResultTableSizeBytes) { + string str = queryIdJobStr + " cancelling the query, queryResult table " + _mergeTable + + " is too large at " + to_string(tResultSize) + " bytes, max allowed size is " + + to_string(_maxResultTableSizeBytes) + " bytes"; + LOGS(_log, LOG_LVL_ERROR, str); + _error = util::Error(util::Error::CZAR_RESULT_TOO_LARGE, util::Error::NONE, str); + return false; + } + } else { LOGS(_log, LOG_LVL_ERROR, "InfileMerger::merge mysql applyMysql failure"); } - LOGS(_log, LOG_LVL_DEBUG, "virtFileT=" << virtFileT.getElapsed() << " mergeDur=" << mergeDur.count()); - + LOGS(_log, LOG_LVL_TRACE, "virtFileT=" << virtFileT.getElapsed() << " mergeDur=" << mergeDur.count()); return ret; } @@ -269,6 +276,7 @@ size_t InfileMerger::getTotalResultSize() const { return _totalResultSize; } bool InfileMerger::finalize(size_t& collectedBytes, int64_t& rowCount) { bool finalizeOk = true; collectedBytes = _totalResultSize; + lock_guard lgFinal(_finalMergeMtx); // block on other merges // TODO: Should check for error condition before continuing. if (_isFinished) { LOGS(_log, LOG_LVL_ERROR, "InfileMerger::finalize(), but _isFinished == true"); @@ -338,13 +346,14 @@ bool InfileMerger::getSchemaForQueryResults(query::SelectStmt const& stmt, sql:: bool ok = _databaseModels->applySql(query, results, getSchemaErrObj); if (not ok) { LOGS(_log, LOG_LVL_ERROR, "Failed to get schema:" << getSchemaErrObj.errMsg()); - _error = util::Error(util::ErrorCode::INTERNAL, "Failed to get schema: " + getSchemaErrObj.errMsg()); + _error = util::Error(util::Error::RESULT_SCHEMA, util::Error::NONE, + "Failed to get schema: " + getSchemaErrObj.errMsg()); return false; } sql::SqlErrorObject errObj; if (errObj.isSet()) { LOGS(_log, LOG_LVL_ERROR, "Failed to extract schema from result: " << errObj.errMsg()); - _error = util::Error(util::ErrorCode::INTERNAL, + _error = util::Error(util::Error::RESULT_SCHEMA, util::Error::NONE, "Failed to extract schema from result: " + errObj.errMsg()); return false; } @@ -361,7 +370,7 @@ bool InfileMerger::makeResultsTableForQuery(query::SelectStmt const& stmt) { std::string const createStmt = sql::formCreateTable(_mergeTable, schema) + " ENGINE=MyISAM"; LOGS(_log, LOG_LVL_TRACE, "InfileMerger make results table query: " << createStmt); if (not _applySqlLocal(createStmt, "makeResultsTableForQuery")) { - _error = util::Error(util::ErrorCode::CREATE_TABLE, + _error = util::Error(util::Error::RESULT_CREATETABLE, util::Error::NONE, "Error creating table:" + _mergeTable + ": " + _error.getMsg()); _isFinished = true; // Cannot continue. LOGS(_log, LOG_LVL_ERROR, "InfileMerger sql error: " << _error.getMsg()); @@ -400,8 +409,8 @@ bool InfileMerger::_applySqlLocal(std::string const& sql, sql::SqlResults& resul return false; } if (not _sqlConn->runQuery(sql, results, errObj)) { - _error = util::Error(errObj.errNo(), "Error applying sql: " + errObj.printErrMsg(), - util::ErrorCode::MYSQLEXEC); + _error = util::Error(util::Error::RESULT_SQL, errObj.errNo(), + "Error applying sql: " + errObj.printErrMsg()); LOGS(_log, LOG_LVL_ERROR, "InfileMerger error: " << _error.getMsg()); return false; } @@ -413,8 +422,8 @@ bool InfileMerger::_sqlConnect(sql::SqlErrorObject& errObj) { if (_sqlConn == nullptr) { _sqlConn = sql::SqlConnectionFactory::make(_config.mySqlConfig); if (not _sqlConn->connectToDb(errObj)) { - _error = util::Error(errObj.errNo(), "Error connecting to db: " + errObj.printErrMsg(), - util::ErrorCode::MYSQLCONNECT); + _error = util::Error(util::Error::RESULT_CONNECT, errObj.errNo(), + "Error connecting to db: " + errObj.printErrMsg()); _sqlConn.reset(); LOGS(_log, LOG_LVL_ERROR, "InfileMerger error: " << _error.getMsg()); return false; diff --git a/src/rproc/InfileMerger.h b/src/rproc/InfileMerger.h index fa2c65d88f..2a9179024e 100644 --- a/src/rproc/InfileMerger.h +++ b/src/rproc/InfileMerger.h @@ -46,15 +46,16 @@ // Forward declarations namespace lsst::qserv { namespace mysql { -class CsvStream; +class CsvMemDisk; class MysqlConfig; } // namespace mysql -namespace proto { -class ResponseSummary; -} // namespace proto namespace qdisp { class MessageStore; +class UberJob; } // namespace qdisp +namespace QMeta { +class MessageStore; +} namespace qproc { class DatabaseModels; } @@ -101,12 +102,9 @@ class InfileMerger { InfileMerger& operator=(InfileMerger const&) = delete; ~InfileMerger() = default; - /// Merge a worker response, which contains a single message - /// @return true if merge was successfully imported. - bool merge(proto::ResponseSummary const& resp, std::shared_ptr const& csvStream); - - /// Indicate the merge for the job is complete. - void mergeCompleteFor(int jobId); + /// Merge the result data collected over Http. + bool mergeHttp(std::shared_ptr const& uberJob, uint64_t fileSize, + std::shared_ptr const& csvMemDisk); /// @return error details if finalize() returns false util::Error const& getError() const { return _error; } @@ -125,12 +123,6 @@ class InfileMerger { /// Check if the object has completed all processing. bool isFinished() const; - /// Check if the result size limit has been exceeded. - bool resultSizeLimitExceeded() const { return _resultSizeLimitExceeded.load(); } - - /// Check if the result size limit has been exceeded. - void setResultSizeLimitExceeded() { _resultSizeLimitExceeded.store(true); } - void setMergeStmtFromList(std::shared_ptr const& mergeStmt) const; /** @@ -169,18 +161,19 @@ class InfileMerger { bool _applySqlLocal(std::string const& sql, sql::SqlResults& results); bool _applySqlLocal(std::string const& sql, sql::SqlResults& results, sql::SqlErrorObject& errObj); bool _sqlConnect(sql::SqlErrorObject& errObj); + std::string _getQueryIdStr(); void _setQueryIdStr(std::string const& qIdStr); void _fixupTargetName(); bool _setupConnectionMyIsam(); - InfileMergerConfig _config; ///< Configuration - std::shared_ptr _sqlConn; ///< SQL connection - std::string _mergeTable; ///< Table for result loading - util::Error _error; ///< Error state - bool _isFinished = false; ///< Completed? - std::atomic _resultSizeLimitExceeded{false}; ///< Large result query? - std::mutex _sqlMutex; ///< Protection for SQL connection + InfileMergerConfig _config; ///< Configuration + std::shared_ptr _sqlConn; ///< SQL connection + std::string _mergeTable; ///< Table for result loading + util::Error _error; ///< Error state + bool _isFinished = false; ///< Completed? + std::mutex _sqlMutex; ///< Protection for SQL connection + mysql::MySqlConnection _mysqlConn; std::mutex _mysqlMutex; mysql::LocalInfile::Mgr _infileMgr; @@ -192,10 +185,10 @@ class InfileMerger { 10; ///< maximum number of times to retry connecting to the SQL database. /// Variable to track result size. Each - size_t const _maxResultTableSizeBytes; ///< Max result table size in bytes. - size_t _totalResultSize = 0; ///< Size of result so far in bytes. - std::map _perJobResultSize; ///< Result size for each job - std::mutex _mtxResultSizeMtx; ///< Protects _perJobResultSize and _totalResultSize. + size_t const _maxResultTableSizeBytes; ///< Max result table size in bytes. + size_t _totalResultSize = 0; ///< Size of result so far in bytes. + std::mutex _mtxResultSizeMtx; ///< Protects _totalResultSize. + std::mutex _finalMergeMtx; ///< Protects mysql result tables }; } // namespace lsst::qserv::rproc diff --git a/src/sql/CMakeLists.txt b/src/sql/CMakeLists.txt index 9475c63403..e15a4fa25a 100644 --- a/src/sql/CMakeLists.txt +++ b/src/sql/CMakeLists.txt @@ -17,9 +17,7 @@ target_link_libraries(sql PUBLIC log ) -install( - TARGETS sql -) +install(TARGETS sql) FUNCTION(sql_tests) FOREACH(TEST IN ITEMS ${ARGV}) diff --git a/src/sql/SqlResults.cc b/src/sql/SqlResults.cc index eee4a1de6d..1ccd3a56fb 100644 --- a/src/sql/SqlResults.cc +++ b/src/sql/SqlResults.cc @@ -30,6 +30,12 @@ #include #include +// LSST headers +#include "lsst/log/Log.h" + +// LSST headers +#include "lsst/log/Log.h" + // Qserv headers #include "mysql/SchemaFactory.h" @@ -38,6 +44,10 @@ // when the null pointer is passed into the constructor. #define EMPTY_STR_IF_NULL(x) ((x) == nullptr ? "" : (x)) +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.sql.SqlResults"); +} + namespace lsst::qserv::sql { namespace detail { @@ -117,12 +127,22 @@ void SqlResults::addResult(MYSQL_RES* r) { } } -bool SqlResults::extractFirstColumn(std::vector& ret, SqlErrorObject& errObj) { - int i, s = _results.size(); - for (i = 0; i < s; ++i) { +bool SqlResults::extractFirstXColumns(std::vector*> const& vectorRef, + SqlErrorObject& sqlErr) { + size_t rsz = _results.size(); + size_t expectedCols = vectorRef.size(); + if (rsz > 0 && mysql_num_fields(_results[0]) < expectedCols) { + LOGS(_log, LOG_LVL_ERROR, + "extractFirstXColumns had too few columns expected=" << rsz << " found=" + << mysql_num_fields(_results[0])); + return false; + } + for (size_t i = 0; i < rsz; ++i) { MYSQL_ROW row; while ((row = mysql_fetch_row(_results[i])) != nullptr) { - ret.push_back(EMPTY_STR_IF_NULL(row[0])); + for (size_t j = 0; j < expectedCols; ++j) { + vectorRef[j]->push_back(EMPTY_STR_IF_NULL(row[j])); + } } mysql_free_result(_results[i]); } @@ -130,53 +150,45 @@ bool SqlResults::extractFirstColumn(std::vector& ret, SqlErrorObjec return true; } +bool SqlResults::extractFirstColumn(std::vector& col1, SqlErrorObject& errObj) { + return extractFirstXColumns({&col1}, errObj); +} bool SqlResults::extractFirst2Columns(std::vector& col1, std::vector& col2, SqlErrorObject& errObj) { - int i, s = _results.size(); - for (i = 0; i < s; ++i) { - MYSQL_ROW row; - while ((row = mysql_fetch_row(_results[i])) != nullptr) { - col1.push_back(EMPTY_STR_IF_NULL(row[0])); - col2.push_back(EMPTY_STR_IF_NULL(row[1])); - } - mysql_free_result(_results[i]); - } - _results.clear(); - return true; + return extractFirstXColumns({&col1, &col2}, errObj); } - bool SqlResults::extractFirst3Columns(std::vector& col1, std::vector& col2, std::vector& col3, SqlErrorObject& errObj) { - int i, s = _results.size(); - for (i = 0; i < s; ++i) { - MYSQL_ROW row; - while ((row = mysql_fetch_row(_results[i])) != nullptr) { - col1.push_back(EMPTY_STR_IF_NULL(row[0])); - col2.push_back(EMPTY_STR_IF_NULL(row[1])); - col3.push_back(EMPTY_STR_IF_NULL(row[2])); - } - mysql_free_result(_results[i]); - } - _results.clear(); - return true; + return extractFirstXColumns({&col1, &col2, &col3}, errObj); } - bool SqlResults::extractFirst4Columns(std::vector& col1, std::vector& col2, std::vector& col3, std::vector& col4, SqlErrorObject& errObj) { - int i, s = _results.size(); - for (i = 0; i < s; ++i) { + return extractFirstXColumns({&col1, &col2, &col3, &col4}, errObj); +} +bool SqlResults::extractFirst6Columns(std::vector& col1, std::vector& col2, + std::vector& col3, std::vector& col4, + std::vector& col5, std::vector& col6, + SqlErrorObject& errObj) { + return extractFirstXColumns({&col1, &col2, &col3, &col4, &col5, &col6}, errObj); +} + +std::vector> SqlResults::extractFirstNColumns(size_t numColumns) { + std::vector> rows; + for (int resultIdx = 0, numResults = _results.size(); resultIdx < numResults; ++resultIdx) { MYSQL_ROW row; - while ((row = mysql_fetch_row(_results[i])) != nullptr) { - col1.push_back(EMPTY_STR_IF_NULL(row[0])); - col2.push_back(EMPTY_STR_IF_NULL(row[1])); - col3.push_back(EMPTY_STR_IF_NULL(row[2])); - col4.push_back(EMPTY_STR_IF_NULL(row[3])); + while ((row = mysql_fetch_row(_results[resultIdx])) != nullptr) { + std::vector columns; + columns.reserve(numColumns); + for (size_t colIdx = 0; colIdx < numColumns; ++colIdx) { + columns.push_back(row[colIdx]); + } + rows.push_back(std::move(columns)); } - mysql_free_result(_results[i]); + mysql_free_result(_results[resultIdx]); } _results.clear(); - return true; + return rows; } bool SqlResults::extractFirstValue(std::string& ret, SqlErrorObject& errObj) { diff --git a/src/sql/SqlResults.h b/src/sql/SqlResults.h index a48f5a78a1..608514daca 100644 --- a/src/sql/SqlResults.h +++ b/src/sql/SqlResults.h @@ -32,6 +32,7 @@ // System headers #include #include +#include #include #include @@ -90,19 +91,44 @@ class SqlResults : boost::noncopyable { // do not use it for SELECT unsigned long long getAffectedRows() const { return _affectedRows; } bool extractFirstValue(std::string&, SqlErrorObject&); - bool extractFirstColumn(std::vector&, SqlErrorObject&); - bool extractFirst2Columns(std::vector&, // FIXME: generalize - std::vector&, SqlErrorObject&); - bool extractFirst3Columns(std::vector&, // FIXME: generalize - std::vector&, std::vector&, SqlErrorObject&); - bool extractFirst4Columns(std::vector&, std::vector&, std::vector&, - std::vector&, SqlErrorObject&); + + /// Return the value of the first X columns of `_results`, where X is the size() of vectorRef. + /// It would be nice to use references instead of pointers, but curly bracket initialization + /// of the references was problematic. + /// @param vectorRef - A vector of pointers to vectors of strings. Each vector of strings + /// contains a column of the table (index 0 holds column1, + /// index 1 holds column2, etc.). The number of columns returned is + /// vectorRef.size(). NULL values are set to empty strings. + /// @param errObj - is never set and should be removed. (Only likely error is database disconnect, + /// which would be catastrophic) + /// @return - Returns false when fewer than expected columns are found. + bool extractFirstXColumns(std::vector*> const& vectorRef, + SqlErrorObject& sqlErr); + bool extractFirstColumn(std::vector& col1, SqlErrorObject& errObj); + bool extractFirst2Columns(std::vector& col1, std::vector& col2, + SqlErrorObject& errObj); + bool extractFirst3Columns(std::vector& col1, std::vector& col2, + std::vector& col3, SqlErrorObject& errObj); + bool extractFirst4Columns(std::vector& col1, std::vector& col2, + std::vector& col3, std::vector& col4, + SqlErrorObject& errObj); + bool extractFirst6Columns(std::vector& col1, std::vector& col2, + std::vector& col3, std::vector& col4, + std::vector& col5, std::vector& col6, + SqlErrorObject& errObj); + template bool extractFirstColumns(SqlErrorObject& err, Columns&... cols) { std::vector>> columns = {std::ref(cols)...}; return _extractFirstColumnsImpl(err, columns); } + /// Extract a result set into the 2D array. + /// @param numColumns The number of columns in the array. + /// @return a 2D array, where the first index of the array represents rows + /// and the second index represents columns. + std::vector> extractFirstNColumns(size_t numColumns); + void freeResults(); /// Return row iterator diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 9e7cd6c725..24bf9ffa52 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -20,6 +20,7 @@ target_sources(util PRIVATE Issue.cc MultiError.cc Mutex.cc + QdispPool.cc ResultFileName.cc StringHash.cc String.cc @@ -29,7 +30,6 @@ target_sources(util PRIVATE Timer.cc TimeUtils.cc WorkQueue.cc - xrootd.cc ) target_link_libraries(util PUBLIC @@ -39,9 +39,7 @@ target_link_libraries(util PUBLIC log ) -install( - TARGETS util -) +install(TARGETS util) FUNCTION(util_tests) FOREACH(TEST IN ITEMS ${ARGV}) diff --git a/src/util/Command.cc b/src/util/Command.cc index 39cb7d4d84..7ce436361b 100644 --- a/src/util/Command.cc +++ b/src/util/Command.cc @@ -26,6 +26,9 @@ // Class header #include "util/Command.h" +// System headers +#include + // LSST headers #include "lsst/log/Log.h" @@ -33,6 +36,8 @@ namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.util.Command"); } +using namespace std; + namespace lsst::qserv::util { /// Set status to COMPLETE and notify everyone waiting for a status change. @@ -70,4 +75,20 @@ void Command::setFunc(std::function func) { /// this function must be called or the lambda will keep this object alive. void Command::resetFunc() { setFunc(nullptr); } +std::string Command::dump() const { + ostringstream os; + dump(os); + return os.str(); +} + +std::ostream& Command::dump(std::ostream& os) const { + os << "util::Command"; + return os; +} + +ostream& operator<<(ostream& os, Command const& cmd) { + cmd.dump(os); + return os; +} + } // namespace lsst::qserv::util diff --git a/src/util/Command.h b/src/util/Command.h index b1620f5b65..9456d1981e 100644 --- a/src/util/Command.h +++ b/src/util/Command.h @@ -39,7 +39,7 @@ namespace lsst::qserv::util { class Tracker { public: Tracker() {} - virtual ~Tracker() {} + virtual ~Tracker() = default; enum class Status { INPROGRESS, COMPLETE }; using Ptr = std::shared_ptr; void setComplete(); @@ -79,6 +79,11 @@ class Command { void setFunc(std::function func); void resetFunc(); + /// Returns a string for logging. + virtual std::ostream& dump(std::ostream& os) const; + std::string dump() const; + friend std::ostream& operator<<(std::ostream& os, Command const& cmd); + protected: std::function _func = [](CmdData*) { ; }; }; @@ -88,7 +93,7 @@ class Command { class CommandTracked : public Command, public Tracker { public: using Ptr = std::shared_ptr; - CommandTracked() = default; + CommandTracked() : Command(), Tracker() {} explicit CommandTracked(std::function func) : Command(func) {} ~CommandTracked() override = default; diff --git a/src/util/ConfigStore.h b/src/util/ConfigStore.h index 7397b2589e..63bf5d1b96 100644 --- a/src/util/ConfigStore.h +++ b/src/util/ConfigStore.h @@ -27,7 +27,7 @@ * * @brief Provide common configuration management framework * - * Manage czar and worker (xrdssi plugin) configuration files + * Manage czar and worker configuration files * * @author Fabrice Jammes, IN2P3/SLAC */ diff --git a/src/util/ConfigValMap.h b/src/util/ConfigValMap.h index 89b227ca79..a562a8e2a0 100644 --- a/src/util/ConfigValMap.h +++ b/src/util/ConfigValMap.h @@ -50,6 +50,8 @@ class ConfigValMap; /// Base class for storing values, usually from configuration files, that have /// identifiers consisting of a `section` and a `name`. /// This class is meant to be used with ConfigValMap. +/// TODO:Maybe a command line argument can be added to this and if the command +/// line argument is found, it will override the value in the file. class ConfigVal { public: using Ptr = std::shared_ptr; diff --git a/src/util/DynamicWorkQueue.cc b/src/util/DynamicWorkQueue.cc index 74765095ad..b368fbe8fd 100644 --- a/src/util/DynamicWorkQueue.cc +++ b/src/util/DynamicWorkQueue.cc @@ -42,22 +42,22 @@ struct DynamicWorkQueue::Queue { // Queue creation time in seconds since the Epoch. double createTime; // Opaque handle used to look up the Queue for a session by DynamicWorkQueue. - void const *session; + void const* session; // Singly linked list of callables. - DynamicWorkQueue::Callable *head; - DynamicWorkQueue::Callable *tail; + DynamicWorkQueue::Callable* head; + DynamicWorkQueue::Callable* tail; - Queue(void const *handle) : numThreads(0), session(handle), head(nullptr), tail(nullptr) { + Queue(void const* handle) : numThreads(0), session(handle), head(nullptr), tail(nullptr) { struct ::timeval t; ::gettimeofday(&t, nullptr); createTime = t.tv_sec + 0.000001 * t.tv_usec; } ~Queue() { - Callable *c = head; + Callable* c = head; head = tail = nullptr; while (c) { - Callable *next = c->_next; + Callable* next = c->_next; delete c; c = next; } @@ -66,7 +66,7 @@ struct DynamicWorkQueue::Queue { bool empty() const { return head == nullptr; } // Take ownership of a Callable and add it to the end of the queue. - void put(Callable *c) { + void put(Callable* c) { if (c) { if (tail) { tail->_next = c; @@ -79,10 +79,10 @@ struct DynamicWorkQueue::Queue { // Remove a Callable from the beginning of the queue and relinquish // ownership of it. If the queue is empty, nullptr is returned. - Callable *take() { - Callable *c = head; + Callable* take() { + Callable* c = head; if (c) { - Callable *next = c->_next; + Callable* next = c->_next; head = next; if (next == nullptr) { tail = nullptr; @@ -92,8 +92,8 @@ struct DynamicWorkQueue::Queue { } // Remove and relinquish ownership for all Callable objects in the queue. - Callable *takeAll() { - Callable *c = head; + Callable* takeAll() { + Callable* c = head; head = tail = nullptr; return c; } @@ -101,8 +101,8 @@ struct DynamicWorkQueue::Queue { // Order queue pointers lexicographically by // (active thread count, queue creation time, queue memory address). -bool DynamicWorkQueue::QueuePtrCmp::operator()(DynamicWorkQueue::Queue const *x, - DynamicWorkQueue::Queue const *y) const { +bool DynamicWorkQueue::QueuePtrCmp::operator()(DynamicWorkQueue::Queue const* x, + DynamicWorkQueue::Queue const* y) const { if (x->numThreads < y->numThreads) { return true; } else if (x->numThreads == y->numThreads) { @@ -117,9 +117,9 @@ bool DynamicWorkQueue::QueuePtrCmp::operator()(DynamicWorkQueue::Queue const *x, // Wraps a DynamicWorkQueue reference and implements the work scheduling loop. struct DynamicWorkQueue::Runner { - Runner(DynamicWorkQueue &queue) : wq(queue) {} + Runner(DynamicWorkQueue& queue) : wq(queue) {} void operator()(); - DynamicWorkQueue &wq; + DynamicWorkQueue& wq; }; void DynamicWorkQueue::Runner::operator()() { @@ -134,7 +134,7 @@ void DynamicWorkQueue::Runner::operator()() { } // The first set element is the oldest of the queues with the smallest // active thread count. - Queue *q = *wq._nonEmptyQueues.begin(); + Queue* q = *wq._nonEmptyQueues.begin(); // Remove q from _nonEmptyQueues prior to updating it - this is // necessary because the queues may be reordered by the update. // @@ -184,7 +184,7 @@ void DynamicWorkQueue::Runner::operator()() { } } -void DynamicWorkQueue::_startRunner(DynamicWorkQueue &dwq) { +void DynamicWorkQueue::_startRunner(DynamicWorkQueue& dwq) { Runner r(dwq); r(); } @@ -223,7 +223,7 @@ DynamicWorkQueue::~DynamicWorkQueue() { _sessions.clear(); } -void DynamicWorkQueue::add(void const *session, DynamicWorkQueue::Callable *callable) { +void DynamicWorkQueue::add(void const* session, DynamicWorkQueue::Callable* callable) { std::lock_guard lock(_mutex); if (_shouldIncreaseThreadCount()) { std::thread t(_startRunner, std::ref(*this)); @@ -247,7 +247,7 @@ void DynamicWorkQueue::add(void const *session, DynamicWorkQueue::Callable *call // thread is created, and Runner decrements it before exiting. _numThreads += 1; } - Queue *q = nullptr; + Queue* q = nullptr; SessionQueueMap::iterator i = _sessions.find(session); if (i != _sessions.end()) { // There is an existing queue for session. @@ -272,13 +272,13 @@ void DynamicWorkQueue::add(void const *session, DynamicWorkQueue::Callable *call _workAvailable.notify_one(); } -void DynamicWorkQueue::cancelQueued(void const *session) { - Callable *c = nullptr; +void DynamicWorkQueue::cancelQueued(void const* session) { + Callable* c = nullptr; { std::lock_guard lock(_mutex); SessionQueueMap::iterator i = _sessions.find(session); if (i != _sessions.end()) { - Queue *q = i->second; + Queue* q = i->second; c = q->takeAll(); _nonEmptyQueues.erase(q); if (q->numThreads == 0) { @@ -289,7 +289,7 @@ void DynamicWorkQueue::cancelQueued(void const *session) { } } while (c) { - Callable *next = c->_next; + Callable* next = c->_next; c->cancel(); // TODO: what if cancel() throws? delete c; c = next; diff --git a/src/util/DynamicWorkQueue.h b/src/util/DynamicWorkQueue.h index 6399575202..74e27113fa 100644 --- a/src/util/DynamicWorkQueue.h +++ b/src/util/DynamicWorkQueue.h @@ -44,7 +44,7 @@ namespace lsst::qserv::util { class DynamicWorkQueue { struct Queue; struct QueuePtrCmp { - bool operator()(Queue const *, Queue const *) const; + bool operator()(Queue const*, Queue const*) const; }; struct Runner; @@ -67,7 +67,7 @@ class DynamicWorkQueue { virtual void cancel() {} private: - Callable *_next; // Embedded singly linked-list pointer; not owned. + Callable* _next; // Embedded singly linked-list pointer; not owned. friend class DynamicWorkQueue; friend struct DynamicWorkQueue::Queue; }; @@ -81,21 +81,21 @@ class DynamicWorkQueue { /// Add `callable` to the queue, associating it with `session`. /// Ownership of `callable` is transfered from the caller to the queue. - void add(void const *session, Callable *callable); + void add(void const* session, Callable* callable); /// Remove and `cancel()` any `Callable` objects associated with `session` /// from this queue. - void cancelQueued(void const *session); + void cancelQueued(void const* session); private: // Disable copy-construction and assignment. - DynamicWorkQueue(DynamicWorkQueue const &); - DynamicWorkQueue &operator=(DynamicWorkQueue const &); + DynamicWorkQueue(DynamicWorkQueue const&); + DynamicWorkQueue& operator=(DynamicWorkQueue const&); - static void _startRunner(DynamicWorkQueue &dwq); + static void _startRunner(DynamicWorkQueue& dwq); - typedef std::map SessionQueueMap; - typedef std::set QueueSet; + typedef std::map SessionQueueMap; + typedef std::set QueueSet; // Call only while holding a lock on _mutex. bool _shouldIncreaseThreadCount() const; diff --git a/src/util/Error.cc b/src/util/Error.cc index 9f6e39e3f5..02e1ea1cca 100644 --- a/src/util/Error.cc +++ b/src/util/Error.cc @@ -30,6 +30,8 @@ // LSST headers #include "lsst/log/Log.h" +using namespace std; + namespace { // File-scope helpers LOG_LOGGER _log = LOG_GET("lsst.qserv.util.Error"); @@ -38,21 +40,81 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.util.Error"); namespace lsst::qserv::util { -Error::Error(int code, std::string const& msg, int status) : _code(code), _msg(msg), _status(status) { - if (_code != ErrorCode::NONE || _msg != "" || _status != ErrorCode::NONE) { +Error::Error(int code, int subCode, string const& msg, bool logLvlErr) + : _code(code), _subCode(subCode), _msg(msg) { + if (_code != NONE || _msg != "") { // Flushing output as it is likely that this exception will not be caught. - LOGS(_log, LOG_LVL_ERROR, "Error " << *this << std::endl); + int logLvl = (logLvlErr) ? LOG_LVL_ERROR : LOG_LVL_TRACE; + LOGS(_log, logLvl, "Error " << *this << std::endl); } } -/** Overload output operator for this class - * - * @param out - * @param multiError - * @return an output stream - */ -std::ostream& operator<<(std::ostream& out, Error const& error) { - out << "[" << error._code << "] " << error._msg; +Error::Error(int code, int subCode, set const& chunkIds, set const& jobIds, string const& msg, + bool logLvlErr) + : _code(code), _subCode(subCode), _msg(msg) { + _chunkIds.insert(chunkIds.begin(), chunkIds.end()); + _jobIds.insert(jobIds.begin(), jobIds.end()); + if (_code != NONE || _msg != "") { + // Flushing output as it is likely that this exception will not be caught. + int logLvl = (logLvlErr) ? LOG_LVL_ERROR : LOG_LVL_TRACE; + LOGS(_log, logLvl, "Error " << *this << std::endl); + } +} + +vector Error::getChunkIdsVect() const { + vector res(_chunkIds.begin(), _chunkIds.end()); + return res; +} + +vector Error::getJobIdsVect() const { + vector res(_jobIds.begin(), _jobIds.end()); + return res; +} + +string Error::dump() const { + stringstream os; + dump(os); + return os.str(); +} + +ostream& Error::dump(ostream& os, bool showJobs) const { + os << "[count=" << _count << "][code=" << _code << "] " << _msg; + if (_subCode != 0) { + os << "[subCode=" << _subCode << "]"; + } + if (_chunkIds.size() > 0) { + unsigned int const maxPrint = 10; // There could be tens of thousands of these + auto iter = _chunkIds.begin(); + + if (_chunkIds.size() > maxPrint) { + os << "[chunkIds(first 10 of " << _chunkIds.size() << ")=" << *iter++; + } else { + os << "[chunkIds=" << *iter++; + } + for (unsigned int j = 1; j < _chunkIds.size() && j < maxPrint; ++j) { + os << ", " << *iter++; + } + os << "]"; + } + if (showJobs && _jobIds.size() > 0) { + unsigned int const maxPrint = 10; // There could be tens of thousands of these + auto iter = _jobIds.begin(); + + if (_jobIds.size() > maxPrint) { + os << "[jobIds(first 10 of " << _jobIds.size() << ")=" << *iter++; + } else { + os << "[jobIds=" << *iter++; + } + for (unsigned int j = 1; j < _jobIds.size() && j < maxPrint; ++j) { + os << ", " << *iter++; + } + os << "]"; + } + return os; +} + +ostream& operator<<(ostream& out, Error const& error) { + error.dump(out); return out; } diff --git a/src/util/Error.h b/src/util/Error.h index c95ec76b06..7d056833b7 100644 --- a/src/util/Error.h +++ b/src/util/Error.h @@ -34,19 +34,41 @@ #define LSST_QSERV_UTIL_ERROR_H_ // System headers +#include #include +#include namespace lsst::qserv::util { -/** - * List of known Qserv errors - * - * TODO: fix confusion between status and code see: DM-2996 - * TODO: centralize all error code (here?) see: DM-2416 - */ -struct ErrorCode { - enum errCode { +/// Store a Qserv error to be used with util::MultiError +/// This class stores and error `_code`, `_subCode`, `_msg`, and possibly some other +/// information about the error like chunkId. +/// The Error objects usually stored in a `util::MultiError` object, which uses +/// `_code` +`_subCode` as the key. `MultiError` only stores the first +/// `Error` for each key and increments the `_count` if more errors of the +/// same type are found. +/// `_code` is either from MariaDb or `Error::ErrCode`. +/// `_subCode` is usually 0, but is useful in some cases. A primary use is +/// for reporting the SQL error code behind a WORKER_SQL error. +class Error { +public: + /// Final Errors sent to the user are set by qmeta::MessageStore which + /// writes the errors to a database table, which makes them impossible + /// to sort within qserv. + /// All of the errors in the MultiError object go into a single error + /// row in the table, so sorting them in MultiError does have an effect. + /// + /// List of Qserv errors + /// Errors codes should be in order of likely usefulness to the end user. + /// MariahDB errors should all be below 5000. + enum ErrCode { + // Default for blank error. NONE = 0, + // Significant MySQL error codes + UNKNOWN_TABLE = 1051, // usually associated with ALTER and DROP + NONEXISTANT_TABLE = 1146, // usually associated with SELECT, INSERT + // Qserv errors begin + QSERV_ERR = 5000, // Should avoid conflicts with MariaDB errors. // Query plugin errors: DUPLICATE_SELECT_EXPR, // InfileMerger errors: @@ -60,48 +82,69 @@ struct ErrorCode { CREATE_TABLE, MYSQLCONNECT, MYSQLEXEC, - INTERNAL, + CZAR_RESULT_TOO_LARGE, + JOB_CANCEL, // Worker errors: - WORKER_RESULT_TOO_LARGE + WORKER_RESULT_TOO_LARGE, + WORKER_ERROR, + WORKER_QUERY, + WORKER_SQL_CONNECT, + WORKER_SQL, + // Czar internal errors + INTERNAL, + RETRY_FAILS, + RETRY_UNASSIGN, + RESULT_CONNECT, + RESULT_CREATETABLE, + RESULT_SCHEMA, + RESULT_SQL, + // Communication errors + CZAR_WORKER_COM, + WORKER_CZAR_COM, + // This a common error code indicating the czar + // cancelled this because another error had been found. + CANCEL }; -}; -/** @brief Store a Qserv error - * - * To be used with util::MultiError - * - */ -class Error { -public: - Error(int code = ErrorCode::NONE, std::string const& msg = "", int status = ErrorCode::NONE); + Error(int code, int subCode, std::string const& msg = "", bool logLvlErr = true); + Error(int code, int subCode, std::set const& chunkIds, std::set const& jobIds, + std::string const& msg, bool logLvlErr = true); - /** Overload output operator for current class - * - * @param out - * @param error - * @return an output stream - */ - friend std::ostream& operator<<(std::ostream& out, Error const& error); + Error() = default; + Error(Error const&) = default; + Error& operator=(Error const&) = default; + bool operator==(Error const& other) const = default; + + ~Error() = default; int getCode() const { return _code; } + int getSubCode() const { return _subCode; } + std::vector getChunkIdsVect() const; + std::vector getJobIdsVect() const; const std::string& getMsg() const { return _msg; } - int getStatus() const { return _status; } + /// Check if current Object contains an error + /// @return true if current object doesn't contain an error + bool isNone() { return (_code == NONE); } - /** Check if current Object contains an actual error - * - * By convention, code==util::ErrorCode::NONE - * means that no error has been detected - * - * @return true if current object doesn't contain an actual error - */ - bool isNone() { return (_code == util::ErrorCode::NONE); } + void incrCount(int val = 1) { _count += val; } + int getCount() const { return _count; } + + std::string dump() const; + std::ostream& dump(std::ostream& os, bool showJobs = false) const; + friend std::ostream& operator<<(std::ostream& out, Error const& error); private: - int _code; + /// This is either from MariaDB or from Error::ErrCode + int _code = NONE; + /// Only used for certain cases, such as SQL error numbers, may have any value. + /// A primary use is for reporting the SQL error code behind a WORKER_SQL error. + int _subCode = 0; + std::set _jobIds; /// Job ID number, when useful. + std::set _chunkIds; /// Chunk ID number, when useful. std::string _msg; - int _status; + int _count = 1; }; } // namespace lsst::qserv::util diff --git a/src/util/FileMonitor.cc b/src/util/FileMonitor.cc index b7d5e5acc6..c44a22a011 100644 --- a/src/util/FileMonitor.cc +++ b/src/util/FileMonitor.cc @@ -69,9 +69,8 @@ void FileMonitor::_checkLoop() { char buffer[EVENT_BUF_LEN]; /// There's a lock situation here. If the file is never modified, it's never getting past - /// this line. xrootd doesn't exit gracefully anyway, so this is unlikely to cause a problem. - /// This thread could be cancelled or the file could be touched, but that's unlikely to make - /// program termination much prettier. + /// this line. qserv doesn't exit gracefully anyway, so this is unlikely to cause a problem. + /// This thread could be cancelled or the file could be touched. int length = read(_fD, buffer, EVENT_BUF_LEN); LOGS(_log, LOG_LVL_WARN, "FileMonitor::checkLoop() " << _fileName << " read length=" << length); if (length < 0) { @@ -82,7 +81,7 @@ void FileMonitor::_checkLoop() { int i = 0; while (i < length) { - struct inotify_event *event = (struct inotify_event *)&buffer[i]; + struct inotify_event* event = (struct inotify_event*)&buffer[i]; LOGS(_log, LOG_LVL_DEBUG, "FileMonitor inotify event i=" << i << " event len=" << event->len); bool reread = false; string msg = "FileMonitor::checkLoop got event " + to_string(event->mask); diff --git a/src/util/InstanceCount.cc b/src/util/InstanceCount.cc index af9f0f8dda..80b920b1e4 100644 --- a/src/util/InstanceCount.cc +++ b/src/util/InstanceCount.cc @@ -4,10 +4,13 @@ #include "util/InstanceCount.h" // System Headers +#include // LSST headers #include "lsst/log/Log.h" +using namespace std; + namespace { // File-scope helpers LOG_LOGGER _log = LOG_GET("lsst.qserv.util.InstanceCount"); @@ -16,8 +19,17 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.util.InstanceCount"); namespace lsst::qserv::util { -std::map InstanceCount::_instances; -std::recursive_mutex InstanceCount::_mx; +InstanceCount::InstanceCountData InstanceCount::_icData; + +InstanceCount::InstanceCountData::InstanceCountData() { + std::cout << "InstanceCountData " << " mx=" << (void*)(&_mx) << " _inst=" << (void*)(&_instances) + << " t=" << (void*)(this) << endl; +} + +InstanceCount::InstanceCountData::~InstanceCountData() { + cout << "~InstanceCountData " << " mx=" << (void*)(&_mx) << " _inst=" << (void*)(&_instances) + << " t=" << (void*)(this) << endl; +} InstanceCount::InstanceCount(std::string const& className) : _className{className} { _increment("con"); } @@ -26,42 +38,73 @@ InstanceCount::InstanceCount(InstanceCount const& other) : _className{other._cla InstanceCount::InstanceCount(InstanceCount&& origin) : _className{origin._className} { _increment("mov"); } void InstanceCount::_increment(std::string const& source) { - std::lock_guard lg(_mx); + std::lock_guard lg(_icData._mx); + static std::atomic first = true; + static InstanceCountData* icD = nullptr; + if (first.exchange(false) == true) { + icD = &_icData; + LOGS(_log, LOG_LVL_DEBUG, "InstanceCount::_increment first icd changed to " << (void*)icD); + } else { + if (icD != &_icData) { + LOGS(_log, LOG_LVL_ERROR, + "InstanceCount::_increment icd changed to " << (void*)&_icData << " from " << (void*)icD); + } + } std::pair entry(_className, 0); - auto ret = _instances.insert(entry); + auto ret = _icData._instances.insert(entry); auto iter = ret.first; iter->second += 1; - LOGS(_log, LOG_LVL_WARN, - "InstanceCount " << source << " " << iter->first << "=" << iter->second); // LockupDB INFO + LOGS(_log, LOG_LVL_TRACE, "InstanceCount " << source << " " << iter->first << "=" << iter->second); + if ((++(_icData._instanceLogLimiter)) % 10000 == 0) { + LOGS(_log, LOG_LVL_DEBUG, "InstanceCount brief " << *this << " icD=" << (void*)(&_icData)); + } } InstanceCount::~InstanceCount() { - std::lock_guard lg(_mx); - auto iter = _instances.find(_className); - if (iter != _instances.end()) { + std::lock_guard lg(_icData._mx); + static std::atomic first = true; + static InstanceCountData* icD = nullptr; + if (first.exchange(false) == true) { + icD = &_icData; + LOGS(_log, LOG_LVL_DEBUG, "~InstanceCount first icd changed to " << (void*)icD); + } else { + if (icD != &_icData) { + LOGS(_log, LOG_LVL_ERROR, + "~InstanceCount icd changed to " << (void*)&_icData << " from " << (void*)icD); + } + } + auto iter = _icData._instances.find(_className); + if (iter != _icData._instances.end()) { iter->second -= 1; - LOGS(_log, LOG_LVL_WARN, - "~InstanceCount " << iter->first << "=" << iter->second << " : " << *this); // LockupDB INFO - if (iter->second == 0) { - _instances.erase(_className); + LOGS(_log, LOG_LVL_TRACE, "~InstanceCount " << iter->first << "=" << iter->second << " : " << *this); + int sec = iter->second; + if (sec == 0 || (sec <= 100000 && sec % 1000 == 0) || (sec > 100000 && sec % 100000 == 0)) { + LOGS(_log, LOG_LVL_DEBUG, + "~InstanceCount " << iter->first << "=" << iter->second << " : " << *this + << " icD=" << (void*)(&_icData)); + } + if (sec == 0) { + _icData._instances.erase(iter); } } else { - LOGS(_log, LOG_LVL_ERROR, "~InstanceCount " << _className << " was not found! : " << *this); + LOGS(_log, LOG_LVL_ERROR, + "~InstanceCount " << _className << " was not found! : " << *this + << " icD=" << (void*)(&_icData)); } } int InstanceCount::getCount() { - std::lock_guard lg(_mx); - auto iter = _instances.find(_className); - if (iter == _instances.end()) { + std::lock_guard lg(_icData._mx); + auto iter = _icData._instances.find(_className); + if (iter == _icData._instances.end()) { return 0; } return iter->second; } std::ostream& operator<<(std::ostream& os, InstanceCount const& instanceCount) { - std::lock_guard lg(instanceCount._mx); - for (auto const& entry : instanceCount._instances) { + std::lock_guard lg(instanceCount._icData._mx); + for (auto const& entry : instanceCount._icData._instances) { if (entry.second != 0) { os << entry.first << "=" << entry.second << " "; } diff --git a/src/util/InstanceCount.h b/src/util/InstanceCount.h index 9923b84b5b..7e59d0dfea 100644 --- a/src/util/InstanceCount.h +++ b/src/util/InstanceCount.h @@ -4,6 +4,7 @@ #define LSST_QSERV_UTIL_INSTANCECOUNT_H // System headers +#include #include #include #include @@ -23,14 +24,26 @@ class InstanceCount { int getCount(); //< Return the number of instances of _className. + class InstanceCountData { + InstanceCountData(); + ~InstanceCountData(); + + friend InstanceCount; + friend std::ostream& operator<<(std::ostream& out, InstanceCount const& instanceCount); + + private: + std::map _instances; ///< Map of instances per class name. + std::recursive_mutex _mx; ///< Protects _instances. + std::atomic _instanceLogLimiter{0}; + }; + friend std::ostream& operator<<(std::ostream& out, InstanceCount const& instanceCount); private: - std::string _className; //< Names of the of which this is a member. - static std::map _instances; //< Map of instances per class name. - static std::recursive_mutex _mx; //< Protects _instances. - void _increment(std::string const& source); + + std::string _className; ///< Name of instance being counted. + static InstanceCountData _icData; ///< Map of counts and other data. }; } // namespace lsst::qserv::util diff --git a/src/util/MultiError.cc b/src/util/MultiError.cc index bf1ac13616..fc5531b9b5 100644 --- a/src/util/MultiError.cc +++ b/src/util/MultiError.cc @@ -30,35 +30,81 @@ #include #include +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.util.MultiError"); +} // namespace + using namespace std; namespace lsst::qserv::util { -std::string MultiError::toString() const { - std::ostringstream oss; +string MultiError::toString() const { + ostringstream oss; oss << *this; return oss.str(); } -std::string MultiError::toOneLineString() const { - std::ostringstream oss; - if (!this->empty()) { - if (this->size() > 1) { - std::ostream_iterator string_it(oss, ", "); - std::copy(_errorVector.begin(), _errorVector.end() - 1, string_it); +string MultiError::toOneLineString() const { + ostringstream oss; + bool first = true; + for (auto const& [key, elem] : _errorMap) { + if (first) { + oss << elem; + first = false; + } else { + oss << ", " << elem; } - oss << _errorVector.back(); } return oss.str(); } -int MultiError::firstErrorCode() const { return empty() ? ErrorCode::NONE : _errorVector.front().getCode(); } +util::Error MultiError::firstError() const { + auto const iter = _errorMap.begin(); + return iter == _errorMap.end() ? Error() : iter->second; +} + +std::vector::size_type MultiError::size() const { return _errorMap.size(); } -bool MultiError::empty() const { return _errorVector.empty(); } +void MultiError::insert(Error const& err) { + // Error with code == NONE being added to the map indicates a coding + // error. It will be added to the map, but the + // problem should be fixed as soon as it is discovered. Throwing an + // exception is likely overkill. Not adding it could hide useful information + // as the message could still valuable. + if (err.getCode() == Error::NONE) { + LOGS(_log, LOG_LVL_WARN, "MultiError::insert adding error with code=NONE " << err); + } + auto const key = make_pair(err.getCode(), err.getSubCode()); + auto iter = _errorMap.find(key); + if (iter == _errorMap.end()) { + _errorMap[key] = err; + } else { + iter->second.incrCount(); + } +} -std::vector::size_type MultiError::size() const { return _errorVector.size(); } +void MultiError::merge(MultiError const& other) { + for (auto const& [key, err] : other._errorMap) { + auto iter = _errorMap.find(key); + if (iter != _errorMap.end()) { + // Entry already exists, increase the count + iter->second.incrCount(err.getCount()); + } else { + _errorMap[key] = err; + } + } +} -void MultiError::push_back(const std::vector::value_type& val) { _errorVector.push_back(val); } +std::vector MultiError::getVector() const { + std::vector errVect; + for (auto const& [key, elem] : _errorMap) { + errVect.push_back(elem); + } + return errVect; +} std::ostream& operator<<(std::ostream& out, MultiError const& multiError) { // This string is meant to be provided to end users on a failure, so @@ -66,37 +112,20 @@ std::ostream& operator<<(std::ostream& out, MultiError const& multiError) { // To get numerous '[0]' entries in the output under control... // Put all errors in a map, and count how many times each occurs. - std::map errMap; - for (auto const& err : multiError._errorVector) { - stringstream sstrm; - sstrm << err; - string errStr = sstrm.str(); - auto iter = errMap.find(errStr); - if (iter == errMap.end()) { - errMap[errStr] = 1; - } else { - iter->second += 1; - } - } - - // Write the map to `out` - bool firstLoop = true; - for (auto const& elem : errMap) { - int count = elem.second; - if (firstLoop) { - firstLoop = false; + bool first = true; + for (auto const& [key, err] : multiError._errorMap) { + if (first) { + first = false; } else { out << "\n"; } - out << elem.first; - if (count > 1) { - out << " (Occurences = " << count << ")"; - } + out << err; // Limit this to about 10,000 characters, as that's more than will // likely be useful to end users. if (out.tellp() > 10'000) break; } + return out; } diff --git a/src/util/MultiError.h b/src/util/MultiError.h index ea0d620488..bbdbf7a15b 100644 --- a/src/util/MultiError.h +++ b/src/util/MultiError.h @@ -35,6 +35,7 @@ // System headers #include +#include #include #include @@ -43,59 +44,47 @@ namespace lsst::qserv::util { -/** @class - * @brief Implement a generic error container for Qserv - * - * Store Qserv errors in a throwable vector. - * util::Error operator << is used for output. - * - */ -class MultiError : public std::exception { +/// Implement a generic error container for Qserv +/// Errors are stored in a map using a code/subCode pair for the key. +/// The first error with a given code/subCode sets the message and +/// duplicate values increase the count of the error. +/// The hope is that numerous duplicate errors will just have a high +/// count and not obscure other important messages. +class MultiError { public: - /** Return a string representation of the object - * - * Can be used in the log - * - * @return a string representation of the object - */ - std::string toString() const; + MultiError() = default; + MultiError(MultiError const& multiErr) = default; + + virtual ~MultiError() = default; + + bool operator==(MultiError const& other) const = default; - /** Return a minimalistic string representation of the object - * - * Can be used to print error messages to the - * command-line interface - * - * @return a string representation of the object - */ + /// Return a minimalistic string representation of the object + /// @return a string representation of the object std::string toOneLineString() const; - /** Return the first error code (if any) - * - * The idea is to return the first code that might trigger the "chain" reaction. - * An interpretation of the code depns on a context. - * - * @return the code or ErrorCode::NONE if the collection of errors is empty - */ - int firstErrorCode() const; - - virtual ~MultiError() throw() {} - - /** Overload output operator for this class - * - * @param out - * @param multiError - * @return an output stream, with no newline at the end - */ - friend std::ostream& operator<<(std::ostream& out, MultiError const& multiError); + /// Return the error with the lowest error code. + util::Error firstError() const; - bool empty() const; + bool empty() const { return _errorMap.empty(); } std::vector::size_type size() const; - void push_back(const std::vector::value_type& val); + std::vector getVector() const; + + /// Errors should set the error code to anything but NONE (0). + /// The Error subCode may be any value, including NONE. + void insert(Error const& val); + void merge(MultiError const& other); + + //// Return a string representation of the object + std::string toString() const; + + friend std::ostream& operator<<(std::ostream& out, MultiError const& multiError); private: - std::vector _errorVector; + /// Map of Errors using Error::_code + Error::_subCode as the key. + std::map, Error> _errorMap; }; } // namespace lsst::qserv::util diff --git a/src/util/Mutex.cc b/src/util/Mutex.cc index cd60e2b0e4..d7e46c0c13 100644 --- a/src/util/Mutex.cc +++ b/src/util/Mutex.cc @@ -47,7 +47,7 @@ void Lock::_lock() { _context << " LOCK[" << _mutex.id() << "]:1 " << " LOCKED: " << util::printable(Mutex::lockedId(), "", "", " ")); } - assert(!_mutex.lockedByCaller()); + assert(!_mutex.lockedByThread()); _mutex.lock(); if (!_context.empty()) { LOGS(_log, LOG_LVL_TRACE, diff --git a/src/util/Mutex.h b/src/util/Mutex.h index 0353f733a6..c1229c87a7 100644 --- a/src/util/Mutex.h +++ b/src/util/Mutex.h @@ -32,13 +32,34 @@ #include "util/Bug.h" +#ifdef MUTEX_UNITTEST +#undef USING_VMUTEX +#define USING_VMUTEX 1 +#else +#define USING_VMUTEX 0 +#endif + +#if USING_VMUTEX + +#define MUTEX util::Mutex + /// Used to verify a mutex is locked before accessing a protected variable. #define VMUTEX_HELD(vmtx) \ - if (!vmtx.lockedByCaller()) throw lsst::qserv::util::Bug(ERR_LOC, "mutex not locked!"); + if (!vmtx.lockedByThread()) throw lsst::qserv::util::Bug(ERR_LOC, "mutex not locked!"); /// Used to verify a mutex is not locked by this thread before locking a related mutex. #define VMUTEX_NOT_HELD(vmtx) \ - if (vmtx.lockedByCaller()) throw lsst::qserv::util::Bug(ERR_LOC, "mutex not free!"); + if (vmtx.lockedByThread()) throw lsst::qserv::util::Bug(ERR_LOC, "mutex not unlocked!"); + +#else // not USING_VMUTEX + +#define MUTEX std::mutex + +#define VMUTEX_HELD(vmtx) ; + +#define VMUTEX_NOT_HELD(vmtx) ; + +#endif // USING_VMUTEX // This header declarations namespace lsst::qserv::util { @@ -50,6 +71,8 @@ namespace lsst::qserv::util { /// Making VMutex a wrapper around std::mutex instead of a child causes lines /// like `std::lock_guard lck(_vmutex);` to be flagged as errors, /// which is desirable. +/// Unfortunately, VMutex won't work with condition_variable as those explicitly +/// expect std::mutex. class VMutex { public: explicit VMutex() {} @@ -75,8 +98,7 @@ class VMutex { } /// @return true if the mutex is locked by this thread. - /// TODO: Rename lockedByThread() - bool lockedByCaller() const { return _holder == std::this_thread::get_id(); } + bool lockedByThread() const { return _holder == std::this_thread::get_id(); } protected: std::atomic _holder; @@ -101,13 +123,13 @@ class Mutex : public VMutex { Mutex() : _id(nextId()) {} - /// Lock the mutext (replaces the corresponding method of the base class) + /// Lock the mutex (replaces the corresponding method of the base class) void lock() { VMutex::lock(); addCurrentId(); } - /// Release the mutext (replaces the corresponding method of the base class) + /// Release the mutex (replaces the corresponding method of the base class) void unlock() { removeCurrentId(); VMutex::unlock(); diff --git a/src/qdisp/QdispPool.cc b/src/util/QdispPool.cc similarity index 95% rename from src/qdisp/QdispPool.cc rename to src/util/QdispPool.cc index 137e59a34b..131200dc98 100644 --- a/src/qdisp/QdispPool.cc +++ b/src/util/QdispPool.cc @@ -22,7 +22,7 @@ */ // Class header -#include "qdisp/QdispPool.h" +#include "util/QdispPool.h" // LSST headers #include "lsst/log/Log.h" @@ -32,10 +32,10 @@ #include "util/common.h" namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.QdispPool"); +LOG_LOGGER _log = LOG_GET("lsst.qserv.util.QdispPool"); } -namespace lsst::qserv::qdisp { +namespace lsst::qserv::util { ///< @Return true if the queue could be added. bool PriorityQueue::addPriQueue(int priority, int minRunning, int maxRunning) { @@ -67,6 +67,10 @@ void PriorityQueue::queCmd(util::Command::Ptr const& cmd) { void PriorityQueue::queCmd(PriorityCommand::Ptr const& cmd, int priority) { { std::lock_guard lock(_mtx); + if (cmd->_queued.exchange(true) == true) { + throw util::Bug(ERR_LOC, + "PriorityQueue::queCmd cmd has already been queued and cannot be queued twice."); + } auto iter = _queues.find(priority); if (iter == _queues.end()) { // give it the default priority @@ -95,7 +99,8 @@ util::Command::Ptr PriorityQueue::getCmd(bool wait) { ++localLogLimiter; // Log this every once in while to INFO so there's some idea of system // load without generating crushing amounts of log messages. - if (localLogLimiter % 500 == 0) { + unsigned int const limitMod = 100; + if (localLogLimiter % limitMod == 0) { LOGS(_log, LOG_LVL_INFO, "priQueGet " << _statsStr()); } else { LOGS(_log, LOG_LVL_DEBUG, "priQueGet " << _statsStr()); @@ -162,6 +167,7 @@ void PriorityQueue::_incrDecrRunningCount(util::Command::Ptr const& cmd, int inc iter->second->running += incrDecr; } } + _cv.notify_one(); } void PriorityQueue::commandStart(util::Command::Ptr const& cmd) { @@ -260,4 +266,4 @@ QdispPool::QdispPool(bool unitTest) { } } -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::util diff --git a/src/qdisp/QdispPool.h b/src/util/QdispPool.h similarity index 89% rename from src/qdisp/QdispPool.h rename to src/util/QdispPool.h index d3e1af7743..6dc170599a 100644 --- a/src/qdisp/QdispPool.h +++ b/src/util/QdispPool.h @@ -20,8 +20,8 @@ * the GNU General Public License along with this program. If not, * see . */ -#ifndef LSST_QSERV_QDISP_QDISPPOOL_H -#define LSST_QSERV_QDISP_QDISPPOOL_H +#ifndef LSST_QSERV_UTIL_QDISPPOOL_H +#define LSST_QSERV_UTIL_QDISPPOOL_H // System headers #include @@ -33,7 +33,7 @@ // Qserv headers #include "util/ThreadPool.h" -namespace lsst::qserv::qdisp { +namespace lsst::qserv::util { class PriorityQueue; @@ -46,7 +46,10 @@ class PriorityCommand : public util::CommandTracked { friend PriorityQueue; private: - int _priority{0}; // Need to know what queue this was placed on. + int _priority{0}; ///< Need to know what queue this was placed on. + /// Priority commands can only be queued once, or PriorityQueue acounting + /// can be contaminated: this flag causes But to be thrown if queued twice. + std::atomic _queued{false}; }; /// FIFO priority queue. Elements with the same priority are handled in @@ -137,12 +140,10 @@ class PriorityQueue : public util::CommandQueue { }; /// This class is used to provide a pool of threads for handling out going -/// and incoming messages from xrootd as well as a system for prioritizing +/// and incoming messages as well as a system for prioritizing /// the messages. -/// This has not worked entirely as intended. Reducing the number of threads -/// had negative impacts on xrootd, but other changes have been made such that -/// reducing the size of the thread pools can be tried again. -/// What it does do is prioritize out going messages (typically jobs going to +/// This has not worked entirely as intended. +/// What it does do is prioritize outgoing messages (typically jobs going to /// workers), allow interactive queries to be handled quickly, even under /// substantial loads, and it gives a good idea of how busy the czar really /// is. Large numbers of queued items in any of the scan queries, or large @@ -162,7 +163,8 @@ class QdispPool { /// largestPriority - highest priority is 0, lowest possible priority is /// 100 and is reserved for default priority. largestPriority=4 would /// result in PriorityQueues's being created for - /// priorities 0, 1, 2, 3, 4, and 100 + /// priorities 0, 1, 2, 3, 4, and 100. Priority 100 is + /// meant for changing aspects of the pool and shutdown. /// runSizes - Each entry represents the maximum number of concurrent running /// commands for a priority given by the position in the array. /// If a position is undefined, the default value is 1. @@ -172,7 +174,7 @@ class QdispPool { /// priorities 3 and 4 can have up to 3 /// minRunningSizes - Each entry represents the minimum number of threads /// to be running (defaults to 0). Non-zero values can keep - /// lower priorities from being completely stared and/or + /// lower priorities from being completely starved and/or /// reduce deadlocks from high priorities depending on lower /// priorities. QdispPool(int poolSize, int largestPriority, std::vector const& maxRunSizes, @@ -200,6 +202,6 @@ class QdispPool { util::ThreadPool::Ptr _pool; }; -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::util -#endif /* LSST_QSERV_QDISP_QDISPPOOL_H_ */ +#endif /* LSST_QSERV_UTIL_QDISPPOOL_H_ */ diff --git a/src/util/ResultFileName.cc b/src/util/ResultFileName.cc index 300e682198..36f25edc95 100644 --- a/src/util/ResultFileName.cc +++ b/src/util/ResultFileName.cc @@ -25,6 +25,9 @@ // Third party headers #include "boost/filesystem.hpp" +// LSST headers +#include "lsst/log/Log.h" + // Qserv headers #include "util/String.h" @@ -32,19 +35,28 @@ using namespace std; using namespace nlohmann; namespace fs = boost::filesystem; +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.util.ResultFileName"); +} + namespace lsst::qserv::util { string const ResultFileName::fileExt = ".csv"; -ResultFileName::ResultFileName(qmeta::CzarId czarId, QueryId queryId, uint32_t jobId, uint32_t chunkId, - uint32_t attemptCount) - : _fileName(to_string(czarId) + "-" + to_string(queryId) + "-" + to_string(jobId) + "-" + - to_string(chunkId) + "-" + to_string(attemptCount) + fileExt), +ResultFileName::ResultFileName(CzarId czarId, QueryId queryId, UberJobId ujId) + : _fileName(to_string(czarId) + splitToken() + to_string(queryId) + splitToken() + to_string(ujId) + + fileExt), _czarId(czarId), _queryId(queryId), - _jobId(jobId), - _chunkId(chunkId), - _attemptCount(attemptCount) {} + _ujId(ujId) { + if (ujId < 0) { + string const eMsg = + "ResultFileName cannot be called with negative, which would be invalid for a filename " + + _fileName; + LOGS(_log, LOG_LVL_ERROR, eMsg); + throw std::invalid_argument(eMsg); + } +} ResultFileName::ResultFileName(fs::path const& filePath) : _fileName(filePath.filename().string()) { _parse(); @@ -55,11 +67,7 @@ ResultFileName::ResultFileName(string const& filePath) : _fileName(fs::path(file } json ResultFileName::toJson() const { - return json::object({{"czar_id", _czarId}, - {"query_id", _queryId}, - {"job_id", _jobId}, - {"chunk_id", _chunkId}, - {"attemptcount", _attemptCount}}); + return json::object({{"czar_id", _czarId}, {"query_id", _queryId}, {"uj_id", _ujId}}); } bool ResultFileName::operator==(ResultFileName const& rhs) const { return _fileName == rhs._fileName; } @@ -78,16 +86,14 @@ void ResultFileName::_parse() { throw invalid_argument(_context(__func__) + " not a valid result file name: " + _fileName + ", file ext: " + fileNameExt + ", expected: " + fileExt); } - _taskAttributes = String::parseToVectUInt64(fileName.stem().string(), "-"); - if (_taskAttributes.size() != 5) { + _taskAttributes = String::parseToVectUInt64(fileName.stem().string(), splitToken()); + if (_taskAttributes.size() != 3) { throw invalid_argument(_context(__func__) + " not a valid result file name: " + _fileName); } size_t attrIndex = 0; _validateAndStoreAttr(attrIndex++, "czarId", _czarId); _validateAndStoreAttr(attrIndex++, "queryId", _queryId); - _validateAndStoreAttr(attrIndex++, "jobId", _jobId); - _validateAndStoreAttr(attrIndex++, "chunkId", _chunkId); - _validateAndStoreAttr(attrIndex++, "attemptCount", _attemptCount); + _validateAndStoreAttr(attrIndex++, "ujId", _ujId); } } // namespace lsst::qserv::util diff --git a/src/util/ResultFileName.h b/src/util/ResultFileName.h index 424f2fa2cb..4c7521b600 100644 --- a/src/util/ResultFileName.h +++ b/src/util/ResultFileName.h @@ -34,7 +34,6 @@ // Qserv headers #include "global/intTypes.h" -#include "qmeta/types.h" // Forward declarations @@ -58,9 +57,12 @@ namespace lsst::qserv::util { * All operations are done in the class's constructors. A few forms of the construction are * provided for convenience of the client applications. * + * QueryId + UberJobId results is a unique identifier. + * CzarId can be useful for some operations. + * * The file path has the following general format: * @code - * [/]----[.] + * [/]-[.] * @code */ class ResultFileName { @@ -68,14 +70,15 @@ class ResultFileName { /// The file extention including the '.' prefix. static std::string const fileExt; + static std::string splitToken() { return std::string("-"); } + ResultFileName() = default; ResultFileName(ResultFileName const&) = default; ResultFileName& operator=(ResultFileName const&) = default; - /// This form of constructionstores attributes of a file and generates + /// This form of construction stores attributes of a file and generates /// the name of a file in a format specified in the class description section. - ResultFileName(qmeta::CzarId czarId, QueryId queryId, std::uint32_t jobId, std::uint32_t chunkId, - std::uint32_t attemptCount); + ResultFileName(CzarId czarId, QueryId queryId, UberJobId ujId); /// @param filePath The file to be evaluated. /// @throw std::invalid_argument If the file path did not match expectations. @@ -88,11 +91,9 @@ class ResultFileName { /// @return The name of a file including its extension and excluding the optional base folder. std::string const& fileName() const { return _fileName; } - qmeta::CzarId czarId() const { return _czarId; } + CzarId czarId() const { return _czarId; } QueryId queryId() const { return _queryId; } - std::uint32_t jobId() const { return _jobId; } - std::uint32_t chunkId() const { return _chunkId; } - std::uint32_t attemptCount() const { return _attemptCount; } + UberJobId ujId() const { return _ujId; } /// @return The JSON object (dictionary) encapsulating values of the attributes. nlohmann::json toJson() const; @@ -106,26 +107,26 @@ class ResultFileName { static std::string _context(std::string const& func); void _parse(); + // This only works with unsigned, which wouldn't work with UberJobId + // except that negative UberJobId's never make it off of the czar. template void _validateAndStoreAttr(std::size_t attrIndex, std::string const& attrName, T& attr) { - std::uint64_t const& attrValue = _taskAttributes[attrIndex]; - T const minVal = std::numeric_limits::min(); - T const maxVal = std::numeric_limits::max(); - if ((attrValue >= minVal) && (attrValue <= maxVal)) { + size_t const& attrValue = _taskAttributes[attrIndex]; + size_t const maxVal = std::numeric_limits::max(); + /// min value for size_t is 0, so only max matters + if (attrValue <= maxVal) { attr = static_cast(attrValue); return; } - throw std::invalid_argument(_context(__func__) + " failed for attribute=" + attrName + ", value=" + - std::to_string(attrValue) + ", allowed range=[" + std::to_string(minVal) + - "," + std::to_string(maxVal) + "], file=" + _fileName); + throw std::invalid_argument(_context(__func__) + " failed for attribute=" + attrName + + ", value=" + std::to_string(attrValue) + ", allowed range=[0," + + std::to_string(maxVal) + "], file=" + _fileName); } std::string _fileName; - qmeta::CzarId _czarId = 0; + CzarId _czarId = 0; QueryId _queryId = 0; - std::uint32_t _jobId = 0; - std::uint32_t _chunkId = 0; - std::uint32_t _attemptCount = 0; + UberJobId _ujId = 0; std::vector _taskAttributes; }; diff --git a/src/util/ResultFileNameParser.cc b/src/util/ResultFileNameParser.cc new file mode 100644 index 0000000000..eaf4c1ce02 --- /dev/null +++ b/src/util/ResultFileNameParser.cc @@ -0,0 +1,79 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "util/ResultFileNameParser.h" + +// Third party headers +#include "boost/filesystem.hpp" + +// Qserv headers +#include "util/String.h" + +using namespace std; +using namespace nlohmann; +namespace fs = boost::filesystem; + +namespace lsst::qserv::util { + +string const ResultFileNameParser::fileExt = ".proto"; + +ResultFileNameParser::ResultFileNameParser(fs::path const& filePath) : _fileName(filePath.stem().string()) { + _parse(); +} + +ResultFileNameParser::ResultFileNameParser(string const& filePath) + : _fileName(fs::path(filePath).stem().string()) { + _parse(); +} + +json ResultFileNameParser::toJson() const { + return json::object( + {{"czar_id", czarId}, {"query_id", queryId}, {"job_id", jobId}, {"chunk_id", chunkId}}); +} + +bool ResultFileNameParser::operator==(ResultFileNameParser const& rhs) const { + return (czarId == rhs.czarId) && (queryId == rhs.queryId) && (jobId == rhs.jobId) && + (chunkId == rhs.chunkId); +} + +ostream& operator<<(ostream& os, ResultFileNameParser const& parser) { + os << parser.toJson(); + return os; +} + +string ResultFileNameParser::_context(string const& func) { + return "FileChannelShared::ResultFileNameParser::" + func; +} + +void ResultFileNameParser::_parse() { + _taskAttributes = String::parseToVectUInt64(_fileName, "-"); + if (_taskAttributes.size() != 4) { + throw invalid_argument(_context(__func__) + " not a valid result file name: " + _fileName); + } + size_t attrIndex = 0; + _validateAndStoreAttr(attrIndex++, "czarId", czarId); + _validateAndStoreAttr(attrIndex++, "queryId", queryId); + _validateAndStoreAttr(attrIndex++, "jobId", jobId); + _validateAndStoreAttr(attrIndex++, "chunkId", chunkId); +} + +} // namespace lsst::qserv::util diff --git a/src/util/ThreadPool.h b/src/util/ThreadPool.h index 80bc275d17..8dd7a193a4 100644 --- a/src/util/ThreadPool.h +++ b/src/util/ThreadPool.h @@ -83,9 +83,9 @@ class CommandForThreadPool : public CommandTracked { public: using Ptr = std::shared_ptr; - CommandForThreadPool() = default; + CommandForThreadPool() : CommandTracked() {} explicit CommandForThreadPool(std::function func) : CommandTracked{func} {} - virtual ~CommandForThreadPool(); + ~CommandForThreadPool() override; /// Return true if the number of threads created and still existing is /// greater than the max. @@ -111,8 +111,11 @@ class CommandForThreadPool : public CommandTracked { /// Note: It is possible for threads to leave the pool and be replaced using leavePool() /// This is usually done when a thread no longer requires significant CPU but has /// to wait for something to happen, like transferring data. -/// _poolThreadCount is a total of all threads in the pool and all threads that have -/// left the pool and this total should not exceed _maxThreadCount. +/// _poolThreadCount is a total of all threads that are still running both in the pool +/// and those that have left the pool but have not finished. +/// _poolThreadCount total should not exceed _maxThreadCount. +/// _maxThreadCount can be significantly larger than the _targetThrdCount. +/// class ThreadPool : public std::enable_shared_from_this { public: using Ptr = std::shared_ptr; diff --git a/src/util/TimeUtils.cc b/src/util/TimeUtils.cc index 17c68c9849..17edb71fa5 100644 --- a/src/util/TimeUtils.cc +++ b/src/util/TimeUtils.cc @@ -55,4 +55,13 @@ uint64_t TimeUtils::tp2ms(chrono::system_clock::time_point const& tp) { return chrono::duration_cast(tp.time_since_epoch()).count(); } +string TimeUtils::timePointToDateTimeString(TIMEPOINT const& point) { + auto const timer = chrono::system_clock::to_time_t(point); + auto broken_time = *localtime(&timer); + + ostringstream ss; + ss << put_time(&broken_time, "%Y-%m-%d %H:%M:%S"); + return ss.str(); +} + } // namespace lsst::qserv::util diff --git a/src/util/TimeUtils.h b/src/util/TimeUtils.h index ac5a31480a..bd3f19ca5b 100644 --- a/src/util/TimeUtils.h +++ b/src/util/TimeUtils.h @@ -31,6 +31,9 @@ #include #include +// Qserv headers +#include "global/clock_defs.h" + // This header declarations namespace lsst::qserv::util { @@ -47,6 +50,9 @@ struct TimeUtils { /// @return a human-readable timestamp in a format 'YYYY-MM-DD HH:MM:SS.mmm' static std::string toDateTimeString(std::chrono::milliseconds const& millisecondsSinceEpoch); + /// @return a human-readable time in a format 'YYYY-MM-DD HH:MM:SS' + static std::string timePointToDateTimeString(TIMEPOINT const& point); + /** * @param tp The timepoint to be converted. * @return The number of milliseconds since UNIX Epoch diff --git a/src/util/common.cc b/src/util/common.cc index be8e05af87..f280705356 100644 --- a/src/util/common.cc +++ b/src/util/common.cc @@ -32,12 +32,20 @@ #include #include #include +#include // Third-party headers #include "boost/asio.hpp" +// LSST headers +#include "lsst/log/Log.h" + using namespace std; +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.util.common"); +} + namespace lsst::qserv::util { string get_current_host_fqdn(bool all) { diff --git a/src/util/testFileNameParser.cc b/src/util/testFileNameParser.cc new file mode 100644 index 0000000000..b1a7bcdbd4 --- /dev/null +++ b/src/util/testFileNameParser.cc @@ -0,0 +1,109 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// System headers +#include +#include +#include +#include + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "util/ResultFileNameParser.h" + +// Boost unit test header +#define BOOST_TEST_MODULE ResultFileNameParser +#include + +// Third party headers +#include "boost/filesystem.hpp" + +namespace fs = boost::filesystem; +namespace qmeta = lsst::qserv::qmeta; +namespace test = boost::test_tools; +namespace util = lsst::qserv::util; + +BOOST_AUTO_TEST_SUITE(Suite) + +BOOST_AUTO_TEST_CASE(ResultFileNameParserTest) { + LOGS_INFO("ResultFileNameParserTest"); + + util::ResultFileNameParser fileExpected; + fileExpected.czarId = 1; + fileExpected.queryId = 2; + fileExpected.jobId = 3; + fileExpected.chunkId = 4; + std::string const fileNameNoExt = + std::to_string(fileExpected.czarId) + "-" + std::to_string(fileExpected.queryId) + "-" + + std::to_string(fileExpected.jobId) + "-" + std::to_string(fileExpected.chunkId); + + std::string const fileName = fileNameNoExt + util::ResultFileNameParser::fileExt; + + BOOST_CHECK_NO_THROW({ + util::ResultFileNameParser const file(fileNameNoExt); + BOOST_CHECK_EQUAL(file, fileExpected); + BOOST_CHECK_EQUAL(file.czarId, fileExpected.czarId); + BOOST_CHECK_EQUAL(file.queryId, fileExpected.queryId); + BOOST_CHECK_EQUAL(file.jobId, fileExpected.jobId); + BOOST_CHECK_EQUAL(file.chunkId, fileExpected.chunkId); + }); + + BOOST_CHECK_NO_THROW({ + util::ResultFileNameParser const file(fileName); + BOOST_CHECK_EQUAL(file, fileExpected); + }); + + BOOST_CHECK_NO_THROW({ + util::ResultFileNameParser const file{fs::path(fileName)}; + BOOST_CHECK_EQUAL(file, fileExpected); + }); + + BOOST_CHECK_NO_THROW({ + util::ResultFileNameParser const file("/" + fileName); + BOOST_CHECK_EQUAL(file, fileExpected); + }); + + BOOST_CHECK_NO_THROW({ + util::ResultFileNameParser const file("/base/" + fileName); + BOOST_CHECK_EQUAL(file, fileExpected); + }); + + BOOST_CHECK_NO_THROW({ + util::ResultFileNameParser const file("base/" + fileName); + BOOST_CHECK_EQUAL(file, fileExpected); + }); + + BOOST_CHECK_NO_THROW({ + util::ResultFileNameParser const file(fs::path("/base/") / fileName); + BOOST_CHECK_EQUAL(file, fileExpected); + }); + + BOOST_CHECK_THROW( + { util::ResultFileNameParser const file("1-2-3-4" + fileName); }, std::invalid_argument); + + BOOST_CHECK_THROW( + { util::ResultFileNameParser const file("a-2-3-4-5" + fileName); }, std::invalid_argument); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/testMultiError.cc b/src/util/testMultiError.cc index c0f4a44699..63d71070f8 100644 --- a/src/util/testMultiError.cc +++ b/src/util/testMultiError.cc @@ -62,12 +62,12 @@ BOOST_AUTO_TEST_CASE(MonoError) { test::output_test_stream output; util::MultiError multiError; - std::string expected_err_msg = "[1] Stupid error message"; + std::string expected_err_msg = "[count=1][code=1] Stupid error message"; int errCode = 1; std::string errMsg = "Stupid error message"; - util::Error error(errCode, errMsg); - multiError.push_back(error); + util::Error error(errCode, util::Error::NONE, errMsg); + multiError.insert(error); output << multiError; std::cout << multiError; @@ -78,40 +78,55 @@ BOOST_AUTO_TEST_CASE(MonoError) { * Print a MultiError object containing several errors */ BOOST_AUTO_TEST_CASE(MultiError) { - test::output_test_stream output; util::MultiError multiError; std::string expected_err_msg = - "[10] Error code is: 10\n" - "[11] Error code is: 11\n" - "[12] Error code is: 12"; + "[count=1][code=10] Error code is: 10\n" + "[count=1][code=11] Error code is: 11\n" + "[count=1][code=12] Error code is: 12"; for (int errCode = 10; errCode < 13; errCode = errCode + 1) { std::stringstream ss; ss << "Error code is: " << errCode; std::string errMsg = ss.str(); - util::Error error(errCode, errMsg); - multiError.push_back(error); + util::Error error(errCode, util::Error::NONE, errMsg); + multiError.insert(error); } + test::output_test_stream output; output << multiError; - std::cout << multiError; + std::cout << "a=" << multiError << std::endl; BOOST_CHECK(output.is_equal(expected_err_msg)); -} - -/** @test - * Throw a MultiError object containing one error - */ -BOOST_AUTO_TEST_CASE(ThrowMultiError) { - util::MultiError multiError; - int errCode = 5; - std::string errMsg = "Error stack thrown"; - util::Error error(errCode, errMsg); - multiError.push_back(error); - BOOST_REQUIRE_THROW(_throw_it(multiError), std::exception); + util::MultiError multiErrB(multiError); + test::output_test_stream outputB; + outputB << multiErrB; + std::cout << "b=" << multiErrB << std::endl; + BOOST_CHECK(outputB.is_equal(expected_err_msg)); } -BOOST_AUTO_TEST_CASE(Exception) { std::string out; } +BOOST_AUTO_TEST_CASE(MultiErrorEqual) { + test::output_test_stream output; + util::MultiError multiErrorA; + util::MultiError multiErrorB; + util::MultiError multiErrorC; + + BOOST_REQUIRE(multiErrorA == multiErrorB); + BOOST_REQUIRE(multiErrorA == multiErrorC); + + util::Error err1(34, util::Error::NONE, "test rando"); + util::Error err2(-1, 25, "cancel blah"); + multiErrorA.insert(err1); + BOOST_REQUIRE(multiErrorA != multiErrorB); + multiErrorB.insert(err2); + BOOST_REQUIRE(multiErrorA != multiErrorB); + multiErrorB.insert(err1); + BOOST_REQUIRE(multiErrorA != multiErrorB); + multiErrorC.insert(err1); + BOOST_REQUIRE(multiErrorA == multiErrorC); + multiErrorA.insert(err2); + BOOST_REQUIRE(multiErrorA == multiErrorB); + BOOST_REQUIRE(multiErrorA != multiErrorC); +} BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/testMutex.cc b/src/util/testMutex.cc index 42220436e6..e1da95c9d1 100644 --- a/src/util/testMutex.cc +++ b/src/util/testMutex.cc @@ -33,6 +33,8 @@ // LSST headers #include "lsst/log/Log.h" +#define MUTEX_UNITTEST + // Qserv headers #include "util/BlockPost.h" #include "util/Mutex.h" @@ -58,16 +60,16 @@ BOOST_AUTO_TEST_SUITE(Suite) BOOST_AUTO_TEST_CASE(MutexTest) { // Test the interface of class Mutex to comply with expectations // of the standard std::lock_guard. - LOGS_DEBUG("MutexTest begins"); + LOGS_INFO("MutexTest begins"); // The mutex won't be locked by anyone Mutex mtx1; - BOOST_CHECK(!mtx1.lockedByCaller()); + BOOST_CHECK(!mtx1.lockedByThread()); // The mutex will be locked by the current thread Mutex mtx2; lock_guard const lockGuard2(mtx2); - BOOST_CHECK(mtx2.lockedByCaller()); + BOOST_CHECK(mtx2.lockedByThread()); // Lock this mutex in each of two separate threads. Let each thread // to wait for a random period of time within some interval before @@ -85,18 +87,18 @@ BOOST_AUTO_TEST_CASE(MutexTest) { thread thr1([&mtx, &wasLockedBeforeBy1, &wasLockedAfterBy1]() { BlockPost blockPost(10, 20); blockPost.wait(); - wasLockedBeforeBy1 = mtx.lockedByCaller(); + wasLockedBeforeBy1 = mtx.lockedByThread(); lock_guard const lock(mtx); - wasLockedAfterBy1 = mtx.lockedByCaller(); + wasLockedAfterBy1 = mtx.lockedByThread(); }); bool wasLockedBeforeBy2 = false; bool wasLockedAfterBy2 = false; thread thr2([&mtx, &wasLockedBeforeBy2, &wasLockedAfterBy2]() { BlockPost blockPost(10, 20); blockPost.wait(); - wasLockedBeforeBy2 = mtx.lockedByCaller(); + wasLockedBeforeBy2 = mtx.lockedByThread(); lock_guard const lock(mtx); - wasLockedAfterBy2 = mtx.lockedByCaller(); + wasLockedAfterBy2 = mtx.lockedByThread(); }); thr1.join(); BOOST_CHECK(!wasLockedBeforeBy1); @@ -126,24 +128,24 @@ BOOST_AUTO_TEST_CASE(MutexTest) { } BOOST_CHECK_EQUAL(counter, steps * numThreads); } - LOGS_DEBUG("MutexTest ends"); + LOGS_INFO("MutexTest ends"); } BOOST_AUTO_TEST_CASE(VMutexTest) { // Test the interface of class Mutex to comply with expectations // of the standard std::lock_guard. - LOGS_DEBUG("VMutexTest begins"); + LOGS_INFO("VMutexTest begins"); // The mutex won't be locked by anyone VMutex mtx1; - BOOST_CHECK(!mtx1.lockedByCaller()); + BOOST_CHECK(!mtx1.lockedByThread()); BOOST_CHECK_THROW(VMUTEX_HELD(mtx1), lsst::qserv::util::Bug); BOOST_REQUIRE_NO_THROW(VMUTEX_NOT_HELD(mtx1)); // The mutex will be locked by the current thread VMutex mtx2; lock_guard const lockGuard2(mtx2); - BOOST_CHECK(mtx2.lockedByCaller()); + BOOST_CHECK(mtx2.lockedByThread()); BOOST_REQUIRE_NO_THROW(VMUTEX_HELD(mtx2)); BOOST_CHECK_THROW(VMUTEX_NOT_HELD(mtx2), lsst::qserv::util::Bug); @@ -163,18 +165,18 @@ BOOST_AUTO_TEST_CASE(VMutexTest) { thread thr1([&mtx, &wasLockedBeforeBy1, &wasLockedAfterBy1]() { BlockPost blockPost(10, 20); blockPost.wait(); - wasLockedBeforeBy1 = mtx.lockedByCaller(); + wasLockedBeforeBy1 = mtx.lockedByThread(); lock_guard const lock(mtx); - wasLockedAfterBy1 = mtx.lockedByCaller(); + wasLockedAfterBy1 = mtx.lockedByThread(); }); bool wasLockedBeforeBy2 = false; bool wasLockedAfterBy2 = false; thread thr2([&mtx, &wasLockedBeforeBy2, &wasLockedAfterBy2]() { BlockPost blockPost(10, 20); blockPost.wait(); - wasLockedBeforeBy2 = mtx.lockedByCaller(); + wasLockedBeforeBy2 = mtx.lockedByThread(); lock_guard const lock(mtx); - wasLockedAfterBy2 = mtx.lockedByCaller(); + wasLockedAfterBy2 = mtx.lockedByThread(); }); thr1.join(); BOOST_CHECK(!wasLockedBeforeBy1); @@ -205,16 +207,16 @@ BOOST_AUTO_TEST_CASE(VMutexTest) { BOOST_CHECK_EQUAL(counter, steps * numThreads); } - LOGS_DEBUG("VMutexTest ends"); + LOGS_INFO("VMutexTest ends"); } BOOST_AUTO_TEST_CASE(LockTest1) { // Test locking a mutex created on stack using a special class util::Lock. - LOGS_DEBUG("LockTest1 begins"); + LOGS_INFO("LockTest1 begins"); // The mutex won't be locked by anyone Mutex mtx1; - BOOST_CHECK(not mtx1.lockedByCaller()); + BOOST_CHECK(not mtx1.lockedByThread()); // The mutex will be locked by the current thread Mutex mtx2; @@ -222,9 +224,9 @@ BOOST_AUTO_TEST_CASE(LockTest1) { // Do this in a nested block to ensure that lock object // gets destructed before the mutex. Lock const lock(mtx2, "LockTes1t: main thread"); - BOOST_CHECK(mtx2.lockedByCaller()); + BOOST_CHECK(mtx2.lockedByThread()); } - LOGS_DEBUG(!mtx2.lockedByCaller()); + LOGS_INFO(!mtx2.lockedByThread()); // Lock this mutex in each of two separate threads. Let each thread // to wait for a random period of time within some interval before @@ -247,7 +249,7 @@ BOOST_AUTO_TEST_CASE(LockTest1) { blockPost.wait(); Lock const lock(mtx, "LockTest1: thread 2"); }); - BOOST_CHECK(!mtx.lockedByCaller()); + BOOST_CHECK(!mtx.lockedByThread()); thr1.join(); thr2.join(); } @@ -272,7 +274,7 @@ BOOST_AUTO_TEST_CASE(LockTest1) { } BOOST_CHECK_EQUAL(counter, steps * numThreads); } - LOGS_DEBUG("LockTest1 ends"); + LOGS_INFO("LockTest1 ends"); } BOOST_AUTO_TEST_CASE(LockTest2) { @@ -280,11 +282,11 @@ BOOST_AUTO_TEST_CASE(LockTest2) { // a shared pointer using a special class util::Lock. The test implements // the same testing algorithm as the previous test, except it will be testing // a different way of constructing the lock. - LOGS_DEBUG("LockTest2 begins"); + LOGS_INFO("LockTest2 begins"); // The mutex won't be locked by anyone shared_ptr const mtx1 = make_shared(); - BOOST_CHECK(!mtx1->lockedByCaller()); + BOOST_CHECK(!mtx1->lockedByThread()); // The mutex will be locked by the current thread shared_ptr const mtx2 = make_shared(); @@ -292,9 +294,9 @@ BOOST_AUTO_TEST_CASE(LockTest2) { // Do this in a nested block to ensure that lock object // gets destructed before the mutex. Lock const lock(mtx2, "LockTes1t: main thread"); - BOOST_CHECK(mtx2->lockedByCaller()); + BOOST_CHECK(mtx2->lockedByThread()); } - BOOST_CHECK(!mtx2->lockedByCaller()); + BOOST_CHECK(!mtx2->lockedByThread()); // Lock this mutex in each of two separate threads. Let each thread // to wait for a random period of time within some interval before @@ -317,7 +319,7 @@ BOOST_AUTO_TEST_CASE(LockTest2) { blockPost.wait(); Lock const lock(mtx, "LockTest1: thread 2"); }); - BOOST_CHECK(!mtx->lockedByCaller()); + BOOST_CHECK(!mtx->lockedByThread()); thr1.join(); thr2.join(); } @@ -342,7 +344,7 @@ BOOST_AUTO_TEST_CASE(LockTest2) { } BOOST_CHECK_EQUAL(counter, steps * numThreads); } - LOGS_DEBUG("LockTest2 ends"); + LOGS_INFO("LockTest2 ends"); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/testResultFileName.cc b/src/util/testResultFileName.cc index cdb978f05f..b54b932e31 100644 --- a/src/util/testResultFileName.cc +++ b/src/util/testResultFileName.cc @@ -31,7 +31,6 @@ // Qserv headers #include "global/intTypes.h" -#include "qmeta/types.h" #include "util/ResultFileName.h" // Boost unit test header @@ -49,24 +48,19 @@ BOOST_AUTO_TEST_SUITE(Suite) BOOST_AUTO_TEST_CASE(ResultFileNameTest) { LOGS_INFO("ResultFileNameTest"); - lsst::qserv::qmeta::CzarId const czarId = 1; + lsst::qserv::CzarId const czarId = 1; lsst::qserv::QueryId const queryId = 2; - uint32_t const jobId = 3; - uint32_t const chunkId = 4; - uint32_t const attemptCount = 5; + lsst::qserv::UberJobId const ujId = 3; std::string const name2parse = std::to_string(czarId) + "-" + std::to_string(queryId) + "-" + - std::to_string(jobId) + "-" + std::to_string(chunkId) + "-" + - std::to_string(attemptCount) + lsst::qserv::util::ResultFileName::fileExt; + std::to_string(ujId) + lsst::qserv::util::ResultFileName::fileExt; BOOST_CHECK_NO_THROW({ lsst::qserv::util::ResultFileName const file(name2parse); BOOST_CHECK_EQUAL(file.fileName(), name2parse); BOOST_CHECK_EQUAL(file.czarId(), czarId); BOOST_CHECK_EQUAL(file.queryId(), queryId); - BOOST_CHECK_EQUAL(file.jobId(), jobId); - BOOST_CHECK_EQUAL(file.chunkId(), chunkId); - BOOST_CHECK_EQUAL(file.attemptCount(), attemptCount); + BOOST_CHECK_EQUAL(file.ujId(), ujId); }); BOOST_CHECK_NO_THROW({ @@ -74,9 +68,7 @@ BOOST_AUTO_TEST_CASE(ResultFileNameTest) { BOOST_CHECK_EQUAL(file.fileName(), name2parse); BOOST_CHECK_EQUAL(file.czarId(), czarId); BOOST_CHECK_EQUAL(file.queryId(), queryId); - BOOST_CHECK_EQUAL(file.jobId(), jobId); - BOOST_CHECK_EQUAL(file.chunkId(), chunkId); - BOOST_CHECK_EQUAL(file.attemptCount(), attemptCount); + BOOST_CHECK_EQUAL(file.ujId(), ujId); }); BOOST_CHECK_NO_THROW({ @@ -84,27 +76,22 @@ BOOST_AUTO_TEST_CASE(ResultFileNameTest) { BOOST_CHECK_EQUAL(file.fileName(), name2parse); BOOST_CHECK_EQUAL(file.czarId(), czarId); BOOST_CHECK_EQUAL(file.queryId(), queryId); - BOOST_CHECK_EQUAL(file.jobId(), jobId); - BOOST_CHECK_EQUAL(file.chunkId(), chunkId); - BOOST_CHECK_EQUAL(file.attemptCount(), attemptCount); + BOOST_CHECK_EQUAL(file.ujId(), ujId); }); BOOST_CHECK_NO_THROW({ - lsst::qserv::util::ResultFileName const file(czarId, queryId, jobId, chunkId, attemptCount); + lsst::qserv::util::ResultFileName const file(czarId, queryId, ujId); BOOST_CHECK_EQUAL(file.fileName(), name2parse); BOOST_CHECK_EQUAL(file.czarId(), czarId); BOOST_CHECK_EQUAL(file.queryId(), queryId); - BOOST_CHECK_EQUAL(file.jobId(), jobId); - BOOST_CHECK_EQUAL(file.chunkId(), chunkId); - BOOST_CHECK_EQUAL(file.attemptCount(), attemptCount); + BOOST_CHECK_EQUAL(file.ujId(), ujId); }); BOOST_CHECK_THROW( - { lsst::qserv::util::ResultFileName const file(std::string("1-2-3-4")); }, std::invalid_argument); + { lsst::qserv::util::ResultFileName const file(std::string("1-2")); }, std::invalid_argument); BOOST_CHECK_THROW( - { lsst::qserv::util::ResultFileName const file(std::string("a-2-3-4-5")); }, - std::invalid_argument); + { lsst::qserv::util::ResultFileName const file(std::string("a-2-3-4")); }, std::invalid_argument); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/xrootd.cc b/src/util/xrootd.cc deleted file mode 100644 index a4f967faa4..0000000000 --- a/src/util/xrootd.cc +++ /dev/null @@ -1,89 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2009-2015 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -// xrootd.h -- Helper funcitons for xrootd-based dispatch - -#include "util/xrootd.h" - -// System headers -#include -#include - -// Third-party headers -#include "boost/format.hpp" - -namespace lsst::qserv::util { - -std::string makeUrl(char const* hostport, char const* typeStr, int chunk) { - std::stringstream s; - s << chunk; - // boost::format version is 5x slower. - // std::string s = (boost::format("%d") % chunk).str(); - return makeUrl(hostport, typeStr, s.str()); -} - -std::string makeUrl(char const* hostport, std::string const& path) { - return makeUrl(hostport, nullptr, path); -} - -std::string makeUrl(char const* hostport, char const* typeStr, std::string const& s, char mode) { - // typeStr is either "query" or "result" - if (!hostport) { - hostport = ::getenv("QSERV_XRD"); - if (!hostport) { - // use local host name if nothing is specified - hostport = "localhost:1094"; - } - } -#if 0 - char* user = "qsmaster"; - boost::format f("xroot://%s@%s//%s/%s"); - return (f % user % hostport % typeStr % s).str(); -#else - // This is ~8.5x faster than the boost::format version. - std::string pfx = "xroot://"; - std::string user("qsmaster"); - std::string tstr; - std::string ret; - if (typeStr) tstr = typeStr; - - if (mode != '\0') { - user += "."; - user += mode; - } - ret.reserve(pfx.size() + user.size() + 1 + 2 + 1 + tstr.size() + s.size()); - ret += pfx; - ret += user; - ret += "@"; - ret += hostport; - ret += "/"; - if (typeStr) { - ret += "/"; - ret += typeStr; - ret += "/"; - } // else: assume s contains leading "/" - ret += s; - return ret; -#endif -} - -} // namespace lsst::qserv::util diff --git a/src/util/xrootd.h b/src/util/xrootd.h deleted file mode 100644 index bf3c00f8bb..0000000000 --- a/src/util/xrootd.h +++ /dev/null @@ -1,42 +0,0 @@ -// -*- LSST-C++ -*- - -/* - * LSST Data Management System - * Copyright 2008, 2009, 2010 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -#ifndef LSST_QSERV_UTIL_XROOTD_H -#define LSST_QSERV_UTIL_XROOTD_H - -// xrootd.h : consolidates xrootd/lower-level helper functions (i.e., -// dealing with xrootd URLs) - -// Third-party headers -#include - -namespace lsst::qserv::util { - -std::string makeUrl(char const* hostport, char const* typeStr, int chunk); -std::string makeUrl(char const* hostport, char const* typeStr, std::string const& s, char mode = 0); -std::string makeUrl(char const* hostport, std::string const& path); - -} // namespace lsst::qserv::util - -#endif // LSST_QSERV_UTIL_XROOTD_H diff --git a/src/wbase/Base.h b/src/wbase/Base.h index bc80273804..74cdada0cc 100644 --- a/src/wbase/Base.h +++ b/src/wbase/Base.h @@ -34,18 +34,10 @@ #include "boost/format.hpp" // Forward declarations -class XrdSysError; -class XrdSysLogger; -class XrdSfsAio; -namespace lsst::qserv { -namespace proto { -class TaskMsg; -} -namespace wbase { +namespace lsst::qserv::wbase { class StringBuffer; class StringBuffer2; -} // namespace wbase -} // namespace lsst::qserv +} // namespace lsst::qserv::wbase namespace lsst::qserv::wbase { diff --git a/src/wbase/CMakeLists.txt b/src/wbase/CMakeLists.txt index add5682ee8..0338c0e950 100644 --- a/src/wbase/CMakeLists.txt +++ b/src/wbase/CMakeLists.txt @@ -1,25 +1,17 @@ add_library(wbase SHARED) -add_dependencies(wbase proto) target_sources(wbase PRIVATE Base.cc FileChannelShared.cc SendChannel.cc Task.cc + UberJobData.cc UserQueryInfo.cc - WorkerCommand.cc -) - -install( - TARGETS wbase -) - -target_include_directories(wbase PRIVATE - ${XROOTD_INCLUDE_DIRS} ) target_link_libraries(wbase PUBLIC boost_regex log - XrdSsiLib ) + +install(TARGETS wbase) diff --git a/src/wbase/FileChannelShared.cc b/src/wbase/FileChannelShared.cc index d2ad2db475..d787a76783 100644 --- a/src/wbase/FileChannelShared.cc +++ b/src/wbase/FileChannelShared.cc @@ -35,9 +35,9 @@ // Qserv headers #include "global/LogContext.h" #include "mysql/MySqlUtils.h" -#include "proto/ProtoHeaderWrap.h" -#include "proto/worker.pb.h" +#include "protojson/UberJobReadyMsg.h" #include "wbase/Task.h" +#include "wbase/UberJobData.h" #include "wconfig/WorkerConfig.h" #include "wpublish/QueriesAndChunks.h" #include "util/Bug.h" @@ -46,7 +46,6 @@ #include "util/ResultFileName.h" #include "util/Timer.h" #include "util/TimeUtils.h" -#include "xrdsvc/StreamBuffer.h" // LSST headers #include "lsst/log/Log.h" @@ -169,6 +168,29 @@ void FileChannelShared::cleanUpResults(uint32_t czarId, QueryId queryId) { context << "removed " << numFilesRemoved << " result files from " << dirPath << "."); } +void FileChannelShared::cleanUpResults(uint32_t czarId, QueryId queryId, UberJobId ujId) { + string const context = "FileChannelShared::" + string(__func__) + " "; + fs::path const dirPath = wconfig::WorkerConfig::instance()->resultsDirname(); + LOGS(_log, LOG_LVL_INFO, + context << "removing result files from " << dirPath << " for czarId=" << czarId + << ", queryId=" << queryId << ", and ujId=" << ujId << "."); + lock_guard const lock(_resultsDirCleanupMtx); + size_t const numFilesRemoved = ::cleanUpResultsImpl( + context, dirPath, [&context, czarId, queryId, ujId](string const& fileName) -> bool { + try { + auto const fileAttributes = util::ResultFileName(fileName); + return (fileAttributes.czarId() == czarId) && (fileAttributes.queryId() == queryId) && + (fileAttributes.ujId() == ujId); + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, + context << "failed to parse the file name " << fileName << ", ex: " << ex.what()); + } + return false; + }); + LOGS(_log, LOG_LVL_INFO, + context << "removed " << numFilesRemoved << " result files from " << dirPath << "."); +} + json FileChannelShared::statusToJson() { string const context = "FileChannelShared::" + string(__func__) + " "; auto const config = wconfig::WorkerConfig::instance(); @@ -255,43 +277,49 @@ json FileChannelShared::filesToJson(vector const& queryIds, unsigned in return json::object({{"files", files}, {"num_selected", numSelected}, {"num_total", numTotal}}); } -shared_ptr FileChannelShared::create(shared_ptr const& sendChannel, - qmeta::CzarId czarId, string const& workerId) { +FileChannelShared::Ptr FileChannelShared::create(std::shared_ptr const& uberJobData) { lock_guard const lock(_resultsDirCleanupMtx); - return shared_ptr(new FileChannelShared(sendChannel, czarId, workerId)); + return Ptr(new FileChannelShared(uberJobData)); } -FileChannelShared::FileChannelShared(shared_ptr const& sendChannel, qmeta::CzarId czarId, - string const& workerId) - : _sendChannel(sendChannel), _czarId(czarId), _workerId(workerId) { - LOGS(_log, LOG_LVL_DEBUG, "FileChannelShared created"); - if (_sendChannel == nullptr) { - throw util::Bug(ERR_LOC, "FileChannelShared constructor given nullptr"); - } +FileChannelShared::FileChannelShared(std::shared_ptr const& uberJobData) + : _sendChannel(nullptr), _uberJobData(uberJobData), _uberJobId(uberJobData->getUberJobId()) { + LOGS(_log, LOG_LVL_TRACE, "FileChannelShared created ujId=" << _uberJobId); } FileChannelShared::~FileChannelShared() { + LOGS(_log, LOG_LVL_TRACE, "~FileChannelShared ujId=" << _uberJobId); // Normally, the channel should not be dead at this time. If it's already // dead it means there was a problem to process a query or send back a response // to Czar. In either case, the file would be useless and it has to be deleted // in order to avoid leaving unclaimed result files within the results folder. - if (isDead()) { + // + // _rowLimitComplete confuses things as it can cause other Tasks using this + // file to be cancelled, but the file should not be deleted until collected. + // In any case, the WorkerQueryStatusData message from the czar will delete + // the file when the user query completes. + if (isDead() && !_rowLimitComplete) { _removeFile(lock_guard(_tMtx)); } - if (_sendChannel != nullptr) { - _sendChannel->setDestroying(); - if (!_sendChannel->isDead()) { - _sendChannel->kill("~FileChannelShared()"); - } - } - LOGS(_log, LOG_LVL_DEBUG, "FileChannelShared deleted"); } void FileChannelShared::setTaskCount(int taskCount) { _taskCount = taskCount; } -bool FileChannelShared::transmitTaskLast() { +bool FileChannelShared::transmitTaskLast(bool rowLimitComplete) { lock_guard const streamMutexLock(_streamMutex); ++_lastCount; + if (rowLimitComplete) { + // There are enough rows in the file so other tasks can be ignored. + if (_rowLimitComplete.exchange(true) == false) { + // This is TaskLast. + return true; + } else { + // A different task set _rowLimitComplete before + // this one. Since there can be only one TaskLast, + // it is not this one. + return false; + } + } bool lastTaskDone = _lastCount >= _taskCount; return lastTaskDone; } @@ -301,24 +329,33 @@ bool FileChannelShared::kill(string const& note) { return _kill(streamMutexLock, note); } -bool FileChannelShared::isDead() { - if (_sendChannel == nullptr) return true; - return _sendChannel->isDead(); -} +bool FileChannelShared::isDead() const { return _dead; } string FileChannelShared::makeIdStr(int qId, int jId) { string str("QID" + (qId == 0 ? "" : to_string(qId) + "#" + to_string(jId))); return str; } -bool FileChannelShared::buildAndTransmitError(util::MultiError& multiErr, shared_ptr const& task, +bool FileChannelShared::isRowLimitComplete() const { + lock_guard const tMtxLock(_tMtx); + return _rowLimitComplete; +} + +void FileChannelShared::buildAndTransmitError(util::MultiError& multiErr, shared_ptr const& task, bool cancelled) { lock_guard const tMtxLock(_tMtx); - if (!_sendResponse(tMtxLock, task, cancelled, multiErr)) { - LOGS(_log, LOG_LVL_ERROR, "Could not transmit the error message to Czar."); - return false; + if (_rowLimitComplete) { + LOGS(_log, LOG_LVL_WARN, + __func__ << " already enough rows, this call likely a side effect" << task->getIdStr()); + return; } - return true; + + auto ujd = _uberJobData.lock(); + if (ujd != nullptr) { + ujd->responseError(multiErr, task->getChunkId(), cancelled, task->getLvlET()); + } + // Flag the result as dead after sending the error to avoid races on queries with missing tables. + _kill(tMtxLock, " buildAndTransmitError"); } bool FileChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, shared_ptr const& task, @@ -330,7 +367,7 @@ bool FileChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, shared_ptr const tMtxLockA(_tMtx); + if (_rowLimitComplete) { + LOGS(_log, LOG_LVL_DEBUG, __func__ << " already enough rows, returning " << task->getIdStr()); + // Deleting the file now could be risky. + return erred; + } // Extract the result set and write it into the file. util::Timer bufferFillT; @@ -357,36 +399,48 @@ bool FileChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, shared_ptrgetMaxTableSize(); - if (maxTableSize > 0 && _transmitsize > maxTableSize) { - string const err = "The result set size " + to_string(_transmitsize) + + // Fail the operation if the amount of data in the result set exceeds the requested + // "large result" limit (in case one was specified). + LOGS(_log, LOG_LVL_TRACE, "bytesWritten=" << _bytesWritten << " max=" << maxTableSize); + if (maxTableSize > 0 && _bytesWritten > maxTableSize) { + string const err = "The result set size " + to_string(_bytesWritten) + " of a job exceeds the requested limit of " + to_string(maxTableSize) + " bytes, task: " + task->getIdStr(); - multiErr.push_back(util::Error(util::ErrorCode::WORKER_RESULT_TOO_LARGE, err)); + multiErr.insert(util::Error(util::Error::WORKER_RESULT_TOO_LARGE, util::Error::NONE, err)); LOGS(_log, LOG_LVL_ERROR, err); erred = true; + return erred; } - // If this is last task in a logical group of ones created for processing + int const ujRowLimit = task->getRowLimit(); + bool rowLimitComplete = false; + if (ujRowLimit > 0 && _rowcount >= ujRowLimit) { + // There are enough rows to satisfy the query, so stop reading + rowLimitComplete = true; + LOGS(_log, LOG_LVL_DEBUG, + __func__ << " enough rows for query rows=" << _rowcount << " " << task->getIdStr()); + } + + // If no more rows are left in the task's result set then we need to check + // if this is last task in a logical group of ones created for processing // the current request (note that certain classes of requests may require // more than one task for processing). - if (!erred && transmitTaskLast()) { + if (transmitTaskLast(rowLimitComplete)) { // Make sure the file is sync to disk before notifying Czar. _file.flush(); _file.close(); // Only the last ("summary") message, w/o any rows, is sent to the Czar to notify // it about the completion of the request. - if (!_sendResponse(tMtxLockA, task, cancelled, multiErr)) { + LOGS(_log, LOG_LVL_DEBUG, "FileChannelShared " << task->cName(__func__) << " sending start"); + if (!_sendResponse(tMtxLockA, task, cancelled, multiErr, rowLimitComplete)) { LOGS(_log, LOG_LVL_ERROR, "Could not transmit the request completion message to Czar."); erred = true; } else { LOGS(_log, LOG_LVL_TRACE, __func__ << " " << task->getIdStr() << " sending done!!!"); } + LOGS(_log, LOG_LVL_TRACE, "FileChannelShared " << task->cName(__func__) << " sending done!!!"); } } transmitT.stop(); @@ -405,7 +459,7 @@ bool FileChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, shared_ptr const tMtxLockA(_tMtx); _removeFile(tMtxLockA); } @@ -413,15 +467,19 @@ bool FileChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, shared_ptr const& streamMutexLock, string const& note) { - LOGS(_log, LOG_LVL_DEBUG, "FileChannelShared::" << __func__ << " " << note); - return _sendChannel->kill(note); + LOGS(_log, LOG_LVL_TRACE, "FileChannelShared::" << __func__ << " " << note); + bool oldVal = _dead.exchange(true); + if (!oldVal) { + LOGS(_log, LOG_LVL_WARN, "FileChannelShared::" << __func__ << " first kill call " << note); + } + return oldVal; } void FileChannelShared::_writeToFile(lock_guard const& tMtxLock, shared_ptr const& task, - MYSQL_RES* mResult, uint64_t& bytes, uint32_t& rows, + MYSQL_RES* mResult, uint64_t& bytes, uint64_t& rows, util::MultiError& multiErr) { if (!_file.is_open()) { - _fileName = task->resultFileAbsPath(); + _fileName = task->getUberJobData()->resultFilePath(); _file.open(_fileName, ios::out | ios::trunc | ios::binary); if (!(_file.is_open() && _file.good())) { throw runtime_error("FileChannelShared::" + string(__func__) + @@ -434,6 +492,7 @@ void FileChannelShared::_writeToFile(lock_guard const& tMtxLock, shared_p string const fieldEndsWith = "\t"; string const rowEndsWith = "\n"; string const mysqlNull("\\N"); + int const numFields = mysql_num_fields(mResult); bytes = 0; rows = 0; @@ -462,81 +521,47 @@ void FileChannelShared::_writeToFile(lock_guard const& tMtxLock, shared_p } void FileChannelShared::_removeFile(lock_guard const& tMtxLock) { - if (!_fileName.empty() && _file.is_open()) { - _file.close(); + LOGS(_log, LOG_LVL_TRACE, "FileChannelShared::_removeFile " << _fileName); + if (!_fileName.empty()) { + if (_file.is_open()) { + _file.close(); + } boost::system::error_code ec; + LOGS(_log, LOG_LVL_DEBUG, "FileChannelShared::" << __func__ << " removing " << _fileName); fs::remove_all(fs::path(_fileName), ec); if (ec.value() != 0) { LOGS(_log, LOG_LVL_WARN, "FileChannelShared::" << __func__ << " failed to remove the result file '" << _fileName << "', ec: " << ec << "."); + return; } } + _fileName.clear(); } bool FileChannelShared::_sendResponse(lock_guard const& tMtxLock, shared_ptr const& task, - bool cancelled, util::MultiError const& multiErr) { + bool cancelled, util::MultiError const& multiErr, bool mustSend) { auto const queryId = task->getQueryId(); - auto const jobId = task->getJobId(); - auto const idStr(makeIdStr(queryId, jobId)); + auto const jId = task->getJobId(); + auto const idStr(makeIdStr(queryId, jId)); // This lock is required for making consistent modifications and usage of the metadata // and response buffers. lock_guard const streamMutexLock(_streamMutex); - QSERV_LOGCONTEXT_QUERY_JOB(queryId, jobId); - LOGS(_log, LOG_LVL_DEBUG, __func__); - if (isDead()) { - LOGS(_log, LOG_LVL_INFO, __func__ << ": aborting transmit since sendChannel is dead."); - return false; - } - - // Prepare the response object and serialize in into a message that will - // be sent to Czar. - - proto::ResponseSummary response; - response.set_wname(_workerId); - response.set_queryid(queryId); - response.set_jobid(jobId); - response.set_fileresource_http(task->resultFileHttpUrl()); - response.set_attemptcount(task->getAttemptCount()); - response.set_rowcount(_rowcount); - response.set_transmitsize(_transmitsize); - string errorMsg; - int errorCode = 0; - if (!multiErr.empty()) { - errorMsg = multiErr.toOneLineString(); - errorCode = multiErr.firstErrorCode(); - } else if (cancelled) { - errorMsg = "cancelled"; - errorCode = -1; - } - if (!errorMsg.empty() or (errorCode != 0)) { - errorMsg = "FileChannelShared::" + string(__func__) + " error(s) in result for chunk #" + - to_string(task->getChunkId()) + ": " + errorMsg; - response.set_errormsg(errorMsg); - response.set_errorcode(errorCode); - LOGS(_log, LOG_LVL_ERROR, errorMsg); - } - response.SerializeToString(&_responseBuf); - - LOGS(_log, LOG_LVL_DEBUG, - __func__ << " idStr=" << idStr << ", _responseBuf.size()=" << _responseBuf.size()); + QSERV_LOGCONTEXT_QUERY_JOB(queryId, jId); - // Send the message sent out-of-band within the SSI metadata. - if (!_sendChannel->setMetadata(_responseBuf.data(), _responseBuf.size())) { - LOGS(_log, LOG_LVL_ERROR, __func__ << " failed in setMetadata " << idStr); - _kill(streamMutexLock, "setMetadata"); + if (isDead() && !mustSend) { + LOGS(_log, LOG_LVL_INFO, __func__ << ": aborting transmit since sendChannel is dead."); return false; } - // Send back the empty object since no info is expected by a caller - // for this type of requests beyond the usual error notifications (if any). - // Note that this call is needed to initiate the transaction. - if (!_sendChannel->sendData((char const*)0, 0)) { - LOGS(_log, LOG_LVL_ERROR, __func__ << " failed in sendData " << idStr); - _kill(streamMutexLock, "sendData"); - return false; + // Prepare the response object and put into a message that will + // be sent to the Czar. + string httpFileUrl = task->getUberJobData()->resultFileHttpUrl(); + auto ujd = _uberJobData.lock(); + if (ujd != nullptr) { + ujd->responseFileReady(protojson::FileUrlInfo(httpFileUrl, _rowcount, _transmitsize)); } return true; } diff --git a/src/wbase/FileChannelShared.h b/src/wbase/FileChannelShared.h index 0ad6fa75d1..197c8ddbb9 100644 --- a/src/wbase/FileChannelShared.h +++ b/src/wbase/FileChannelShared.h @@ -35,11 +35,9 @@ // Qserv headers #include "global/intTypes.h" -#include "qmeta/types.h" #include "wbase/SendChannel.h" // Forward declarations - namespace lsst::qserv::wbase { class Task; } // namespace lsst::qserv::wbase @@ -49,8 +47,9 @@ class MultiError; } // namespace lsst::qserv::util namespace lsst::qserv::wbase { +class UberJobData; -/// The class is responsible for writing mysql result rows as Protobuf +/// The class is responsible for writing mysql result rows as Csv /// serialized messages into an output file. Once a task (or all sub-chunk /// tasks) finished writing data a short reply message is sent back to Czar using /// SSI request's SendChannel that was provided to the factory method @@ -88,10 +87,18 @@ class FileChannelShared { /** * Clean up result files of the specified query. * @param czarId The unique identifier of Czar that initiated the query. - * @param queryId The most recent user query registered before restart. + * @param queryId The id of the query results to be removed. */ static void cleanUpResults(uint32_t czarId, QueryId queryId); + /** + * Clean up result files of the specified UberJob. + * @param czarId The unique identifier of Czar that initiated the query. + * @param queryId The id of the UberJob results to be removed. + * @param ujId The id of the UberJob results to be removed. + */ + static void cleanUpResults(uint32_t czarId, QueryId queryId, UberJobId ujId); + /// @return Status and statistics on the results folder (capacity, usage, etc.) static nlohmann::json statusToJson(); @@ -105,9 +112,8 @@ class FileChannelShared { */ static nlohmann::json filesToJson(std::vector const& queryIds, unsigned int maxFiles); - /// The factory method for the channel class. - static Ptr create(std::shared_ptr const& sendChannel, qmeta::CzarId czarId, - std::string const& workerId = std::string()); + /// The factory method for handling UberJob over http. + static Ptr create(std::shared_ptr const& uberJobData); FileChannelShared() = delete; FileChannelShared(FileChannelShared const&) = delete; @@ -123,7 +129,9 @@ class FileChannelShared { int getTaskCount() const { return _taskCount; } /// @return true if this is the last task to call this - bool transmitTaskLast(); + /// @param rowLimitComplete - true means enough rows for the result are + /// already in the file, so other tasks can be ignored. + bool transmitTaskLast(bool rowLimitComplete); /// Return a normalized id string. static std::string makeIdStr(int qId, int jId); @@ -137,8 +145,8 @@ class FileChannelShared { /// @return true if this is the first time this function has been called. bool getFirstChannelSqlConn() { return _firstChannelSqlConn.exchange(false); } - /// @return a transmit data object indicating the errors in 'multiErr'. - bool buildAndTransmitError(util::MultiError& multiErr, std::shared_ptr const& task, bool cancelled); + /// Build and transmit a transmit data object indicating the errors in 'multiErr'. + void buildAndTransmitError(util::MultiError& multiErr, std::shared_ptr const& task, bool cancelled); /// Extract the SQL results and write them into the file and notify Czar after the last /// row of the result result set depending on theis channel has been processed. @@ -150,12 +158,16 @@ class FileChannelShared { bool kill(std::string const& note); /// @see wbase::SendChannel::isDead - bool isDead(); + bool isDead() const; + + /// Return true if there are enough rows in this result file to satisfy the + /// LIMIT portion of the query. + /// @See _rowLimitComplete + bool isRowLimitComplete() const; private: /// Private constructor to protect shared pointer integrity. - FileChannelShared(std::shared_ptr const& sendChannel, qmeta::CzarId czarId, - std::string const& workerId); + FileChannelShared(std::shared_ptr const& uberJobData); /// @see wbase::SendChannel::kill /// @param streamMutexLock - Lock on mutex _streamMutex to be acquired before calling the method. @@ -165,21 +177,21 @@ class FileChannelShared { * Transfer rows of the result set into into the output file. * @note The file will be created at the first call to the method. * @note The method may not extract all rows if the amount of data found - * in the result set exceeded the maximum size allowed by the Google Protobuf - * implementation. Also, the iterative approach to the data extraction allows - * the driving code to be interrupted should the correponding query be cancelled + * in the result set exceeded the maximum size allowed. Also, the iterative + * approach to the data extraction allows the driving code to be + * interrupted should the corresponding query be cancelled * during the lengthy data processing phase. * @param tMtxLock - a lock on the mutex tMtx * @param task - a task that produced the result set * @param mResult - MySQL result to be used as a source * @param bytes - the number of bytes in the result message recorded into the file - * @param rows - the number of rows extracted from th eresult set + * @param rows - the number of rows extracted from the result set * @param multiErr - a collector of any errors that were captured during result set processing * @throws std::runtime_error for problems encountered when attemting to create the file * or write into the file. */ void _writeToFile(std::lock_guard const& tMtxLock, std::shared_ptr const& task, - MYSQL_RES* mResult, std::uint64_t& bytes, std::uint32_t& rows, + MYSQL_RES* mResult, std::uint64_t& bytes, std::uint64_t& rows, util::MultiError& multiErr); /// Write a string into the currently open file. @@ -208,16 +220,20 @@ class FileChannelShared { * @param task - a task that produced the result set * @param cancelled - request cancellaton flag (if any) * @param multiErr - a collector of any errors that were captured during result set processing + * @param mustSend - set to true if this message should be sent even if the query was cancelled. * @return 'true' if the operation was successfull */ bool _sendResponse(std::lock_guard const& tMtxLock, std::shared_ptr const& task, - bool cancelled, util::MultiError const& multiErr); + bool cancelled, util::MultiError const& multiErr, bool mustSend = false); mutable std::mutex _tMtx; ///< Protects data recording and Czar notification - std::shared_ptr const _sendChannel; ///< Used to pass encoded information to XrdSsi. - qmeta::CzarId const _czarId; ///< id of the czar that requested this task(s). - std::string const _workerId; ///< The unique identifier of the worker. + bool _isUberJob; ///< true if this is using UberJob http. To be removed when _sendChannel goes away. + + std::shared_ptr const _sendChannel; ///< Used to send info to czar. + std::weak_ptr _uberJobData; ///< Contains czar contact info. + + UberJobId const _uberJobId; ///< The UberJobId /// streamMutex is used to protect _lastCount and messages that are sent /// using FileChannelShared. @@ -249,8 +265,17 @@ class FileChannelShared { // Counters reported to Czar in the only ("summary") message sent upon the completion // of all tasks of a query. - uint32_t _rowcount = 0; ///< The total numnber of rows in all result sets of a query. + int64_t _rowcount = 0; ///< The total numnber of rows in all result sets of a query. uint64_t _transmitsize = 0; ///< The total amount of data (bytes) in all result sets of a query. + + /// _rowLimitComplete indicates that there is a LIMIT clause in the user query that + /// can be applied to the queries given to workers. It's important to apply it + /// when possible as an UberJob could have 1000 chunks and a LIMIT of 1, and it's + /// much faster to answer the query without scanning all 1000 chunks. + std::atomic _rowLimitComplete; + std::atomic _dead{false}; ///< Set to true when the contents of the file are no longer useful. + + std::atomic _bytesWritten{0}; ///< Total bytes written. }; } // namespace lsst::qserv::wbase diff --git a/src/wbase/MsgProcessor.h b/src/wbase/MsgProcessor.h deleted file mode 100644 index 8458dc3f45..0000000000 --- a/src/wbase/MsgProcessor.h +++ /dev/null @@ -1,64 +0,0 @@ - -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2011-2016 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -/// MsgProcessor.h -#ifndef LSST_QSERV_WBASE_MSG_PROCESSOR_H -#define LSST_QSERV_WBASE_MSG_PROCESSOR_H - -// System headers -#include -#include - -// Third party headers -#include "nlohmann/json.hpp" - -// Forward declarations -namespace lsst::qserv::wbase { -class Task; -struct TaskSelector; -class WorkerCommand; -} // namespace lsst::qserv::wbase - -namespace lsst::qserv::wbase { - -/// MsgProcessor implementations handle incoming Task objects. -struct MsgProcessor { - virtual ~MsgProcessor() {} - - /// Process a group of query processing tasks. - virtual void processTasks(std::vector> const& tasks) = 0; - - /// Process a managememt command - virtual void processCommand(std::shared_ptr const& command) = 0; - - /** - * Retreive the status of queries being processed by the worker. - * @param taskSelector Task selection criterias. - * @return a JSON representation of the object's status for the monitoring - */ - virtual nlohmann::json statusToJson(wbase::TaskSelector const& taskSelector) = 0; -}; - -} // namespace lsst::qserv::wbase - -#endif // LSST_QSERV_WBASE_MSG_PROCESSOR_H diff --git a/src/wbase/SendChannel.cc b/src/wbase/SendChannel.cc index 21e459ee87..a3206d78d4 100644 --- a/src/wbase/SendChannel.cc +++ b/src/wbase/SendChannel.cc @@ -37,11 +37,9 @@ #include "lsst/log/Log.h" // Qserv headers -#include "proto/ProtoHeaderWrap.h" #include "global/LogContext.h" #include "util/common.h" #include "util/Timer.h" -#include "xrdsvc/SsiRequest.h" namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.SendChannel"); @@ -52,28 +50,13 @@ using namespace std; namespace lsst::qserv::wbase { /// NopChannel is a NOP implementation of SendChannel for development and -/// debugging code without an XrdSsi channel. +/// debugging code without an actual channel. class NopChannel : public SendChannel { public: NopChannel() {} - - bool send(char const* buf, int bufLen) override { - cout << "NopChannel send(" << (void*)buf << ", " << bufLen << ");\n"; - return !isDead(); - } - - bool sendError(string const& msg, int code) override { - if (kill("NopChannel")) return false; - cout << "NopChannel sendError(\"" << msg << "\", " << code << ");\n"; - return true; - } - bool sendStream(xrdsvc::StreamBuffer::Ptr const& sBuf, bool last) override { - cout << "NopChannel sendStream(" << (void*)sBuf.get() << ", " << (last ? "true" : "false") << ");\n"; - return !isDead(); - } }; -SendChannel::Ptr SendChannel::newNopChannel() { return make_shared(); } +SendChannel::Ptr SendChannel::newNopChannel() { return std::shared_ptr(new NopChannel()); } /// StringChannel is an almost-trivial implementation of a SendChannel that /// remembers what it has received. @@ -81,52 +64,12 @@ class StringChannel : public SendChannel { public: StringChannel(string& dest) : _dest(dest) {} - bool send(char const* buf, int bufLen) override { - if (isDead()) return false; - _dest.append(buf, bufLen); - return true; - } - - bool sendError(string const& msg, int code) override { - if (kill("StringChannel")) return false; - ostringstream os; - os << "(" << code << "," << msg << ")"; - _dest.append(os.str()); - return true; - } - - bool sendStream(xrdsvc::StreamBuffer::Ptr const& sBuf, bool last) override { - if (isDead()) return false; - char const* buf = sBuf->data; - size_t bufLen = sBuf->getSize(); - _dest.append(buf, bufLen); - cout << "StringChannel sendStream(" << (void*)buf << ", " << bufLen << ", " - << (last ? "true" : "false") << ");\n"; - return true; - } - private: string& _dest; }; -SendChannel::Ptr SendChannel::newStringChannel(string& d) { return make_shared(d); } - -/// This is the standard definition of SendChannel which actually does something! -/// We vector responses posted to SendChannel via the tightly bound SsiRequest -/// object as this object knows how to effect Ssi responses. -/// -bool SendChannel::send(char const* buf, int bufLen) { - if (isDead()) return false; - if (_ssiRequest->reply(buf, bufLen)) return true; - kill("SendChannel::send"); - return false; -} - -bool SendChannel::sendError(string const& msg, int code) { - // Kill this send channel. If it wasn't already dead, send the error. - if (kill("SendChannel::sendError")) return false; - if (_ssiRequest->replyError(msg.c_str(), code)) return true; - return false; +SendChannel::Ptr SendChannel::newStringChannel(string& d) { + return std::shared_ptr(new StringChannel(d)); } bool SendChannel::kill(std::string const& note) { @@ -139,36 +82,7 @@ bool SendChannel::kill(std::string const& note) { bool SendChannel::isDead() { if (_dead) return true; - if (_ssiRequest == nullptr) return true; - if (_ssiRequest->isFinished()) kill("SendChannel::isDead"); return _dead; } -bool SendChannel::sendStream(xrdsvc::StreamBuffer::Ptr const& sBuf, bool last) { - if (isDead()) return false; - if (_ssiRequest->replyStream(sBuf, last)) return true; - LOGS(_log, LOG_LVL_ERROR, "_ssiRequest->replyStream failed, killing."); - kill("SendChannel::sendStream"); - return false; -} - -bool SendChannel::sendData(char const* buf, int bufLen) { - if (isDead()) return false; - if (_ssiRequest->reply(buf, bufLen)) return true; - LOGS(_log, LOG_LVL_ERROR, "_ssiRequest->reply failed, killing."); - kill("SendChannel::sendData"); - return false; -} - -bool SendChannel::setMetadata(const char* buf, int blen) { - if (isDead()) return false; - if (_ssiRequest->sendMetadata(buf, blen)) return true; - return false; -} - -uint64_t SendChannel::getSeq() const { - if (_ssiRequest == nullptr) return 0; - return _ssiRequest->getSeq(); -} - } // namespace lsst::qserv::wbase diff --git a/src/wbase/SendChannel.h b/src/wbase/SendChannel.h index 0753e0aeff..dfad76875c 100644 --- a/src/wbase/SendChannel.h +++ b/src/wbase/SendChannel.h @@ -23,57 +23,25 @@ #define LSST_QSERV_WBASE_SENDCHANNEL_H // System headers +#include #include #include #include -// Qserv headers -#include "xrdsvc/StreamBuffer.h" +namespace lsst::qserv { namespace wbase { -namespace lsst::qserv { -namespace xrdsvc { -class SsiRequest; // Forward declaration -} -namespace wbase { - -/// SendChannel objects abstract an byte-output mechanism. Provides a layer of -/// abstraction to reduce coupling to the XrdSsi API. SendChannel generally -/// accepts only one call to send bytes, unless the sendStream call is used. +/// SendChannel is used to send information about results +/// and errors back to the czar so that the czar can collect +/// the results or cancel the related data. class SendChannel { public: using Ptr = std::shared_ptr; using Size = long long; - SendChannel(std::shared_ptr const& s) : _ssiRequest(s) {} SendChannel() {} // Strictly for non-Request versions of this object. virtual ~SendChannel() {} - /// ****************************************************************** - /// The following methods are used to send responses back to a request. - /// The "send" calls may vector the response via the tightly bound - /// SsiRequest object (the constructor default) or use some other - /// mechanism (see newNopChannel and newStringChannel). - /// - virtual bool send(char const* buf, int bufLen); - virtual bool sendError(std::string const& msg, int code); - - /// Send a bucket of bytes. - /// @param last true if no more sendStream calls will be invoked. - virtual bool sendStream(xrdsvc::StreamBuffer::Ptr const& sBuf, bool last); - - /// Send the data. - virtual bool sendData(char const* buf, int bufLen); - - /// - /// ****************************************************************** - - /// Set a function to be called when a resources from a deferred send* - /// operation may be released. This allows a caller to be - /// notified when the file descriptor may be closed and perhaps reclaimed. - void setReleaseFunc(std::function const& r) { _release = r; } - void release() { _release(); } - /// Construct a new NopChannel that ignores everything it is asked to send static SendChannel::Ptr newNopChannel(); @@ -81,10 +49,6 @@ class SendChannel { /// provided by reference at construction. static SendChannel::Ptr newStringChannel(std::string& dest); - /// @return true if metadata was set. - /// buff must remain valid until the transmit is complete. - bool setMetadata(const char* buf, int blen); - /// Kill this SendChannel /// @ return the previous value of _dead bool kill(std::string const& note); @@ -95,17 +59,10 @@ class SendChannel { /// Set just before destorying this object to prevent pointless error messages. void setDestroying() { _destroying = true; } - uint64_t getSeq() const; - -protected: - std::function _release = []() { ; }; ///< Function to release resources. - private: - std::shared_ptr _ssiRequest; std::atomic _dead{false}; ///< True if there were any failures using this SendChanel. std::atomic _destroying{false}; }; -} // namespace wbase -} // namespace lsst::qserv +}} // namespace lsst::qserv::wbase #endif // LSST_QSERV_WBASE_SENDCHANNEL_H diff --git a/src/wbase/Task.cc b/src/wbase/Task.cc index b6f86633a1..f56165782a 100644 --- a/src/wbase/Task.cc +++ b/src/wbase/Task.cc @@ -37,7 +37,6 @@ // Third-party headers #include -#include "boost/filesystem.hpp" // LSST headers #include "lsst/log/Log.h" @@ -46,8 +45,9 @@ #include "global/constants.h" #include "global/LogContext.h" #include "global/UnsupportedError.h" +#include "http/RequestBodyJSON.h" #include "mysql/MySqlConfig.h" -#include "proto/worker.pb.h" +#include "protojson/UberJobMsg.h" #include "util/Bug.h" #include "util/common.h" #include "util/HoldTrack.h" @@ -56,6 +56,7 @@ #include "util/TimeUtils.h" #include "wbase/Base.h" #include "wbase/FileChannelShared.h" +#include "wbase/UberJobData.h" #include "wbase/UserQueryInfo.h" #include "wconfig/WorkerConfig.h" #include "wdb/QueryRunner.h" @@ -63,31 +64,31 @@ using namespace std; using namespace std::chrono_literals; -namespace fs = boost::filesystem; +using namespace nlohmann; namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.Task"); -string buildResultFileName(shared_ptr const& taskMsg) { - auto const resultFileName = - lsst::qserv::util::ResultFileName(taskMsg->czarid(), taskMsg->queryid(), taskMsg->jobid(), - taskMsg->chunkid(), taskMsg->attemptcount()); - return resultFileName.fileName(); -} - -string buildResultFilePath(string const& resultFileName, string const& resultsDirname) { - if (resultsDirname.empty()) return resultsDirname; - return fs::weakly_canonical(fs::path(resultsDirname) / resultFileName).string(); -} - size_t const MB_SIZE_BYTES = 1024 * 1024; } // namespace namespace lsst::qserv::wbase { -string const Task::_fqdn = util::get_current_host_fqdn(); +TaskUseRAII::Ptr TaskUseRAII::create(wpublish::ChunkStatistics::Ptr const& chunkStats, + std::string const& dbName) { + return Ptr(new TaskUseRAII(chunkStats, dbName)); +} + +TaskUseRAII::TaskUseRAII(wpublish::ChunkStatistics::Ptr const& chunkStats, std::string const& dbName) + : _dbChunkStats(chunkStats), _dbName(dbName) { + if (_dbChunkStats) _dbChunkStats->incrDbUseCount(_dbName); +} + +TaskUseRAII::~TaskUseRAII() { + if (_dbChunkStats) _dbChunkStats->decrDbUseCount(_dbName); +} // Task::ChunkEqual functor bool Task::ChunkEqual::operator()(Task::Ptr const& x, Task::Ptr const& y) { @@ -106,7 +107,6 @@ bool Task::ChunkIdGreater::operator()(Task::Ptr const& x, Task::Ptr const& y) { } string const Task::defaultUser = "qsmaster"; -IdSet Task::allIds{}; TaskScheduler::TaskScheduler() { auto hour = chrono::milliseconds(1h); @@ -122,151 +122,265 @@ atomic taskSequence{0}; ///< Unique identifier source for Task. /// available to define the action to take when this task is run, so /// Command::setFunc() is used set the action later. This is why /// the util::CommandThreadPool is not called here. -Task::Task(TaskMsgPtr const& t, int fragmentNumber, shared_ptr const& userQueryInfo, - size_t templateId, int subchunkId, shared_ptr const& sc, - uint16_t resultsHttpPort) - : _userQueryInfo(userQueryInfo), - _sendChannel(sc), +Task::Task(UberJobData::Ptr const& ujData, int jobId, int attemptCount, int chunkId, int fragmentNumber, + size_t templateId, bool hasSubchunks, int subchunkId, string const& db, + vector const& fragSubTables, vector const& fragSubchunkIds, + wpublish::QueryStatistics::Ptr const& queryStats_) + : _logLvlWT(LOG_LVL_WARN), + _logLvlET(LOG_LVL_ERROR), _tSeq(++taskSequence), - _qId(t->queryid()), + _qId(ujData->getQueryId()), _templateId(templateId), - _hasChunkId(t->has_chunkid()), - _chunkId(t->has_chunkid() ? t->chunkid() : -1), + _hasChunkId((chunkId >= 0)), + _chunkId(chunkId), _subchunkId(subchunkId), - _jId(t->jobid()), - _attemptCount(t->attemptcount()), + _jId(jobId), + _attemptCount(attemptCount), _queryFragmentNum(fragmentNumber), - _fragmentHasSubchunks(t->fragment(fragmentNumber).has_subchunks()), - _hasDb(t->has_db()), - _db(t->has_db() ? t->db() : ""), - _czarId(t->has_czarid() ? t->czarid() : -1) { - // These attributes will be passed back to Czar in the Protobuf response - // to advice which result delivery channel to use. - auto const workerConfig = wconfig::WorkerConfig::instance(); - _resultFileName = ::buildResultFileName(t); - _resultFileAbsPath = ::buildResultFilePath(_resultFileName, workerConfig->resultsDirname()); - _resultFileHttpUrl = "http://" + _fqdn + ":" + to_string(resultsHttpPort) + "/" + _resultFileName; - if (t->has_user()) { - user = t->user(); - } else { - user = defaultUser; - } - - allIds.add(to_string(_qId) + "_" + to_string(_jId)); - LOGS(_log, LOG_LVL_DEBUG, "Task(...) " << "this=" << this << " : " << allIds); - - // Determine which major tables this task will use. - int const size = t->scantable_size(); - for (int j = 0; j < size; ++j) { - _scanInfo.infoTables.push_back(proto::ScanTableInfo(t->scantable(j))); - } - _scanInfo.scanRating = t->scanpriority(); - _scanInfo.sortTablesSlowestFirst(); - _scanInteractive = t->scaninteractive(); - _maxTableSize = t->maxtablesize_mb() * ::MB_SIZE_BYTES; + _fragmentHasSubchunks(hasSubchunks), + _db(db), + _czarId(ujData->getCzarId()), + _queryStats(queryStats_), + _rowLimit(ujData->getRowLimit()), + _ujData(ujData), + _idStr(ujData->getIdStr() + " jId=" + to_string(_jId) + " sc=" + to_string(_subchunkId)) { + user = defaultUser; // Create sets and vectors for 'aquiring' subchunk temporary tables. - proto::TaskMsg_Fragment const& fragment(t->fragment(_queryFragmentNum)); + // Fill in _dbTblsAndSubchunks DbTableSet dbTbls_; IntVector subchunksVect_; if (!_fragmentHasSubchunks) { /// FUTURE: Why acquire anything if there are no subchunks in the fragment? /// This branch never seems to happen, but this needs to be proven beyond any doubt. - LOGS(_log, LOG_LVL_WARN, "Task::Task not _fragmentHasSubchunks"); - for (auto const& scanTbl : t->scantable()) { - dbTbls_.emplace(scanTbl.db(), scanTbl.table()); - LOGS(_log, LOG_LVL_INFO, - "Task::Task scanTbl.db()=" << scanTbl.db() << " scanTbl.table()=" << scanTbl.table()); + auto scanInfo = _ujData->getScanInfo(); + for (auto const& scanTbl : scanInfo->infoTables) { + dbTbls_.emplace(scanTbl.db, scanTbl.table); + LOGS(_log, LOG_LVL_TRACE, + "Task::Task scanTbl.db=" << scanTbl.db << " scanTbl.table=" << scanTbl.table); } - LOGS(_log, LOG_LVL_INFO, + LOGS(_log, LOG_LVL_TRACE, "fragment a db=" << _db << ":" << _chunkId << " dbTbls=" << util::printable(dbTbls_)); } else { - proto::TaskMsg_Subchunk const& sc = fragment.subchunks(); - for (int j = 0; j < sc.dbtbl_size(); j++) { + for (TaskDbTbl const& fDbTbl : fragSubTables) { /// Different subchunk fragments can require different tables. /// FUTURE: It may save space to store these in UserQueryInfo as it seems /// database and table names are consistent across chunks. - dbTbls_.emplace(sc.dbtbl(j).db(), sc.dbtbl(j).tbl()); + dbTbls_.emplace(fDbTbl.db, fDbTbl.tbl); LOGS(_log, LOG_LVL_TRACE, - "Task::Task subchunk j=" << j << " sc.dbtbl(j).db()=" << sc.dbtbl(j).db() - << " sc.dbtbl(j).tbl()=" << sc.dbtbl(j).tbl()); + "Task::Task subchunk fDbTbl.db=" << fDbTbl.db << " fDbTbl.tbl=" << fDbTbl.tbl); } - IntVector sVect(sc.id().begin(), sc.id().end()); - subchunksVect_ = sVect; - if (sc.has_database()) { - _db = sc.database(); - } else { - _db = t->db(); - } - LOGS(_log, LOG_LVL_DEBUG, + subchunksVect_ = fragSubchunkIds; + + LOGS(_log, LOG_LVL_TRACE, "fragment b db=" << _db << ":" << _chunkId << " dbTableSet" << util::printable(dbTbls_) << " subChunks=" << util::printable(subchunksVect_)); } - _dbTblsAndSubchunks = make_unique(dbTbls_, subchunksVect_); - if (_sendChannel == nullptr) { - throw util::Bug(ERR_LOC, "Task::Task _sendChannel==null " + getIdStr()); + + // Find the ChunkStatistics for the chunkId. + auto queriesAndChunks_ = _ujData->getQueriesAndChunks().lock(); + if (queriesAndChunks_ != nullptr) { + set dbsUsed; + lock_guard taskUseVectMtxLock(_taskUseVectMtx); + wpublish::ChunkStatistics::Ptr chunkStats = queriesAndChunks_->getChunkStatistics(_chunkId); + for (auto const& fDbTbl : dbTbls_) { + // Add TaskUseCount using fragSubTables. + dbsUsed.insert(fDbTbl.db); + } + for (auto const& dbu : dbsUsed) { + _taskUseVect.push_back(chunkStats->getTaskUseRAII(dbu)); + } } -} -Task::~Task() { - allIds.remove(to_string(_qId) + "_" + to_string(_jId)); - LOGS(_log, LOG_LVL_TRACE, "~Task() : " << allIds); + _dbTblsAndSubchunks = make_unique(dbTbls_, subchunksVect_); - _userQueryInfo.reset(); - UserQueryInfo::uqMapErase(_qId); - if (UserQueryInfo::uqMapGet(_qId) == nullptr) { - LOGS(_log, LOG_LVL_TRACE, "~Task Cleared uqMap entry for _qId=" << _qId); - } + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " created"); } -vector Task::createTasks(shared_ptr const& taskMsg, - shared_ptr const& sendChannel, - shared_ptr const& chunkResourceMgr, - mysql::MySqlConfig const& mySqlConfig, - shared_ptr const& sqlConnMgr, - shared_ptr const& queriesAndChunks, - uint16_t resultsHttpPort) { - QueryId qId = taskMsg->queryid(); - QSERV_LOGCONTEXT_QUERY_JOB(qId, taskMsg->jobid()); - vector vect; +Task::~Task() {} + +std::vector Task::createTasksFromUberJobMsg( + std::shared_ptr const& ujMsg, std::shared_ptr const& ujData, + std::shared_ptr const& chunkResourceMgr, mysql::MySqlConfig const& mySqlConfig, + std::shared_ptr const& sqlConnMgr) { + QueryId qId = ujData->getQueryId(); + UberJobId ujId = ujData->getUberJobId(); + CzarId czId = ujData->getCzarId(); + auto const queriesAndChunks = ujData->getQueriesAndChunks().lock(); + if (queriesAndChunks == nullptr) { + throw util::Bug(ERR_LOC, "Task::createTasksFromUberJobMsg queriesAndChunks is null"); + } + + vector vect; // List of created tasks to be returned. + wpublish::QueryStatistics::Ptr queryStats = queriesAndChunks->addQueryId(qId, czId); + UserQueryInfo::Ptr userQueryInfo = queryStats->getUserQueryInfo(); - UserQueryInfo::Ptr userQueryInfo = UserQueryInfo::uqMapInsert(qId); + string funcN(__func__); + funcN += " QID=" + to_string(qId) + " "; - /// Make one task for each fragment. - int fragmentCount = taskMsg->fragment_size(); - if (fragmentCount < 1) { - throw util::Bug(ERR_LOC, "Task::createTasks No fragments to execute in TaskMsg"); + if (ujMsg->getQueryId() != qId) { + throw util::Bug(ERR_LOC, "Task::createTasksFromUberJobMsg qId(" + to_string(qId) + + ") did not match ujMsg->qId(" + to_string(ujMsg->getQueryId()) + + ")"); + } + if (ujMsg->getUberJobId() != ujId) { + throw util::Bug(ERR_LOC, "Task::createTasksFromUberJobMsg ujId(" + to_string(ujId) + + ") did not match ujMsg->qId(" + to_string(ujMsg->getUberJobId()) + + ")"); } - string const chunkIdStr = to_string(taskMsg->chunkid()); - for (int fragNum = 0; fragNum < fragmentCount; ++fragNum) { - proto::TaskMsg_Fragment const& fragment = taskMsg->fragment(fragNum); - for (string queryStr : fragment.query()) { - size_t templateId = userQueryInfo->addTemplate(queryStr); - if (fragment.has_subchunks() && not fragment.subchunks().id().empty()) { - for (auto subchunkId : fragment.subchunks().id()) { - auto task = make_shared(taskMsg, fragNum, userQueryInfo, templateId, - subchunkId, sendChannel, resultsHttpPort); + std::string workerId = ujMsg->getWorkerId(); + auto jobSubQueryTempMap = ujMsg->getJobSubQueryTempMap(); + auto jobDbTablesMap = ujMsg->getJobDbTableMap(); + auto jobMsgVect = ujMsg->getJobMsgVect(); + + for (auto const& jobMsg : *jobMsgVect) { + JobId jobId = jobMsg->getJobId(); + int attemptCount = jobMsg->getAttemptCount(); + std::string chunkQuerySpecDb = jobMsg->getChunkQuerySpecDb(); + int chunkId = jobMsg->getChunkId(); + + auto jobFragments = jobMsg->getJobFragments(); + int fragmentNumber = 0; + + for (auto const& fMsg : *jobFragments) { + // These need to be constructed for the fragment + vector fragSubQueries; + vector fragSubTables; + vector fragSubchunkIds; + + vector fsqIndexes = fMsg->getJobSubQueryTempIndexes(); + for (int fsqIndex : fsqIndexes) { + string fsqStr = jobSubQueryTempMap->getSubQueryTemp(fsqIndex); + fragSubQueries.push_back(fsqStr); + } + + vector dbTblIndexes = fMsg->getJobDbTablesIndexes(); + for (int dbTblIndex : dbTblIndexes) { + auto [scDb, scTable] = jobDbTablesMap->getDbTable(dbTblIndex); + TaskDbTbl scDbTbl(scDb, scTable); + fragSubTables.push_back(scDbTbl); + } + + fragSubchunkIds = fMsg->getSubchunkIds(); + + for (string const& fragSubQ : fragSubQueries) { + size_t templateId = userQueryInfo->addTemplate(fragSubQ); + if (fragSubchunkIds.empty()) { + bool const noSubchunks = false; + int const subchunkId = -1; + auto task = Task::Ptr(new Task(ujData, jobId, attemptCount, chunkId, fragmentNumber, + templateId, noSubchunks, subchunkId, chunkQuerySpecDb, + fragSubTables, fragSubchunkIds, queryStats)); vect.push_back(task); + } else { + for (auto subchunkId : fragSubchunkIds) { + bool const hasSubchunks = true; + auto task = Task::Ptr(new Task(ujData, jobId, attemptCount, chunkId, fragmentNumber, + templateId, hasSubchunks, subchunkId, chunkQuerySpecDb, + fragSubTables, fragSubchunkIds, queryStats)); + vect.push_back(task); + } } - } else { - int subchunkId = -1; // there are no subchunks. - auto task = make_shared(taskMsg, fragNum, userQueryInfo, templateId, subchunkId, - sendChannel, resultsHttpPort); - vect.push_back(task); } + ++fragmentNumber; } } - for (auto task : vect) { + + for (auto taskPtr : vect) { // newQueryRunner sets the `_taskQueryRunner` pointer in `task`. - task->setTaskQueryRunner(wdb::QueryRunner::newQueryRunner(task, chunkResourceMgr, mySqlConfig, - sqlConnMgr, queriesAndChunks)); + taskPtr->setTaskQueryRunner(wdb::QueryRunner::newQueryRunner(taskPtr, chunkResourceMgr, mySqlConfig, + sqlConnMgr, queriesAndChunks)); + } + + return vect; +} + +std::vector Task::createTasksForUnitTest( + std::shared_ptr const& ujData, nlohmann::json const& jsJobs, + std::shared_ptr const& sendChannel, int maxTableSizeMb, + std::shared_ptr const& chunkResourceMgr) { + vector vect; + auto const qId = ujData->getQueryId(); + auto const czId = ujData->getCzarId(); + auto const queriesAndChunks = ujData->getQueriesAndChunks().lock(); + if (queriesAndChunks == nullptr) { + throw util::Bug(ERR_LOC, "Task::createTasksForUnitTest queriesAndChunks is null"); + } + protojson::JobSubQueryTempMap::Ptr jobSubQueryTempMap{protojson::JobSubQueryTempMap::create()}; + protojson::JobDbTableMap::Ptr jobDbTablesMap{protojson::JobDbTableMap::create()}; + protojson::JobMsg::VectPtr jobMsgVect{new protojson::JobMsg::Vect()}; + for (auto const& jsUjJob : jsJobs) { + protojson::JobMsg::Ptr jobMsgPtr = + protojson::JobMsg::createFromJson(jsUjJob, jobSubQueryTempMap, jobDbTablesMap); + jobMsgVect->push_back(jobMsgPtr); + } + + wpublish::QueryStatistics::Ptr queryStats = queriesAndChunks->addQueryId(qId, czId); + UserQueryInfo::Ptr userQueryInfo = queryStats->getUserQueryInfo(); + + for (auto const& jobMsg : *jobMsgVect) { + JobId jobId = jobMsg->getJobId(); + int attemptCount = jobMsg->getAttemptCount(); + std::string chunkQuerySpecDb = jobMsg->getChunkQuerySpecDb(); + int chunkId = jobMsg->getChunkId(); + + auto jobFragments = jobMsg->getJobFragments(); + int fragmentNumber = 0; + + for (auto const& fMsg : *jobFragments) { + // These need to be constructed for the fragment + vector fragSubQueries; + vector fragSubTables; + vector fragSubchunkIds; + + vector fsqIndexes = fMsg->getJobSubQueryTempIndexes(); + for (int fsqIndex : fsqIndexes) { + string fsqStr = jobSubQueryTempMap->getSubQueryTemp(fsqIndex); + fragSubQueries.push_back(fsqStr); + } + + vector dbTblIndexes = fMsg->getJobDbTablesIndexes(); + for (int dbTblIndex : dbTblIndexes) { + auto [scDb, scTable] = jobDbTablesMap->getDbTable(dbTblIndex); + TaskDbTbl scDbTbl(scDb, scTable); + fragSubTables.push_back(scDbTbl); + } + + fragSubchunkIds = fMsg->getSubchunkIds(); + + for (string const& fragSubQ : fragSubQueries) { + size_t templateId = userQueryInfo->addTemplate(fragSubQ); + if (fragSubchunkIds.empty()) { + bool const noSubchunks = false; + int const subchunkId = -1; + auto task = Task::Ptr(new Task(ujData, jobId, attemptCount, chunkId, fragmentNumber, + templateId, noSubchunks, subchunkId, chunkQuerySpecDb, + fragSubTables, fragSubchunkIds, queryStats)); + vect.push_back(task); + } else { + for (auto subchunkId : fragSubchunkIds) { + bool const hasSubchunks = true; + auto task = Task::Ptr(new Task(ujData, jobId, attemptCount, chunkId, fragmentNumber, + templateId, hasSubchunks, subchunkId, chunkQuerySpecDb, + fragSubTables, fragSubchunkIds, queryStats)); + vect.push_back(task); + } + } + } + ++fragmentNumber; + } } - sendChannel->setTaskCount(vect.size()); return vect; } +protojson::ScanInfo::Ptr Task::getScanInfo() const { return _ujData->getScanInfo(); } + +bool Task::getScanInteractive() const { return _ujData->getScanInteractive(); } + +FileChannelShared::Ptr Task::getSendChannel() const { return _ujData->getFileChannelShared(); } + void Task::action(util::CmdData* data) { string tIdStr = getIdStr(); if (_queryStarted.exchange(true)) { @@ -284,34 +398,44 @@ void Task::action(util::CmdData* data) { // Get a local copy for safety. auto qr = _taskQueryRunner; bool success = false; + string errStr = getUberJobData()->getWorkerId() + " "; try { - success = qr->runQuery(); + success = qr->runQuery(errStr); } catch (UnsupportedError const& e) { LOGS(_log, LOG_LVL_ERROR, __func__ << " runQuery threw UnsupportedError " << e.what() << tIdStr); + errStr += string(" exception:") + e.what(); } if (not success) { - LOGS(_log, LOG_LVL_ERROR, "runQuery failed " << tIdStr); - if (not getSendChannel()->kill("Foreman::_setRunFunc")) { - LOGS(_log, LOG_LVL_WARN, "runQuery sendChannel already killed " << tIdStr); + LOGS(_log, _logLvlET, "runQuery failed " << tIdStr); + if (not getSendChannel()->kill("Task::action")) { + LOGS(_log, _logLvlWT, "runQuery sendChannel already killed " << tIdStr); } + // This is what gets sent if a more specific error has not been sent already. + util::MultiError multiErr; + bool logLvl = (_logLvlET != LOG_LVL_TRACE); + string const strMsg = string("worker run error chunk=") + to_string(_chunkId) + ":" + errStr + + " worker=" + getUberJobData()->getWorkerId(); + util::Error err(util::Error::WORKER_QUERY, util::Error::NONE, strMsg, logLvl); + multiErr.insert(err); + _ujData->responseError(multiErr, -1, false, _logLvlET); } - - // The QueryRunner class access to sendChannel for results is over by this point. - // 'task' contains statistics that are still useful. However, the resources used - // by sendChannel need to be freed quickly. - LOGS(_log, LOG_LVL_DEBUG, __func__ << " calling resetSendChannel() for " << tIdStr); - resetSendChannel(); // Frees its xrdsvc::SsiRequest object. } string Task::getQueryString() const { - string qs = _userQueryInfo->getTemplate(_templateId); + auto qStats = _queryStats.lock(); + if (qStats == nullptr) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " _queryStats could not be locked"); + return string(""); + } + + auto uQInfo = qStats->getUserQueryInfo(); + string qs = uQInfo->getTemplate(_templateId); boost::algorithm::replace_all(qs, CHUNK_TAG, to_string(_chunkId)); boost::algorithm::replace_all(qs, SUBCHUNK_TAG, to_string(_subchunkId)); + LOGS(_log, LOG_LVL_TRACE, cName(__func__) << " qs=" << qs); return qs; } -void Task::setQueryStatistics(wpublish::QueryStatistics::Ptr const& qStats) { _queryStats = qStats; } - wpublish::QueryStatistics::Ptr Task::getQueryStats() const { auto qStats = _queryStats.lock(); if (qStats == nullptr) { @@ -321,54 +445,54 @@ wpublish::QueryStatistics::Ptr Task::getQueryStats() const { } /// Flag the Task as cancelled, try to stop the SQL query, and try to remove it from the schedule. -void Task::cancel() { +void Task::cancel(bool logIt) { if (_cancelled.exchange(true)) { // Was already cancelled. return; } - util::HoldTrack::Mark markA(ERR_LOC, "Task::cancel"); - LOGS(_log, LOG_LVL_DEBUG, "Task::cancel " << getIdStr()); + if (logIt) { + if (!_ujData->getCancelled()) { + LOGS(_log, LOG_LVL_DEBUG, "Task::cancel " << getIdStr() << " UberJob still live."); + } else { + LOGS(_log, LOG_LVL_TRACE, "Task::cancel " << getIdStr()); + } + } auto qr = _taskQueryRunner; // Need a copy in case _taskQueryRunner is reset. if (qr != nullptr) { qr->cancel(); } - // At this point, this code doesn't do anything. It may be - // useful to remove this task from the scheduler, but it - // seems doubtful that that would improve performance. - auto sched = _taskScheduler.lock(); - if (sched != nullptr) { - sched->taskCancelled(this); - } + _logLvlWT = LOG_LVL_TRACE; + _logLvlET = LOG_LVL_TRACE; } bool Task::checkCancelled() { - // A czar doesn't directly tell the worker the query is dead. - // A czar has XrdSsi kill the SsiRequest, which kills the - // sendChannel used by this task. sendChannel can be killed - // in other ways, however, without the sendChannel, this task - // has no way to return anything to the originating czar and - // may as well give up now. - if (_sendChannel == nullptr || _sendChannel->isDead()) { - // The sendChannel is dead, probably squashed by the czar. + // The czar does tell the worker a query id is cancelled. + // Returning true here indicates there's no point in doing + // any more processing for this Task. + if (_cancelled) return true; + auto const sendChannel = getSendChannel(); + if (sendChannel == nullptr || sendChannel->isDead() || sendChannel->isRowLimitComplete()) { cancel(); } return _cancelled; } -/// @return true if task has already been cancelled. -bool Task::setTaskQueryRunner(TaskQueryRunner::Ptr const& taskQueryRunner) { +bool Task::setTaskQueryRunner(wdb::QueryRunner::Ptr const& taskQueryRunner) { _taskQueryRunner = taskQueryRunner; return checkCancelled(); } -void Task::freeTaskQueryRunner(TaskQueryRunner* tqr) { +void Task::freeTaskQueryRunner(wdb::QueryRunner* tqr) { + // Only free _taskQueryRunner if it's the expected one. if (_taskQueryRunner.get() == tqr) { _taskQueryRunner.reset(); } else { LOGS(_log, LOG_LVL_WARN, "Task::freeTaskQueryRunner pointer didn't match!"); } + lock_guard taskUseVectMtxLock(_taskUseVectMtx); + _taskUseVect.clear(); } /// Set values associated with the Task being put on the queue. @@ -391,21 +515,21 @@ bool Task::isRunning() const { } void Task::started(chrono::system_clock::time_point const& now) { - LOGS(_log, LOG_LVL_DEBUG, __func__ << " " << getIdStr() << " started"); + LOGS(_log, LOG_LVL_TRACE, __func__ << " " << getIdStr() << " started"); lock_guard guard(_stateMtx); _state = TaskState::STARTED; _startTime = now; } void Task::queryExecutionStarted() { - LOGS(_log, LOG_LVL_DEBUG, __func__ << " " << getIdStr() << " executing"); + LOGS(_log, LOG_LVL_TRACE, __func__ << " " << getIdStr() << " executing"); lock_guard guard(_stateMtx); _state = TaskState::EXECUTING_QUERY; _queryExecTime = chrono::system_clock::now(); } void Task::queried() { - LOGS(_log, LOG_LVL_DEBUG, __func__ << " " << getIdStr() << " reading"); + LOGS(_log, LOG_LVL_TRACE, __func__ << " " << getIdStr() << " reading"); lock_guard guard(_stateMtx); _state = TaskState::READING_DATA; _queryTime = chrono::system_clock::now(); @@ -417,7 +541,7 @@ void Task::queried() { /// Set values associated with the Task being finished. /// @return milliseconds to complete the Task, system clock time. chrono::milliseconds Task::finished(chrono::system_clock::time_point const& now) { - LOGS(_log, LOG_LVL_DEBUG, __func__ << " " << getIdStr() << " finished"); + LOGS(_log, LOG_LVL_TRACE, __func__ << " " << getIdStr() << " finished"); chrono::milliseconds duration; { lock_guard guard(_stateMtx); @@ -429,7 +553,7 @@ chrono::milliseconds Task::finished(chrono::system_clock::time_point const& now) if (duration.count() < 1) { duration = chrono::milliseconds{1}; } - LOGS(_log, LOG_LVL_DEBUG, "processing millisecs=" << duration.count()); + LOGS(_log, LOG_LVL_TRACE, "processing millisecs=" << duration.count()); return duration; } @@ -467,8 +591,7 @@ nlohmann::json Task::getJson() const { js["fragmentId"] = _queryFragmentNum; js["attemptId"] = _attemptCount; js["sequenceId"] = _tSeq; - js["scanInteractive"] = _scanInteractive; - js["maxTableSize"] = _maxTableSize; + js["maxTableSize"] = _ujData->getMaxTableSizeBytes(); js["cancelled"] = to_string(_cancelled); js["state"] = static_cast(_state.load()); js["createTime_msec"] = util::TimeUtils::tp2ms(_createTime); @@ -486,29 +609,21 @@ nlohmann::json Task::getJson() const { return js; } -ostream& operator<<(ostream& os, Task const& t) { - os << "Task: " - << "msg: " << t.getIdStr() << " chunk=" << t._chunkId << " db=" << t._db << " " << t.getQueryString(); - - return os; -} +int64_t Task::getMaxTableSize() const { return _ujData->getMaxTableSizeBytes(); } -ostream& operator<<(ostream& os, IdSet const& idSet) { - // Limiting output as number of entries can be very large. - int maxDisp = idSet.maxDisp; // only affects the amount of data printed. - lock_guard lock(idSet.mx); - os << "showing " << maxDisp << " of count=" << idSet._ids.size() << " "; - bool first = true; - int i = 0; - for (auto id : idSet._ids) { - if (!first) { - os << ", "; - } else { - first = false; - } - os << id; - if (++i >= maxDisp) break; +ostream& Task::dump(ostream& os) const { + os << "Task: " + << "msg: " << getIdStr() << " chunk=" << _chunkId << " seq=" << _tSeq << " db=" << _db << " " + << getQueryString(); + if (_ujData == nullptr) { + os << " ujData=null"; + return os; + } + if (_ujData->getScanInfo() == nullptr) { + os << " scanInfo=null"; + return os; } + _ujData->getScanInfo()->dump(os); return os; } diff --git a/src/wbase/Task.h b/src/wbase/Task.h index 22663ba5aa..55415f26a6 100644 --- a/src/wbase/Task.h +++ b/src/wbase/Task.h @@ -41,7 +41,7 @@ // Qserv headers #include "global/DbTable.h" #include "global/intTypes.h" -#include "proto/ScanTableInfo.h" +#include "protojson/ScanTableInfo.h" #include "wbase/TaskState.h" #include "util/Histogram.h" #include "util/ThreadPool.h" @@ -50,10 +50,11 @@ namespace lsst::qserv::mysql { class MySqlConfig; } -namespace lsst::qserv::proto { -class TaskMsg; -class TaskMsg_Fragment; -} // namespace lsst::qserv::proto + +namespace lsst::qserv::protojson { +class UberJobMsg; +} + namespace lsst::qserv::wbase { class FileChannelShared; } @@ -62,23 +63,46 @@ class SqlConnMgr; } namespace lsst::qserv::wdb { class ChunkResourceMgr; -} +class QueryRunner; +} // namespace lsst::qserv::wdb namespace lsst::qserv::wpublish { +class ChunkStatistics; class QueriesAndChunks; class QueryStatistics; } // namespace lsst::qserv::wpublish namespace lsst::qserv::wbase { +class UberJobData; class UserQueryInfo; -/// Base class for tracking a database query for a worker Task. -class TaskQueryRunner { +class TaskException : public util::Issue { +public: + explicit TaskException(util::Issue::Context const& ctx, std::string const& msg) : util::Issue(ctx, msg) {} +}; + +/// Class for storing database + table name. +class TaskDbTbl { +public: + TaskDbTbl() = delete; + TaskDbTbl(std::string const& db_, std::string const& tbl_) : db(db_), tbl(tbl_) {} + std::string const db; + std::string const tbl; +}; + +/// Class to manage the use count of a database for a Task. +class TaskUseRAII { public: - using Ptr = std::shared_ptr; - virtual ~TaskQueryRunner() {}; - virtual bool runQuery() = 0; - virtual void cancel() = 0; ///< Repeated calls to cancel() must be harmless. + using Ptr = std::unique_ptr; + TaskUseRAII() = delete; + static Ptr create(std::shared_ptr const& chunkStats, + std::string const& dbName); + ~TaskUseRAII(); + +private: + TaskUseRAII(std::shared_ptr const& chunkStats, std::string const& dbName); + std::shared_ptr const _dbChunkStats; + std::string const _dbName; }; class Task; @@ -98,28 +122,6 @@ class TaskScheduler { util::HistogramRolling::Ptr histTimeOfTransmittingTasks; ///< Store information about transmitting tasks. }; -/// Used to find tasks that are in process for debugging with Task::_idStr. -/// This is largely meant to track down incomplete tasks in a possible intermittent -/// failure and should probably be removed when it is no longer needed. -/// It depends on code in BlendScheduler to work. If the decision is made to keep it -/// forever, dependency on BlendScheduler needs to be re-worked. -struct IdSet { - void add(std::string const& id) { - std::lock_guard lock(mx); - _ids.insert(id); - } - void remove(std::string const& id) { - std::lock_guard lock(mx); - _ids.erase(id); - } - std::atomic maxDisp{5}; //< maximum number of entries to show with operator<< - friend std::ostream& operator<<(std::ostream& os, IdSet const& idSet); - -private: - std::set _ids; - mutable std::mutex mx; -}; - /// class Task defines a query task to be done, containing a TaskMsg /// (over-the-wire) additional concrete info related to physical /// execution conditions. @@ -128,7 +130,6 @@ class Task : public util::CommandForThreadPool { public: static std::string const defaultUser; using Ptr = std::shared_ptr; - using TaskMsgPtr = std::shared_ptr; /// Class to store constant sets and vectors. class DbTblsAndSubchunks { @@ -155,27 +156,33 @@ class Task : public util::CommandForThreadPool { bool operator()(Ptr const& x, Ptr const& y); }; - Task(TaskMsgPtr const& t, int fragmentNumber, std::shared_ptr const& userQueryInfo, - size_t templateId, int subchunkId, std::shared_ptr const& sc, - uint16_t resultsHttpPort = 8080); + std::string cName(const char* func) const { return std::string("Task::") + func + " " + _idStr; } + + // Hopefully, many are the same for all tasks and can be moved to ujData and userQueryInfo. + // Candidates: maxTableSizeMb, FileChannelShared, resultsHttpPort. + Task(std::shared_ptr const& ujData, int jobId, int attemptCount, int chunkId, + int fragmentNumber, size_t templateId, bool hasSubchunks, int subchunkId, std::string const& db, + std::vector const& fragSubTables, std::vector const& fragSubchunkIds, + std::shared_ptr const& queryStats_); Task& operator=(const Task&) = delete; Task(const Task&) = delete; virtual ~Task(); - /// Read 'taskMsg' to generate a vector of one or more task objects all using the same 'sendChannel' - static std::vector createTasks(std::shared_ptr const& taskMsg, - std::shared_ptr const& sendChannel, - std::shared_ptr const& chunkResourceMgr, - mysql::MySqlConfig const& mySqlConfig, - std::shared_ptr const& sqlConnMgr, - std::shared_ptr const& queriesAndChunks, - uint16_t resultsHttpPort = 8080); - - void setQueryStatistics(std::shared_ptr const& qC); - - std::shared_ptr getSendChannel() const { return _sendChannel; } - void resetSendChannel() { _sendChannel.reset(); } ///< reset the shared pointer for FileChannelShared - std::string user; ///< Incoming username + /// Create the Tasks needed to run an UberJob on this worker. + static std::vector createTasksFromUberJobMsg( + std::shared_ptr const& uberJobMsg, + std::shared_ptr const& ujData, + std::shared_ptr const& chunkResourceMgr, + mysql::MySqlConfig const& mySqlConfig, std::shared_ptr const& sqlConnMgr); + + /// Create Tasks needed to run unit tests. + static std::vector createTasksForUnitTest( + std::shared_ptr const& ujData, nlohmann::json const& jsJobs, + std::shared_ptr const& sendChannel, int maxTableSizeMb, + std::shared_ptr const& chunkResourceMgr); + + std::shared_ptr getSendChannel() const; + std::string user; ///< Incoming username // Note that manpage spec of "26 bytes" is insufficient /// This is the function the scheduler will run, overriden from the util::Command class. @@ -184,20 +191,19 @@ class Task : public util::CommandForThreadPool { void action(util::CmdData* data) override; /// Cancel the query in progress and set _cancelled. - /// Query cancellation on the worker is fairly complicated. This - /// function usually called by `SsiRequest::Finished` when xrootd - /// indicates the job is cancelled. This may come from: - /// - xrootd - in the case of communications issues + /// Query cancellation on the worker is fairly complicated. + /// This may come from: /// - czar - user query was cancelled, an error, or limit reached. /// This function may also be called by `Task::checkCancelled()` - `_sendChannel` - /// has been killed, usually a result of failed communication with xrootd. + /// has been killed, usually a result of failed czar communication. /// If a `QueryRunner` object for this task exists, it must /// be cancelled to free up threads and other resources. /// Otherwise `_cancelled` is set so that an attempt /// to run this `Task` will result in a rapid exit. /// This functional also attempts to inform the scheduler for this - /// `Task` that is has been cancelled (scheduler currently does nothing in this case). - void cancel(); + /// `Task` that is has been cancelled. The scheduler currently does + /// nothing in this case. + void cancel(bool logIt = true); /// Check if this task should be cancelled and call cancel() as needed. /// @return true if this task was or needed to be cancelled. @@ -205,15 +211,14 @@ class Task : public util::CommandForThreadPool { TaskState state() const { return _state; } std::string getQueryString() const; - int getQueryFragmentNum() { return _queryFragmentNum; } - std::string const& resultFileAbsPath() const { return _resultFileAbsPath; } - std::string const& resultFileHttpUrl() const { return _resultFileHttpUrl; } - bool setTaskQueryRunner( - TaskQueryRunner::Ptr const& taskQueryRunner); ///< return true if already cancelled. - void freeTaskQueryRunner(TaskQueryRunner* tqr); + /// Return true if already cancelled. + bool setTaskQueryRunner(std::shared_ptr const& taskQueryRunner); + + /// Free object associated with running the SQL query, including the TaskQueryRunner object, + /// but only if the pointer matches `tqr`. + void freeTaskQueryRunner(wdb::QueryRunner* tqr); void setTaskScheduler(TaskScheduler::Ptr const& scheduler) { _taskScheduler = scheduler; } TaskScheduler::Ptr getTaskScheduler() const { return _taskScheduler.lock(); } - friend std::ostream& operator<<(std::ostream& os, Task const& t); // Shared scan information bool getHasChunkId() const { return _hasChunkId; } @@ -225,14 +230,13 @@ class Task : public util::CommandForThreadPool { size_t getTemplateId() const { return _templateId; } int getJobId() const { return _jId; } int getAttemptCount() const { return _attemptCount; } - bool getScanInteractive() { return _scanInteractive; } - int64_t getMaxTableSize() const { return _maxTableSize; } - proto::ScanInfo& getScanInfo() { return _scanInfo; } + bool getScanInteractive() const; + int64_t getMaxTableSize() const; + + protojson::ScanInfo::Ptr getScanInfo() const; void setOnInteractive(bool val) { _onInteractive = val; } bool getOnInteractive() { return _onInteractive; } - static IdSet allIds; // set of all task jobId numbers that are not complete. - /// @return true if qId and jId match this task's query and job ids. bool idsMatch(QueryId qId, int jId, uint64_t tseq) const { return (_qId == qId && _jId == jId && tseq == _tSeq); @@ -302,9 +306,21 @@ class Task : public util::CommandForThreadPool { setFunc(func); } + std::shared_ptr getUberJobData() const { return _ujData; } + + /// Returns the LIMIT of rows for the query enforceable at the worker, where values <= 0 indicate + /// that there is no limit to the number of rows sent back by the worker. + /// @see UberJobData::getRowLimit() + int getRowLimit() { return _rowLimit; } + + int getLvlWT() const { return _logLvlWT; } + int getLvlET() const { return _logLvlET; } + + std::ostream& dump(std::ostream& os) const override; + private: - std::shared_ptr _userQueryInfo; ///< Details common to Tasks in this UserQuery. - std::shared_ptr _sendChannel; ///< Send channel. + std::atomic _logLvlWT; ///< Normally LOG_LVL_WARN, set to TRACE in cancelled Tasks. + std::atomic _logLvlET; ///< Normally LOG_LVL_ERROR, set to TRACE in cancelled Tasks. uint64_t const _tSeq = 0; ///< identifier for the specific task QueryId const _qId = 0; ///< queryId from czar @@ -316,30 +332,25 @@ class Task : public util::CommandForThreadPool { int const _attemptCount = 0; ///< attemptCount from czar int const _queryFragmentNum; ///< The fragment number of the query in the task message. bool const _fragmentHasSubchunks; ///< True if the fragment in this query has subchunks. - bool const _hasDb; ///< true if db was in message from czar. std::string _db; ///< Task database int const _czarId; ///< czar Id from the task message. /// Set of tables and vector of subchunk ids used by ChunkResourceRequest. Do not change/reset. std::unique_ptr _dbTblsAndSubchunks; - /// The path to the result file. - std::string _resultFileAbsPath; - - /// The name of the result file. - std::string _resultFileName; - - /// The HTTP URL for the result file: "http://:/" + _resultFileName - std::string _resultFileHttpUrl; - std::atomic _queryStarted{false}; ///< Set to true when the query is about to be run. std::atomic _cancelled{false}; - TaskQueryRunner::Ptr _taskQueryRunner; + std::atomic _safeToMoveRunning{false}; ///< false until done with waitForMemMan(). + std::shared_ptr _taskQueryRunner; std::weak_ptr _taskScheduler; - proto::ScanInfo _scanInfo; - bool _scanInteractive; ///< True if the czar thinks this query should be interactive. - bool _onInteractive{ - false}; ///< True if the scheduler put this task on the interactive (group) scheduler. + protojson::ScanInfo::Ptr _scanInfo; + + /// True if the scheduler put this task on the interactive (group) scheduler. + bool _onInteractive{false}; + + /// Stores information on the query's resource usage. + std::weak_ptr const _queryStats; + int64_t _maxTableSize = 0; mutable std::mutex _stateMtx; ///< Mutex to protect state related members _state, _???Time. @@ -350,12 +361,9 @@ class Task : public util::CommandForThreadPool { std::chrono::system_clock::time_point _startTime; ///< task processing started std::chrono::system_clock::time_point _queryExecTime; ///< query execution at MySQL started std::chrono::system_clock::time_point _queryTime; ///< MySQL finished executing queries - std::chrono::system_clock::time_point _finishTime; ///< data transmission to Czar fiished + std::chrono::system_clock::time_point _finishTime; ///< data transmission to Czar finished size_t _totalSize = 0; ///< Total size of the result so far. - /// Stores information on the query's resource usage. - std::weak_ptr _queryStats; - std::atomic _mysqlThreadId{0}; ///< 0 if not connected to MySQL std::atomic _booted{false}; ///< Set to true if this task takes too long and is booted. @@ -363,9 +371,19 @@ class Task : public util::CommandForThreadPool { /// Time stamp for when `_booted` is set to true, otherwise meaningless. TIMEPOINT _bootedTime; - bool _unitTest = false; ///< + /// When > 0, indicates maximum number of rows needed for a result. + int const _rowLimit; + + /// Container for UberJob data and shared objects. + std::shared_ptr _ujData; + std::string const _idStr; + + /// Container to hold RAII objects from when this object is created until + /// _taskQueryRunner is finished or this object is destroyed. + std::vector _taskUseVect; + std::mutex _taskUseVectMtx; ///< Protects _taskUseVect. - static std::string const _fqdn; ///< Fully qualified domain name of the host. Acquired once at startup. + bool _unitTest = false; ///< Only true in unit tests. }; } // namespace lsst::qserv::wbase diff --git a/src/wbase/TaskState.h b/src/wbase/TaskState.h index 5a2f92e61c..76063a780c 100644 --- a/src/wbase/TaskState.h +++ b/src/wbase/TaskState.h @@ -38,10 +38,6 @@ namespace lsst::qserv::wbase { * @note This class and the relevant functions are put into this header to * allow the complile-time (only) dependency onto this type from other modules * without needing to link against the current module's library. - * Also note a choice of the underlying type which is meant to allow sending - * values of the type as numeric attribites in the Protobuf messages w/o - * introducing an additional (Protobuf) representation for those, or converting - * the values to strings and vs. */ enum class TaskState : std::uint64_t { CREATED = 0, diff --git a/src/wbase/UberJobData.cc b/src/wbase/UberJobData.cc new file mode 100644 index 0000000000..b18a563eea --- /dev/null +++ b/src/wbase/UberJobData.cc @@ -0,0 +1,348 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "wbase/UberJobData.h" + +// System headers + +// Third party headers +#include "boost/filesystem.hpp" + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "http/Client.h" +#include "http/Exceptions.h" +#include "http/MetaModule.h" +#include "http/Method.h" +#include "http/RequestBodyJSON.h" +#include "http/RequestQuery.h" +#include "protojson/PwHideJson.h" +#include "protojson/UberJobErrorMsg.h" +#include "protojson/UberJobReadyMsg.h" +#include "protojson/WorkerCzarComIssue.h" +#include "util/Bug.h" +#include "util/MultiError.h" +#include "util/ResultFileName.h" +#include "wbase/FileChannelShared.h" +#include "wconfig/WorkerConfig.h" +#include "wcontrol/Foreman.h" +#include "wcontrol/WCzarInfoMap.h" +#include "wpublish/ChunkInventory.h" +#include "wpublish/QueriesAndChunks.h" + +using namespace std; +using namespace nlohmann; + +namespace fs = boost::filesystem; + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.UberJobData"); + +} // namespace + +namespace lsst::qserv::wbase { + +UberJobData::UberJobData(UberJobId uberJobId, std::string const& czarName, CzarId czarId, + std::string czarHost, int czarPort, uint64_t queryId, int rowLimit, + uint64_t maxTableSizeBytes, protojson::ScanInfo::Ptr const& scanInfo, + bool scanInteractive, std::string const& workerId, + std::shared_ptr const& foreman, + std::shared_ptr queriesAndChunks_, + std::string const& authKey, uint16_t resultsHttpPort) + : UberJobBase(queryId, uberJobId, czarId), + _czarName(czarName), + _czarHost(czarHost), + _czarPort(czarPort), + _rowLimit(rowLimit), + _maxTableSizeBytes(maxTableSizeBytes), + _workerId(workerId), + _authKey(authKey), + _resultsHttpPort(resultsHttpPort), + _foreman(foreman), + _queriesAndChunks(queriesAndChunks_), + _scanInteractive(scanInteractive), + _scanInfo(scanInfo), + _idStr(string("QID=") + to_string(_queryId) + "_ujId=" + to_string(_uberJobId)) {} + +UberJobData::Ptr UberJobData::create(UberJobId uberJobId, std::string const& czarName, CzarId czarId, + std::string const& czarHost, int czarPort, uint64_t queryId, + int rowLimit, uint64_t maxTableSizeBytes, + std::shared_ptr const& scanInfo, + bool scanInteractive, std::string const& workerId, + wcontrol::Foreman::Ptr const& foreman, + wpublish::QueriesAndChunks::Ptr const& queriesAndChunks_, + std::string const& authKey, uint16_t resultsHttpPort) { + Ptr ujd = Ptr(new UberJobData(uberJobId, czarName, czarId, czarHost, czarPort, queryId, rowLimit, + maxTableSizeBytes, scanInfo, scanInteractive, workerId, foreman, + queriesAndChunks_, authKey, resultsHttpPort)); + // _fileChannelShared accesses this object with a weak pointer for cancellation and query info. + ujd->_fileChannelShared = FileChannelShared::create(ujd); + return ujd; +} + +void UberJobData::setTasks(std::vector> const& tasks) { + std::lock_guard tLg(_ujTasksMtx); + if (!_ujTasks.empty()) { + throw TaskException(ERR_LOC, "setTasks() called more than once for " + _idStr); + } + // Needs to be insert instead of '=' for conversion to weak_ptr + _ujTasks.insert(_ujTasks.end(), tasks.begin(), tasks.end()); + getFileChannelShared()->setTaskCount(tasks.size()); +} + +void UberJobData::responseFileReady(protojson::FileUrlInfo const& fileUrlInfo_) { + LOGS(_log, LOG_LVL_INFO, cName(__func__) << fileUrlInfo_.dump()); + + // Latch to prevent errors from being transmitted. + // NOTE: Calls to responseError() and responseFileReady() are protected by the + // mutex in FileChannelShared (_tMtx). + if (_responseState.exchange(SENDING_FILEURL) != NOTHING) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " _responseState was " << _responseState << " instead of NOTHING"); + } + + protojson::AuthContext authContext_(wconfig::WorkerConfig::instance()->replicationInstanceId(), + wconfig::WorkerConfig::instance()->replicationAuthKey()); + auto ujMsg = responseFileReadyBuild(fileUrlInfo_, authContext_); + + auto const method = http::Method::POST; + vector const headers = {"Content-Type: application/json"}; + string const url = "http://" + _czarHost + ":" + to_string(_czarPort) + "/queryjob-ready"; + string const requestContext = "Worker: '" + http::method2string(method) + "' request to '" + url + "'"; + _queueUJAnswer(method, headers, url, requestContext, ujMsg); +} + +shared_ptr UberJobData::responseFileReadyBuild( + protojson::FileUrlInfo const& fileUrlInfo_, protojson::AuthContext const& authContext_) { + string workerIdStr; + if (_foreman != nullptr) { + workerIdStr = _foreman->chunkInventory()->id(); + } else { + workerIdStr = "dummyWorkerIdStr"; + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " _foreman was null, which should only happen in unit tests"); + } + + unsigned int const version = http::MetaModule::version; + auto ujMsg = protojson::UberJobReadyMsg::create(authContext_, version, workerIdStr, _czarName, _czarId, + _queryId, _uberJobId, fileUrlInfo_); + + return ujMsg; +} + +void UberJobData::responseError(util::MultiError& multiErr, int chunkId, bool cancelled, int logLvl) { + LOGS(_log, logLvl, cName(__func__)); + // NOTE: Calls to responseError() and responseFileReady() are protected by the + // mutex in FileChannelShared (_tMtx). + if (_responseState == NOTHING) { + _responseState = SENDING_ERROR; + } else { + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) << " Already sending a different message. NOT sending [" << multiErr << "]"); + return; + } + + protojson::AuthContext authContext_(wconfig::WorkerConfig::instance()->replicationInstanceId(), + wconfig::WorkerConfig::instance()->replicationAuthKey()); + + auto jrMsg = responseErrorBuild(multiErr, chunkId, cancelled, logLvl, authContext_); + + auto const method = http::Method::POST; + vector const headers = {"Content-Type: application/json"}; + string const url = "http://" + _czarHost + ":" + to_string(_czarPort) + "/queryjob-error"; + string const requestContext = "Worker: '" + http::method2string(method) + "' request to '" + url + "'"; + _queueUJAnswer(method, headers, url, requestContext, jrMsg); +} + +shared_ptr UberJobData::responseErrorBuild( + util::MultiError& multiErr, int chunkId, bool cancelled, int logLvl, + protojson::AuthContext const& authContext_) { + string workerIdStr; + if (_foreman != nullptr) { + workerIdStr = _foreman->chunkInventory()->id(); + } else { + workerIdStr = "dummyWorkerIdStr"; + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " _foreman was null, which should only happen in unit tests"); + } + + if (cancelled) { + util::Error err(util::Error::CANCEL, util::Error::NONE, "cancelled"); + multiErr.insert(err); + } + LOGS(_log, logLvl, + cName(__func__) + " error(s) in result for chunk #" + to_string(chunkId) + ":" + + multiErr.toOneLineString()); + unsigned int const version = http::MetaModule::version; + auto jrMsg = protojson::UberJobErrorMsg::create(authContext_, version, workerIdStr, _czarName, _czarId, + _queryId, _uberJobId, multiErr); + + return jrMsg; +} + +void UberJobData::_queueUJAnswer(http::Method method_, vector const& headers_, string const& url_, + string const& requestContext_, + shared_ptr const& ujMsg_) { + util::QdispPool::Ptr wPool; + if (_foreman != nullptr) { + wPool = _foreman->getWPool(); + } + + auto thisPtr = static_pointer_cast(shared_from_this()); + if (thisPtr == nullptr) { + throw util::Bug(ERR_LOC, "Bad thisPtr in UberJobData::_queueUJResponse"); + } + auto cmdTransmit = + UJTransmitCmd::create(_foreman, thisPtr, method_, headers_, url_, requestContext_, ujMsg_); + if (wPool == nullptr) { + // No thread pool. Run the command now. This should only happen in unit tests. + cmdTransmit->action(nullptr); + } else { + if (_scanInteractive) { + wPool->queCmd(cmdTransmit, 0); + } else { + wPool->queCmd(cmdTransmit, 1); + } + } +} + +string UberJobData::_resultFileName() const { + return util::ResultFileName(_czarId, _queryId, _uberJobId).fileName(); +} + +string UberJobData::resultFilePath() const { + string const resultsDirname = wconfig::WorkerConfig::instance()->resultsDirname(); + if (resultsDirname.empty()) return resultsDirname; + return (fs::path(resultsDirname) / _resultFileName()).string(); +} + +std::string UberJobData::resultFileHttpUrl() const { + return "http://" + _foreman->getFqdn() + ":" + to_string(_resultsHttpPort) + "/" + _resultFileName(); +} + +void UberJobData::cancelAllTasks() { + LOGS(_log, LOG_LVL_INFO, cName(__func__)); + int count = 0; + if (_cancelled.exchange(true) == false) { + lock_guard lg(_ujTasksMtx); + for (auto const& task : _ujTasks) { + auto tsk = task.lock(); + if (tsk != nullptr) { + tsk->cancel(false); + ++count; + } + } + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " cancelled " << count << " Tasks"); + } +} + +string UJTransmitCmd::cName(const char* funcN) const { + stringstream os; + os << "UJTransmitCmd::" << funcN << " czId=" << _czarId << " QID=" << _queryId << "_ujId=" << _uberJobId; + return os.str(); +} + +void UJTransmitCmd::action(util::CmdData* data) { + LOGS(_log, LOG_LVL_TRACE, cName(__func__)); + // Make certain _selfPtr is reset before leaving this function. + // If a retry is needed, duplicate() is called. + class ResetSelf { + public: + ResetSelf(UJTransmitCmd* ujtCmd) : _ujtCmd(ujtCmd) {} + ~ResetSelf() { _ujtCmd->_selfPtr.reset(); } + UJTransmitCmd* const _ujtCmd; + }; + ResetSelf resetSelf(this); + + _attemptCount++; + auto ujPtr = _ujData.lock(); + if (ujPtr == nullptr || ujPtr->getCancelled()) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " UberJob was cancelled " << _attemptCount); + return; + } + auto request = _ujMsg->toJson(); + string const requestStr = request.dump(); + http::Client client(_method, _url, requestStr, _headers); + bool transmitSuccess = false; + try { + json const response = client.readAsJson(); + auto respMsg = protojson::ExecutiveRespMsg::createFromJson(response); + if (respMsg->success) { + transmitSuccess = true; + if (respMsg->dataObsolete) { + // Mark the as obsolete and end this UberJob + ujPtr->cancelAllTasks(); + // At this point, just deleting obsolete result files. + wbase::FileChannelShared::cleanUpResults(ujPtr->getCzarId(), ujPtr->getQueryId(), + ujPtr->getUberJobId()); + } + string note = response.at("note"); + if (!note.empty()) { + LOGS(_log, LOG_LVL_INFO, protojson::pwHide(response)); + } + } else { + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " Transmit success=0 " << protojson::pwHide(response)); + respMsg->failedUpdateUberJobData(ujPtr->getCzarId(), ujPtr->getQueryId(), ujPtr->getUberJobId()); + // There's no point in re-sending as the czar got the message and didn't like + // it. + return; + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " " << _requestContext << " failed, ex: " << ex.what()); + } + if (!transmitSuccess) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " Transmit failed, adding to WorkerCzarComIssue"); + auto sPtr = _selfPtr; + if (_foreman != nullptr && sPtr != nullptr) { + // Do not reset _selfPtr as re-queuing may be needed several times. + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " no response for transmit, putting on failed transmit queue."); + auto wCzInfo = _foreman->getWCzarInfoMap()->getWCzarInfo(_czarId); + // This will check if the czar is believed to be alive and try the queue the query to be tried + // again at a lower priority. It it thinks the czar is dead, it will throw it away. + if (wCzInfo->checkAlive(CLOCK::now())) { + auto wcComIssue = wCzInfo->getWorkerCzarComIssue(); + // nullptr should be impossible + // Add this failed transmit to the list so the czar will try to + // handle it when it gets the WorkerCzarComIssue message. + wcComIssue->addFailedTransmit(_queryId, _uberJobId, _ujMsg); + } + } else { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " _selfPtr was null, assuming job killed."); + } + } +} + +void UJTransmitCmd::kill() { + LOGS(_log, LOG_LVL_WARN, cName(__func__)); + auto sPtr = _selfPtr; + _selfPtr.reset(); + if (sPtr == nullptr) { + return; + } +} + +} // namespace lsst::qserv::wbase diff --git a/src/wbase/UberJobData.h b/src/wbase/UberJobData.h new file mode 100644 index 0000000000..1f22a13ad9 --- /dev/null +++ b/src/wbase/UberJobData.h @@ -0,0 +1,253 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +#ifndef LSST_QSERV_WBASE_UBERJOBDATA_H +#define LSST_QSERV_WBASE_UBERJOBDATA_H + +// System headers +#include +#include +#include +#include +#include + +// Third-party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "global/intTypes.h" +#include "global/UberJobBase.h" +#include "http/Method.h" +#include "util/QdispPool.h" +#include "wbase/SendChannel.h" + +namespace lsst::qserv::protojson { +class AuthContext; +class FileUrlInfo; +class ScanInfo; +class UberJobErrorMsg; +class UberJobReadyMsg; +class UberJobStatusMsg; +} // namespace lsst::qserv::protojson + +namespace lsst::qserv::util { +class MultiError; +} // namespace lsst::qserv::util + +namespace lsst::qserv::wcontrol { +class Foreman; +} // namespace lsst::qserv::wcontrol + +namespace lsst::qserv::wpublish { +class QueriesAndChunks; +} // namespace lsst::qserv::wpublish + +namespace lsst::qserv::wbase { + +class FileChannelShared; +class Task; + +/// This class tracks all Tasks associates with the UberJob on the worker +/// and reports status to the czar. +class UberJobData : public UberJobBase { +public: + using Ptr = std::shared_ptr; + + enum ResponseState { SENDING_ERROR = -1, NOTHING = 0, SENDING_FILEURL = 1 }; + + UberJobData() = delete; + UberJobData(UberJobData const&) = delete; + + ~UberJobData() override = default; + + static Ptr create(UberJobId uberJobId, std::string const& czarName, CzarId czarId, + std::string const& czarHost, int czarPort, uint64_t queryId, int rowLimit, + uint64_t maxTableSizeBytes, std::shared_ptr const& scanInfo, + bool scanInteractive, std::string const& workerId, + std::shared_ptr const& foreman, + std::shared_ptr const& queriesAndChunks_, + std::string const& authKey, uint16_t resultsHttpPort = 8080); + + bool getScanInteractive() const { return _scanInteractive; } + std::shared_ptr getScanInfo() const { return _scanInfo; } + + UberJobId getUberJobId() const { return _uberJobId; } + CzarId getCzarId() const { return _czarId; } + std::string getCzarHost() const { return _czarHost; } + int getCzarPort() const { return _czarPort; } + uint64_t getQueryId() const { return _queryId; } + std::string getWorkerId() const { return _workerId; } + uint64_t getMaxTableSizeBytes() const { return _maxTableSizeBytes; } + std::shared_ptr getFileChannelShared() const { return _fileChannelShared; } + std::weak_ptr getQueriesAndChunks() const { return _queriesAndChunks; } + + /// Set the tasks defined in the UberJob to this UberJobData object. + /// Once the tasks are set for the UberJob, no more can be added as it could + /// cause a race condition in FileChannelShared task completion comparisons. + void setTasks(std::vector> const& tasks); + + /// Let the czar know the result is ready. + void responseFileReady(protojson::FileUrlInfo const& fileUrlInfo_); + + /// Build the UberJob result ready message. + std::shared_ptr responseFileReadyBuild( + protojson::FileUrlInfo const& fileUrlInfo_, protojson::AuthContext const& authContext_); + + /// Let the Czar know there's been a problem. + void responseError(util::MultiError& multiErr, int chunkId, bool cancelled, int logLvl); + std::shared_ptr responseErrorBuild( + util::MultiError& multiErr, int chunkId, bool cancelled, int logLvl, + protojson::AuthContext const& authContext_); + + std::string const& getIdStr() const { return _idStr; } + std::string cName(const char* funcN) const override { + return std::string("UberJobData::") + funcN + " " + getIdStr(); + } + + bool getCancelled() const { return _cancelled; } + + /// Cancel all Tasks in this UberJob. + void cancelAllTasks(); + + /// Returns the LIMIT of rows for the query enforceable at the worker, where values <= 0 indicate + /// that there is no limit to the number of rows sent back by the worker. + /// Workers can only safely limit rows for queries that have the LIMIT clause without other related + /// clauses like ORDER BY. + int getRowLimit() const { return _rowLimit; } + + std::string resultFilePath() const; + std::string resultFileHttpUrl() const; + +private: + UberJobData(UberJobId uberJobId, std::string const& czarName, CzarId czarId, std::string czarHost, + int czarPort, uint64_t queryId, int rowLimit, uint64_t maxTableSizeBytes, + std::shared_ptr const& scanInfo, bool scanInteractive, + std::string const& workerId, std::shared_ptr const& foreman, + std::shared_ptr queriesAndChunks_, std::string const& authKey, + uint16_t resultsHttpPort); + + /// Return the name of the file that will contain the results of the query. + std::string _resultFileName() const; + + /// Queue the response to be sent to the originating czar. + void _queueUJAnswer(http::Method method_, std::vector const& headers_, + std::string const& url_, std::string const& requestContext_, + std::shared_ptr const& ujMsg_); + + std::string const _czarName; + std::string const _czarHost; + int const _czarPort; + int const _rowLimit; ///< If > 0, only read this many rows before return the results. + uint64_t const _maxTableSizeBytes; + std::string const _workerId; + std::string const _authKey; + uint16_t const _resultsHttpPort; ///< = 8080 + + std::shared_ptr const _foreman; + std::weak_ptr _queriesAndChunks; + + /// Tasks in the UberJob, lifetimes are controlled by the scheduler. + std::vector> _ujTasks; + std::shared_ptr _fileChannelShared; + + std::mutex _ujTasksMtx; ///< Protects _ujTasks. + + /// True if this an interactive (aka high priority) user query. + std::atomic _scanInteractive; + + /// Pointer to scan rating and table information. + std::shared_ptr _scanInfo; + + std::string const _idStr; + + std::atomic _cancelled{false}; ///< Set to true if this was cancelled. + + /// Either a file ULR or error needs to be sent back to the czar. + /// In the case of LIMIT queries, once a file URL has been sent, + /// the system must be prevented from sending errors back to the czar + /// for Tasks that were cancelled due to the LIMIT already being reached. + std::atomic _responseState{NOTHING}; +}; + +/// This class puts the information about a locally finished UberJob into a command +/// so it can be put on a queue and sent to the originating czar. The information +/// being transmitted is usually the url for the result file or an error message. +class UJTransmitCmd : public util::PriorityCommand { +public: + using Ptr = std::shared_ptr; + + UJTransmitCmd() = delete; + ~UJTransmitCmd() override = default; + + std::string cName(const char* funcN) const; + + static Ptr create(std::shared_ptr const& foreman_, UberJobData::Ptr const& ujData_, + http::Method method_, std::vector const& headers_, std::string const& url_, + std::string const& requestContext_, + std::shared_ptr const& ujMsg_) { + auto ptr = + Ptr(new UJTransmitCmd(foreman_, ujData_, method_, headers_, url_, requestContext_, ujMsg_)); + ptr->_selfPtr = ptr; + return ptr; + } + + /// Send the UberJob file to the czar, this is the function that will be run when + /// the queue reaches this command. If this message is not received by the czar, + /// it will notify WCzarInfo and possibly send WorkerCzarComIssue. + void action(util::CmdData* data) override; + + /// Reset the self pointer so this object can be killed. + void kill(); + +private: + UJTransmitCmd(std::shared_ptr const& foreman_, UberJobData::Ptr const& ujData_, + http::Method method_, std::vector const& headers_, std::string const& url_, + std::string const& requestContext_, + std::shared_ptr const& ujMsg_) + : PriorityCommand(), + _foreman(foreman_), + _ujData(ujData_), + _czarId(ujData_->getCzarId()), + _queryId(ujData_->getQueryId()), + _uberJobId(ujData_->getUberJobId()), + _method(method_), + _headers(headers_), + _url(url_), + _requestContext(requestContext_), + _ujMsg(ujMsg_) {} + + Ptr _selfPtr; ///< So this object can put itself back on the queue and keep itself alive. + std::shared_ptr const _foreman; + std::weak_ptr const _ujData; + CzarId const _czarId; + QueryId const _queryId; + UberJobId const _uberJobId; + http::Method const _method; + std::vector const _headers; + std::string const _url; + std::string const _requestContext; + std::shared_ptr _ujMsg; + int _attemptCount = 0; ///< How many attempts have been made to transmit this. +}; + +} // namespace lsst::qserv::wbase + +#endif // LSST_QSERV_WBASE_UBERJOBDATA_H diff --git a/src/wbase/UserQueryInfo.cc b/src/wbase/UserQueryInfo.cc index 846be63fc4..2b347f5a9b 100644 --- a/src/wbase/UserQueryInfo.cc +++ b/src/wbase/UserQueryInfo.cc @@ -24,6 +24,7 @@ // Qserv headers #include "util/Bug.h" +#include "wbase/UberJobData.h" // LSST headers #include "lsst/log/Log.h" @@ -36,46 +37,7 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.UserQueryInfo"); namespace lsst::qserv::wbase { -UserQueryInfo::UserQueryInfo(QueryId qId) : _qId(qId) {} - -UserQueryInfo::Ptr UserQueryInfo::uqMapInsert(QueryId qId) { - Ptr uqi; - lock_guard lg(_uqMapMtx); - auto iter = _uqMap.find(qId); - if (iter != _uqMap.end()) { - uqi = iter->second.lock(); - } - // If uqi is invalid at this point, a new one needs to be made. - if (uqi == nullptr) { - uqi = make_shared(qId); - _uqMap[qId] = uqi; - } - return uqi; -} - -UserQueryInfo::Ptr UserQueryInfo::uqMapGet(QueryId qId) { - lock_guard lg(_uqMapMtx); - auto iter = _uqMap.find(qId); - if (iter != _uqMap.end()) { - return iter->second.lock(); - } - return nullptr; -} - -void UserQueryInfo::uqMapErase(QueryId qId) { - lock_guard lg(_uqMapMtx); - auto iter = _uqMap.find(qId); - if (iter != _uqMap.end()) { - // If the weak pointer has 0 real references - if (iter->second.expired()) { - _uqMap.erase(qId); - } - } -} - -UserQueryInfo::Map UserQueryInfo::_uqMap; - -mutex UserQueryInfo::_uqMapMtx; +UserQueryInfo::UserQueryInfo(QueryId qId, CzarId czarId) : _qId(qId), _czarId(czarId) {} size_t UserQueryInfo::addTemplate(std::string const& templateStr) { size_t j = 0; @@ -101,4 +63,56 @@ std::string UserQueryInfo::getTemplate(size_t id) { return _templates[id]; } +void UserQueryInfo::addUberJob(std::shared_ptr const& ujData) { + lock_guard lockUq(_uberJobMapMtx); + UberJobId ujId = ujData->getUberJobId(); + _uberJobMap[ujId] = ujData; +} + +void UserQueryInfo::cancelFromCzar() { + if (_cancelledByCzar.exchange(true)) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " already cancelledByCzar"); + return; + } + lock_guard lockUq(_uberJobMapMtx); + for (auto const& [ujId, weakUjPtr] : _uberJobMap) { + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " cancelling ujId=" << ujId); + auto ujPtr = weakUjPtr.lock(); + if (ujPtr != nullptr) { + ujPtr->cancelAllTasks(); + } + } +} + +void UserQueryInfo::cancelUberJob(UberJobId ujId) { + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " cancelling ujId=" << ujId); + lock_guard lockUq(_uberJobMapMtx); + _deadUberJobSet.insert(ujId); + auto iter = _uberJobMap.find(ujId); + if (iter != _uberJobMap.end()) { + auto weakUjPtr = iter->second; + auto ujPtr = weakUjPtr.lock(); + if (ujPtr != nullptr) { + ujPtr->cancelAllTasks(); + } + } +} + +void UserQueryInfo::cancelAllUberJobs() { + lock_guard lockUq(_uberJobMapMtx); + for (auto const& [ujKey, weakUjPtr] : _uberJobMap) { + _deadUberJobSet.insert(ujKey); + auto ujPtr = weakUjPtr.lock(); + if (ujPtr != nullptr) { + ujPtr->cancelAllTasks(); + } + } +} + +bool UserQueryInfo::isUberJobDead(UberJobId ujId) const { + lock_guard lockUq(_uberJobMapMtx); + auto iter = _deadUberJobSet.find(ujId); + return iter != _deadUberJobSet.end(); +} + } // namespace lsst::qserv::wbase diff --git a/src/wbase/UserQueryInfo.h b/src/wbase/UserQueryInfo.h index 27a7bb490a..63223cfd73 100644 --- a/src/wbase/UserQueryInfo.h +++ b/src/wbase/UserQueryInfo.h @@ -24,6 +24,7 @@ #define LSST_QSERV_WBASE_USERQUERYINFO_H // System headers +#include #include #include #include @@ -31,31 +32,33 @@ // Qserv headers #include "global/intTypes.h" +#include "util/InstanceCount.h" // This header declarations namespace lsst::qserv::wbase { +class UberJobData; + /// This class contains information about a user query that is effectively the same /// for all Task's in the user query. class UserQueryInfo { public: using Ptr = std::shared_ptr; - using Map = std::map>; - - static Ptr uqMapInsert(QueryId qId); - static Ptr uqMapGet(QueryId qId); - /// Erase the entry for `qId` in the map, as long as there are only - /// weak references to the UserQueryInfoObject. - /// Clear appropriate local and member references before calling this. - static void uqMapErase(QueryId qId); - UserQueryInfo(QueryId qId); UserQueryInfo() = delete; UserQueryInfo(UserQueryInfo const&) = delete; UserQueryInfo& operator=(UserQueryInfo const&) = delete; + static Ptr create(QueryId qId, CzarId czarId) { + return std::shared_ptr(new UserQueryInfo(qId, czarId)); + } + ~UserQueryInfo() = default; + std::string cName(const char* func) { + return std::string("UserQueryInfo::") + func + " qId=" + std::to_string(_qId); + } + /// Add a query template to the map of templates for this user query. size_t addTemplate(std::string const& templateStr); @@ -63,17 +66,48 @@ class UserQueryInfo { /// @throws Bug if id is out of range. std::string getTemplate(size_t id); + /// Add an UberJobData object to the UserQueryInfo. + void addUberJob(std::shared_ptr const& ujData); + + /// Return true if this user query was cancelled by its czar. + bool getCancelledByCzar() const { return _cancelledByCzar; } + + /// The czar has cancelled this user query, all tasks need to + /// be killed but there's no need to track UberJob id's anymore. + void cancelFromCzar(); + + /// Cancel all associated tasks and track the killed UberJob id's + /// The user query itself may still be alive, so the czar may need + /// information about which UberJobs are dead. + void cancelAllUberJobs(); + + /// Cancel a specific UberJob in this user query. + void cancelUberJob(UberJobId ujId); + + bool isUberJobDead(UberJobId ujId) const; + + QueryId getQueryId() const { return _qId; } + + CzarId getCzarId() const { return _czarId; } + private: - static Map _uqMap; - static std::mutex _uqMapMtx; ///< protects _uqMap + UserQueryInfo(QueryId qId, CzarId czId); QueryId const _qId; ///< The User Query Id number. + CzarId const _czarId; /// List of template strings. This is expected to be short, 1 or 2 entries. /// This must be a vector. New entries are always added to the end so as not /// to alter existing indexes into the vector. std::vector _templates; - std::mutex _uqMtx; ///< protects _templates; + std::mutex _uqMtx; ///< protects _templates + + /// Map of all UberJobData objects on this worker for this User Query. + std::map> _uberJobMap; + std::set _deadUberJobSet; ///< Set of cancelled UberJob Ids. + mutable std::mutex _uberJobMapMtx; ///< protects _uberJobMap, _deadUberJobSet + + std::atomic _cancelledByCzar{false}; }; } // namespace lsst::qserv::wbase diff --git a/src/wbase/WorkerCommand.cc b/src/wbase/WorkerCommand.cc deleted file mode 100644 index cf79089a92..0000000000 --- a/src/wbase/WorkerCommand.cc +++ /dev/null @@ -1,49 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2012-2018 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// Class header -#include "wbase/WorkerCommand.h" - -// LSST headers -#include "lsst/log/Log.h" - -// Qserv headers -#include "wbase/SendChannel.h" - -namespace { - -LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.WorkerCommand"); - -} // namespace - -namespace lsst::qserv::wbase { - -WorkerCommand::WorkerCommand(SendChannel::Ptr const& sendChannel) - : util::Command([this](util::CmdData* data) { this->run(); }), _sendChannel(sendChannel) {} - -void WorkerCommand::sendSerializedResponse() { - std::string str(_frameBuf.data(), _frameBuf.size()); - _sendChannel->sendStream(xrdsvc::StreamBuffer::createWithMove(str), true); -} - -} // namespace lsst::qserv::wbase diff --git a/src/wbase/WorkerCommand.h b/src/wbase/WorkerCommand.h deleted file mode 100644 index c0934f4797..0000000000 --- a/src/wbase/WorkerCommand.h +++ /dev/null @@ -1,96 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2011-2018 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -/// WorkerCommand.h -#ifndef LSST_QSERV_WBASE_WORKER_COMMAND_H -#define LSST_QSERV_WBASE_WORKER_COMMAND_H - -// System headers -#include -#include -#include -#include - -// Qserv headers -#include "proto/FrameBuffer.h" -#include "proto/worker.pb.h" -#include "util/Command.h" - -// Forward declarations -namespace lsst::qserv::wbase { -class SendChannel; -} // namespace lsst::qserv::wbase - -namespace lsst::qserv::wbase { - -/** - * Class WorkerCommand is the base class for a family of various worker - * management commmands. - */ -class WorkerCommand : public util::Command { -public: - using Ptr = std::shared_ptr; - - WorkerCommand& operator=(const WorkerCommand&) = delete; - WorkerCommand(const WorkerCommand&) = delete; - WorkerCommand() = delete; - virtual ~WorkerCommand() = default; - - /// @param sendChannel - communication channel for reporting results - explicit WorkerCommand(std::shared_ptr const& sendChannel); - -protected: - /// The actual behavior is provided by subclasses. - virtual void run() = 0; - - /** - * Fill in the status code and the message into the response message - * of the desired type and sent it back to a caller. - * @param error Mandatory error to be reported. - * @param code The optional error code if the one differes from the default one. - * @param extendedModsFunc The optional function to be provided if any additional modifications - * are required to be made to the response object. - */ - template - void reportError(std::string const& error, - proto::WorkerCommandStatus::Code code = proto::WorkerCommandStatus::ERROR, - std::function const& extendedModsFunc = nullptr) { - RESPONSE resp; - resp.mutable_status()->set_code(code); - resp.mutable_status()->set_error(error); - if (extendedModsFunc != nullptr) extendedModsFunc(resp); - _frameBuf.serialize(resp); - sendSerializedResponse(); - } - - /** - * Send the serialized payload stored within the frame buffer to a caller. - */ - void sendSerializedResponse(); - - std::shared_ptr _sendChannel; ///< For result reporting - proto::FrameBuffer _frameBuf; ///< Buffer for serializing a response -}; - -} // namespace lsst::qserv::wbase - -#endif // LSST_QSERV_WBASE_WORKER_COMMAND_H diff --git a/src/wcomms/CMakeLists.txt b/src/wcomms/CMakeLists.txt new file mode 100644 index 0000000000..1e879ab029 --- /dev/null +++ b/src/wcomms/CMakeLists.txt @@ -0,0 +1,15 @@ +add_library(wcomms SHARED) + +target_sources(wcomms PRIVATE + HttpModule.cc + HttpMonitorModule.cc + HttpReplicaMgtModule.cc + HttpSvc.cc + HttpWorkerCzarModule.cc +) + +target_link_libraries(wcomms PUBLIC + log +) + +install(TARGETS wcomms) diff --git a/src/xrdsvc/HttpModule.cc b/src/wcomms/HttpModule.cc similarity index 89% rename from src/xrdsvc/HttpModule.cc rename to src/wcomms/HttpModule.cc index 02f46818d9..ccd6588fb7 100644 --- a/src/xrdsvc/HttpModule.cc +++ b/src/wcomms/HttpModule.cc @@ -20,7 +20,7 @@ */ // Class header -#include "xrdsvc/HttpModule.h" +#include "wcomms/HttpModule.h" // System headers #include @@ -38,7 +38,7 @@ using namespace std; -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wcomms { HttpModule::HttpModule(string const& context, shared_ptr const& foreman, shared_ptr const& req, shared_ptr const& resp) @@ -88,11 +88,10 @@ wbase::TaskSelector HttpModule::translateTaskSelector(string const& func) const } } selector.maxTasks = query().optionalUInt("max_tasks", 0); - debug(func, "include_tasks=" + string(selector.includeTasks ? "1" : "0")); - debug(func, "queryIds.size()=" + to_string(selector.queryIds.size())); - debug(func, "taskStates.size()=" + to_string(selector.taskStates.size())); - debug(func, "max_tasks=" + to_string(selector.maxTasks)); + trace(func, "include_tasks=" + string(selector.includeTasks ? "1" : "0") + + " queryIds.size()=" + to_string(selector.queryIds.size()) + " taskStates.size()=" + + to_string(selector.taskStates.size()) + " max_tasks=" + to_string(selector.maxTasks)); return selector; } -} // namespace lsst::qserv::xrdsvc +} // namespace lsst::qserv::wcomms diff --git a/src/xrdsvc/HttpModule.h b/src/wcomms/HttpModule.h similarity index 93% rename from src/xrdsvc/HttpModule.h rename to src/wcomms/HttpModule.h index fb122041d6..e8cfd7083b 100644 --- a/src/xrdsvc/HttpModule.h +++ b/src/wcomms/HttpModule.h @@ -18,8 +18,8 @@ * the GNU General Public License along with this program. If not, * see . */ -#ifndef LSST_QSERV_XRDSVC_HTTPMODULE_H -#define LSST_QSERV_XRDSVC_HTTPMODULE_H +#ifndef LSST_QSERV_WCOMMS_HTTPMODULE_H +#define LSST_QSERV_WCOMMS_HTTPMODULE_H // System headers #include @@ -43,7 +43,7 @@ class Foreman; } // namespace lsst::qserv::wcontrol // This header declarations -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wcomms { /** * Class HttpModule is an intermediate base class of the Qserv worker modules. @@ -85,6 +85,6 @@ class HttpModule : public http::QhttpModule { std::shared_ptr const _foreman; }; -} // namespace lsst::qserv::xrdsvc +} // namespace lsst::qserv::wcomms -#endif // LSST_QSERV_XRDSVC_HTTPMODULE_H +#endif // LSST_QSERV_WCOMMS_HTTPMODULE_H diff --git a/src/xrdsvc/HttpMonitorModule.cc b/src/wcomms/HttpMonitorModule.cc similarity index 97% rename from src/xrdsvc/HttpMonitorModule.cc rename to src/wcomms/HttpMonitorModule.cc index bbd4c65667..9210defb8c 100644 --- a/src/xrdsvc/HttpMonitorModule.cc +++ b/src/wcomms/HttpMonitorModule.cc @@ -20,7 +20,7 @@ */ // Class header -#include "xrdsvc/HttpMonitorModule.h" +#include "wcomms/HttpMonitorModule.h" // System headers #include @@ -41,7 +41,7 @@ using namespace std; using json = nlohmann::json; -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wcomms { void HttpMonitorModule::process(string const& context, shared_ptr const& foreman, shared_ptr const& req, @@ -133,4 +133,4 @@ json HttpMonitorModule::_echo() { return json::object({{"data", body().required("data")}}); } -} // namespace lsst::qserv::xrdsvc +} // namespace lsst::qserv::wcomms diff --git a/src/xrdsvc/HttpMonitorModule.h b/src/wcomms/HttpMonitorModule.h similarity index 91% rename from src/xrdsvc/HttpMonitorModule.h rename to src/wcomms/HttpMonitorModule.h index 01c5c171c5..fb9d1ab573 100644 --- a/src/xrdsvc/HttpMonitorModule.h +++ b/src/wcomms/HttpMonitorModule.h @@ -18,8 +18,8 @@ * the GNU General Public License along with this program. If not, * see . */ -#ifndef LSST_QSERV_XRDSVC_HTTPMONITORMODULE_H -#define LSST_QSERV_XRDSVC_HTTPMONITORMODULE_H +#ifndef LSST_QSERV_WCOMMS_HTTPMONITORMODULE_H +#define LSST_QSERV_WCOMMS_HTTPMONITORMODULE_H // System headers #include @@ -29,7 +29,7 @@ #include "nlohmann/json.hpp" // Qserv headers -#include "xrdsvc/HttpModule.h" +#include "wcomms/HttpModule.h" namespace lsst::qserv::qhttp { class Request; @@ -41,13 +41,13 @@ class Foreman; } // namespace lsst::qserv::wcontrol // This header declarations -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wcomms { /** * Class HttpMonitorModule implements a handler for reporting various run-time * monitoring metrics and statistics collected at the Qserv worker. */ -class HttpMonitorModule : public xrdsvc::HttpModule { +class HttpMonitorModule : public wcomms::HttpModule { public: /** * @note supported values for parameter 'subModuleName' are: @@ -94,6 +94,6 @@ class HttpMonitorModule : public xrdsvc::HttpModule { nlohmann::json _echo(); }; -} // namespace lsst::qserv::xrdsvc +} // namespace lsst::qserv::wcomms -#endif // LSST_QSERV_XRDSVC_HTTPMONITORMODULE_H +#endif // LSST_QSERV_WCOMMS_HTTPMONITORMODULE_H diff --git a/src/xrdsvc/HttpReplicaMgtModule.cc b/src/wcomms/HttpReplicaMgtModule.cc similarity index 92% rename from src/xrdsvc/HttpReplicaMgtModule.cc rename to src/wcomms/HttpReplicaMgtModule.cc index 095fe2e3e1..06957eb996 100644 --- a/src/xrdsvc/HttpReplicaMgtModule.cc +++ b/src/wcomms/HttpReplicaMgtModule.cc @@ -20,7 +20,7 @@ */ // Class header -#include "xrdsvc/HttpReplicaMgtModule.h" +#include "wcomms/HttpReplicaMgtModule.h" // System headers #include @@ -28,7 +28,7 @@ #include // Third party headers -#include "XrdSsi/XrdSsiCluster.hh" +#include "lsst/log/Log.h" // Qserv headers #include "http/Exceptions.h" @@ -39,15 +39,16 @@ #include "wconfig/WorkerConfig.h" #include "wcontrol/Foreman.h" #include "wcontrol/ResourceMonitor.h" +#include "wmain/WorkerMain.h" #include "wpublish/ChunkInventory.h" -#include "xrdsvc/SsiProvider.h" -#include "xrdsvc/XrdName.h" - -extern XrdSsiProvider* XrdSsiProviderLookup; using namespace std; using json = nlohmann::json; +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.wcomms.HttpReplicaMgt"); +} + namespace { // These markers if reported in the extended error response object of the failed // requests could be used by a caller for refining the completion status @@ -59,7 +60,7 @@ string makeResource(string const& database, int chunk) { return "/chk/" + databa } // namespace -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wcomms { void HttpReplicaMgtModule::process(string const& context, shared_ptr const& foreman, shared_ptr const& req, @@ -73,14 +74,10 @@ HttpReplicaMgtModule::HttpReplicaMgtModule(string const& context, shared_ptr const& foreman, shared_ptr const& req, shared_ptr const& resp) - : HttpModule(context, foreman, req, resp), - _providerServer(dynamic_cast(XrdSsiProviderLookup)), - _clusterManager(_providerServer->GetClusterManager()), - _dataContext(_clusterManager->DataContext()) {} + : HttpModule(context, foreman, req, resp) {} json HttpReplicaMgtModule::executeImpl(string const& subModuleName) { string const func = string(__func__) + "[sub-module='" + subModuleName + "']"; - debug(func); enforceInstanceId(func, wconfig::WorkerConfig::instance()->replicationInstanceId()); enforceWorkerId(func); if (subModuleName == "GET") @@ -185,8 +182,7 @@ json HttpReplicaMgtModule::_rebuildInventory() { // Load the persistent inventory data into the transient one. wpublish::ChunkInventory newChunkInventory; try { - xrdsvc::XrdName x; - newChunkInventory.init(x.getName(), foreman()->mySqlConfig()); + newChunkInventory.init(wmain::WorkerMain::get()->getName(), foreman()->mySqlConfig()); } catch (exception const& ex) { throw http::Error(__func__, "persistent inventory read failed, ex: " + string(ex.what())); } @@ -215,8 +211,7 @@ json HttpReplicaMgtModule::_rebuildInventory() { void HttpReplicaMgtModule::_rebuildPersistentInventory() const { wpublish::ChunkInventory newChunkInventory; try { - xrdsvc::XrdName x; - newChunkInventory.rebuild(x.getName(), foreman()->mySqlConfig()); + newChunkInventory.rebuild(wmain::WorkerMain::get()->getName(), foreman()->mySqlConfig()); } catch (exception const& ex) { throw http::Error(__func__, "inventory rebuild stage failed, ex: " + string(ex.what())); } @@ -255,7 +250,7 @@ void HttpReplicaMgtModule::_updateInventory(string const& func, } } - // Update the current map and notify XRootD accordingly. + // Update the current map. for (auto&& [database, chunks] : toBeRemovedExistMap) { if (databaseFilter.contains(database)) { for (int const chunk : chunks) { @@ -326,9 +321,7 @@ void HttpReplicaMgtModule::_modifyChunk(string const& func, int chunk, string co string const resource = ::makeResource(database, chunk); debug(func, operation + " resource: " + resource + ", DataContext: " + to_string(_dataContext)); try { - // Notify XRootD/cmsd and (depending on a mode) modify the provider's - // copy of the inventory. After that modify both (persistent and - // transient) inventories. + // Modify both (persistent and transient) inventories. if (Direction::ADD == direction) { try { // The first operation to add the chunk to the persistent inventory @@ -336,8 +329,6 @@ void HttpReplicaMgtModule::_modifyChunk(string const& func, int chunk, string co // steps will be skipped and the database will be ignored with a warn message logged // and reported back to a caller. foreman()->chunkInventory()->add(database, chunk, foreman()->mySqlConfig()); - if (_dataContext) _providerServer->GetChunkInventory().add(database, chunk); - _clusterManager->Added(resource.data()); } catch (wpublish::InvalidParamError const& ex) { // This optimisation is to avoid flooding logs with repetitive warnings // about the same non-existing database in case if many chunks are being @@ -350,8 +341,6 @@ void HttpReplicaMgtModule::_modifyChunk(string const& func, int chunk, string co } } } else { - _clusterManager->Removed(resource.data()); - if (_dataContext) _providerServer->GetChunkInventory().remove(database, chunk); foreman()->chunkInventory()->remove(database, chunk, foreman()->mySqlConfig()); } } catch (wpublish::QueryError const& ex) { @@ -361,4 +350,4 @@ void HttpReplicaMgtModule::_modifyChunk(string const& func, int chunk, string co } } -} // namespace lsst::qserv::xrdsvc +} // namespace lsst::qserv::wcomms diff --git a/src/xrdsvc/HttpReplicaMgtModule.h b/src/wcomms/HttpReplicaMgtModule.h similarity index 93% rename from src/xrdsvc/HttpReplicaMgtModule.h rename to src/wcomms/HttpReplicaMgtModule.h index 505f94c284..db567caf43 100644 --- a/src/xrdsvc/HttpReplicaMgtModule.h +++ b/src/wcomms/HttpReplicaMgtModule.h @@ -18,8 +18,8 @@ * the GNU General Public License along with this program. If not, * see . */ -#ifndef LSST_QSERV_XRDSVC_HTTPREPLICAMGTMODULE_H -#define LSST_QSERV_XRDSVC_HTTPREPLICAMGTMODULE_H +#ifndef LSST_QSERV_WCOMMS_HTTPREPLICAMGTMODULE_H +#define LSST_QSERV_WCOMMS_HTTPREPLICAMGTMODULE_H // System headers #include @@ -31,11 +31,9 @@ #include "nlohmann/json.hpp" // Qserv headers -#include "xrdsvc/HttpModule.h" +#include "wcomms/HttpModule.h" // Forward declarations -class XrdSsiCluster; - namespace lsst::qserv::qhttp { class Request; class Response; @@ -49,12 +47,8 @@ namespace lsst::qserv::wpublish { class ChunkInventory; } // namespace lsst::qserv::wpublish -namespace lsst::qserv::xrdsvc { -class SsiProviderServer; -} // namespace lsst::qserv::xrdsvc - // This header declarations -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wcomms { /** * Class HttpReplicaMgtModule implements a handler for managing chunk replicas @@ -62,7 +56,7 @@ namespace lsst::qserv::xrdsvc { * databases that were partitioned based on the same partitioning configuration) * or all known databases. */ -class HttpReplicaMgtModule : public xrdsvc::HttpModule { +class HttpReplicaMgtModule : public wcomms::HttpModule { public: /** * @note supported values for parameter 'subModuleName' are: @@ -189,13 +183,9 @@ class HttpReplicaMgtModule : public xrdsvc::HttpModule { /// when attempting to add replicas into non-existing databases. std::set _missingDatabaseNames; - // XROOTD/SSI service context. - - xrdsvc::SsiProviderServer* _providerServer = nullptr; - XrdSsiCluster* _clusterManager = nullptr; bool _dataContext = false; }; -} // namespace lsst::qserv::xrdsvc +} // namespace lsst::qserv::wcomms -#endif // LSST_QSERV_XRDSVC_HTTPREPLICAMGTMODULE_H +#endif // LSST_QSERV_WCOMMS_HTTPREPLICAMGTMODULE_H diff --git a/src/xrdsvc/HttpSvc.cc b/src/wcomms/HttpSvc.cc similarity index 81% rename from src/xrdsvc/HttpSvc.cc rename to src/wcomms/HttpSvc.cc index f30c82a0c1..9e01f5f328 100644 --- a/src/xrdsvc/HttpSvc.cc +++ b/src/wcomms/HttpSvc.cc @@ -20,7 +20,7 @@ */ // Class header -#include "xrdsvc/HttpSvc.h" +#include "wcomms/HttpSvc.h" // System headers #include @@ -31,8 +31,9 @@ #include "wconfig/WorkerConfig.h" #include "wcontrol/Foreman.h" #include "wpublish/ChunkInventory.h" -#include "xrdsvc/HttpMonitorModule.h" -#include "xrdsvc/HttpReplicaMgtModule.h" +#include "wcomms/HttpMonitorModule.h" +#include "wcomms/HttpReplicaMgtModule.h" +#include "wcomms/HttpWorkerCzarModule.h" // LSST headers #include "lsst/log/Log.h" @@ -42,13 +43,13 @@ using namespace std; namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdsvc.HttpSvc"); +LOG_LOGGER _log = LOG_GET("lsst.qserv.wcomms.HttpSvc"); string const serviceName = "WORKER-MANAGEMENT "; } // namespace -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wcomms { shared_ptr HttpSvc::create(shared_ptr const& foreman, uint16_t port, unsigned int numThreads) { @@ -59,7 +60,7 @@ HttpSvc::HttpSvc(shared_ptr const& foreman, uint16_t port, un : _foreman(foreman), _port(port), _numThreads(numThreads) {} uint16_t HttpSvc::start() { - string const context = "xrdsvc::HttpSvc::" + string(__func__) + " "; + string const context = "wcomms::HttpSvc::" + string(__func__) + " "; lock_guard const lock(_mtx); if (_httpServerPtr != nullptr) { throw logic_error(context + "the service is already running."); @@ -134,6 +135,23 @@ uint16_t HttpSvc::start() { HttpReplicaMgtModule::process(::serviceName, self->_foreman, req, resp, "REBUILD", http::AuthType::REQUIRED); }}}); + _httpServerPtr->addHandlers( + {{"POST", "/queryjob", + [self](shared_ptr const& req, shared_ptr const& resp) { + HttpWorkerCzarModule::process(::serviceName, self->_foreman, req, resp, "/queryjob", + http::AuthType::REQUIRED); + }}}); + _httpServerPtr->addHandlers( + {{"POST", "/querystatus", + [self](shared_ptr const& req, shared_ptr const& resp) { + HttpWorkerCzarModule::process(::serviceName, self->_foreman, req, resp, "/querystatus", + http::AuthType::REQUIRED); + }}}); + _httpServerPtr->addHandlers( + {{"GET", "/chunkusecounts", + [self](shared_ptr const& req, shared_ptr const& resp) { + HttpWorkerCzarModule::process(::serviceName, self->_foreman, req, resp, "/chunkusecounts"); + }}}); _httpServerPtr->start(); // Initialize the I/O context and start the service threads. At this point @@ -142,12 +160,13 @@ uint16_t HttpSvc::start() { _threads.push_back(make_unique([self]() { self->_io_service.run(); })); } auto const actualPort = _httpServerPtr->getPort(); - LOGS(_log, LOG_LVL_INFO, context + "started on port " + to_string(actualPort)); + LOGS(_log, LOG_LVL_INFO, + context + "started on port " + to_string(actualPort) + " numThreads=" + to_string(_numThreads)); return actualPort; } void HttpSvc::stop() { - string const context = "xrdsvc::HttpSvc::" + string(__func__) + " "; + string const context = "wcomms::HttpSvc::" + string(__func__) + " "; lock_guard const lock(_mtx); if (_httpServerPtr == nullptr) { throw logic_error(context + "the service is not running."); @@ -165,4 +184,4 @@ void HttpSvc::stop() { LOGS(_log, LOG_LVL_INFO, context + "stopped"); } -} // namespace lsst::qserv::xrdsvc +} // namespace lsst::qserv::wcomms diff --git a/src/xrdsvc/HttpSvc.h b/src/wcomms/HttpSvc.h similarity index 95% rename from src/xrdsvc/HttpSvc.h rename to src/wcomms/HttpSvc.h index a7796e3637..bf8e14c294 100644 --- a/src/xrdsvc/HttpSvc.h +++ b/src/wcomms/HttpSvc.h @@ -18,8 +18,8 @@ * the GNU General Public License along with this program. If not, * see . */ -#ifndef LSST_QSERV_XRDSVC_HTTPSVC_H -#define LSST_QSERV_XRDSVC_HTTPSVC_H +#ifndef LSST_QSERV_WCOMMS_HTTPSVC_H +#define LSST_QSERV_WCOMMS_HTTPSVC_H // System headers #include @@ -40,7 +40,7 @@ class Foreman; } // namespace lsst::qserv::wcontrol // This header declarations -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wcomms { /** * Class HttpSvc is the HTTP server for processing worker management requests. @@ -54,7 +54,7 @@ namespace lsst::qserv::xrdsvc { * // Create the server. Note, it won't run yet until explicitly started. * uint16_t const port = 0; // The port will be dynamically allocated at start * unsigned int const numThreads = 2; // The number of BOOST ASIO threads - * auto const svc = xrdsvc::HttpSvc::create(port, numThreads); + * auto const svc = wcomms::HttpSvc::create(port, numThreads); * * // Start the server and get the actual port number. * uint16_t const actualPort = svc->start(); @@ -135,6 +135,6 @@ class HttpSvc : public std::enable_shared_from_this { std::vector> _threads; }; -} // namespace lsst::qserv::xrdsvc +} // namespace lsst::qserv::wcomms -#endif // LSST_QSERV_XRDSVC_HTTPSVC_H +#endif // LSST_QSERV_WCOMMS_HTTPSVC_H diff --git a/src/wcomms/HttpWorkerCzarModule.cc b/src/wcomms/HttpWorkerCzarModule.cc new file mode 100644 index 0000000000..2c26cfab0d --- /dev/null +++ b/src/wcomms/HttpWorkerCzarModule.cc @@ -0,0 +1,337 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "wcomms/HttpWorkerCzarModule.h" + +// System headers +#include +#include +#include + +// Third party headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "http/Exceptions.h" +#include "http/MetaModule.h" +#include "http/RequestQuery.h" +#include "http/RequestBodyJSON.h" +#include "mysql/MySqlUtils.h" +#include "protojson/ChunkUseCountAnswerMsg.h" +#include "protojson/ResponseMsg.h" +#include "protojson/UberJobMsg.h" +#include "protojson/WorkerCzarComIssue.h" +#include "protojson/WorkerQueryStatusData.h" +#include "util/Command.h" +#include "util/Error.h" +#include "util/MultiError.h" +#include "util/String.h" +#include "util/Timer.h" +#include "wbase/FileChannelShared.h" +#include "wbase/Task.h" +#include "wbase/UberJobData.h" +#include "wbase/UserQueryInfo.h" +#include "wcontrol/Foreman.h" +#include "wcontrol/WCzarInfoMap.h" +#include "wconfig/WorkerConfig.h" +#include "wcontrol/ResourceMonitor.h" +#include "wpublish/ChunkInventory.h" +#include "wpublish/QueriesAndChunks.h" +#include "wpublish/QueryStatistics.h" +#include "wsched/BlendScheduler.h" + +using namespace std; +using json = nlohmann::json; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.wcomms.HttpReplicaMgt"); +} + +namespace { +// These markers if reported in the extended error response object of the failed +// requests could be used by a caller for refining the completion status +// of the corresponding Controller-side operation. +json const extErrorInvalidParam = json::object({{"invalid_param", 1}}); +json const extErrorReplicaInUse = json::object({{"in_use", 1}}); + +} // namespace + +namespace lsst::qserv::wcomms { + +void HttpWorkerCzarModule::process(string const& context, shared_ptr const& foreman, + shared_ptr const& req, + shared_ptr const& resp, string const& subModuleName, + http::AuthType const authType) { + HttpWorkerCzarModule module(context, foreman, req, resp); + module.execute(subModuleName, authType); +} + +HttpWorkerCzarModule::HttpWorkerCzarModule(string const& context, + shared_ptr const& foreman, + shared_ptr const& req, + shared_ptr const& resp) + : HttpModule(context, foreman, req, resp) {} + +json HttpWorkerCzarModule::executeImpl(string const& subModuleName) { + string const func = string(__func__) + "[sub-module='" + subModuleName + "']"; + enforceInstanceId(func, wconfig::WorkerConfig::instance()->replicationInstanceId()); + enforceWorkerId(func); + if (subModuleName == "/queryjob") return _queryJob(); + if (subModuleName == "/querystatus") return _queryStatus(); + if (subModuleName == "/chunkusecounts") return _chunkUseCounts(); + throw invalid_argument(context() + func + " unsupported sub-module"); +} + +json HttpWorkerCzarModule::_queryJob() { + debug(__func__); + checkApiVersion(__func__, 34); + // At this point, API version, correct worker, and auth have been checked. + json jsRet = _handleQueryJob(__func__); + return jsRet; +} + +json HttpWorkerCzarModule::_handleQueryJob(string const& func) { + json jsRet; + + try { + auto const& jsReq = body().objJson; + auto uberJobMsg = protojson::UberJobMsg::createFromJson(jsReq); + + UberJobId ujId = uberJobMsg->getUberJobId(); + auto ujCzInfo = uberJobMsg->getCzarContactInfo(); + QueryId ujQueryId = uberJobMsg->getQueryId(); + int ujRowLimit = uberJobMsg->getRowLimit(); + auto targetWorkerId = uberJobMsg->getWorkerId(); + uint64_t maxTableSizeMb = uberJobMsg->getMaxTableSizeMb(); + uint64_t const MB_SIZE_BYTES = 1024 * 1024; + uint64_t maxTableSizeBytes = maxTableSizeMb * MB_SIZE_BYTES; + auto scanInfo = uberJobMsg->getScanInfo(); + bool scanInteractive = uberJobMsg->getScanInteractive(); + + // Get or create QueryStatistics and UserQueryInfo instances. + auto queryStats = foreman()->getQueriesAndChunks()->addQueryId(ujQueryId, ujCzInfo->czId); + auto userQueryInfo = queryStats->getUserQueryInfo(); + + if (userQueryInfo->getCancelledByCzar()) { + throw wbase::TaskException( + ERR_LOC, string("Already cancelled by czar. ujQueryId=") + to_string(ujQueryId)); + } + if (userQueryInfo->isUberJobDead(ujId)) { + throw wbase::TaskException(ERR_LOC, string("UberJob already dead. ujQueryId=") + + to_string(ujQueryId) + " ujId=" + to_string(ujId)); + } + + std::shared_ptr foremanPtr = foreman(); + auto const qAndC = foremanPtr->getQueriesAndChunks(); + auto authCtx = getAuthContext(); + + // It is important to create UberJobData at this point as it will be the only way to + // inform the czar of errors after this function returns. + auto ujData = wbase::UberJobData::create(ujId, ujCzInfo->czName, ujCzInfo->czId, ujCzInfo->czHostName, + ujCzInfo->czPort, ujQueryId, ujRowLimit, maxTableSizeBytes, + scanInfo, scanInteractive, targetWorkerId, foremanPtr, qAndC, + authCtx.authKey, foremanPtr->httpPort()); + + auto lFunc = [ujId, ujQueryId, ujCzInfo, ujRowLimit, maxTableSizeBytes, targetWorkerId, userQueryInfo, + uberJobMsg, foremanPtr, authCtx, ujData](util::CmdData*) { + _buildTasks(ujId, ujQueryId, ujCzInfo, ujRowLimit, maxTableSizeBytes, targetWorkerId, + userQueryInfo, uberJobMsg, foremanPtr, authCtx.authKey, ujData); + }; + + util::Command::Ptr taskLoadCmd = std::make_shared(lFunc); + foremanPtr->getScheduler()->queTaskLoad(taskLoadCmd); + + string note = string("qId=") + to_string(ujQueryId) + " ujId=" + to_string(ujId); + protojson::ResponseMsg respMsg(true); + jsRet = respMsg.toJson(); + } catch (wbase::TaskException const& texp) { + LOGS(_log, LOG_LVL_ERROR, + "HttpWorkerCzarModule::_handleQueryJob wbase::TaskException received " << texp.what()); + protojson::ResponseMsg respMsg(false, "parse", texp.what()); + jsRet = respMsg.toJson(); + } + return jsRet; +} + +void HttpWorkerCzarModule::_buildTasks(UberJobId ujId, QueryId ujQueryId, + protojson::CzarContactInfo::Ptr const& ujCzInfo, int ujRowLimit, + uint64_t maxTableSizeBytes, string const& targetWorkerId, + std::shared_ptr const& userQueryInfo, + protojson::UberJobMsg::Ptr const& uberJobMsg, + shared_ptr const& foremanPtr, + string const& authKeyStr, wbase::UberJobData::Ptr const& ujData) { + try { + LOGS(_log, LOG_LVL_TRACE, __func__ << " qid=" << ujQueryId << "ujId=" << ujId); + util::Timer timerParse; + timerParse.start(); + auto czarId = ujCzInfo->czId; + + userQueryInfo->addUberJob(ujData); + auto const ujTasks = + wbase::Task::createTasksFromUberJobMsg(uberJobMsg, ujData, foremanPtr->chunkResourceMgr(), + foremanPtr->mySqlConfig(), foremanPtr->sqlConnMgr()); + ujData->setTasks(ujTasks); + + // At this point, it looks like the message was sent successfully. + wcontrol::WCzarInfoMap::Ptr wCzarMap = foremanPtr->getWCzarInfoMap(); + wcontrol::WCzarInfo::Ptr wCzarInfo = wCzarMap->getWCzarInfo(czarId); + wCzarInfo->czarMsgReceived(CLOCK::now()); + + timerParse.stop(); + util::Timer timer; + timer.start(); + foremanPtr->processTasks(ujTasks); // Queues tasks to be run later. + timer.stop(); + + LOGS(_log, LOG_LVL_DEBUG, + __func__ << " Enqueued UberJob time=" << timer.getElapsed() + << " parseTime=" << timerParse.getElapsed() << " " << uberJobMsg->getIdStr()); + } catch (wbase::TaskException const& texp) { + LOGS(_log, LOG_LVL_ERROR, + "HttpWorkerCzarModule::_buildTasks wbase::TaskException received " << texp.what()); + // Send a message back saying this UberJobFailed + util::MultiError multiErr; + util::Error err(util::Error::WORKER_CZAR_COM, util::Error::NONE, + string("UberJob parse error ") + texp.what()); + multiErr.insert(err); + ujData->responseError(multiErr, -1, false, LOG_LVL_ERROR); + } +} + +json HttpWorkerCzarModule::_queryStatus() { + debug(__func__); + checkApiVersion(__func__, 34); + // At this point, API version, correct worker, and auth have been checked. + json jsRet = _handleQueryStatus(__func__); + return jsRet; +} + +json HttpWorkerCzarModule::_handleQueryStatus(std::string const& func) { + json jsRet; + auto now = CLOCK::now(); + auto const workerConfig = wconfig::WorkerConfig::instance(); + protojson::AuthContext authContext(workerConfig->replicationInstanceId(), + workerConfig->replicationAuthKey()); + + auto const& jsReq = body().objJson; + auto wqsData = protojson::WorkerQueryStatusData::createFromJson(jsReq, authContext, now); + + auto const czInfo = wqsData->getCzInfo(); + LOGS(_log, LOG_LVL_TRACE, " HttpWorkerCzarModule::_handleQueryStatus req=" << jsReq.dump()); + CzarId const czId = czInfo->czId; + wcontrol::WCzarInfoMap::Ptr wCzarMap = foreman()->getWCzarInfoMap(); + wcontrol::WCzarInfo::Ptr wCzarInfo = wCzarMap->getWCzarInfo(czId); + wCzarInfo->czarMsgReceived(CLOCK::now()); + + // For all queryId and czarId items, if the item can't be found, it is simply ignored. Anything that + // is missed will eventually be picked up by other mechanisms, such as results being rejected + // by the czar. This almost never happen, but the system should respond gracefully. + + // If a czar was restarted, cancel and delete the abandoned items. + if (wqsData->isCzarRestart()) { + auto restartCzarId = wqsData->getCzarRestartCzarId(); + auto restartQId = wqsData->getCzarRestartQueryId(); + if (restartCzarId > 0 && restartQId > 0) { + wbase::FileChannelShared::cleanUpResultsOnCzarRestart(wqsData->getCzarRestartCzarId(), + wqsData->getCzarRestartQueryId()); + } + } + + // Take the values from the lists in the message to cancel the + // appropriate queries and tasks as needed. + auto const queriesAndChunks = foreman()->queriesAndChunks(); + vector cancelledList; + vector deleteFilesList; + std::map> deadUberJobsList; + { + // Make a lists of these while the mutex is held, + // and then use the lists to make changes after the mutex is released. + + lock_guard mapLg(wqsData->mapMtx); + // Cancelled queries where we want to keep the files + bool const keepFiles = true; + queriesAndChunks->buildCancelledAndDeletedLists(czId, wqsData->qIdDoneKeepFiles, keepFiles, + cancelledList, deleteFilesList); + // Cancelled queries where the files can be deleted. + queriesAndChunks->buildCancelledAndDeletedLists(czId, wqsData->qIdDoneDeleteFiles, !keepFiles, + cancelledList, deleteFilesList); + deadUberJobsList = wqsData->qIdDeadUberJobs; + } + + // For dead UberJobs, add them to a list of dead uberjobs within UserQueryInfo. + // UserQueryInfo will cancel the tasks in the uberjobs if they exist. + // New UberJob Id's will be checked against the list, and immediately be + // killed if they are on it. (see HttpWorkerCzarModule::_handleQueryJob) + for (auto const& [ujQid, ujIdMap] : deadUberJobsList) { + auto qStats = queriesAndChunks->addQueryId(ujQid, czId); + if (qStats != nullptr) { + auto uqInfo = qStats->getUserQueryInfo(); + if (uqInfo != nullptr) { + if (!uqInfo->getCancelledByCzar()) { + for (auto const& [ujId, tm] : ujIdMap) { + uqInfo->cancelUberJob(ujId); + wbase::FileChannelShared::cleanUpResults(czId, uqInfo->getQueryId(), ujId); + } + } + } + } + } + + // Cancel everything in the cancelled list. + for (auto const& canUqInfo : cancelledList) { + canUqInfo->cancelFromCzar(); + } + + // Delete files that should be deleted + auto const czIdToDelete = wqsData->getCzInfo()->czId; + for (wbase::UserQueryInfo::Ptr uqiPtr : deleteFilesList) { + if (uqiPtr == nullptr) continue; + wbase::FileChannelShared::cleanUpResults(czIdToDelete, uqiPtr->getQueryId()); + } + // Syntax errors in the message would throw invalid_argument, which is handled elsewhere. + + // Remove associated entries from the associated WorkerCzarComIssue object. + auto wccIssue = wCzarInfo->getWorkerCzarComIssue(); + wccIssue->clearFailedTransmitsForQids(wqsData->qIdDoneDeleteFiles); + + // Return a message containing lists of the queries that were cancelled. + jsRet = wqsData->buildResponseJson(foreman()->getWorkerStartupTime()); + wCzarInfo->sendWorkerCzarComIssueIfNeeded(wqsData->getWInfo(), wqsData->getCzInfo()); + return jsRet; +} + +json HttpWorkerCzarModule::_chunkUseCounts() { + debug(__func__); + checkApiVersion(__func__, 34); + // At this point, API version, correct worker, and auth have been checked. + json jsRet = _handleChunkUseCounts(__func__); + return jsRet; +} + +json HttpWorkerCzarModule::_handleChunkUseCounts(std::string const& func) { + auto const queriesAndChunks = foreman()->queriesAndChunks(); + auto const dbChunkCountMap = queriesAndChunks->getDbChunkCountMap(); + auto answer = protojson::ChunkUseCountAnswerMsg::create(dbChunkCountMap); + return answer->toJson(); +} + +} // namespace lsst::qserv::wcomms diff --git a/src/wcomms/HttpWorkerCzarModule.h b/src/wcomms/HttpWorkerCzarModule.h new file mode 100644 index 0000000000..80a2559322 --- /dev/null +++ b/src/wcomms/HttpWorkerCzarModule.h @@ -0,0 +1,122 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_WCOMMS_HTTPWORKERCZARMODULE_H +#define LSST_QSERV_WCOMMS_HTTPWORKERCZARMODULE_H + +// System headers +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "global/intTypes.h" +#include "wcomms/HttpModule.h" + +namespace lsst::qserv::protojson { +class CzarContactInfo; +class UberJobMsg; +} // namespace lsst::qserv::protojson + +namespace lsst::qserv::protojson { +class CzarContactInfo; +class UberJobMsg; +} // namespace lsst::qserv::protojson + +namespace lsst::qserv::qhttp { +class Request; +class Response; +} // namespace lsst::qserv::qhttp + +namespace lsst::qserv::wbase { +class UberJobData; +class UserQueryInfo; +} // namespace lsst::qserv::wbase + +namespace lsst::qserv::wcontrol { +class Foreman; +} // namespace lsst::qserv::wcontrol + +// This header declarations +namespace lsst::qserv::wcomms { + +/// This class handles Http message from the czar to the worker. +class HttpWorkerCzarModule : public wcomms::HttpModule { +public: + /// @note supported values for parameter 'subModuleName' are: + /// 'QUERYJOB' - Convert an UberJob message into Tasks and a send channel. + /// @throws std::invalid_argument for unknown values of parameter 'subModuleName' + static void process(std::string const& context, std::shared_ptr const& foreman, + std::shared_ptr const& req, + std::shared_ptr const& resp, std::string const& subModuleName, + http::AuthType const authType = http::AuthType::NONE); + + HttpWorkerCzarModule() = delete; + HttpWorkerCzarModule(HttpWorkerCzarModule const&) = delete; + HttpWorkerCzarModule& operator=(HttpWorkerCzarModule const&) = delete; + + ~HttpWorkerCzarModule() final = default; + +protected: + virtual nlohmann::json executeImpl(std::string const& subModuleName) final; + +private: + HttpWorkerCzarModule(std::string const& context, std::shared_ptr const& foreman, + std::shared_ptr const& req, + std::shared_ptr const& resp); + + /// Handle an UberJob message from the czar to run it on this worker by calling _handleQueryJob. + nlohmann::json _queryJob(); + + /// Handle an UberJob message from the czar to run it on this worker, this does + /// work of deciphering the message, creating UberJobData objects and Task objects. + nlohmann::json _handleQueryJob(std::string const& func); + + static void _buildTasks(UberJobId ujId, QueryId ujQueryId, + std::shared_ptr const& ujCzInfo, int ujRowLimit, + uint64_t maxTableSizeBytes, std::string const& targetWorkerId, + std::shared_ptr const& userQueryInfo, + std::shared_ptr const& uberJobMsg, + std::shared_ptr const& foremanPtr, + std::string const& authKeyStr, std::shared_ptr const& ujData); + + /// Verify some aspects of the query and call _handleQueryStatus + nlohmann::json _queryStatus(); + + /// Reconstruct the message, absorb the lists into this worker's state, + /// queue the ComIssue message and needed, and send the lists back to + /// the czar. + nlohmann::json _handleQueryStatus(std::string const& func); + + /// Verify some aspects of the query and call _handleChunkUseCounts. + nlohmann::json _chunkUseCounts(); + + /// Return a json object containing how many tasks need to use or are using each database+chunkId + /// combination. Only counts > 0 are included. + nlohmann::json _handleChunkUseCounts(std::string const& func); +}; + +} // namespace lsst::qserv::wcomms + +#endif // LSST_QSERV_WCOMMS_HTTPWORKERCZARMODULE_H diff --git a/src/wconfig/CMakeLists.txt b/src/wconfig/CMakeLists.txt index 4fef7302c0..f0749148c2 100644 --- a/src/wconfig/CMakeLists.txt +++ b/src/wconfig/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(wconfig SHARED) -add_dependencies(wconfig proto) target_sources(wconfig PRIVATE WorkerConfig.cc @@ -7,12 +6,9 @@ target_sources(wconfig PRIVATE target_link_libraries(wconfig PUBLIC log - proto ) -install( - TARGETS wconfig -) +install(TARGETS wconfig) add_executable(testSanityCheck testSanityCheck.cc) @@ -22,7 +18,3 @@ target_link_libraries(testSanityCheck ) add_test(NAME testSanityCheck COMMAND testSanityCheck) - -install( - TARGETS wconfig -) diff --git a/src/wconfig/WorkerConfig.h b/src/wconfig/WorkerConfig.h index c7a563567a..a672b6fc12 100644 --- a/src/wconfig/WorkerConfig.h +++ b/src/wconfig/WorkerConfig.h @@ -136,6 +136,9 @@ class WorkerConfig { /// @return slow shared scan priority unsigned int getPrioritySnail() const { return _prioritySnail->getVal(); } + /// @return Prioritize by number of inFLight tasks per scheduler. + bool getPrioritizeByInFlight() const { return _prioritizeByInFlight->getVal(); } + /// @return maximum concurrent chunks for fast shared scan unsigned int getMaxActiveChunksFast() const { return _maxActiveChunksFast->getVal(); } @@ -159,8 +162,28 @@ class WorkerConfig { /// @return the name of a folder where query results will be stored std::string const resultsDirname() const { return _resultsDirname->getVal(); } - /// @return the port number of the worker XROOTD service for serving result files - uint16_t resultsXrootdPort() const { return _resultsXrootdPort->getVal(); } + /// The size + int getQPoolSize() const { return _qPoolSize->getVal(); } + + /// The highest priority number, such as 2, which results + /// in queues for priorities 0, 1, 2, and 100; where 0 is the + /// highest priority. + /// @see util::QdispPool + int getQPoolMaxPriority() const { return _qPoolMaxPriority->getVal(); } + + /// The maximum number of running threads at each priority, + /// "30:20:20:10" with _qPoolMaxPriority=2 allows 30 threads + /// at priority 0, 20 threads at priorities 1+2, and 10 threads + /// at priority 100. + /// @see util::QdispPool + std::string getQPoolRunSizes() const { return _qPoolRunSizes->getVal(); } + + /// The minimum number of running threads per priority, + /// "3:3:3:3" with _qPoolMaxPriority=2 means that a thread at priority + /// 0 would not start if it meant that there would not be enough threads + /// left to have running for each of priorities 1, 2, and 100. + /// @see util::QdispPool + std::string getQPoolMinRunningSizes() const { return _qPoolMinRunningSizes->getVal(); } /// @return the number of the BOOST ASIO threads for servicing HTGTP requests size_t resultsNumHttpThreads() const { return _resultsNumHttpThreads->getVal(); } @@ -194,6 +217,13 @@ class WorkerConfig { void setHttpPassword(std::string const& password); http::AuthContext httpAuthContext() const; + /// The number of seconds a czar needs to be incommunicado before being considered + /// dead by a worker. + unsigned int getCzarDeadTimeSec() const { return _czarDeadTimeSec->getVal(); } + + /// Return the number of threads HttpSvc use for communicating with the czar. + unsigned int getCzarComNumHttpThreads() const { return _czarComNumHttpThreads->getVal(); } + /// @return the JSON representation of the configuration parameters. /// @note The object has two collections of the parameters: 'input' - for /// parameters that were proided to the construction of the class, and @@ -264,6 +294,9 @@ class WorkerConfig { util::ConfigValTUInt::create(_configValMap, "scheduler", "priority_med", notReq, 3); CVTUIntPtr _priorityFast = util::ConfigValTUInt::create(_configValMap, "scheduler", "priority_fast", notReq, 4); + CVTBoolPtr _prioritizeByInFlight = + util::ConfigValTBool::create(_configValMap, "results", "prioritize_by_inflight", notReq, false); + CVTUIntPtr _maxReserveSlow = util::ConfigValTUInt::create(_configValMap, "scheduler", "reserve_slow", notReq, 2); CVTUIntPtr _maxReserveSnail = @@ -298,8 +331,6 @@ class WorkerConfig { _configValMap, "sqlconnections", "reservedinteractivesqlconn", notReq, 50); CVTStrPtr _resultsDirname = util::ConfigValTStr::create(_configValMap, "results", "dirname", notReq, "/qserv/data/results"); - CVTUIntPtr _resultsXrootdPort = - util::ConfigValTUInt::create(_configValMap, "results", "xrootd_port", notReq, 1094); CVTUIntPtr _resultsNumHttpThreads = util::ConfigValTUInt::create(_configValMap, "results", "num_http_threads", notReq, 1); CVTBoolPtr _resultsCleanUpOnStart = @@ -320,7 +351,7 @@ class WorkerConfig { CVTUIntPtr _replicationHttpPort = util::ConfigValTUInt::create(_configValMap, "replication", "http_port", required, 0); CVTUIntPtr _replicationNumHttpThreads = - util::ConfigValTUInt::create(_configValMap, "replication", "num_http_threads", notReq, 2); + util::ConfigValTUInt::create(_configValMap, "replication", "num_http_threads", notReq, 20); CVTUIntPtr _mysqlPort = util::ConfigValTUInt::create(_configValMap, "mysql", "port", notReq, 4048); CVTStrPtr _mysqlSocket = util::ConfigValTStr::create(_configValMap, "mysql", "socket", notReq, ""); @@ -336,6 +367,18 @@ class WorkerConfig { CVTStrPtr _httpUser = util::ConfigValTStr::create(_configValMap, "http", "user", notReq, ""); CVTStrPtr _httpPassword = util::ConfigValTStr::create(_configValMap, "http", "password", notReq, "", hidden); + + CVTIntPtr _qPoolSize = util::ConfigValTInt::create(_configValMap, "qpool", "Size", notReq, 50); + CVTIntPtr _qPoolMaxPriority = + util::ConfigValTInt::create(_configValMap, "qpool", "MaxPriority", notReq, 2); + CVTStrPtr _qPoolRunSizes = + util::ConfigValTStr::create(_configValMap, "qpool", "RunSizes", notReq, "50:20:10"); + CVTStrPtr _qPoolMinRunningSizes = + util::ConfigValTStr::create(_configValMap, "qpool", "MinRunningSizes", notReq, "3:3:3"); + CVTUIntPtr _czarDeadTimeSec = + util::ConfigValTUInt::create(_configValMap, "czar", "DeadTimeSec", notReq, 180); + CVTUIntPtr _czarComNumHttpThreads = + util::ConfigValTUInt::create(_configValMap, "czar", "ComNumHttpThreads", notReq, 40); }; } // namespace lsst::qserv::wconfig diff --git a/src/wcontrol/CMakeLists.txt b/src/wcontrol/CMakeLists.txt index 24600c50e3..9832d5c020 100644 --- a/src/wcontrol/CMakeLists.txt +++ b/src/wcontrol/CMakeLists.txt @@ -1,24 +1,17 @@ add_library(wcontrol SHARED) -add_dependencies(wcontrol proto) target_sources(wcontrol PRIVATE Foreman.cc ResourceMonitor.cc SqlConnMgr.cc WorkerStats.cc + WCzarInfoMap.cc ) -install( - TARGETS wcontrol -) - -target_include_directories(wcontrol PRIVATE - ${XROOTD_INCLUDE_DIRS} -) +install(TARGETS wcontrol) target_link_libraries(wcontrol PUBLIC log - XrdSsiLib qhttp wdb ) diff --git a/src/wcontrol/Foreman.cc b/src/wcontrol/Foreman.cc index 84b9a44ef4..df6fc3b798 100644 --- a/src/wcontrol/Foreman.cc +++ b/src/wcontrol/Foreman.cc @@ -26,11 +26,9 @@ // System headers #include +#include #include -// Third party headers -#include "boost/filesystem.hpp" - // LSST headers #include "lsst/log/Log.h" @@ -39,17 +37,21 @@ #include "qhttp/Response.h" #include "qhttp/Server.h" #include "qhttp/Status.h" -#include "wbase/WorkerCommand.h" +#include "util/common.h" +#include "util/QdispPool.h" +#include "util/String.h" #include "wconfig/WorkerConfig.h" #include "wcontrol/ResourceMonitor.h" #include "wcontrol/SqlConnMgr.h" +#include "wcontrol/WCzarInfoMap.h" #include "wcontrol/WorkerStats.h" #include "wdb/ChunkResource.h" #include "wdb/SQLBackend.h" #include "wpublish/QueriesAndChunks.h" +#include "wsched/BlendScheduler.h" using namespace std; -namespace fs = boost::filesystem; +namespace fs = std::filesystem; namespace qhttp = lsst::qserv::qhttp; namespace { @@ -63,7 +65,7 @@ qhttp::Status removeResultFile(std::string const& fileName) { string const context = "Foreman::" + string(__func__) + " "; fs::path const filePath(fileName); if (!fs::exists(filePath)) return qhttp::STATUS_NOT_FOUND; - boost::system::error_code ec; + std::error_code ec; fs::remove_all(filePath, ec); if (ec.value() != 0) { LOGS(_log, LOG_LVL_WARN, @@ -78,10 +80,33 @@ qhttp::Status removeResultFile(std::string const& fileName) { namespace lsst::qserv::wcontrol { -Foreman::Foreman(Scheduler::Ptr const& scheduler, unsigned int poolSize, unsigned int maxPoolThreads, - mysql::MySqlConfig const& mySqlConfig, wpublish::QueriesAndChunks::Ptr const& queries, +Foreman::Ptr Foreman::_globalForeman; + +Foreman::Ptr Foreman::create(wsched::BlendScheduler::Ptr const& scheduler, unsigned int poolSize, + unsigned int maxPoolThreads, mysql::MySqlConfig const& mySqlConfig, + wpublish::QueriesAndChunks::Ptr const& queries, + shared_ptr const& chunkInventory, + shared_ptr const& sqlConnMgr, int qPoolSize, + int maxPriority, string const& vectRunSizesStr, + string const& vectMinRunningSizesStr) { + // Latch + static atomic globalForemanSet{false}; + if (globalForemanSet.exchange(true) == true) { + throw util::Bug(ERR_LOC, "Foreman::create already an existing global Foreman."); + } + + Ptr fm = Ptr(new Foreman(scheduler, poolSize, maxPoolThreads, mySqlConfig, queries, chunkInventory, + sqlConnMgr, qPoolSize, maxPriority, vectRunSizesStr, vectMinRunningSizesStr)); + _globalForeman = fm; + return _globalForeman; +} + +Foreman::Foreman(wsched::BlendScheduler::Ptr const& scheduler, unsigned int poolSize, + unsigned int maxPoolThreads, mysql::MySqlConfig const& mySqlConfig, + wpublish::QueriesAndChunks::Ptr const& queries, std::shared_ptr const& chunkInventory, - std::shared_ptr const& sqlConnMgr) + std::shared_ptr const& sqlConnMgr, int qPoolSize, int maxPriority, + std::string const& vectRunSizesStr, std::string const& vectMinRunningSizesStr) : _scheduler(scheduler), _mySqlConfig(mySqlConfig), _queries(queries), @@ -89,7 +114,9 @@ Foreman::Foreman(Scheduler::Ptr const& scheduler, unsigned int poolSize, unsigne _sqlConnMgr(sqlConnMgr), _resourceMonitor(make_shared()), _io_service(), - _httpServer(qhttp::Server::create(_io_service, 0 /* grab the first available port */)) { + _httpServer(qhttp::Server::create(_io_service, 0 /* grab the first available port */)), + _wCzarInfoMap(WCzarInfoMap::create()), + _fqdn(util::get_current_host_fqdn_wait()) { // Make the chunk resource mgr // Creating backend makes a connection to the database for making temporary tables. // It will delete temporary tables that it can identify as being created by a worker. @@ -109,8 +136,23 @@ Foreman::Foreman(Scheduler::Ptr const& scheduler, unsigned int poolSize, unsigne _mark = make_shared(ERR_LOC, "Forman Test Msg"); + vector vectRunSizes = util::String::parseToVectInt(vectRunSizesStr, ":", 1); + vector vectMinRunningSizes = util::String::parseToVectInt(vectMinRunningSizesStr, ":", 0); + LOGS(_log, LOG_LVL_INFO, + "INFO wPool config qPoolSize=" << qPoolSize << " maxPriority=" << maxPriority << " vectRunSizes=" + << vectRunSizesStr << " -> " << util::prettyCharList(vectRunSizes) + << " vectMinRunningSizes=" << vectMinRunningSizesStr << " -> " + << util::prettyCharList(vectMinRunningSizes)); + _wPool = make_shared(qPoolSize, maxPriority, vectRunSizes, vectMinRunningSizes); + // Read-only access to the result files via the HTTP protocol's method "GET" auto const workerConfig = wconfig::WorkerConfig::instance(); + std::error_code ec; + fs::create_directories(workerConfig->resultsDirname(), ec); + if (ec) + LOGS(_log, LOG_LVL_ERROR, + "Failed to create results directory " << workerConfig->resultsDirname() + << ", error: " << ec.message()); _httpServer->addStaticContent("/*", workerConfig->resultsDirname()); _httpServer->addHandler( "DELETE", "/:file", @@ -133,7 +175,7 @@ Foreman::Foreman(Scheduler::Ptr const& scheduler, unsigned int poolSize, unsigne Foreman::~Foreman() { LOGS(_log, LOG_LVL_DEBUG, "Foreman::~Foreman()"); - // It will take significant effort to have xrootd shutdown cleanly and this will never get called + // It will take significant effort to have qserv shutdown cleanly and this will never get called // until that happens. _pool->shutdownPool(); _httpServer->stop(); @@ -141,17 +183,10 @@ Foreman::~Foreman() { void Foreman::processTasks(vector const& tasks) { std::vector cmds; - for (auto const& task : tasks) { - _queries->addTask(task); - cmds.push_back(task); - } + _queries->addTasks(tasks, cmds); _scheduler->queCmd(cmds); } -void Foreman::processCommand(shared_ptr const& command) { - _workerCommandQueue->queCmd(command); -} - uint16_t Foreman::httpPort() const { return _httpServer->getPort(); } nlohmann::json Foreman::statusToJson(wbase::TaskSelector const& taskSelector) { diff --git a/src/wcontrol/Foreman.h b/src/wcontrol/Foreman.h index 17fd0f14f6..84258aa323 100644 --- a/src/wcontrol/Foreman.h +++ b/src/wcontrol/Foreman.h @@ -39,8 +39,8 @@ #include "mysql/MySqlConfig.h" #include "util/EventThread.h" #include "util/HoldTrack.h" +#include "util/QdispPool.h" #include "wbase/Base.h" -#include "wbase/MsgProcessor.h" #include "wbase/Task.h" // Forward declarations @@ -50,6 +50,7 @@ struct TaskSelector; } // namespace lsst::qserv::wbase namespace lsst::qserv::wcontrol { +class WCzarInfoMap; class ResourceMonitor; class SqlConnMgr; } // namespace lsst::qserv::wcontrol @@ -66,10 +67,15 @@ class QueryRunner; namespace lsst::qserv::wpublish { class ChunkInventory; class QueriesAndChunks; +class QueryStatistics; } // namespace lsst::qserv::wpublish // This header declarations +namespace lsst::qserv::wsched { +class BlendScheduler; +} + namespace lsst::qserv::wcontrol { /// An abstract scheduler interface. Foreman objects use Scheduler instances @@ -95,8 +101,12 @@ class Scheduler : public wbase::TaskScheduler, public util::CommandQueue { /// Foreman is used to maintain a thread pool and schedule Tasks for the thread pool. /// It also manages sub-chunk tables with the ChunkResourceMgr. /// The schedulers may limit the number of threads they will use from the thread pool. -class Foreman : public wbase::MsgProcessor { +class Foreman { public: + using Ptr = std::shared_ptr; + + static Ptr getForeman() { return _globalForeman; } + /** * @param scheduler - pointer to the scheduler * @param poolSize - size of the thread pool @@ -105,12 +115,14 @@ class Foreman : public wbase::MsgProcessor { * @param chunkInventory - a collection of the SSI resources published by the worker * @param sqlConnMgr - for limiting the number of MySQL connections used for tasks */ - Foreman(Scheduler::Ptr const& scheduler, unsigned int poolSize, unsigned int maxPoolThreads, - mysql::MySqlConfig const& mySqlConfig, std::shared_ptr const& queries, - std::shared_ptr const& chunkInventory, - std::shared_ptr const& sqlConnMgr); + static Ptr create(std::shared_ptr const& scheduler, unsigned int poolSize, + unsigned int maxPoolThreads, mysql::MySqlConfig const& mySqlConfig, + std::shared_ptr const& queries, + std::shared_ptr const& chunkInventory, + std::shared_ptr const& sqlConnMgr, int qPoolSize, int maxPriority, + std::string const& vectRunSizesStr, std::string const& vectMinRunningSizesStr); - virtual ~Foreman() override; + ~Foreman(); // This class doesn't have the default construction or copy semantics Foreman() = delete; @@ -127,22 +139,40 @@ class Foreman : public wbase::MsgProcessor { uint16_t httpPort() const; /// Process a group of query processing tasks. - /// @see MsgProcessor::processTasks() - void processTasks(std::vector> const& tasks) override; + void processTasks(std::vector> const& tasks); /// Implement the corresponding method of the base class - /// @see MsgProcessor::processCommand() - void processCommand(std::shared_ptr const& command) override; + nlohmann::json statusToJson(wbase::TaskSelector const& taskSelector); - /// Implement the corresponding method of the base class - /// @see MsgProcessor::statusToJson() - virtual nlohmann::json statusToJson(wbase::TaskSelector const& taskSelector) override; + uint64_t getWorkerStartupTime() const { return _workerStartupTime; } + + std::shared_ptr getWPool() const { return _wPool; } + + std::shared_ptr getWCzarInfoMap() const { return _wCzarInfoMap; } + + std::shared_ptr getQueriesAndChunks() const { return _queries; } + + std::shared_ptr getScheduler() const { return _scheduler; } + + /// Return the fqdn for this worker. + std::string getFqdn() const { return _fqdn; } private: + Foreman(std::shared_ptr const& scheduler, unsigned int poolSize, + unsigned int maxPoolThreads, mysql::MySqlConfig const& mySqlConfig, + std::shared_ptr const& queries, + std::shared_ptr const& chunkInventory, + std::shared_ptr const& sqlConnMgr, int qPoolSize, int maxPriority, + std::string const& vectRunSizesStr, std::string const& vectMinRunningSizesStr); + + /// Startup time of worker, sent to czars so they can detect that the worker was + /// was restarted when this value changes. + uint64_t const _workerStartupTime = millisecSinceEpoch(CLOCK::now()); + std::shared_ptr _chunkResourceMgr; util::ThreadPool::Ptr _pool; - Scheduler::Ptr _scheduler; + std::shared_ptr _scheduler; util::CommandQueue::Ptr _workerCommandQueue; ///< dedicated queue for the worker commands util::ThreadPool::Ptr _workerCommandPool; ///< dedicated pool for executing worker commands @@ -158,6 +188,7 @@ class Foreman : public wbase::MsgProcessor { /// A a counter of the XROOTD/SSI resources which are in use at any given moment /// of time by the worker. + /// TODO:DM-53240 is this still tracking anything? Does removing it cause dashboard issues? std::shared_ptr const _resourceMonitor; /// BOOST ASIO services needed to run the HTTP server @@ -165,6 +196,22 @@ class Foreman : public wbase::MsgProcessor { /// The HTTP server for serving/managing result files std::shared_ptr const _httpServer; + + /// Combined priority queue and thread pool for communicating with czars. + /// TODO:Maybe - It would be better to have a pool for each czar as it + /// may be possible for a czar to have communications + /// problems in a way that would wedge the pool. This can + /// probably be done fairly easily by having pools + /// attached to wcontrol::WCzarInfoMap. + std::shared_ptr _wPool; + + /// Map of czar information for all czars that have contacted this worker. + std::shared_ptr const _wCzarInfoMap; + + /// FQDN for this worker. + std::string const _fqdn; + + static Ptr _globalForeman; ///< Pointer to the global instance. }; } // namespace lsst::qserv::wcontrol diff --git a/src/wcontrol/SqlConnMgr.cc b/src/wcontrol/SqlConnMgr.cc index 3c2d654969..f5df3ba4fc 100644 --- a/src/wcontrol/SqlConnMgr.cc +++ b/src/wcontrol/SqlConnMgr.cc @@ -52,7 +52,7 @@ SqlConnMgr::ConnType SqlConnMgr::_take(bool scanQuery, shared_ptr const& channelShared, bool firstChannelSqlConn) { ++_totalCount; - LOGS(_log, LOG_LVL_DEBUG, "SqlConnMgr take " << dump()); + LOGS(_log, LOG_LVL_TRACE, "SqlConnMgr take " << dump()); unique_lock uLock(_mtx); SqlConnMgr::ConnType connType = SCAN; @@ -100,7 +100,7 @@ SqlConnMgr::ConnType SqlConnMgr::_take(bool scanQuery, // requestor got its sql connection, increment counts if (channelShared != nullptr) { int newCount = channelShared->incrSqlConnectionCount(); - LOGS(_log, LOG_LVL_DEBUG, "SqlConnMgr::_take newCount=" << newCount); + LOGS(_log, LOG_LVL_TRACE, "SqlConnMgr::_take newCount=" << newCount); } if (connType == SCAN) { @@ -120,7 +120,7 @@ void SqlConnMgr::_release(SqlConnMgr::ConnType connType) { // causing _take() to block when it really should not. // When the FileChannelShared is finished, it is thrown away, effectively // clearing its count. - LOGS(_log, LOG_LVL_DEBUG, "SqlConnMgr release " << dump()); + LOGS(_log, LOG_LVL_TRACE, "SqlConnMgr release " << dump()); if (connType == SCAN) { --_sqlScanConnCount; } else { diff --git a/src/wcontrol/WCzarInfoMap.cc b/src/wcontrol/WCzarInfoMap.cc new file mode 100644 index 0000000000..3dfe415154 --- /dev/null +++ b/src/wcontrol/WCzarInfoMap.cc @@ -0,0 +1,244 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "wcontrol/WCzarInfoMap.h" + +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// qserv headers +#include "http/Client.h" +#include "protojson/PwHideJson.h" +#include "protojson/ResponseMsg.h" +#include "protojson/WorkerCzarComIssue.h" +#include "protojson/WorkerQueryStatusData.h" +#include "util/Bug.h" +#include "util/Histogram.h" +#include "wbase/FileChannelShared.h" +#include "wbase/UberJobData.h" +#include "wconfig/WorkerConfig.h" +#include "wcontrol/Foreman.h" +#include "wpublish/QueriesAndChunks.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; + +using namespace std::chrono_literals; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.wcontrol.WCzarInfoMap"); +} + +namespace lsst::qserv::wcontrol { + +WCzarInfo::WCzarInfo(CzarId czarId_) + : czarId(czarId_), + _workerCzarComIssue(protojson::WorkerCzarComIssue::create( + protojson::AuthContext(wconfig::WorkerConfig::instance()->replicationInstanceId(), + wconfig::WorkerConfig::instance()->replicationAuthKey()))) {} + +void WCzarInfo::czarMsgReceived(TIMEPOINT tm) { + unique_lock uniLock(_wciMtx); + _lastTouch = tm; + if (_alive.exchange(true) == false) { + uniLock.unlock(); + auto msSinceEpoch = std::chrono::duration_cast(tm.time_since_epoch()); + uint64_t msDeadNowAliveTime = msSinceEpoch.count(); + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " was dead and is now alive ms=" << msDeadNowAliveTime); + _workerCzarComIssue->setThoughtCzarWasDeadTime(msDeadNowAliveTime); + } +} + +void WCzarInfo::sendWorkerCzarComIssueIfNeeded(protojson::WorkerContactInfo::Ptr const& wInfo_, + protojson::CzarContactInfo::Ptr const& czInfo_) { + unique_lock uniLock(_wciMtx); + if (_workerCzarComIssue->needToSend()) { + // Having more than one of this message being sent at one time + // could cause race issues and it would be a problem if it was + // stuck in a queue, so it gets its own thread. + if (_msgThreadRunning.exchange(true) == true) { + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " message thread already running"); + return; + } + _workerCzarComIssue->setContactInfo(wInfo_, czInfo_); + auto selfPtr = weak_from_this(); + auto thrdFunc = [selfPtr]() { + auto sPtr = selfPtr.lock(); + if (sPtr == nullptr) { + LOGS(_log, LOG_LVL_WARN, "WCzarInfo::sendWorkerCzarComIssueIfNeeded thrdFunc sPtr was null"); + return; + } + sPtr->_sendMessage(); + }; + + thread thrd(thrdFunc); + thrd.detach(); + } +} + +void WCzarInfo::_sendMessage() { + // Make certain _msgThreadRunning is set to false when this function ends. + class ClearMsgThreadRunning { + public: + ClearMsgThreadRunning(WCzarInfo* wcInfo) : _wcInfo(wcInfo) {} + ~ClearMsgThreadRunning() { _wcInfo->_msgThreadRunning = false; } + WCzarInfo* const _wcInfo; + }; + ClearMsgThreadRunning clearMsgThreadRunning(this); + + auto const method = http::Method::POST; + + unique_lock uniLock(_wciMtx); + // If thoughtCzarWasDead is set now, it needs to be cleared on successful reception from czar. + auto czInfo = _workerCzarComIssue->getCzarInfo(); + if (czInfo == nullptr) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " czar info was null"); + return; + } + vector const headers = {"Content-Type: application/json"}; + string const url = + "http://" + czInfo->czHostName + ":" + to_string(czInfo->czPort) + "/workerczarcomissue"; + auto jsReq = _workerCzarComIssue->toJson(); + uniLock.unlock(); // Must unlock before communication + + // Send the request to the czar to be handled by + auto requestStr = jsReq.dump(); + http::Client client(method, url, requestStr, headers); + bool transmitSuccess = false; + + size_t cleanupCount = 0; + vector ujDataObsoleteList; + vector ujIdNotFoundErrorList; + try { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " read start"); + nlohmann::json const response = client.readAsJson(); + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " read end"); + auto respMsg = protojson::WorkerCzarComRespMsg::createFromJson(response); + + // `response` json was created by WorkerCzarComRespMsg::toJson on the czar. + // The `response` from the czar needs to be used to remove the handled entries + // from the `failedTransmits` map and to determine if any result files are obsolete + // or if there were any parse errors. + uniLock.lock(); // re-lock _wciMtx to protect members. + if (respMsg->success) { + transmitSuccess = true; + /// Read the value sent back by the czar. If it is greater than or equal + /// czDeadTime, then set dead time to zero. + auto localDeadTime = _workerCzarComIssue->getThoughtCzarWasDeadTime(); + if (localDeadTime != 0) { + auto respDeadTime = respMsg->thoughtCzarWasDeadTime; + bool cleared = false; + if (respDeadTime >= _workerCzarComIssue->getThoughtCzarWasDeadTime()) { + _workerCzarComIssue->setThoughtCzarWasDeadTime(0); + cleared = true; + } + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " ThoughtCzarWasDeadTime check local=" << localDeadTime + << " resp=" << respDeadTime << " cleared=" << cleared); + } + tie(cleanupCount, ujDataObsoleteList, ujIdNotFoundErrorList) = + _workerCzarComIssue->clearMapEntries(response); + + } else { + ++_czarSentFailCount; + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " Transmit czarSentFailCount=" << _czarSentFailCount + << " msg=" << *respMsg); + // There's no point in re-sending as the czar got the message and didn't like + // it. + // TODO: What to do here? Ignore this until its a problem? Czar failed to parse original + // message. Start counting and consider the czar dead when a threshold is reached? + } + } catch (exception const& ex) { + ++_parseErrorCount; + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " " << protojson::pwHide(jsReq) + << " failed, parseErrorCount=" << _parseErrorCount << " ex:" << ex.what()); + } + + if (!transmitSuccess) { + // If transmit fails, the czar will send another message eventually. + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " failed to send message"); + return; + } + + auto foreman = Foreman::getForeman(); + if (foreman == nullptr) return; + auto queriesAndChunks = foreman->getQueriesAndChunks(); + if (queriesAndChunks == nullptr) return; + + // Set these files as obsolete (at this point they are just deleted, but that may change). + for (auto const& ujIdent : ujDataObsoleteList) { + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " marking qId=" << ujIdent.qId << "_ujId=" << ujIdent.ujId << " as obsolete"); + wbase::FileChannelShared::cleanUpResults(ujIdent.czInfo->czId, ujIdent.qId, ujIdent.ujId); + } + // Delete files where there were parse errors. + for (auto const& ujIdent : ujIdNotFoundErrorList) { + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " deleting qId=" << ujIdent.qId << "_ujId=" << ujIdent.ujId + << " due to parse error"); + wbase::FileChannelShared::cleanUpResults(ujIdent.czInfo->czId, ujIdent.qId, ujIdent.ujId); + } +} + +bool WCzarInfo::checkAlive(TIMEPOINT tmMark) { + lock_guard lg(_wciMtx); + if (_alive) { + auto timeSinceContact = tmMark - _lastTouch; + std::chrono::seconds deadTime(wconfig::WorkerConfig::instance()->getCzarDeadTimeSec()); + if (timeSinceContact >= deadTime) { + // Contact with the czar has timed out. + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " czar timeout"); + _alive = false; + // Kill all queries from this czar + auto fMan = Foreman::getForeman(); + if (fMan != nullptr) { + auto queriesAndChunks = fMan->getQueriesAndChunks(); + if (queriesAndChunks != nullptr) { + queriesAndChunks->killAllQueriesFromCzar(czarId); + } + } + } + } + return _alive; +} + +WCzarInfo::Ptr WCzarInfoMap::getWCzarInfo(CzarId czId) { + std::lock_guard lg(_wczMapMtx); + auto iter = _wczMap.find(czId); + if (iter == _wczMap.end()) { + LOGS(_log, LOG_LVL_INFO, cName(__func__) << " new czar contacted " << czId); + auto const newCzInfo = WCzarInfo::create(czId); + _wczMap[czId] = newCzInfo; + return newCzInfo; + } + return iter->second; +} + +} // namespace lsst::qserv::wcontrol diff --git a/src/wcontrol/WCzarInfoMap.h b/src/wcontrol/WCzarInfoMap.h new file mode 100644 index 0000000000..71834276ba --- /dev/null +++ b/src/wcontrol/WCzarInfoMap.h @@ -0,0 +1,138 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_WCONTROL_WCZARINFOMAP_H +#define LSST_QSERV_WCONTROL_WCZARINFOMAP_H + +// System headers +#include +#include +#include +#include + +// Third-party headers + +// Qserv headers +#include "global/clock_defs.h" +#include "global/intTypes.h" + +namespace lsst::qserv::protojson { +class CzarContactInfo; +class WorkerContactInfo; +class WorkerCzarComIssue; +} // namespace lsst::qserv::protojson + +namespace lsst::qserv::wbase { +class UJTransmitCmd; +} + +namespace lsst::qserv::wcontrol { + +class Foreman; + +/// This class is used to send the "/workerczarcomissue" from the worker to the +/// czar and then used by the czar to handle the message; the messsage itself +/// is made with WorkerCzarComIssue. +/// The general concept is that WorkerCzarComIssue exists on both the worker +/// and the czar and messages keep them in sync. +/// This class is assuming the czardId is correct and there are no duplicate czarIds. +class WCzarInfo : public std::enable_shared_from_this { +public: + using Ptr = std::shared_ptr; + + std::string cName(const char* funcN) { + return std::string("WCzarInfo::") + funcN + " czId=" + std::to_string(czarId); + } + + WCzarInfo() = delete; + ~WCzarInfo() = default; + + static Ptr create(CzarId czarId_) { return Ptr(new WCzarInfo(czarId_)); } + + /// If there were communication issues, start a thread to send the WorkerCzarComIssue message. + void sendWorkerCzarComIssueIfNeeded(std::shared_ptr const& wInfo_, + std::shared_ptr const& czInfo_); + + /// Called by the worker after the czar successfully replied to the original + /// message from the worker. + void czarMsgReceived(TIMEPOINT tm); + + bool isAlive() const { return _alive; } + + /// Check if the czar is still considered to be alive, or it timed out. + bool checkAlive(TIMEPOINT tmMark); + + std::shared_ptr getWorkerCzarComIssue() const { + return _workerCzarComIssue; + } + + CzarId const czarId; + +private: + WCzarInfo(CzarId czarId_); + + void _sendMessage(); + + std::atomic _alive{true}; + TIMEPOINT _lastTouch{CLOCK::now()}; + + /// This class tracks communication problems and prepares a message + /// to inform the czar of the problem. + std::shared_ptr _workerCzarComIssue; + mutable std::mutex _wciMtx; ///< protects all private members. + + /// true when running a thread to send a message to the czar + /// with _sendMessage() + std::atomic _msgThreadRunning{false}; + + /// If the system is working properly, these counts should be zero. If + /// they start climbing, it may indicate a problem that should be dealt with. + /// This is used to track how many times the czar has failed to handle a worker message. + std::atomic _czarSentFailCount{0}; + /// This is used to track how many times there's been a parse error. + std::atomic _parseErrorCount{0}; +}; + +/// Each worker talks to multiple czars and needs a WCzarInfo object for each czar, +/// this class keeps track of those objects. +class WCzarInfoMap { +public: + using Ptr = std::shared_ptr; + + std::string cName(const char* funcN) { return std::string("WCzarInfoMap::") + funcN; } + + ~WCzarInfoMap() = default; + + static Ptr create() { return Ptr(new WCzarInfoMap()); } + + /// Return the WCzarInfo ptr associated with czId, creating a new one if needed. + WCzarInfo::Ptr getWCzarInfo(CzarId czId); + +private: + WCzarInfoMap() = default; + + std::map _wczMap; + + mutable std::mutex _wczMapMtx; +}; + +} // namespace lsst::qserv::wcontrol + +#endif // LSST_QSERV_WCONTROL_WCZARINFOMAP_H diff --git a/src/wcontrol/WorkerStats.cc b/src/wcontrol/WorkerStats.cc index 27055bd046..18a60b6a63 100644 --- a/src/wcontrol/WorkerStats.cc +++ b/src/wcontrol/WorkerStats.cc @@ -45,10 +45,10 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.wcontrol.WorkerStats"); namespace lsst::qserv::wcontrol { WorkerStats::Ptr WorkerStats::_globalWorkerStats; -util::Mutex WorkerStats::_globalMtx; +MUTEX WorkerStats::_globalMtx; void WorkerStats::setup() { - lock_guard lg(_globalMtx); + lock_guard lg(_globalMtx); if (_globalWorkerStats != nullptr) { throw util::Bug(ERR_LOC, "Error WorkerStats::setup called after global pointer set."); } @@ -70,7 +70,7 @@ WorkerStats::WorkerStats() { } WorkerStats::Ptr WorkerStats::get() { - std::lock_guard lg(_globalMtx); + std::lock_guard lg(_globalMtx); if (_globalWorkerStats == nullptr) { throw util::Bug(ERR_LOC, "Error CzarStats::get called before CzarStats::setup."); } diff --git a/src/wcontrol/WorkerStats.h b/src/wcontrol/WorkerStats.h index d61f450330..afcde1ed9f 100644 --- a/src/wcontrol/WorkerStats.h +++ b/src/wcontrol/WorkerStats.h @@ -77,7 +77,7 @@ class WorkerStats : std::enable_shared_from_this { private: WorkerStats(); static Ptr _globalWorkerStats; ///< Pointer to the global instance. - static util::Mutex _globalMtx; ///< Protects `_globalWorkerStats` + static MUTEX _globalMtx; ///< Protects `_globalWorkerStats` std::atomic _queueCount{ 0}; ///< Number of buffers on queues (there are many queues, one per ChannelShared) diff --git a/src/wdb/CMakeLists.txt b/src/wdb/CMakeLists.txt index ba92138265..b76ab9cf26 100644 --- a/src/wdb/CMakeLists.txt +++ b/src/wdb/CMakeLists.txt @@ -1,35 +1,34 @@ add_library(wdb SHARED) -add_dependencies(wdb proto) target_sources(wdb PRIVATE ChunkResource.cc QueryRunner.cc - QuerySql.cc SQLBackend.cc ) -target_include_directories(wdb PRIVATE - ${XROOTD_INCLUDE_DIRS} -) - target_link_libraries(wdb PUBLIC log - XrdSsiLib ) -install( - TARGETS wdb -) +install(TARGETS wdb) FUNCTION(wdb_tests) FOREACH(TEST IN ITEMS ${ARGV}) add_executable(${TEST} ${TEST}.cc) - target_include_directories(${TEST} PRIVATE - ${XROOTD_INCLUDE_DIRS} - ) target_link_libraries(${TEST} PUBLIC crypto - xrdsvc + global + http + mysql + protojson + sql + util + wbase + wconfig + wcontrol + wdb + wpublish + wsched Boost::unit_test_framework Threads::Threads ) @@ -39,8 +38,4 @@ ENDFUNCTION() wdb_tests( testChunkResource - testQueryRunner - testQuerySql ) - -set_tests_properties(testQueryRunner PROPERTIES WILL_FAIL 1) diff --git a/src/wdb/ChunkResource.cc b/src/wdb/ChunkResource.cc index b131552e29..dfefac392b 100644 --- a/src/wdb/ChunkResource.cc +++ b/src/wdb/ChunkResource.cc @@ -48,32 +48,11 @@ #include "util/Bug.h" #include "util/IterableFormatter.h" #include "wbase/Base.h" -#include "wdb/QuerySql.h" namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.wdb.ChunkResource"); -template -class ScScriptBuilder { -public: - ScScriptBuilder(lsst::qserv::wdb::QuerySql& qSql_, std::string const& db, std::string const& table, - std::string const& scColumn, int chunkId) - : qSql(qSql_) { - buildT = (boost::format(lsst::qserv::wbase::CREATE_SUBCHUNK_SCRIPT) % db % table % scColumn % - chunkId % "%1%") - .str(); - cleanT = (boost::format(lsst::qserv::wbase::CLEANUP_SUBCHUNK_SCRIPT) % db % table % chunkId % "%1%") - .str(); - } - void operator()(T const& subc) { - qSql.buildList.push_back((boost::format(buildT) % subc).str()); - qSql.cleanupList.push_back((boost::format(cleanT) % subc).str()); - } - std::string buildT; - std::string cleanT; - lsst::qserv::wdb::QuerySql& qSql; -}; } // anonymous namespace namespace lsst::qserv::wdb { @@ -105,7 +84,7 @@ std::ostream& operator<<(std::ostream& os, ChunkResource::Info const& i) { ChunkResource::ChunkResource(ChunkResourceMgr* mgr) : _mgr{mgr} {} ChunkResource::ChunkResource(ChunkResourceMgr* mgr, ChunkResource::Info* info) : _mgr{mgr}, _info{info} { - LOGS(_log, LOG_LVL_DEBUG, "ChunkResource info=" << *info); + LOGS(_log, LOG_LVL_TRACE, "ChunkResource info=" << *info); _mgr->acquireUnit(*_info); } ChunkResource::ChunkResource(ChunkResource const& cr) : _mgr{cr._mgr}, _info{new Info(*cr._info)} { @@ -178,8 +157,8 @@ class ChunkEntry { std::lock_guard lock(_mutex); backend->memLockRequireOwnership(); ++_refCount; // Increase usage count - LOGS(_log, LOG_LVL_DEBUG, - "SubChunk acquire refC=" << _refCount << " db=" << db << " tables[" + LOGS(_log, LOG_LVL_TRACE, + "Subchunk acquire refC=" << _refCount << " db=" << db << " tables[" << util::printable(dbTableSet) << "]" << " sc[" << util::printable(sc) << "]"); for (auto const& dbTbl : dbTableSet) { @@ -215,7 +194,7 @@ class ChunkEntry { std::lock_guard lock(_mutex); backend->memLockRequireOwnership(); StringVector::const_iterator ti, te; - LOGS(_log, LOG_LVL_DEBUG, + LOGS(_log, LOG_LVL_TRACE, "SubChunk release refC=" << _refCount << " db=" << db << " dbTableSet[" << util::printable(dbTableSet) << "]" << " sc[" << util::printable(sc) << "]"); @@ -296,7 +275,7 @@ ChunkResourceMgr::Ptr ChunkResourceMgr::newMgr(SQLBackend::Ptr const& backend) { ChunkResource ChunkResourceMgr::acquire(std::string const& db, int chunkId, DbTableSet const& tables) { // Make sure that the chunk is ready. (NOP right now.) - LOGS(_log, LOG_LVL_DEBUG, + LOGS(_log, LOG_LVL_TRACE, "acquire db=" << db << " chunkId=" << chunkId << " tables=" << util::printable(tables)); ChunkResource cr(this, new ChunkResource::Info(db, chunkId, tables)); return cr; @@ -320,7 +299,7 @@ void ChunkResourceMgr::acquireUnit(ChunkResource::Info const& i) { Map& map = _getMap(i.db); // Select db ChunkEntry& ce = _getChunkEntry(map, i.chunkId); // Actually acquire - LOGS(_log, LOG_LVL_DEBUG, "acquireUnit info=" << i); + LOGS(_log, LOG_LVL_TRACE, "acquireUnit info=" << i); ce.acquire(i.db, i.tables, i.subChunkIds, _backend); } diff --git a/src/wdb/ChunkResource.h b/src/wdb/ChunkResource.h index f030c23f37..f60014ae70 100644 --- a/src/wdb/ChunkResource.h +++ b/src/wdb/ChunkResource.h @@ -50,14 +50,9 @@ #include "wdb/SQLBackend.h" // Forward declarations -namespace lsst::qserv { -namespace proto { -class TaskMsg_Fragment; -} -namespace wdb { +namespace lsst::qserv::wdb { class Task; -} -} // namespace lsst::qserv +} // namespace lsst::qserv::wdb namespace lsst::qserv::wdb { diff --git a/src/wdb/QueryRunner.cc b/src/wdb/QueryRunner.cc index 06a2498e8e..8e018e8d4f 100644 --- a/src/wdb/QueryRunner.cc +++ b/src/wdb/QueryRunner.cc @@ -24,8 +24,7 @@ * @file * * @brief QueryRunner instances perform single-shot query execution with the - * result reflected in the db state or returned via a SendChannel. Works with - * new XrdSsi API. + * result reflected in the db state or returned via a SendChannel. * * @author Daniel L. Wang, SLAC; John Gates, SLAC */ @@ -38,7 +37,6 @@ #include // Third-party headers -#include #include // Class header @@ -52,8 +50,6 @@ #include "mysql/MySqlConfig.h" #include "mysql/MySqlConnection.h" #include "mysql/SchemaFactory.h" -#include "proto/ProtoHeaderWrap.h" -#include "proto/worker.pb.h" #include "sql/Schema.h" #include "sql/SqlErrorObject.h" #include "util/Bug.h" @@ -65,6 +61,7 @@ #include "util/threadSafe.h" #include "wbase/Base.h" #include "wbase/FileChannelShared.h" +#include "wbase/UberJobData.h" #include "wconfig/WorkerConfig.h" #include "wcontrol/SqlConnMgr.h" #include "wdb/ChunkResource.h" @@ -115,19 +112,19 @@ bool QueryRunner::_initConnection() { if (not _mysqlConn->connect()) { LOGS(_log, LOG_LVL_ERROR, "Unable to connect to MySQL: " << localMySqlConfig); - util::Error error(-1, "Unable to connect to MySQL; " + localMySqlConfig.toString()); - _multiError.push_back(error); + util::Error error(util::Error::WORKER_SQL_CONNECT, util::Error::NONE, + "Unable to connect to MySQL; " + localMySqlConfig.toString()); + _multiError.insert(error); return false; } _task->setMySqlThreadId(_mysqlConn->threadId()); return true; } -bool QueryRunner::runQuery() { - util::InstanceCount ic(to_string(_task->getQueryId()) + "_rq_LDB"); // LockupDB +bool QueryRunner::runQuery(std::string& errMsg) { util::HoldTrack::Mark runQueryMarkA(ERR_LOC, "runQuery " + to_string(_task->getQueryId())); QSERV_LOGCONTEXT_QUERY_JOB(_task->getQueryId(), _task->getJobId()); - LOGS(_log, LOG_LVL_TRACE, __func__ << " tid=" << _task->getIdStr()); + LOGS(_log, LOG_LVL_TRACE, "QueryRunner " << _task->cName(__func__)); // Start tracking the task. auto now = chrono::system_clock::now(); @@ -136,7 +133,7 @@ bool QueryRunner::runQuery() { // Make certain our Task knows that this object is no longer in use when this function exits. class Release { public: - Release(wbase::Task::Ptr t, wbase::TaskQueryRunner* tqr, + Release(wbase::Task::Ptr t, QueryRunner* tqr, shared_ptr const& queriesAndChunks) : _t{t}, _tqr{tqr}, _queriesAndChunks(queriesAndChunks) {} ~Release() { @@ -146,40 +143,33 @@ bool QueryRunner::runQuery() { private: wbase::Task::Ptr _t; - wbase::TaskQueryRunner* _tqr; + QueryRunner* _tqr; shared_ptr const _queriesAndChunks; }; Release release(_task, this, _queriesAndChunks); if (_task->checkCancelled()) { - LOGS(_log, LOG_LVL_DEBUG, "runQuery, task was cancelled before it started." << _task->getIdStr()); + LOGS(_log, LOG_LVL_TRACE, "runQuery, task was cancelled before it started." << _task->getIdStr()); + errMsg += "already cancelled"; return false; } - if (_task->checkCancelled()) { - LOGS(_log, LOG_LVL_DEBUG, "runQuery, task was cancelled after locking tables."); - return false; - } - - LOGS(_log, LOG_LVL_INFO, - "Exec in flight for Db=" << _task->getDb() << " sqlConnMgr " << _sqlConnMgr->dump()); // Queries that span multiple tasks should not be high priority for the SqlConMgr as it risks deadlock. bool interactive = _task->getScanInteractive() && !(_task->getSendChannel()->getTaskCount() > 1); wcontrol::SqlConnLock sqlConnLock(*_sqlConnMgr, not interactive, _task->getSendChannel()); + bool connOk = _initConnection(); if (!connOk) { + errMsg += "initConnection failed"; + util::Error err(util::Error::WORKER_SQL_CONNECT, 0, errMsg); + _multiError.insert(err); // Since there's an error, this will be the last transmit from this QueryRunner. - if (!_task->getSendChannel()->buildAndTransmitError(_multiError, _task, _cancelled)) { - LOGS(_log, LOG_LVL_WARN, " Could not report error to czar as sendChannel not accepting msgs."); - } + _task->getSendChannel()->buildAndTransmitError(_multiError, _task, _cancelled); return false; } // Run the query and send the results back. - if (!_dispatchChannel()) { - return false; - } - return true; + return _dispatchChannel(errMsg); } MYSQL_RES* QueryRunner::_primeResult(string const& query) { @@ -217,7 +207,7 @@ class ChunkResourceRequest { wbase::Task& _task; }; -bool QueryRunner::_dispatchChannel() { +bool QueryRunner::_dispatchChannel(string& errMsg) { bool erred = false; bool needToFreeRes = false; // set to true once there are results to be freed. // Collect the result in _transmitData. When a reasonable amount of data has been collected, @@ -234,17 +224,19 @@ bool QueryRunner::_dispatchChannel() { // Ideally, hold it until moving on to the next chunk. Try to clean up ChunkResource code. auto taskSched = _task->getTaskScheduler(); - if (!_cancelled && !_task->getSendChannel()->isDead()) { + if (!_cancelled && !_task->checkCancelled()) { string const& query = _task->getQueryString(); util::Timer primeT; primeT.start(); _task->queryExecutionStarted(); + LOGS(_log, LOG_LVL_TRACE, "QueryRunner " << _task->cName(__func__) << " sql start"); MYSQL_RES* res = _primeResult(query); // This runs the SQL query, throws SqlErrorObj on failure. + LOGS(_log, LOG_LVL_TRACE, "QueryRunner " << _task->cName(__func__) << " sql end"); primeT.stop(); needToFreeRes = true; if (taskSched != nullptr) { taskSched->histTimeOfRunningTasks->addEntry(primeT.getElapsed()); - LOGS(_log, LOG_LVL_DEBUG, "QR " << taskSched->histTimeOfRunningTasks->getString("run")); + LOGS(_log, LOG_LVL_TRACE, "QR " << taskSched->histTimeOfRunningTasks->getString("run")); } else { LOGS(_log, LOG_LVL_ERROR, "QR runtaskSched == nullptr"); } @@ -266,9 +258,11 @@ bool QueryRunner::_dispatchChannel() { } } } catch (sql::SqlErrorObject const& e) { - LOGS(_log, LOG_LVL_ERROR, "dispatchChannel " << e.errMsg() << " " << _task->getIdStr()); - util::Error worker_err(e.errNo(), e.errMsg()); - _multiError.push_back(worker_err); + errMsg = e.errMsg() + " " + errMsg; + LOGS(_log, LOG_LVL_ERROR, "dispatchChannel " << errMsg << " " << _task->getIdStr()); + util::Error worker_err(util::Error::WORKER_SQL, e.errNo(), {_task->getChunkId()}, {_task->getJobId()}, + errMsg); + _multiError.insert(worker_err); erred = true; } @@ -285,10 +279,7 @@ bool QueryRunner::_dispatchChannel() { erred = true; // Send results. This needs to happen after the error check. // If any errors were found, send an error back. - if (!_task->getSendChannel()->buildAndTransmitError(_multiError, _task, _cancelled)) { - LOGS(_log, LOG_LVL_WARN, - " Could not report error to czar as sendChannel not accepting msgs." << _task->getIdStr()); - } + _task->getSendChannel()->buildAndTransmitError(_multiError, _task, _cancelled); } return !erred; } @@ -296,16 +287,20 @@ bool QueryRunner::_dispatchChannel() { void QueryRunner::cancel() { // QueryRunner::cancel() should only be called by Task::cancel() // to keep the bookkeeping straight. - LOGS(_log, LOG_LVL_WARN, "Trying QueryRunner::cancel() call"); - util::HoldTrack::Mark mark(ERR_LOC, "QR cancel() QID=" + _task->getIdStr()); - _cancelled = true; + LOGS(_log, LOG_LVL_TRACE, "Trying QueryRunner::cancel() call " << _task->getIdStr()); + + bool alreadyCancelled = _cancelled.exchange(true); + if (alreadyCancelled) { + LOGS(_log, LOG_LVL_WARN, "already cancelled" << _task->getIdStr()); + return; + } if (_mysqlConn == nullptr) { - LOGS(_log, LOG_LVL_WARN, "QueryRunner::cancel() no MysqlConn"); + LOGS(_log, LOG_LVL_TRACE, "QueryRunner::cancel() no MysqlConn"); } else { switch (_mysqlConn->cancel()) { case -1: - LOGS(_log, LOG_LVL_WARN, "QueryRunner::cancel() NOP"); + LOGS(_log, LOG_LVL_ERROR, "QueryRunner::cancel() NOP"); break; case 0: LOGS(_log, LOG_LVL_WARN, "QueryRunner::cancel() success"); diff --git a/src/wdb/QueryRunner.h b/src/wdb/QueryRunner.h index 91e83db11a..9b27a3df92 100644 --- a/src/wdb/QueryRunner.h +++ b/src/wdb/QueryRunner.h @@ -27,8 +27,7 @@ * @file * * @brief QueryAction instances perform single-shot query execution with the - * result reflected in the db state or returned via a SendChannel. Works with - * new XrdSsi API. + * result reflected in the db state or returned via a SendChannel. * * @author Daniel L. Wang, SLAC */ @@ -40,7 +39,6 @@ // Qserv headers #include "mysql/MySqlConfig.h" #include "mysql/MySqlConnection.h" -#include "qmeta/types.h" #include "util/MultiError.h" #include "wbase/Task.h" #include "wdb/ChunkResource.h" @@ -55,9 +53,10 @@ class QueriesAndChunks; namespace lsst::qserv::wdb { -/// On the worker, run a query related to a Task, writing the results to a table or supplied SendChannel. +/// On the worker, run a query related to a Task, hold the resources needed to run the query, +/// and write the results to the supplied SendChannel. /// -class QueryRunner : public wbase::TaskQueryRunner, public std::enable_shared_from_this { +class QueryRunner : public std::enable_shared_from_this { public: using Ptr = std::shared_ptr; static QueryRunner::Ptr newQueryRunner( @@ -69,15 +68,15 @@ class QueryRunner : public wbase::TaskQueryRunner, public std::enable_shared_fro QueryRunner& operator=(QueryRunner const&) = delete; virtual ~QueryRunner() = default; - bool runQuery() override; + /// errMsg is used to pass information to the end user that may help explain why the query failed. + bool runQuery(std::string& errMsg); /// Cancel the action (in-progress). This should only be called /// by Task::cancel(), so if this needs to be cancelled elsewhere, /// call Task::cancel(). /// This should kill an in progress SQL command. - /// It also tries to unblock `_streamBuf` to keep the thread - /// from being blocked forever. - void cancel() override; + /// Repeated calls to cancel() must be harmless. + void cancel(); protected: QueryRunner(wbase::Task::Ptr const& task, ChunkResourceMgr::Ptr const& chunkResourceMgr, @@ -87,9 +86,10 @@ class QueryRunner : public wbase::TaskQueryRunner, public std::enable_shared_fro private: bool _initConnection(); + void _setDb(); /// Dispatch with output sent through a SendChannel - bool _dispatchChannel(); + bool _dispatchChannel(std::string& errStr); MYSQL_RES* _primeResult(std::string const& query); ///< Obtain a result handle for a query. wbase::Task::Ptr const _task; ///< Actual task diff --git a/src/wdb/QuerySql.cc b/src/wdb/QuerySql.cc deleted file mode 100644 index 7dd1279578..0000000000 --- a/src/wdb/QuerySql.cc +++ /dev/null @@ -1,133 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2012-2015 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -/** - * @file - * - * @brief QuerySql is a bundle of SQL statements that represent an accepted - * query's generated SQL. - * - * FIXME: Unfinished infrastructure for passing subchunk table name to worker. - * - * @author Daniel L. Wang, SLAC - */ - -// Class header -#include "wdb/QuerySql.h" - -// System headers -#include - -// LSST headers -#include "lsst/log/Log.h" - -// Qserv headers -#include "global/constants.h" -#include "global/DbTable.h" -#include "proto/worker.pb.h" -#include "wbase/Base.h" - -namespace { - -LOG_LOGGER _log = LOG_GET("lsst.qserv.wdb.QuerySql"); - -template -class ScScriptBuilder { -public: - ScScriptBuilder(lsst::qserv::wdb::QuerySql& qSql_, std::string const& db, std::string const& table, - std::string const& scColumn, int chunkId) - : qSql(qSql_) { - buildT = (boost::format(lsst::qserv::wbase::CREATE_SUBCHUNK_SCRIPT) % db % table % scColumn % - chunkId % "%1%") - .str(); - cleanT = (boost::format(lsst::qserv::wbase::CLEANUP_SUBCHUNK_SCRIPT) % db % table % chunkId % "%1%") - .str(); - } - void operator()(T const& subc) { - qSql.buildList.push_back((boost::format(buildT) % subc).str()); - qSql.cleanupList.push_back((boost::format(cleanT) % subc).str()); - } - std::string buildT; - std::string cleanT; - lsst::qserv::wdb::QuerySql& qSql; -}; -} // anonymous namespace - -namespace lsst::qserv::wdb { - -//////////////////////////////////////////////////////////////////////// -// QuerySql ostream friend -//////////////////////////////////////////////////////////////////////// -std::ostream& operator<<(std::ostream& os, QuerySql const& q) { - os << "QuerySql(bu="; - std::copy(q.buildList.begin(), q.buildList.end(), std::ostream_iterator(os, ",")); - os << "; ex="; - std::copy(q.executeList.begin(), q.executeList.end(), std::ostream_iterator(os, ",")); - os << "; cl="; - std::copy(q.cleanupList.begin(), q.cleanupList.end(), std::ostream_iterator(os, ",")); - os << ")"; - return os; -} - -//////////////////////////////////////////////////////////////////////// -// QuerySql constructor -//////////////////////////////////////////////////////////////////////// -QuerySql::QuerySql(std::string const& db, int chunkId, proto::TaskMsg_Fragment const& f, bool needCreate, - std::string const& defaultResultTable) { - std::string resultTable; - if (f.has_resulttable()) { - resultTable = f.resulttable(); - } else { - resultTable = defaultResultTable; - } - assert(!resultTable.empty()); - - // Create executable statement. - // Obsolete when results marshalling is implemented - std::stringstream ss; - for (int i = 0; i < f.query_size(); ++i) { - if (needCreate) { - ss << "CREATE TABLE " + resultTable + " "; - needCreate = false; - } else { - ss << "INSERT INTO " + resultTable + " "; - } - ss << f.query(i); - executeList.push_back(ss.str()); - ss.str(""); - } - - if (f.has_subchunks()) { - proto::TaskMsg_Subchunk const& sc = f.subchunks(); - for (int i = 0; i < sc.dbtbl_size(); ++i) { - DbTable dbTable(sc.dbtbl(i).db(), sc.dbtbl(i).tbl()); - LOGS(_log, LOG_LVL_DEBUG, "Building subchunks for table=" << dbTable << " chunkId=" << chunkId); - ScScriptBuilder scb(*this, dbTable.db, dbTable.table, SUB_CHUNK_COLUMN, chunkId); - for (int i = 0; i < sc.id_size(); ++i) { - scb(sc.id(i)); - } - } - } -} - -} // namespace lsst::qserv::wdb diff --git a/src/wdb/QuerySql.h b/src/wdb/QuerySql.h deleted file mode 100644 index cfc2e48bf9..0000000000 --- a/src/wdb/QuerySql.h +++ /dev/null @@ -1,72 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2013-2015 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -#ifndef LSST_QSERV_WDB_QUERYSQL_H -#define LSST_QSERV_WDB_QUERYSQL_H -/** - * @file - * - * @brief QuerySql is a bundle of SQL statements that represent an accepted - * query's generated SQL. - * - * @author Daniel L. Wang, SLAC - */ - -// System headers -#include -#include -#include -#include - -// Forward declarations -namespace lsst::qserv { -namespace proto { -class TaskMsg_Fragment; -} -namespace wdb { -class Task; -} -} // namespace lsst::qserv - -namespace lsst::qserv::wdb { - -class QuerySql { -public: - typedef std::shared_ptr Ptr; - typedef std::deque StringDeque; - typedef lsst::qserv::proto::TaskMsg_Fragment Fragment; - - QuerySql() {} - QuerySql(std::string const& db, int chunkId, proto::TaskMsg_Fragment const& f, bool needCreate, - std::string const& defaultResultTable); - - StringDeque buildList; - StringDeque executeList; // Consider using SqlFragmenter to break this up into fragments. - StringDeque cleanupList; - struct Batch; - friend std::ostream& operator<<(std::ostream& os, QuerySql const& q); -}; - -} // namespace lsst::qserv::wdb - -#endif // LSST_QSERV_WDB_QUERYSQL_H diff --git a/src/wdb/QuerySql_Batch.h b/src/wdb/QuerySql_Batch.h deleted file mode 100644 index bec2783500..0000000000 --- a/src/wdb/QuerySql_Batch.h +++ /dev/null @@ -1,82 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2013-2014 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -#ifndef LSST_QSERV_WDB_QUERYSQL_BATCH_H -#define LSST_QSERV_WDB_QUERYSQL_BATCH_H -/** - * @file - * - * @brief QuerySql::Batch is the actual bundling portion of a QuerySql object. - * - * @author Daniel L. Wang, SLAC - */ - -// System headers -#include -#include - -// Local headers -#include "wdb/QuerySql.h" - -namespace lsst::qserv::wdb { - -struct QuerySql::Batch { - // Default to 10 SQL statements at a time. - // Idea: Could add statements according to some cost metric(a - // simple one) or to a certain overall query string length - Batch(std::string const& name_, QuerySql::StringDeque const& sequence_, int batchSize_ = 10) - : name(name_), batchSize(batchSize_), pos(0) { - for (QuerySql::StringDeque::const_iterator i = sequence_.begin(); i != sequence_.end(); ++i) { - std::string::const_iterator last = i->begin() + (i->length() - 1); - if (';' == *last) { // Clip trailing semicolon which - // is added during batching. - sequence.push_back(std::string(i->begin(), last)); - } else { - sequence.push_back(*i); - } - } - } - bool isDone() const { return sequence.empty() || (static_cast(pos) >= sequence.size()); } - std::string current() const { - std::ostringstream os; - QuerySql::StringDeque::const_iterator begin; - assert((unsigned)pos < sequence.size()); // caller should have checked isDone() - begin = sequence.begin() + pos; - if (sequence.size() < static_cast(pos + batchSize)) { - std::copy(begin, sequence.end(), std::ostream_iterator(os, ";\n")); - } else { - std::copy(begin, begin + batchSize, std::ostream_iterator(os, ";\n")); - } - return os.str(); - } - void next() { pos += batchSize; } - - std::string name; - QuerySql::StringDeque sequence; - QuerySql::StringDeque::size_type batchSize; - QuerySql::StringDeque::size_type pos; -}; - -} // namespace lsst::qserv::wdb - -#endif // LSST_QSERV_WDB_QUERYSQL_BATCH_H diff --git a/src/wdb/SQLBackend.cc b/src/wdb/SQLBackend.cc index 53c0670132..3f9ea0fdb4 100644 --- a/src/wdb/SQLBackend.cc +++ b/src/wdb/SQLBackend.cc @@ -241,8 +241,8 @@ void SQLBackend::_memLockRelease() { /// Exit the program immediately to reduce minimize possible problems. void SQLBackend::_exitDueToConflict(const std::string& msg) { - // This will likely not be clean exit, but clean exit is impossible - // with xrootd anyway. + // This will likely not be a clean exit. + // TODO:Maybe try for a clean exit by calling WorkerMain::terminate(). _lockConflict = true; LOGS(_log, LOG_LVL_ERROR, msg); exit(EXIT_FAILURE); diff --git a/src/wdb/testQueryRunner.cc b/src/wdb/testQueryRunner.cc deleted file mode 100644 index 319d4252b2..0000000000 --- a/src/wdb/testQueryRunner.cc +++ /dev/null @@ -1,136 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2014-2016 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -/** - * @brief Simple testing for class QueryRunner - * Requires some setup, and assumes some access to a mysqld - * - * @author Daniel L. Wang, SLAC - */ - -// Qserv headers -#include "mysql/MySqlConfig.h" -#include "proto/worker.pb.h" -#include "wbase/FileChannelShared.h" -#include "wbase/Task.h" -#include "wconfig/WorkerConfig.h" -#include "wcontrol/SqlConnMgr.h" -#include "wdb/ChunkResource.h" -#include "wdb/QueryRunner.h" -#include "wpublish/QueriesAndChunks.h" - -// Boost unit test header -#define BOOST_TEST_MODULE QueryRunner -#include - -using namespace std; - -namespace test = boost::test_tools; -namespace gio = google::protobuf::io; -namespace util = lsst::qserv::util; - -using lsst::qserv::mysql::MySqlConfig; -using lsst::qserv::mysql::MySqlConnection; - -using lsst::qserv::proto::TaskMsg; -using lsst::qserv::proto::TaskMsg_Fragment; -using lsst::qserv::proto::TaskMsg_Subchunk; - -using lsst::qserv::wbase::FileChannelShared; -using lsst::qserv::wbase::SendChannel; -using lsst::qserv::wbase::Task; -using lsst::qserv::wconfig::WorkerConfig; -using lsst::qserv::wcontrol::SqlConnMgr; -using lsst::qserv::wdb::ChunkResource; -using lsst::qserv::wdb::ChunkResourceMgr; -using lsst::qserv::wdb::FakeBackend; -using lsst::qserv::wdb::QueryRunner; -using lsst::qserv::wpublish::QueriesAndChunks; - -struct Fixture { - shared_ptr newTaskMsg() { - shared_ptr t = make_shared(); - t->set_chunkid(3240); // hardcoded - t->set_db("LSST"); // hardcoded - auto scanTbl = t->add_scantable(); - scanTbl->set_db("LSST"); - scanTbl->set_table("Object"); - scanTbl->set_lockinmemory(false); - scanTbl->set_scanrating(1); - lsst::qserv::proto::TaskMsg::Fragment* f = t->add_fragment(); - f->add_query("SELECT AVG(yFlux_PS) from LSST.Object_3240"); - return t; - } - - MySqlConfig newMySqlConfig() { - string user = "qsmaster"; - string password = ""; - string socket = "SET ME HERE"; - MySqlConfig mySqlConfig(user, password, socket); - if (not MySqlConnection::checkConnection(mySqlConfig)) { - throw runtime_error("Unable to connect to MySQL database with params: " + mySqlConfig.toString()); - } - return mySqlConfig; - } - shared_ptr queriesAndChunks() { - bool resetForTesting = true; - int maxTasksBooted = 5; - int maxDarkTasks = 25; - return QueriesAndChunks::setupGlobal(chrono::seconds(1), chrono::seconds(120), maxTasksBooted, - maxDarkTasks, resetForTesting); - } -}; - -BOOST_FIXTURE_TEST_SUITE(Basic, Fixture) - -BOOST_AUTO_TEST_CASE(Simple) { - WorkerConfig::create(); - shared_ptr msg(newTaskMsg()); - shared_ptr sendC(SendChannel::newNopChannel()); - auto sc = FileChannelShared::create(sendC, msg->czarid()); - FakeBackend::Ptr backend = make_shared(); - shared_ptr crm = ChunkResourceMgr::newMgr(backend); - SqlConnMgr::Ptr sqlConnMgr = make_shared(20, 15); - auto const queries = queriesAndChunks(); - auto taskVect = Task::createTasks(msg, sc, crm, newMySqlConfig(), sqlConnMgr, queries); - Task::Ptr task = taskVect[0]; - QueryRunner::Ptr a(QueryRunner::newQueryRunner(task, crm, newMySqlConfig(), sqlConnMgr, queries)); - BOOST_CHECK(a->runQuery()); -} - -BOOST_AUTO_TEST_CASE(Output) { - WorkerConfig::create(); - string out; - shared_ptr msg(newTaskMsg()); - shared_ptr sendC(SendChannel::newStringChannel(out)); - auto sc = FileChannelShared::create(sendC, msg->czarid()); - FakeBackend::Ptr backend = make_shared(); - shared_ptr crm = ChunkResourceMgr::newMgr(backend); - SqlConnMgr::Ptr sqlConnMgr = make_shared(20, 15); - auto const queries = queriesAndChunks(); - auto taskVect = Task::createTasks(msg, sc, crm, newMySqlConfig(), sqlConnMgr, queries); - Task::Ptr task = taskVect[0]; - QueryRunner::Ptr a(QueryRunner::newQueryRunner(task, crm, newMySqlConfig(), sqlConnMgr, queries)); - BOOST_CHECK(a->runQuery()); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/wdb/testQuerySql.cc b/src/wdb/testQuerySql.cc deleted file mode 100644 index 5d7cd46071..0000000000 --- a/src/wdb/testQuerySql.cc +++ /dev/null @@ -1,99 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2013-2015 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -/** - * @brief Simple testing for class QuerySql - * - * @author Daniel L. Wang, SLAC - */ - -// Third-party headers - -// Qserv headers -#include "proto/worker.pb.h" -#include "wdb/QuerySql.h" -#include "wdb/QuerySql_Batch.h" - -// Boost unit test header -#define BOOST_TEST_MODULE QuerySql_1 -#include - -namespace test = boost::test_tools; - -using lsst::qserv::proto::TaskMsg_Fragment; -using lsst::qserv::proto::TaskMsg_Subchunk; -using lsst::qserv::wdb::QuerySql; - -struct Fixture { - Fixture() { - defaultDb = "Winter"; - defaultResult = "myResult"; - } - ~Fixture() {} - - TaskMsg_Fragment makeFragment() { - TaskMsg_Fragment f; - // "Real" subchunk query text should include - // pre-substituted subchunk query text. - f.add_query("SELECT o1.*, o2.* FROM Object_1001 o1, Object_1001 o2;"); - f.set_resulttable("fragResult"); - TaskMsg_Subchunk sc; - sc.set_database("obsolete"); - lsst::qserv::proto::TaskMsg_Subchunk_DbTbl* dbTbl = sc.add_dbtbl(); - dbTbl->set_db(defaultDb); - dbTbl->set_tbl("Object"); - sc.add_id(1111); - sc.add_id(1222); - f.mutable_subchunks()->CopyFrom(sc); - return f; - } - - void printQsql(QuerySql const& q) { std::cout << "qsql=" << q << std::endl; } - std::string defaultDb; - std::string defaultResult; -}; - -BOOST_FIXTURE_TEST_SUITE(QuerySqlSuite, Fixture) - -BOOST_AUTO_TEST_CASE(Basic) { - std::shared_ptr qSql; - TaskMsg_Fragment frag = makeFragment(); - qSql = std::make_shared(defaultDb, 1001, frag, true, defaultResult); - BOOST_CHECK(qSql.get()); - printQsql(*qSql); -} - -BOOST_AUTO_TEST_CASE(QueryBatch) { - std::shared_ptr qSql; - TaskMsg_Fragment frag = makeFragment(); - qSql = std::make_shared(defaultDb, 1001, frag, true, defaultResult); - BOOST_CHECK(qSql.get()); - - QuerySql::Batch build("QueryBuildSub", qSql->buildList); - QuerySql::Batch& batch = build; - while (!batch.isDone()) { - std::string piece = batch.current(); - batch.next(); - } -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/wmain/CMakeLists.txt b/src/wmain/CMakeLists.txt new file mode 100644 index 0000000000..b54266fb16 --- /dev/null +++ b/src/wmain/CMakeLists.txt @@ -0,0 +1,42 @@ +add_library(workerm SHARED) + +target_sources(workerm PRIVATE + WorkerMain.cc +) + +target_link_libraries(workerm PUBLIC + boost_filesystem + boost_system + global + http + log + mysql + protojson + sql + wbase + wcomms + wconfig + wcontrol + wdb + wpublish + wsched + boost_program_options +) + +install(TARGETS workerm) + +function(WORKERM_UTILS) + foreach(UTIL IN ITEMS ${ARGV}) + add_executable(${UTIL}) + target_sources(${UTIL} PRIVATE ${UTIL}.cc) + target_link_libraries(${UTIL} PRIVATE + global + workerm + ) + install(TARGETS ${UTIL}) + endforeach() +endfunction() + +workerm_utils( + qserv-worker-http +) diff --git a/src/xrdsvc/SsiService.cc b/src/wmain/WorkerMain.cc similarity index 76% rename from src/xrdsvc/SsiService.cc rename to src/wmain/WorkerMain.cc index b13a32402c..be31601b6d 100644 --- a/src/xrdsvc/SsiService.cc +++ b/src/wmain/WorkerMain.cc @@ -22,7 +22,7 @@ */ // Class header -#include "xrdsvc/SsiService.h" +#include "wmain/WorkerMain.h" // System headers #include @@ -39,7 +39,6 @@ // Third-party headers #include -#include "XrdSsi/XrdSsiLogger.hh" // LSST headers #include "lsst/log/Log.h" @@ -55,45 +54,41 @@ #include "sql/SqlConnectionFactory.h" #include "util/common.h" #include "util/FileMonitor.h" -#include "util/HoldTrack.h" #include "wbase/Base.h" #include "wbase/FileChannelShared.h" #include "wconfig/WorkerConfig.h" #include "wconfig/WorkerConfigError.h" #include "wcontrol/Foreman.h" #include "wcontrol/SqlConnMgr.h" +#include "wcomms/HttpSvc.h" #include "wpublish/ChunkInventory.h" #include "wsched/BlendScheduler.h" #include "wsched/FifoScheduler.h" #include "wsched/GroupScheduler.h" #include "wsched/ScanScheduler.h" -#include "xrdsvc/HttpSvc.h" -#include "xrdsvc/SsiRequest.h" -#include "xrdsvc/XrdName.h" +#include "wcomms/HttpSvc.h" using namespace lsst::qserv; using namespace nlohmann; using namespace std; using namespace std::literals; -class XrdPosixCallBack; // Forward. - namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdsvc.SsiService"); +LOG_LOGGER _log = LOG_GET("lsst.qserv.wmain.WorkerMain"); // add LWP to MDC in log messages void initMDC() { LOG_MDC("LWP", to_string(lsst::log::lwpID())); } int dummyInitMDC = LOG_MDC_INIT(initMDC); -std::shared_ptr makeChunkInventory(mysql::MySqlConfig const& mySqlConfig) { - xrdsvc::XrdName x; +std::shared_ptr makeChunkInventory(string const& workerName, + mysql::MySqlConfig const& mySqlConfig) { if (!mySqlConfig.dbName.empty()) { LOGS(_log, LOG_LVL_FATAL, "dbName must be empty to prevent accidental context"); throw runtime_error("dbName must be empty to prevent accidental context"); } auto conn = sql::SqlConnectionFactory::make(mySqlConfig); assert(conn); - auto inventory = make_shared(x.getName(), conn); + auto inventory = make_shared(workerName, conn); ostringstream os; os << "Paths exported: "; inventory->dbgPrint(os); @@ -101,64 +96,33 @@ std::shared_ptr makeChunkInventory(mysql::MySqlConfig return inventory; } -/** - * This function will keep periodically updating worker's info in the Replication - * System's Registry. - * @param id The unique identifier of a worker to be registered. - * @param dataPort The port number of the HTTP server of the worker for handling - * result file retrieval/deletion requests. - * @note The thread will terminate the process if the registraton request to the Registry - * was explicitly denied by the service. This means the application may be misconfigured. - * Transient communication errors when attempting to connect or send requests to - * the Registry will be posted into the log stream and ignored. - */ -void registryUpdateLoop(string const& id, uint16_t const dataPort) { - auto const workerConfig = wconfig::WorkerConfig::instance(); - auto const method = http::Method::POST; - string const url = "http://" + workerConfig->replicationRegistryHost() + ":" + - to_string(workerConfig->replicationRegistryPort()) + "/qserv-worker"; - vector const headers = {"Content-Type: application/json"}; - string const hostFqdn = util::get_current_host_fqdn_wait(600); - json const request = json::object({{"version", http::MetaModule::version}, - {"instance_id", workerConfig->replicationInstanceId()}, - {"auth_key", workerConfig->replicationAuthKey()}, - {"worker", - {{"name", id}, - {"management-port", workerConfig->replicationHttpPort()}, - {"data-port", dataPort}, - {"host-name", hostFqdn}}}}); - string const requestContext = - "SsiService: '" + http::method2string(method) + "' request to '" + url + "'"; - http::Client client(method, url, request.dump(), headers); - while (true) { - try { - json const response = client.readAsJson(); - if (0 == response.at("success").get()) { - string const error = response.at("error").get(); - LOGS(_log, LOG_LVL_WARN, requestContext + " was denied, error: '" + error + "'."); - } - } catch (exception const& ex) { - LOGS(_log, LOG_LVL_WARN, requestContext + " failed, ex: " + ex.what()); - } - this_thread::sleep_for(chrono::seconds(max(1U, workerConfig->replicationRegistryHearbeatIvalSec()))); - } -} - } // namespace -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wmain { -SsiService::SsiService(XrdSsiLogger* log) { - LOGS(_log, LOG_LVL_DEBUG, "SsiService starting..."); +std::weak_ptr WorkerMain::_globalWorkerMain; +std::atomic WorkerMain::_setup{false}; - util::HoldTrack::setup(10min); +WorkerMain::Ptr WorkerMain::setup() { + if (_setup.exchange(true)) { + throw util::Bug(ERR_LOC, "WorkerMain already setup when setup called again"); + } + auto ptr = Ptr(new WorkerMain()); + _globalWorkerMain = ptr; + return ptr; +} - auto const mySqlConfig = wconfig::WorkerConfig::instance()->getMySqlConfig(); - if (not mysql::MySqlConnection::checkConnection(mySqlConfig)) { - LOGS(_log, LOG_LVL_FATAL, "Unable to connect to MySQL using configuration:" << mySqlConfig); - throw wconfig::WorkerConfigError("Unable to connect to MySQL"); +std::shared_ptr WorkerMain::get() { + auto ptr = _globalWorkerMain.lock(); + if (ptr == nullptr) { + throw std::runtime_error("_globalWorkerMain is null"); } + return ptr; +} + +WorkerMain::WorkerMain() { auto const workerConfig = wconfig::WorkerConfig::instance(); + auto const mySqlConfig = workerConfig->getMySqlConfig(); // Set thread pool size. unsigned int poolSize = ranges::max({wsched::BlendScheduler::getMinPoolSize(), @@ -173,11 +137,11 @@ SsiService::SsiService(XrdSsiLogger* log) { workerConfig->getMaxGroupSize(), wsched::SchedulerBase::getMaxPriority()); - int const fastest = lsst::qserv::proto::ScanInfo::Rating::FASTEST; - int const fast = lsst::qserv::proto::ScanInfo::Rating::FAST; - int const medium = lsst::qserv::proto::ScanInfo::Rating::MEDIUM; - int const slow = lsst::qserv::proto::ScanInfo::Rating::SLOW; - int const slowest = lsst::qserv::proto::ScanInfo::Rating::SLOWEST; + int const fastest = lsst::qserv::protojson::ScanInfo::Rating::FASTEST; + int const fast = lsst::qserv::protojson::ScanInfo::Rating::FAST; + int const medium = lsst::qserv::protojson::ScanInfo::Rating::MEDIUM; + int const slow = lsst::qserv::protojson::ScanInfo::Rating::SLOW; + int const slowest = lsst::qserv::protojson::ScanInfo::Rating::SLOWEST; double fastScanMaxMinutes = (double)workerConfig->getScanMaxMinutesFast(); double medScanMaxMinutes = (double)workerConfig->getScanMaxMinutesMed(); double slowScanMaxMinutes = (double)workerConfig->getScanMaxMinutesSlow(); @@ -207,7 +171,7 @@ SsiService::SsiService(XrdSsiLogger* log) { false); wsched::BlendScheduler::Ptr blendSched = make_shared( "BlendSched", queries, maxThread, group, snail, scanSchedulers); - blendSched->setPrioritizeByInFlight(false); // TODO: set in configuration file. + blendSched->setPrioritizeByInFlight(workerConfig->getPrioritizeByInFlight()); queries->setBlendScheduler(blendSched); unsigned int requiredTasksCompleted = workerConfig->getRequiredTasksCompleted(); @@ -219,8 +183,14 @@ SsiService::SsiService(XrdSsiLogger* log) { LOGS(_log, LOG_LVL_WARN, "config sqlConnMgr" << *sqlConnMgr); LOGS(_log, LOG_LVL_WARN, "maxPoolThreads=" << maxPoolThreads); - _foreman = make_shared(blendSched, poolSize, maxPoolThreads, mySqlConfig, queries, - ::makeChunkInventory(mySqlConfig), sqlConnMgr); + int qPoolSize = workerConfig->getQPoolSize(); + int maxPriority = workerConfig->getQPoolMaxPriority(); + string vectRunSizesStr = workerConfig->getQPoolRunSizes(); + string vectMinRunningSizesStr = workerConfig->getQPoolMinRunningSizes(); + + _foreman = wcontrol::Foreman::create(blendSched, poolSize, maxPoolThreads, mySqlConfig, queries, + ::makeChunkInventory(_name, mySqlConfig), sqlConnMgr, qPoolSize, + maxPriority, vectRunSizesStr, vectMinRunningSizesStr); // Watch to see if the log configuration is changed. // If LSST_LOG_CONFIG is not defined, there's no good way to know what log @@ -244,30 +214,77 @@ SsiService::SsiService(XrdSsiLogger* log) { // Start the control server for processing worker management requests sent // by the Replication System. Update the port number in the configuration // in case if the server is run on the dynamically allocated port. - _controlHttpSvc = HttpSvc::create(_foreman, workerConfig->replicationHttpPort(), - workerConfig->replicationNumHttpThreads()); + _controlHttpSvc = wcomms::HttpSvc::create(_foreman, workerConfig->replicationHttpPort(), + workerConfig->getCzarComNumHttpThreads()); auto const port = _controlHttpSvc->start(); workerConfig->setReplicationHttpPort(port); // Begin periodically updating worker's status in the Replication System's registry // in the detached thread. This will continue before the application gets terminated. - thread registryUpdateThread(::registryUpdateLoop, _foreman->chunkInventory()->id(), _foreman->httpPort()); - registryUpdateThread.detach(); + thread registryUpdateThread(&WorkerMain::_registryUpdateLoop, this); + _registryUpdateThread = move(registryUpdateThread); +} + +void WorkerMain::waitForTerminate() { + unique_lock uniq(_terminateMtx); + _terminateCv.wait(uniq, [this]() -> bool { return _terminate; }); } -SsiService::~SsiService() { - LOGS(_log, LOG_LVL_DEBUG, "SsiService dying."); +void WorkerMain::terminate() { + { + lock_guard lck(_terminateMtx); + if (_terminate.exchange(true)) return; + } + _terminateCv.notify_all(); _controlHttpSvc->stop(); } -void SsiService::ProcessRequest(XrdSsiRequest& reqRef, XrdSsiResource& resRef) { - LOGS(_log, LOG_LVL_DEBUG, "Got request call where rName is: " << resRef.rName); - auto request = SsiRequest::newSsiRequest(resRef.rName, _foreman); +WorkerMain::~WorkerMain() { + LOGS(_log, LOG_LVL_INFO, "WorkerMain shutdown."); + terminate(); + _registryUpdateThread.join(); +} - // Continue execution in the session object as SSI gave us a new thread. - // Object deletes itself when finished is called. - // - request->execute(reqRef); +/** + * This function will keep periodically updating worker's info in the Replication + * System's Registry. + * @param id The unique identifier of a worker to be registered. + * @note The thread will terminate the process if the registraton request to the Registry + * was explicitly denied by the service. This means the application may be misconfigured. + * Transient communication errors when attempting to connect or send requests to + * the Registry will be posted into the log stream and ignored. + */ +void WorkerMain::_registryUpdateLoop() { + string const id = _foreman->chunkInventory()->id(); + auto const workerConfig = wconfig::WorkerConfig::instance(); + auto const method = http::Method::POST; + string const url = "http://" + workerConfig->replicationRegistryHost() + ":" + + to_string(workerConfig->replicationRegistryPort()) + "/qserv-worker"; + vector const headers = {"Content-Type: application/json"}; + string const hostFqdn = util::get_current_host_fqdn_wait(600); + json const request = json::object({{"version", http::MetaModule::version}, + {"instance_id", workerConfig->replicationInstanceId()}, + {"auth_key", workerConfig->replicationAuthKey()}, + {"worker", + {{"name", id}, + {"management-port", workerConfig->replicationHttpPort()}, + {"data-port", _foreman->httpPort()}, + {"host-name", hostFqdn}}}}); + string const requestContext = + "WorkerMain: '" + http::method2string(method) + "' request to '" + url + "'"; + http::Client client(method, url, request.dump(), headers); + while (!_terminate) { + try { + json const response = client.readAsJson(); + if (0 == response.at("success").get()) { + string const error = response.at("error").get(); + LOGS(_log, LOG_LVL_WARN, requestContext + " was denied, error: '" + error + "'."); + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, requestContext + " failed, ex: " + ex.what()); + } + this_thread::sleep_for(chrono::seconds(max(1U, workerConfig->replicationRegistryHearbeatIvalSec()))); + } } -} // namespace lsst::qserv::xrdsvc +} // namespace lsst::qserv::wmain diff --git a/src/xrdsvc/SsiService.h b/src/wmain/WorkerMain.h similarity index 50% rename from src/xrdsvc/SsiService.h rename to src/wmain/WorkerMain.h index f3ef7ab104..aca01ce893 100644 --- a/src/xrdsvc/SsiService.h +++ b/src/wmain/WorkerMain.h @@ -21,18 +21,16 @@ * see . */ -#ifndef LSST_QSERV_XRDSVC_SSISERVICE_H -#define LSST_QSERV_XRDSVC_SSISERVICE_H +#ifndef LSST_QSERV_WMAIN_WORKERMAIN_H +#define LSST_QSERV_WMAIN_WORKERMAIN_H // System headers +#include +#include #include +#include // Third-party headers -#include "XrdSsi/XrdSsiResource.hh" -#include "XrdSsi/XrdSsiService.hh" - -// Forward declarations -class XrdSsiLogger; namespace lsst::qserv::util { class FileMonitor; @@ -42,27 +40,45 @@ namespace lsst::qserv::wcontrol { class Foreman; } // namespace lsst::qserv::wcontrol -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wcomms { class HttpSvc; -} // namespace lsst::qserv::xrdsvc +} // namespace lsst::qserv::wcomms -namespace lsst::qserv::xrdsvc { +namespace lsst::qserv::wmain { -/// SsiService is an XrdSsiService implementation that implements a Qserv query -/// worker services -class SsiService : public XrdSsiService { +class WorkerMain { public: - /** Build a SsiService object - * @param log xrdssi logger - * @note take ownership of logger for now - */ - SsiService(XrdSsiLogger* log); - virtual ~SsiService(); + using Ptr = std::shared_ptr; + + /// Returns a pointer to the global instance. + /// @throw std::runtime_error if global pointer is null. + static std::shared_ptr get(); + static Ptr setup(); + + ~WorkerMain(); - /// Called by SSI framework to handle new requests - void ProcessRequest(XrdSsiRequest& reqRef, XrdSsiResource& resRef) override; + std::string getName() const { return _name; } + + /// End WorkerMain, calling this multiple times is harmless. + void terminate(); + void waitForTerminate(); private: + WorkerMain(); + + void _registryUpdateLoop(); + std::thread _registryUpdateThread; + + /// Weak pointer to allow global access without complicating lifetime issues. + static std::weak_ptr _globalWorkerMain; + + /// There should only be one WorkerMain, this prevents more than + /// one from being created. + static std::atomic _setup; + + /// Worker name, used in some database lookups. + std::string _name{"worker"}; + // The Foreman contains essential structures for adding and running tasks. std::shared_ptr _foreman; @@ -70,9 +86,13 @@ class SsiService : public XrdSsiService { std::shared_ptr _logFileMonitor; /// The HTTP server processing worker management requests. - std::shared_ptr _controlHttpSvc; -}; + std::shared_ptr _controlHttpSvc; -} // namespace lsst::qserv::xrdsvc + /// Set to true when the program should terminate. + std::atomic _terminate{false}; + std::mutex _terminateMtx; + std::condition_variable _terminateCv; +}; -#endif // LSST_QSERV_XRDSVC_SSISERVICE_H +} // namespace lsst::qserv::wmain +#endif // LSST_QSERV_WMAIN_WORKERMAIN_H diff --git a/src/wmain/qserv-worker-http.cc b/src/wmain/qserv-worker-http.cc new file mode 100644 index 0000000000..1e4b2ff770 --- /dev/null +++ b/src/wmain/qserv-worker-http.cc @@ -0,0 +1,77 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +/** + * The CPP-HTTPLIB-based frontend for Czar. + */ + +// System headers +#include +#include +#include +#include +#include + +// Third party headers +#include "boost/program_options.hpp" + +// Qserv headers +#include "wmain/WorkerMain.cc" + +using namespace std; +namespace po = boost::program_options; +namespace qserv = lsst::qserv; + +namespace { +char const* const context = "[WORKER]"; +} // namespace + +int main(int argc, char* argv[]) { + po::options_description desc("", 120); + desc.add_options()("help,h", "Print this help message and exit."); + desc.add_options()("config,c", po::value()->default_value("/config-etc/qserv-worker.cfg"), + "The configuration file."); + + po::variables_map vm; + po::store(po::parse_command_line(argc, const_cast(argv), desc), vm); + po::notify(vm); + + string const configFilePath = vm["config"].as(); + + try { + cout << ::context << " Starting worker\n" + << " Configuration file: " << configFilePath << "\n" + << endl; + + auto const workerConfig = wconfig::WorkerConfig::create(configFilePath); + + // Lifetime of WorkerMain is controlled by wwMn. + auto wwMn = wmain::WorkerMain::setup(); + + wwMn->waitForTerminate(); + cout << ::context << " stopping worker" << endl; + } catch (exception const& ex) { + cerr << ::context << " The application failed, exception: " << ex.what() << endl; + return 1; + } + + return 0; +} diff --git a/src/wpublish/CMakeLists.txt b/src/wpublish/CMakeLists.txt index 47ac6a8512..069b87e1c5 100644 --- a/src/wpublish/CMakeLists.txt +++ b/src/wpublish/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(wpublish SHARED) -add_dependencies(wpublish proto) target_sources(wpublish PRIVATE ChunkInventory.cc @@ -7,32 +6,30 @@ target_sources(wpublish PRIVATE QueryStatistics.cc ) -target_include_directories(wpublish PRIVATE - ${XROOTD_INCLUDE_DIRS} -) - -install( - TARGETS wpublish -) - target_link_libraries(wpublish PUBLIC log - proto protobuf - XrdSsiLib - XrdCl ) +install(TARGETS wpublish) + add_executable(testChunkInventory testChunkInventory.cc) target_link_libraries(testChunkInventory PUBLIC - xrdsvc + global + http + mysql + protojson + sql + util + wbase + wconfig + wcontrol + wdb + wpublish + wsched Boost::unit_test_framework Threads::Threads ) add_test(NAME testChunkInventory COMMAND testChunkInventory) - -install( - TARGETS wpublish -) diff --git a/src/wpublish/ChunkInventory.cc b/src/wpublish/ChunkInventory.cc index eb112303ab..085b114289 100644 --- a/src/wpublish/ChunkInventory.cc +++ b/src/wpublish/ChunkInventory.cc @@ -60,8 +60,6 @@ void fetchDbs(string const& instanceName, SqlConnection& sc, C& dbs) { shared_ptr resultP; // TODO we probably want a more elegant backoff mechanism than this. - // However, normally xrootd will fail & exit here if it can't connect so - // this is maybe just a little bit better than that. while (true) { LOGS(_log, LOG_LVL_DEBUG, "Launching query: " << query); resultP = sc.getQueryIter(query); @@ -133,22 +131,6 @@ void fetchId(string const& instanceName, SqlConnection& sc, string& id) { LOGS(_log, LOG_LVL_WARN, "ChunkInventory couldn't find any a unique identifier of the worker"); } -class Validator : public lsst::qserv::ResourceUnit::Checker { -public: - Validator(lsst::qserv::wpublish::ChunkInventory& c) : chunkInventory(c) {} - virtual bool operator()(lsst::qserv::ResourceUnit const& ru) { - switch (ru.unitType()) { - case lsst::qserv::ResourceUnit::DBCHUNK: - return chunkInventory.has(ru.db(), ru.chunk()); - case lsst::qserv::ResourceUnit::QUERY: - return true; - default: - return false; - } - } - lsst::qserv::wpublish::ChunkInventory& chunkInventory; -}; - } // anonymous namespace namespace lsst::qserv::wpublish { @@ -284,10 +266,6 @@ bool ChunkInventory::has(string const& db, int chunk) const { return true; } -shared_ptr ChunkInventory::newValidator() { - return shared_ptr(new Validator(*this)); -} - void ChunkInventory::dbgPrint(ostream& os) const { lock_guard lock(_mtx); diff --git a/src/wpublish/QueriesAndChunks.cc b/src/wpublish/QueriesAndChunks.cc index 6381d08260..0f539ae6dc 100644 --- a/src/wpublish/QueriesAndChunks.cc +++ b/src/wpublish/QueriesAndChunks.cc @@ -93,11 +93,21 @@ QueriesAndChunks::QueriesAndChunks(chrono::seconds deadAfter, chrono::seconds ex } auto rExamine = [this]() { + int examineAfterSeconds = _examineAfter.count(); + int seconds = 0; while (_loopExamine) { - this_thread::sleep_for(_examineAfter); - if (_loopExamine) examineAll(); + // Check frequently so unit tests finish in less time, + // as they aren't waiting for a 5 minute sleep to finish. + this_thread::sleep_for(chrono::seconds(2)); + seconds++; + if (_loopExamine && seconds > examineAfterSeconds) { + examineAll(); + seconds = 0; + examineAfterSeconds = _examineAfter.count(); + } } }; + thread te(rExamine); _examineThread = move(te); } @@ -119,21 +129,40 @@ void QueriesAndChunks::setBlendScheduler(shared_ptr cons void QueriesAndChunks::setRequiredTasksCompleted(unsigned int value) { _requiredTasksCompleted = value; } -/// Add statistics for the Task, creating a QueryStatistics object if needed. -void QueriesAndChunks::addTask(wbase::Task::Ptr const& task) { - auto qid = task->getQueryId(); - unique_lock guardStats(_queryStatsMapMtx); - auto itr = _queryStatsMap.find(qid); +QueryStatistics::Ptr QueriesAndChunks::_addQueryId(QueryId qId, CzarId czarId) { + auto itr = _queryStatsMap.find(qId); QueryStatistics::Ptr stats; if (_queryStatsMap.end() == itr) { - stats = QueryStatistics::create(qid); - _queryStatsMap[qid] = stats; + stats = QueryStatistics::create(qId, czarId); + _queryStatsMap[qId] = stats; } else { stats = itr->second; } - guardStats.unlock(); + return stats; +} + +QueryStatistics::Ptr QueriesAndChunks::addQueryId(QueryId qId, CzarId czarId) { + unique_lock guardStats(_queryStatsMapMtx); + return _addQueryId(qId, czarId); +} + +/// Add statistics for the Task, creating a QueryStatistics object if needed. +void QueriesAndChunks::addTask(wbase::Task::Ptr const& task) { + auto qid = task->getQueryId(); + auto czId = task->getCzarId(); + auto stats = addQueryId(qid, czId); stats->addTask(task); - task->setQueryStatistics(stats); +} +void QueriesAndChunks::addTasks(vector const& tasks, + std::vector& cmds) { + unique_lock guardStats(_queryStatsMapMtx); + for (auto const& task : tasks) { + auto qid = task->getQueryId(); + auto czId = task->getCzarId(); + auto stats = _addQueryId(qid, czId); + stats->addTask(task); + cmds.push_back(task); + } } /// Update statistics for the Task that was just queued. @@ -169,11 +198,7 @@ void QueriesAndChunks::finishedTask(wbase::Task::Ptr const& task) { QueryId qId = task->getQueryId(); QueryStatistics::Ptr stats = getStats(qId); if (stats != nullptr) { - bool mostlyDead = stats->addTaskCompleted(now, taskDuration); - if (mostlyDead) { - lock_guard gd(_newlyDeadMtx); - (*_newlyDeadQueries)[qId] = stats; - } + stats->addTaskCompleted(now, taskDuration); } if (task->isBooted()) { // _bootedTaskTracker is only interested in system load. @@ -181,68 +206,73 @@ void QueriesAndChunks::finishedTask(wbase::Task::Ptr const& task) { // on system load and needs to be removed from the tracker. _bootedTaskTracker.removeTask(task); } - _finishedTaskForChunk(task, taskDuration); } /// Update statistics for the Task that finished and the chunk it was querying. void QueriesAndChunks::_finishedTaskForChunk(wbase::Task::Ptr const& task, double minutes) { - unique_lock ul(_chunkMtx); - pair ele(task->getChunkId(), nullptr); - auto res = _chunkStats.insert(ele); - if (res.second) { - res.first->second = make_shared(task->getChunkId()); - } - auto ptr = res.first->second; - ul.unlock(); - proto::ScanInfo& scanInfo = task->getScanInfo(); + ChunkStatistics::Ptr chunkStats = getChunkStatistics(task->getChunkId()); + protojson::ScanInfo::Ptr scanInfo = task->getScanInfo(); string tblName; - if (!scanInfo.infoTables.empty()) { - proto::ScanTableInfo& sti = scanInfo.infoTables.at(0); + if (!scanInfo->infoTables.empty()) { + protojson::ScanTableInfo& sti = scanInfo->infoTables.at(0); tblName = ChunkTableStats::makeTableName(sti.db, sti.table); } - ChunkTableStats::Ptr tableStats = ptr->add(tblName, minutes); + ChunkTableStats::Ptr tableStats = chunkStats->addTable(tblName, minutes); +} + +ChunkStatistics::Ptr QueriesAndChunks::getChunkStatistics(int chunkId) { + lock_guard chunkMapLck(_chunkMtx); + pair ele(chunkId, nullptr); + auto res = _dbChunkStats.insert(ele); + if (res.second) { + // New nullptr `ele` was inserted. + res.first->second = ChunkStatistics::create(chunkId); + } + return res.first->second; } /// Go through the list of possibly dead queries and remove those that are too old. void QueriesAndChunks::removeDead() { - vector dList; + vector deadList; + size_t deadQueriesSizeStart = 0; + size_t deadQueriesSizeEnd = 0; + size_t queryStatsMapSize = 0; auto now = chrono::system_clock::now(); { - shared_ptr newlyDead; - { - lock_guard gnd(_newlyDeadMtx); - newlyDead = _newlyDeadQueries; - _newlyDeadQueries.reset(new DeadQueriesType); - } - + // Remove queries that have not been touched for a while. lock_guard gd(_deadMtx); - // Copy newlyDead into dead. - for (auto const& elem : *newlyDead) { - _deadQueries[elem.first] = elem.second; - } - LOGS(_log, LOG_LVL_DEBUG, "QueriesAndChunks::removeDead deadQueries size=" << _deadQueries.size()); + deadQueriesSizeStart = _deadQueries.size(); auto iter = _deadQueries.begin(); while (iter != _deadQueries.end()) { auto const& statPtr = iter->second; if (statPtr->isDead(_deadAfter, now)) { LOGS(_log, LOG_LVL_TRACE, "QueriesAndChunks::removeDead added to list"); - dList.push_back(statPtr); - iter = _deadQueries.erase(iter); - } else { - ++iter; + deadList.push_back(statPtr); } + iter = _deadQueries.erase(iter); } } - for (auto const& dead : dList) { + for (auto const& dead : deadList) { removeDead(dead); } - if (LOG_CHECK_LVL(_log, LOG_LVL_DEBUG)) { - lock_guard gdend(_deadMtx); - LOGS(_log, LOG_LVL_DEBUG, - "QueriesAndChunks::removeDead end deadQueries size=" << _deadQueries.size()); + + /// Find mostly dead queries and add them to the dead map to be checked next time. + { + lock_guard g(_queryStatsMapMtx); + lock_guard gd(_deadMtx); + for (auto const& [qId, statPtr] : _queryStatsMap) { + if (statPtr->isMostlyDead()) { + _deadQueries[qId] = statPtr; + } + } + deadQueriesSizeEnd = _deadQueries.size(); + queryStatsMapSize = _queryStatsMap.size(); } + LOGS(_log, LOG_LVL_INFO, + "removeDead queryStatsMapSize=" << queryStatsMapSize << " deadQueriesSize start=" + << deadQueriesSizeStart << " end=" << deadQueriesSizeEnd); } /// Remove a statistics for a user query. @@ -257,7 +287,7 @@ void QueriesAndChunks::removeDead(QueryStatistics::Ptr const& queryStats) { _queryStatsMap.erase(qId); } -QueryStatistics::Ptr QueriesAndChunks::getStats(QueryId const& qId) const { +QueryStatistics::Ptr QueriesAndChunks::getStats(QueryId qId) const { lock_guard lockG(_queryStatsMapMtx); return _getStats(qId); } @@ -324,8 +354,8 @@ void QueriesAndChunks::examineAll() { } double schedMaxTime = sched->getMaxTimeMinutes(); // Get max time for scheduler // Get the slowest scan table in task. - auto begin = task->getScanInfo().infoTables.begin(); - if (begin == task->getScanInfo().infoTables.end()) { + auto begin = task->getScanInfo()->infoTables.begin(); + if (begin == task->getScanInfo()->infoTables.end()) { continue; } string const& slowestTable = begin->db + ":" + begin->table; @@ -386,6 +416,28 @@ void QueriesAndChunks::examineAll() { LOGS(_log, LOG_LVL_DEBUG, "QueriesAndChunks::examineAll end"); } +void QueriesAndChunks::buildCancelledAndDeletedLists( + CzarId czarId, std::map const& qIdFiles, bool keepFiles, + std::vector>& cancelledList, + std::vector>& deleteList) { + // see wcomms::HttpWorkerCzarModule::_handleQueryStatus + unique_lock guardStats(_queryStatsMapMtx); + for (auto const& [dkQid, dkTm] : qIdFiles) { + auto qStats = _addQueryId(dkQid, czarId); + if (qStats != nullptr) { + auto uqInfo = qStats->getUserQueryInfo(); + if (uqInfo != nullptr) { + if (!uqInfo->getCancelledByCzar()) { + cancelledList.push_back(uqInfo); + } + if (!keepFiles) { + deleteList.push_back(uqInfo); + } + } + } + } +} + nlohmann::json QueriesAndChunks::statusToJson(wbase::TaskSelector const& taskSelector) const { nlohmann::json status = nlohmann::json::object(); { @@ -397,11 +449,15 @@ nlohmann::json QueriesAndChunks::statusToJson(wbase::TaskSelector const& taskSel status["blend_scheduler"] = bSched->statusToJsonBlend(); } } + /// TODO: This just shouldn't be a part of this at all. It's accrues data over time + /// so frequent updates don't help and it's big. "histograms" is the worst offender, + /// a similar argument could be made for "tasks" as this is just too + /// much information to send every couple of seconds. DM-55247 status["query_stats"] = nlohmann::json::object(); + if (true) return status; // TODO: DM-55247 lock_guard g(_queryStatsMapMtx); - for (auto&& itr : _queryStatsMap) { - string const qId = to_string(itr.first); // forcing string type for the json object key - QueryStatistics::Ptr const& qStats = itr.second; + for (auto const& [queryId, qStats] : _queryStatsMap) { + string const qId = to_string(queryId); // forcing string type for the json object key status["query_stats"][qId]["histograms"] = qStats->getJsonHist(); status["query_stats"][qId]["tasks"] = qStats->getJsonTasks(taskSelector); } @@ -430,7 +486,7 @@ QueriesAndChunks::ScanTableSumsMap QueriesAndChunks::_calcScanTableSums() { vector chks; { lock_guard g(_chunkMtx); - for (auto const& ele : _chunkStats) { + for (auto const& ele : _dbChunkStats) { auto const& chk = ele.second; chks.push_back(chk); } @@ -442,8 +498,8 @@ QueriesAndChunks::ScanTableSumsMap QueriesAndChunks::_calcScanTableSums() { // in this chunk. for (auto const& chunkStats : chks) { auto chunkId = chunkStats->_chunkId; - lock_guard lock(chunkStats->_tStatsMtx); - for (auto const& ele : chunkStats->_tableStats) { + lock_guard lock(chunkStats->_tStatsMapMtx); + for (auto const& ele : chunkStats->_tableStatsMap) { auto const& tblName = ele.first; if (!tblName.empty()) { auto& sTSums = scanTblSums[tblName]; @@ -672,10 +728,46 @@ vector QueriesAndChunks::removeQueryFrom(QueryId const& qId, return removedList; } +void QueriesAndChunks::killAllQueriesFromCzar(CzarId czarId) { + std::map qsMap; + { + lock_guard lgQsm(_queryStatsMapMtx); + qsMap = _queryStatsMap; + } + + for (auto const& [qsKey, qsPtr] : qsMap) { + if (qsPtr != nullptr) { + auto uqInfo = qsPtr->getUserQueryInfo(); + if (uqInfo != nullptr && uqInfo->getCzarId() == czarId) { + uqInfo->cancelAllUberJobs(); + } + } + } +} + +protojson::ChunkUseCountAnswerMsg::DbChunkCountMapPtr QueriesAndChunks::getDbChunkCountMap() const { + auto dbChunkCountMap = make_shared(); + + int totalUseCount = 0; + lock_guard chunkLock(_chunkMtx); + for (auto const& [chunkId, chunkStats] : _dbChunkStats) { + std::shared_ptr const chunkDbStatsMap = chunkStats->getDbStatsMapCopy(); + for (auto const& [dbName, dbStats] : *chunkDbStatsMap) { + int useCount = dbStats->getUseCount(); + (*dbChunkCountMap)[dbName][chunkId] = useCount; + totalUseCount += useCount; + } + } + LOGS(_log, LOG_LVL_INFO, + string(__func__) << " dbChunkStats size=" << _dbChunkStats.size() + << " totalUseCount=" << totalUseCount); + return dbChunkCountMap; +} + ostream& operator<<(ostream& os, QueriesAndChunks const& qc) { lock_guard g(qc._chunkMtx); os << "Chunks("; - for (auto const& ele : qc._chunkStats) { + for (auto const& ele : qc._dbChunkStats) { os << *(ele.second) << ";"; } os << ")"; @@ -684,10 +776,10 @@ ostream& operator<<(ostream& os, QueriesAndChunks const& qc) { /// Add the duration to the statistics for the table. Create a statistics object if needed. /// @return the statistics for the table. -ChunkTableStats::Ptr ChunkStatistics::add(string const& scanTableName, double minutes) { +ChunkTableStats::Ptr ChunkStatistics::addTable(string const& scanTableName, double minutes) { pair ele(scanTableName, nullptr); - unique_lock ul(_tStatsMtx); - auto res = _tableStats.insert(ele); + unique_lock ul(_tStatsMapMtx); + auto res = _tableStatsMap.insert(ele); auto iter = res.first; if (res.second) { iter->second = make_shared(_chunkId, scanTableName); @@ -700,24 +792,62 @@ ChunkTableStats::Ptr ChunkStatistics::add(string const& scanTableName, double mi /// @return the statistics for a table. nullptr if the table is not found. ChunkTableStats::Ptr ChunkStatistics::getStats(string const& scanTableName) const { - lock_guard g(_tStatsMtx); - auto iter = _tableStats.find(scanTableName); - if (iter != _tableStats.end()) { + lock_guard g(_tStatsMapMtx); + auto iter = _tableStatsMap.find(scanTableName); + if (iter != _tableStatsMap.end()) { return iter->second; } return nullptr; } ostream& operator<<(ostream& os, ChunkStatistics const& cs) { - lock_guard g(cs._tStatsMtx); - os << "ChunkStatsistics(" << cs._chunkId << "("; - for (auto const& ele : cs._tableStats) { + lock_guard g(cs._tStatsMapMtx); + os << "ChunkStatisistics(" << cs._chunkId << "("; + for (auto const& ele : cs._tableStatsMap) { os << *(ele.second) << ";"; } os << ")"; return os; } +void ChunkStatistics::incrDbUseCount(string const& dbName) { + ChunkDbStats::Ptr dbStats; + lock_guard dbStatsMapLock(_dbStatsMapMtx); + auto iter = _dbStatsMap.find(dbName); + if (iter == _dbStatsMap.end()) { + dbStats = ChunkDbStats::create(_chunkId, dbName); + _dbStatsMap[dbName] = dbStats; + } else { + dbStats = iter->second; + } + dbStats->incrUseCount(1); +} + +void ChunkStatistics::decrDbUseCount(string const& dbName) { + ChunkDbStats::Ptr dbStats; + std::lock_guard dbStatsMapLock(_dbStatsMapMtx); + auto iter = _dbStatsMap.find(dbName); + if (iter != _dbStatsMap.end()) { + dbStats = iter->second; + int count = dbStats->incrUseCount(-1); + // Delete entry if <= 0. When else would it get deleted? + if (count <= 0) { + _dbStatsMap.erase(iter); + } + } else { + LOGS(_log, LOG_LVL_WARN, __func__ << " decrDbUseCount could not find dbName=" << dbName); + } +} + +std::shared_ptr ChunkStatistics::getDbStatsMapCopy() const { + auto copy = make_shared(); + lock_guard dbStatsMapLock(_dbStatsMapMtx); + for (auto const& [key, val] : _dbStatsMap) { + (*copy)[key] = val; + } + return copy; +} + /// Use the duration of the last Task completed to adjust the average completion time. void ChunkTableStats::addTaskFinished(double minutes) { lock_guard g(_dataMtx); @@ -727,7 +857,7 @@ void ChunkTableStats::addTaskFinished(double minutes) { } else { _data.avgCompletionTime = minutes; } - LOGS(_log, LOG_LVL_DEBUG, + LOGS(_log, LOG_LVL_TRACE, "ChkId=" << _chunkId << ":tbl=" << _scanTableName << " completed=" << _data.tasksCompleted << " avgCompletionTime=" << _data.avgCompletionTime); } diff --git a/src/wpublish/QueriesAndChunks.h b/src/wpublish/QueriesAndChunks.h index a51e1d24d2..be5f5ddab6 100644 --- a/src/wpublish/QueriesAndChunks.h +++ b/src/wpublish/QueriesAndChunks.h @@ -39,10 +39,16 @@ // Qserv headers #include "global/intTypes.h" +#include "protojson/ChunkUseCountAnswerMsg.h" +#include "protojson/WorkerQueryStatusData.h" #include "wbase/Task.h" #include "wpublish/QueryStatistics.h" // Forward declarations +namespace lsst::qserv::protojson { +class CzarContactInfo; +} // namespace lsst::qserv::protojson + namespace lsst::qserv::wbase { struct TaskSelector; } // namespace lsst::qserv::wbase @@ -102,26 +108,72 @@ class ChunkTableStats { double _weightSum = _weightAvg + _weightNew; ///< denominator }; +/// Store statistics for a specific database in a chunk, generally just usage counts. In this case, usage +/// counts include the number of Tasks that intend use or are using this chunk for this database. +/// No mutex is required as long as there's only one atomic and all other members are constant. +class ChunkDbStats { +public: + using Ptr = std::shared_ptr; + + static Ptr create(int chunkId, std::string const& dbName) { + return Ptr(new ChunkDbStats(chunkId, dbName)); + } + + int incrUseCount(int incr) { return _useCount += incr; } + int getUseCount() const { return _useCount; } + +private: + ChunkDbStats(int chunkId, std::string const& dbName) : _chunkId{chunkId}, _dbName{dbName} {} + + int const _chunkId; + std::string const _dbName; + /// Number of Tasks that will use or are using this chunk for this database. + std::atomic _useCount{0}; +}; + +typedef std::map + ChunkDbStatsMap; ///< Map of database name to database statistics for a chunk. + /// Statistics for one chunk, including scan table statistics. -class ChunkStatistics { +class ChunkStatistics : public std::enable_shared_from_this { public: using Ptr = std::shared_ptr; - ChunkStatistics(int chunkId) : _chunkId{chunkId} {} + static Ptr create(int chunkId) { return Ptr(new ChunkStatistics(chunkId)); } - ChunkTableStats::Ptr add(std::string const& scanTableName, double duration); + ChunkTableStats::Ptr addTable(std::string const& scanTableName, double duration); ChunkTableStats::Ptr getStats(std::string const& scanTableName) const; + void incrDbUseCount(std::string const& dbName); + void decrDbUseCount(std::string const& dbName); + + /// @return a copy of the database statistics map. + /// The map contains pointers to the original ChunkDbStats objects. + std::shared_ptr getDbStatsMapCopy() const; + + /// @return a RAII object that will help keep count of the Tasks needing this chunk+database. + wbase::TaskUseRAII::Ptr getTaskUseRAII(std::string const& dbName) { + return wbase::TaskUseRAII::create(shared_from_this(), dbName); + } + friend QueriesAndChunks; friend std::ostream& operator<<(std::ostream& os, ChunkStatistics const& cs); private: + ChunkStatistics(int chunkId) : _chunkId{chunkId} {} + int const _chunkId; - mutable std::mutex _tStatsMtx; ///< protects _tableStats; + mutable std::mutex _tStatsMapMtx; ///< protects _tableStatsMap; /// Map of chunk scan table statistics indexed by slowest scan table name in query. - std::map _tableStats; + std::map _tableStatsMap; + + /// Map of chunk use count by database name. + ChunkDbStatsMap _dbStatsMap; + mutable std::mutex _dbStatsMapMtx; ///< protects _dbStatsMap; }; +typedef std::map ChunkStatisticsMap; ///< Map of chunk id to chunk statistics. + /// This class tracks the tasks that have been booted from their scheduler and are /// still running. The tasks are grouped by their related QueryId. class BootedTaskTracker { @@ -163,6 +215,8 @@ class BootedTaskTracker { mutable std::mutex _bootedMapMtx; ///< protects `_bootedMap`. }; +/// This class is used to store information (statistical and other) about UserQueries and +/// Chunks on this worker. class QueriesAndChunks { public: using Ptr = std::shared_ptr; @@ -190,17 +244,44 @@ class QueriesAndChunks { std::vector removeQueryFrom(QueryId const& qId, std::shared_ptr const& sched); + + /// Identify and remove dead QueryStatistics objects from the map. + /// Please see QueryStatistics for more information about its life cycle. void removeDead(); void removeDead(QueryStatistics::Ptr const& queryStats); - /// Return the statistics for a user query. - QueryStatistics::Ptr getStats(QueryId const& qId) const; + /// Return the statistics for a user query, may be nullptr, + /// in many cases addQueryId() may be preferable if + /// new information is being added to the returned object. + /// @see addQueryId() + QueryStatistics::Ptr getStats(QueryId qId) const; + + /// @see _addQueryId + QueryStatistics::Ptr addQueryId(QueryId qId, CzarId czarId); + + /// Build lists of cancelled and deleted user queries based on the provided `qIdFiles` and `keepFiles` + /// flag. + /// @param qIdFiles - A map of files IDs to be cancelled or deleted. + /// @param keepfiles - If true, none of the files will be added to the delete list. + /// @param cancelledList - A vector to be populated with user queries that should be cancelled. + /// @param deleteList - A vector to be populated with user queries that should be deleted. + void buildCancelledAndDeletedLists(CzarId czarId, std::map const& qIdFiles, + bool keepFiles, + std::vector>& cancelledList, + std::vector>& deleteList); void addTask(wbase::Task::Ptr const& task); + void addTasks(std::vector const& tasks, std::vector& cmds); void queuedTask(wbase::Task::Ptr const& task); void startedTask(wbase::Task::Ptr const& task); void finishedTask(wbase::Task::Ptr const& task); + /// Return the ChunkStatistics object for the specified chunkId, creating it if needed. + ChunkStatistics::Ptr getChunkStatistics(int chunkId); + + /// Return an independent DbChunkCountMap object based on _dbChunkStats. + protojson::ChunkUseCountAnswerMsg::DbChunkCountMapPtr getDbChunkCountMap() const; + /// Examine all running Tasks and boot Tasks that are taking too long and /// move user queries that are too slow to the snail scan. /// This is expected to be called maybe once every 5 minutes. @@ -234,12 +315,24 @@ class QueriesAndChunks { }; using ScanTableSumsMap = std::map; + /// If the worker believes this czar has died, it calls this to stop + /// all Tasks associated with that czar. + void killAllQueriesFromCzar(CzarId czarId); + friend std::ostream& operator<<(std::ostream& os, QueriesAndChunks const& qc); private: static Ptr _globalQueriesAndChunks; QueriesAndChunks(std::chrono::seconds deadAfter, std::chrono::seconds examineAfter); + /// Return the statistics for a user query, creating if needed. + /// Since it is possible to get messages out of order, there + /// are several case where something like a cancellation + /// message arrives before any tasks have been created. + /// @see getStats() + /// _queryStatsMapMtx must be locked before calling. + QueryStatistics::Ptr _addQueryId(QueryId qId, CzarId czarId); + /// @return the statistics for a user query. /// _queryStatsMtx must be locked before calling. QueryStatistics::Ptr _getStats(QueryId const& qId) const; @@ -274,7 +367,8 @@ class QueriesAndChunks { std::map _queryStatsMap; ///< Map of Query stats indexed by QueryId. mutable std::mutex _chunkMtx; - std::map _chunkStats; ///< Map of Chunk stats indexed by chunk id. + /// Map of ChunkStatistics indexed by chunkId. + ChunkStatisticsMap _dbChunkStats; std::weak_ptr _blendSched; ///< Pointer to the BlendScheduler. @@ -285,11 +379,10 @@ class QueriesAndChunks { /// A user query must be complete and inactive this long before it can be considered dead. std::chrono::seconds _deadAfter = std::chrono::minutes(5); - std::mutex _deadMtx; ///< Protects _deadQueries. - std::mutex _newlyDeadMtx; ///< Protects _newlyDeadQueries. + /// Protects _deadQueries, lock after locking _queryStatsMapMtx if both are needed. + std::mutex _deadMtx; using DeadQueriesType = std::map; DeadQueriesType _deadQueries; ///< Map of user queries that might be dead. - std::shared_ptr _newlyDeadQueries{new DeadQueriesType()}; // Members for running a separate thread to examine all the running Tasks on the scan schedulers // and remove those that are taking too long (boot them). If too many Tasks in a single user query diff --git a/src/wpublish/QueryStatistics.cc b/src/wpublish/QueryStatistics.cc index 576effdee2..3c0930bdac 100644 --- a/src/wpublish/QueryStatistics.cc +++ b/src/wpublish/QueryStatistics.cc @@ -50,7 +50,10 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.wpublish.QueriesAndChunks"); namespace lsst::qserv::wpublish { -QueryStatistics::QueryStatistics(QueryId const& qId_) : creationTime(CLOCK::now()), queryId(qId_) { +QueryStatistics::QueryStatistics(QueryId qId_, CzarId czarId_) + : creationTime(CLOCK::now()), + queryId(qId_), + _userQueryInfo(wbase::UserQueryInfo::create(qId_, czarId_)) { /// For all of the histograms, all entries should be kept at least until the work is finished. string qidStr = to_string(queryId); _histSizePerTask = util::Histogram::Ptr(new util::Histogram( @@ -204,13 +207,12 @@ void QueryStatistics::setQueryBooted(bool booted, TIMEPOINT now) { _queryBootedTime = now; } -bool QueryStatistics::addTaskCompleted(TIMEPOINT const now, double const taskDuration) { +void QueryStatistics::addTaskCompleted(TIMEPOINT const now, double const taskDuration) { lock_guard gs(_qStatsMtx); _touched = now; _tasksRunning -= 1; _tasksCompleted += 1; _totalTimeMinutes += taskDuration; - return _isMostlyDead(); } vector QueryStatistics::getRunningTasks() const { diff --git a/src/wpublish/QueryStatistics.h b/src/wpublish/QueryStatistics.h index dc26a9da4c..48f05f3c68 100644 --- a/src/wpublish/QueryStatistics.h +++ b/src/wpublish/QueryStatistics.h @@ -39,25 +39,33 @@ // Qserv headers #include "global/intTypes.h" +#include "util/InstanceCount.h" #include "wbase/Task.h" #include "wsched/SchedulerBase.h" namespace lsst::qserv::wbase { -class Histogram; -} +class UserQueryInfo; +} // namespace lsst::qserv::wbase // This header declarations namespace lsst::qserv::wpublish { -/// Statistics for a single user query. -/// This class stores some statistics for each Task in the user query on this worker. +/// Statistics and information for a single user query. +/// This class stores some statistics for each Task in the user query on this worker +/// as well as information for the query as a whole, including cancellation status. +/// It has a complicated life cycle in that it will live on for a while even after +/// all Tasks are completed. This allows the data to be accessed for a while after +/// the query is done and that the data can be used with new incoming UberJobs. +/// Late arriving UberJobs can be caused by network issues or a different worker +/// dying and work being redistributed. +/// Please see isMostlyDead(), isDead(), and QueriesAndChunks::removeDead(). class QueryStatistics { public: using Ptr = std::shared_ptr; /// Force shared_ptr creation for data integrity. - static Ptr create(QueryId const& queryId) { - return std::shared_ptr(new QueryStatistics(queryId)); + static Ptr create(QueryId queryId_, CzarId czarId_) { + return std::shared_ptr(new QueryStatistics(queryId_, czarId_)); } QueryStatistics() = delete; @@ -73,8 +81,16 @@ class QueryStatistics { return _queryBooted; } + std::shared_ptr getUserQueryInfo() const { return _userQueryInfo; } + void setQueryBooted(bool booted, TIMEPOINT now); + /// Return true if all tasks are completed. + bool isMostlyDead() const { + std::lock_guard gs(_qStatsMtx); + return _isMostlyDead(); + } + /// Add statistics related to the running of the query in the task. /// If there are subchunks in the user query, several Tasks may be needed for one chunk. /// @param runTimeSeconds - How long it took to run the query. @@ -95,7 +111,7 @@ class QueryStatistics { void addTask(TIMEPOINT const now); void addTaskRunning(TIMEPOINT const now); - bool addTaskCompleted(TIMEPOINT const now, double const taskDuration); + void addTaskCompleted(TIMEPOINT const now, double const taskDuration); void addTaskBooted() { std::lock_guard guard(_qStatsMtx); _tasksBooted += 1; @@ -167,7 +183,7 @@ class QueryStatistics { friend std::ostream& operator<<(std::ostream& os, QueryStatistics const& q); private: - explicit QueryStatistics(QueryId const& queryId); + explicit QueryStatistics(QueryId queryId, CzarId czarId); bool _isMostlyDead() const; mutable std::mutex _qStatsMtx; @@ -194,6 +210,9 @@ class QueryStatistics { std::shared_ptr _histRowsPerTask; ///< Histogram of rows per Task. SchedTasksInfoMap _taskSchedInfoMap; ///< Map of task information ordered by scheduler name. + + /// Contains information common to all Tasks in this user query. + std::shared_ptr const _userQueryInfo; }; } // namespace lsst::qserv::wpublish diff --git a/src/wsched/BlendScheduler.cc b/src/wsched/BlendScheduler.cc index 3e9babc06a..06c90af3ae 100644 --- a/src/wsched/BlendScheduler.cc +++ b/src/wsched/BlendScheduler.cc @@ -44,7 +44,6 @@ // Qserv headers #include "global/LogContext.h" -#include "proto/worker.pb.h" #include "util/Bug.h" #include "util/EventThread.h" #include "util/Timer.h" @@ -95,8 +94,9 @@ BlendScheduler::BlendScheduler(string const& name, wpublish::QueriesAndChunks::P _scanSnail->setDefaultPosition(position++); assert(_schedulers.size() >= 2); // Must have at least _group and _scanSnail in the list. _sortScanSchedulers(); + LOGS(_log, LOG_LVL_INFO, "BlendScheduler _schedMaxThreads=" << _schedMaxThreads); for (auto const& sched : _schedulers) { - LOGS(_log, LOG_LVL_DEBUG, "Scheduler " << _name << " found scheduler " << sched->getName()); + LOGS(_log, LOG_LVL_INFO, "Scheduler " << _name << " found scheduler " << sched->getName()); } } @@ -110,8 +110,8 @@ void BlendScheduler::_sortScanSchedulers() { if (a == _scanSnail) return false; if (b == _scanSnail) return true; - // base on the number of scans in flight. if (_prioritizeByInFlight) { + // More scans in flight means lower priority. auto aInFlight = a->getInFlight() - a->getPriority(); auto bInFlight = b->getInFlight() - b->getPriority(); if (aInFlight < bInFlight) return true; @@ -131,7 +131,15 @@ void BlendScheduler::_sortScanSchedulers() { str += sched->getName() + ", "; } } - LOGS(_log, LOG_LVL_DEBUG, str); + LOGS(_log, LOG_LVL_TRACE, str); +} + +void BlendScheduler::queTaskLoad(util::Command::Ptr const& cmd) { + { + lock_guard guardA(util::CommandQueue::_mx); + _taskLoadQueue.push_back(cmd); + } + notify(false); } void BlendScheduler::queCmd(util::Command::Ptr const& cmd) { @@ -158,9 +166,10 @@ void BlendScheduler::queCmd(std::vector const& cmds) { throw util::Bug(ERR_LOC, "BlendScheduler::queCmd cmds.size() > 1 when no task was set."); } { - util::LockGuardTimed guard(util::CommandQueue::_mx, "BlendScheduler::queCmd a"); + lock_guard guardA(util::CommandQueue::_mx); _ctrlCmdQueue.queCmd(cmd); } + notify(true); // notify all=true continue; } @@ -169,31 +178,31 @@ void BlendScheduler::queCmd(std::vector const& cmds) { QSERV_LOGCONTEXT_QUERY_JOB(task->getQueryId(), task->getJobId()); } - util::LockGuardTimed guard(util::CommandQueue::_mx, "BlendScheduler::queCmd b"); + lock_guard guardB(util::CommandQueue::_mx); // Check for scan tables. The information for all tasks should be the same // as they all belong to the same query, so only examine the first task. if (first) { first = false; - auto const& scanTables = task->getScanInfo().infoTables; + auto const& scanTables = task->getScanInfo()->infoTables; bool interactive = task->getScanInteractive(); if (scanTables.size() <= 0 || interactive) { // If there are no scan tables, no point in putting on a shared scan. - LOGS(_log, LOG_LVL_DEBUG, + LOGS(_log, LOG_LVL_TRACE, "Blend chose group scanTables.size=" << scanTables.size() << " interactive=" << interactive); onInteractive = true; targSched = _group; } else { onInteractive = false; - int scanPriority = task->getScanInfo().scanRating; - if (LOG_CHECK_LVL(_log, LOG_LVL_DEBUG)) { + int scanPriority = task->getScanInfo()->scanRating; + if (LOG_CHECK_LVL(_log, LOG_LVL_TRACE)) { ostringstream ss; ss << "Blend chose scan for priority=" << scanPriority << " : "; for (auto scanTbl : scanTables) { ss << scanTbl.db + "." + scanTbl.table + " "; } - LOGS(_log, LOG_LVL_DEBUG, ss.str()); + LOGS(_log, LOG_LVL_TRACE, ss.str()); } { // Find the scheduler responsible for this 'scanPriority'. lock_guard lg(_schedMtx); @@ -224,7 +233,7 @@ void BlendScheduler::queCmd(std::vector const& cmds) { task->setTaskScheduler(targSched); _queries->queuedTask(task); taskCmds.push_back(task); - LOGS(_log, LOG_LVL_INFO, + LOGS(_log, LOG_LVL_TRACE, "BlendScheduler::queCmd added tid=" << task->getIdStr() << " sched=" << targSched->getName()); } @@ -232,7 +241,7 @@ void BlendScheduler::queCmd(std::vector const& cmds) { _logSchedulers(); if (!taskCmds.empty()) { - LOGS(_log, LOG_LVL_DEBUG, "Blend queCmd"); + LOGS(_log, LOG_LVL_TRACE, "Blend queCmd"); targSched->queCmd(taskCmds); if (queryStats) { queryStats->tasksAddedToScheduler(targSched, taskCmds.size()); @@ -245,13 +254,14 @@ void BlendScheduler::queCmd(std::vector const& cmds) { void BlendScheduler::commandStart(util::Command::Ptr const& cmd) { auto t = dynamic_pointer_cast(cmd); if (t == nullptr) { - LOGS(_log, LOG_LVL_ERROR, "BlendScheduler::commandStart cmd failed conversion"); + // This happens with loader and control commands. + LOGS(_log, LOG_LVL_TRACE, "BlendScheduler::commandStart cmd not a Task"); return; } QSERV_LOGCONTEXT_QUERY_JOB(t->getQueryId(), t->getJobId()); - LOGS(_log, LOG_LVL_DEBUG, "BlendScheduler::commandStart"); + LOGS(_log, LOG_LVL_TRACE, "BlendScheduler::commandStart"); wcontrol::Scheduler::Ptr s = dynamic_pointer_cast(t->getTaskScheduler()); if (s != nullptr) { s->commandStart(t); @@ -264,22 +274,24 @@ void BlendScheduler::commandStart(util::Command::Ptr const& cmd) { void BlendScheduler::commandFinish(util::Command::Ptr const& cmd) { auto t = dynamic_pointer_cast(cmd); if (t == nullptr) { - LOGS(_log, LOG_LVL_WARN, "BlendScheduler::commandFinish cmd failed conversion"); + LOGS(_log, LOG_LVL_TRACE, "BlendScheduler::commandFinish cmd is not a Task"); return; } QSERV_LOGCONTEXT_QUERY_JOB(t->getQueryId(), t->getJobId()); wcontrol::Scheduler::Ptr s = dynamic_pointer_cast(t->getTaskScheduler()); - LOGS(_log, LOG_LVL_DEBUG, "BlendScheduler::commandFinish"); + LOGS(_log, LOG_LVL_TRACE, "BlendScheduler::commandFinish"); if (s != nullptr) { s->commandFinish(t); } else { LOGS(_log, LOG_LVL_ERROR, "BlendScheduler::commandFinish scheduler not found"); } _infoChanged = true; - _logChunkStatus(); - notify(true); // notify all=true + if (LOG_CHECK_LVL(_log, LOG_LVL_TRACE)) { + _logChunkStatus(); + } + notify(false); // notify one } bool BlendScheduler::ready() { @@ -300,22 +312,27 @@ bool BlendScheduler::_ready() { ostringstream os; bool ready = false; + if (_taskLoadQueue.size() > 0) { + ready = true; + return ready; + } + // _readSched points to the scheduler with a ready task until that // task has been retrieved by getCmd(). if (_readySched != nullptr) { ready = true; } - - // Get the total number of threads schedulers want reserved - int availableThreads = calcAvailableTheads(); bool changed = _infoChanged.exchange(false); if (!ready) { lock_guard lg(_schedMtx); + + // Get the total number of threads schedulers want reserved + int availableThreads = _calcAvailableTheads(); for (auto const& sched : _schedulers) { availableThreads = sched->applyAvailableThreads(availableThreads); ready = sched->ready(); - if (changed && LOG_CHECK_LVL(_log, LOG_LVL_DEBUG)) { + if (changed && LOG_CHECK_LVL(_log, LOG_LVL_TRACE)) { os << sched->getName() << "(r=" << ready << " sz=" << sched->getSize() << " fl=" << sched->getInFlight() << " avail=" << availableThreads << ") "; } @@ -326,73 +343,70 @@ bool BlendScheduler::_ready() { } } - // IF nothing ready on the schedulers, check if the thread pool size should be changed. + // If nothing ready on the schedulers, check if the thread pool size should be changed. if (!ready) { ready = _ctrlCmdQueue.ready(); } if (changed) { - LOGS(_log, LOG_LVL_DEBUG, getName() << "_ready() " << os.str()); + LOGS(_log, LOG_LVL_TRACE, getName() << "_ready() " << os.str()); } return ready; } +atomic logChunkLimiter = 0; + util::Command::Ptr BlendScheduler::getCmd(bool wait) { - util::Timer timeToLock; - util::Timer timeHeld; util::Command::Ptr cmd; - double totalTimeHeld = 0.0; bool ready = false; { - timeToLock.start(); unique_lock lock(util::CommandQueue::_mx); - timeToLock.stop(); - timeHeld.start(); if (wait) { - // util::CommandQueue::_cv.wait(lock, [this](){return _ready();}); - while (!_ready()) { - timeHeld.stop(); - totalTimeHeld += timeHeld.getElapsed(); - util::CommandQueue::_cv.wait(lock); - timeHeld.start(); - } + util::CommandQueue::_cv.wait(lock, [this]() { return _ready(); }); ready = true; } else { ready = _ready(); } + if (ready && _taskLoadQueue.size() > 0) { + cmd = _taskLoadQueue.front(); + _taskLoadQueue.pop_front(); + notify(false); + return cmd; + } + _logSchedulers(); // Try to get a command from the schedulers if (ready && (_readySched != nullptr)) { cmd = _readySched->getCmd(false); if (cmd != nullptr) { - wbase::Task::Ptr task = dynamic_pointer_cast(cmd); - LOGS(_log, LOG_LVL_DEBUG, - "Blend getCmd() using cmd from " << _readySched->getName() << " chunkId=" - << task->getChunkId() << " QID=" << task->getIdStr()); + _sortScanSchedulers(); + if (LOG_CHECK_LVL(_log, LOG_LVL_TRACE)) { + wbase::Task::Ptr task = dynamic_pointer_cast(cmd); + LOGS(_log, LOG_LVL_TRACE, + "Blend getCmd() using cmd from " << _readySched->getName() + << " chunkId=" << task->getChunkId() + << " QID=" << task->getIdStr()); + } } _readySched.reset(); - _sortScanSchedulers(); } + } - if (cmd == nullptr) { - // The scheduler didn't have anything, see if there's anything on the control queue, - // which could change the size of the pool. - cmd = _ctrlCmdQueue.getCmd(); - } + if (cmd == nullptr) { + // The scheduler didn't have anything, see if there's anything on the control queue, + // which could change the size of the pool. + cmd = _ctrlCmdQueue.getCmd(); } + if (cmd != nullptr) { _infoChanged = true; - _logChunkStatus(); + if (LOG_CHECK_LVL(_log, LOG_LVL_TRACE) || (logChunkLimiter++ % 100 == 0)) { + _logChunkStatus(); + } notify(false); // notify all=false } // returning nullptr is acceptable. - timeHeld.stop(); - totalTimeHeld += timeHeld.getElapsed(); - LOGS(_log, LOG_LVL_DEBUG, - "lockTime BlendScheduler::getCmd ready toLock=" << timeToLock.getElapsed() - << " held=" << timeHeld.getElapsed() - << " totalHeld=" << totalTimeHeld); return cmd; } @@ -406,18 +420,22 @@ int BlendScheduler::_getAdjustedMaxThreads(int oldAdjMax, int inFlight) { return newAdjMax; } -/// @return the number of threads that are not reserved by any sub-scheduler. int BlendScheduler::calcAvailableTheads() { + lock_guard lck(_schedMtx); + return _calcAvailableTheads(); +} + +/// @return the number of threads that are not reserved by any sub-scheduler. +int BlendScheduler::_calcAvailableTheads() { int reserve = 0; { - lock_guard lg(_schedMtx); for (auto const& sched : _schedulers) { reserve += sched->desiredThreadReserve(); } } int available = _schedMaxThreads - reserve; if (available < 0) { - LOGS(_log, LOG_LVL_DEBUG, "calcAvailableTheads negative available=" << available); + LOGS(_log, LOG_LVL_TRACE, "calcAvailableTheads negative available=" << available); } return available; } diff --git a/src/wsched/BlendScheduler.h b/src/wsched/BlendScheduler.h index 1ee2b65956..5507c755fd 100644 --- a/src/wsched/BlendScheduler.h +++ b/src/wsched/BlendScheduler.h @@ -105,6 +105,8 @@ class BlendScheduler : public wsched::SchedulerBase { BlendScheduler& operator=(BlendScheduler const&) = delete; ~BlendScheduler() override = default; + void queTaskLoad(util::Command::Ptr const& cmd); + void queCmd(util::Command::Ptr const& cmd) override; void queCmd(std::vector const& cmds) override; util::Command::Ptr getCmd(bool wait) override; @@ -118,8 +120,6 @@ class BlendScheduler : public wsched::SchedulerBase { bool ready() override; int applyAvailableThreads(int tempMax) override { return tempMax; } //< does nothing - int calcAvailableTheads(); - bool isScanSnail(SchedulerBase::Ptr const& scan); int moveUserQueryToSnail(QueryId qId, SchedulerBase::Ptr const& source); int moveUserQuery(QueryId qId, SchedulerBase::Ptr const& source, SchedulerBase::Ptr const& destination); @@ -132,13 +132,19 @@ class BlendScheduler : public wsched::SchedulerBase { /// Do nothing, the schedulers this class manages keep their own statistics. void recordPerformanceData() override {}; + int calcAvailableTheads(); + private: int _getAdjustedMaxThreads(int oldAdjMax, int inFlight); bool _ready(); void _sortScanSchedulers(); void _logChunkStatus(); void _logSchedulers(); + + /// _schedMtx must be locked before calling. + int _calcAvailableTheads(); ControlCommandQueue _ctrlCmdQueue; ///< Needed for changing thread pool size. + std::deque _taskLoadQueue; int _schedMaxThreads; ///< maximum number of threads that can run. @@ -152,8 +158,8 @@ class BlendScheduler : public wsched::SchedulerBase { wpublish::QueriesAndChunks::Ptr _queries; /// UserQuery statistics. - std::atomic _prioritizeByInFlight{ - false}; // Schedulers with more tasks inflight get lower priority. + /// Schedulers with more tasks inflight get lower priority. + std::atomic _prioritizeByInFlight{false}; SchedulerBase::Ptr _readySched; //< Pointer to the scheduler with a ready task. /// Record performance data when this value is less than now(), and then this value us increased diff --git a/src/wsched/CMakeLists.txt b/src/wsched/CMakeLists.txt index c05eb03ba3..d7632b8f36 100644 --- a/src/wsched/CMakeLists.txt +++ b/src/wsched/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(wsched SHARED) -add_dependencies(wsched proto) target_sources(wsched PRIVATE BlendScheduler.cc @@ -13,19 +12,23 @@ target_link_libraries(wsched PUBLIC log ) -install( - TARGETS wsched -) +install(TARGETS wsched) add_executable(testSchedulers testSchedulers.cc) -target_include_directories(testSchedulers PRIVATE - ${XROOTD_INCLUDE_DIRS} -) - target_link_libraries(testSchedulers PUBLIC + global + http + mysql + protojson + sql + util + wbase + wconfig + wcontrol + wdb + wpublish wsched - xrdsvc Boost::unit_test_framework Threads::Threads ) diff --git a/src/wsched/ChunkTasksQueue.cc b/src/wsched/ChunkTasksQueue.cc index b872e34d07..bf1d4a96f7 100644 --- a/src/wsched/ChunkTasksQueue.cc +++ b/src/wsched/ChunkTasksQueue.cc @@ -37,27 +37,24 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.wsched.ChunkTasksQueue"); namespace lsst::qserv::wsched { -/// Queue a Task with other tasks on the same chunk. +/// Queue tasks from an uberjob. void ChunkTasksQueue::queueTask(std::vector const& tasks) { std::lock_guard lg(_mapMx); auto iter = _chunkMap.end(); + int prevChunkId = -1; // invalid chunkId number for (auto const& task : tasks) { int chunkId = task->getChunkId(); - if (iter != _chunkMap.end() && iter->first != chunkId) { - LOGS(_log, LOG_LVL_ERROR, - "All tasks grouped together must be on the same chunk." << " chunkA=" << iter->first - << " chunkB=" << chunkId); - throw util::Bug(ERR_LOC, "ChunkTasksQueue::queueTask mismatched chunkIds"); - } - /// If it's the first time through, or the chunkId is different than the previous one, then - /// find the correct ChunkTask. - if (iter == _chunkMap.end() || iter->first != chunkId) { + // If it's the first time through, or the chunkId is different than the previous one, then + // find the correct ChunkTask. UberJobs are constructed in a way that makes it likely + // that subchunks for the same chunk will be grouped together in `tasks`. + if (iter == _chunkMap.end() || prevChunkId != chunkId) { + prevChunkId = chunkId; iter = _chunkMap.find(chunkId); if (iter == _chunkMap.end()) { // Correct ChunkTask wasn't found, make a new one. std::pair ele(chunkId, std::make_shared(chunkId)); auto res = _chunkMap.insert(ele); // insert should fail if the key already exists. - LOGS(_log, LOG_LVL_DEBUG, " queueTask chunk=" << chunkId << " created=" << res.second); + LOGS(_log, LOG_LVL_TRACE, " queueTask chunk=" << chunkId << " created=" << res.second); iter = res.first; } } @@ -98,7 +95,7 @@ bool ChunkTasksQueue::_ready(bool useFlexibleLock) { // If the _activeChunk is invalid, start at the beginning. if (_activeChunk == _chunkMap.end()) { - LOGS(_log, LOG_LVL_INFO, "ChunkTasksQueue::_ready _activeChunk invalid, reset"); + LOGS(_log, LOG_LVL_DEBUG, "ChunkTasksQueue::_ready _activeChunk invalid, reset"); _activeChunk = _chunkMap.begin(); _activeChunk->second->setActive(); // Flag tasks on active so new Tasks added wont be run. } @@ -111,7 +108,7 @@ bool ChunkTasksQueue::_ready(bool useFlexibleLock) { // Should the active chunk be advanced? if (_activeChunk->second->readyToAdvance()) { - LOGS(_log, LOG_LVL_DEBUG, "ChunkTasksQueue::_ready advancing chunk"); + LOGS(_log, LOG_LVL_TRACE, "ChunkTasksQueue::_ready advancing chunk"); auto newActive = _activeChunk; ++newActive; if (newActive == _chunkMap.end()) { @@ -140,7 +137,7 @@ bool ChunkTasksQueue::_ready(bool useFlexibleLock) { // Advance through chunks until READY found, or until entire list scanned. auto iter = _activeChunk; ChunkTasks::ReadyState chunkState = iter->second->ready(useFlexibleLock); - LOGS(_log, LOG_LVL_DEBUG, + LOGS(_log, LOG_LVL_TRACE, "_ready loopA state=" << ChunkTasks::toStr(chunkState) << " iter=" << iter->first << " " << iter->second->cInfo()); while (chunkState != ChunkTasks::ReadyState::READY) { @@ -161,9 +158,10 @@ bool ChunkTasksQueue::_ready(bool useFlexibleLock) { } chunkState = iter->second->ready(useFlexibleLock); } - LOGS(_log, LOG_LVL_DEBUG, + LOGS(_log, LOG_LVL_TRACE, "_ready loopB state=" << ChunkTasks::toStr(chunkState) << " iter=" << iter->first << " " << iter->second->cInfo()); + _readyChunk = iter->second; return true; } @@ -309,21 +307,21 @@ void ChunkTasks::queTask(wbase::Task::Ptr const& a) { _activeTasks.push(a); state = "ACTIVE"; } - LOGS(_log, LOG_LVL_DEBUG, + LOGS(_log, LOG_LVL_TRACE, "ChunkTasks::queTask tid=" << a->getIdStr() << " chunkId=" << _chunkId << " state=" << state << " active.sz=" << _activeTasks._tasks.size() << " pend.sz=" << _pendingTasks.size() << cInfo()); if (_activeTasks.empty()) { - LOGS(_log, LOG_LVL_DEBUG, "Top of ACTIVE is now: (empty)"); + LOGS(_log, LOG_LVL_TRACE, "Top of ACTIVE is now: (empty)"); } else { - LOGS(_log, LOG_LVL_DEBUG, "Top of ACTIVE is now: " << _activeTasks.top()->getIdStr()); + LOGS(_log, LOG_LVL_TRACE, "Top of ACTIVE is now: " << _activeTasks.top()->getIdStr()); } } /// Set this chunk as the active chunk and move pending jobs to active if needed. void ChunkTasks::setActive(bool active) { if (_active != active) { - LOGS(_log, LOG_LVL_DEBUG, "ChunkTasks " << _chunkId << " active changed to " << active); + LOGS(_log, LOG_LVL_TRACE, "ChunkTasks " << _chunkId << " active changed to " << active); if (_active && !active) { movePendingToActive(); } @@ -334,7 +332,7 @@ void ChunkTasks::setActive(bool active) { /// Move all pending Tasks to the active heap. void ChunkTasks::movePendingToActive() { for (auto const& t : _pendingTasks) { - LOGS(_log, LOG_LVL_DEBUG, "ChunkTasks " << _chunkId << " pending->active " << t->getIdStr()); + LOGS(_log, LOG_LVL_TRACE, "ChunkTasks " << _chunkId << " pending->active " << t->getIdStr()); _activeTasks.push(t); } _pendingTasks.clear(); @@ -347,8 +345,7 @@ bool ChunkTasks::empty() const { return _activeTasks.empty() && _pendingTasks.em bool ChunkTasks::readyToAdvance() { // There is a rare case where _activeTasks and _inFlightTasks are empty but _readyTask in not null. bool advance = _activeTasks.empty() && _inFlightTasks.empty() && _readyTask == nullptr; - auto logLvl = (advance) ? LOG_LVL_INFO : LOG_LVL_TRACE; - LOGS(_log, logLvl, + LOGS(_log, LOG_LVL_TRACE, "ChunkTasks::readyToAdvance chunkId=" << _chunkId << " _activeTasks.sz=" << _activeTasks.size() << " _inFlightTasks.sz=" << _inFlightTasks.size() << " _readyTask==null=" << (_readyTask == nullptr) @@ -403,19 +400,27 @@ wbase::Task::Ptr ChunkTasks::getTask(bool useFlexibleLock) { void ChunkTasks::taskComplete(wbase::Task::Ptr const& task) { _inFlightTasks.erase(task.get()); } -std::string ChunkTasks::cInfo() const { +std::string ChunkTasks::cInfo(bool listTasks) const { std::stringstream os; os << " cInfo(chkId=" << _chunkId << " act=" << _active << " readyTask=" << _readyTask << " inF=" << _inFlightTasks.size() << " (act=" << _activeTasks.size() << " "; - for (auto const& tsk : _activeTasks._tasks) { - os << tsk->getIdStr() << ", "; + if (listTasks) { + for (auto const& tsk : _activeTasks._tasks) { + os << tsk->getIdStr() << ", "; + } + } else { + os << "..."; } + os << ") (pend.sz=" << _pendingTasks.size() << " "; - for (auto const& tsk : _pendingTasks) { - os << tsk->getIdStr() << ", "; + if (listTasks) { + for (auto const& tsk : _pendingTasks) { + os << tsk->getIdStr() << ", "; + } + } else { + os << "..."; } os << "))"; - return os.str(); } diff --git a/src/wsched/ChunkTasksQueue.h b/src/wsched/ChunkTasksQueue.h index b485ee0e12..a29c57e1fb 100644 --- a/src/wsched/ChunkTasksQueue.h +++ b/src/wsched/ChunkTasksQueue.h @@ -71,7 +71,7 @@ class ChunkTasks { wbase::Task::Ptr removeTask(wbase::Task::Ptr const& task); /// @return a string describing this instance for the log file. - std::string cInfo() const; + std::string cInfo(bool listTasks = false) const; /// Class that keeps the slowest tables at the front of the heap. class SlowTableHeap { @@ -82,7 +82,7 @@ class ChunkTasks { return false; } // compare scanInfo (slower scans first) - int siComp = x->getScanInfo().compareTables(y->getScanInfo()); + int siComp = x->getScanInfo()->compareTables(*(y->getScanInfo())); return siComp < 0; }; void push(wbase::Task::Ptr const& task); diff --git a/src/wsched/GroupScheduler.cc b/src/wsched/GroupScheduler.cc index 5b5c7da270..904bcb3186 100644 --- a/src/wsched/GroupScheduler.cc +++ b/src/wsched/GroupScheduler.cc @@ -42,7 +42,6 @@ // Qserv headers #include "global/LogContext.h" -#include "proto/worker.pb.h" namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.wsched.GroupScheduler"); @@ -116,7 +115,7 @@ void GroupScheduler::_queCmd(util::Command::Ptr const& cmd, bool keepInThisGroup } auto uqCount = _incrCountForUserQuery(t->getQueryId(), 1); LOGS(_log, LOG_LVL_DEBUG, - getName() << " queCmd uqCount=" << uqCount << " rating=" << t->getScanInfo().scanRating + getName() << " queCmd uqCount=" << uqCount << " rating=" << t->getScanInfo()->scanRating << " interactive=" << t->getScanInteractive()); util::CommandQueue::_cv.notify_one(); } diff --git a/src/wsched/ScanScheduler.cc b/src/wsched/ScanScheduler.cc index ce01f5f0d4..b1156b10ef 100644 --- a/src/wsched/ScanScheduler.cc +++ b/src/wsched/ScanScheduler.cc @@ -69,14 +69,17 @@ ScanScheduler::ScanScheduler(string const& name, int maxThreads, int maxReserve, } void ScanScheduler::commandStart(util::Command::Ptr const& cmd) { - wbase::Task::Ptr task = dynamic_pointer_cast(cmd); _infoChanged = true; - if (task == nullptr) { - LOGS(_log, LOG_LVL_WARN, "ScanScheduler::commandStart cmd failed conversion " << getName()); - return; + auto logLvl = LOG_LVL_TRACE; + if (LOG_CHECK_LVL(_log, logLvl)) { + wbase::Task::Ptr task = dynamic_pointer_cast(cmd); + if (task == nullptr) { + LOGS(_log, LOG_LVL_WARN, "ScanScheduler::commandStart cmd failed conversion " << getName()); + return; + } + QSERV_LOGCONTEXT_QUERY_JOB(task->getQueryId(), task->getJobId()); + LOGS(_log, logLvl, "commandStart " << getName() << " task=" << task->getIdStr()); } - QSERV_LOGCONTEXT_QUERY_JOB(task->getQueryId(), task->getJobId()); - LOGS(_log, LOG_LVL_DEBUG, "commandStart " << getName() << " task=" << task->getIdStr()); // task was registered Inflight when getCmd() was called. } @@ -96,11 +99,11 @@ void ScanScheduler::commandFinish(util::Command::Ptr const& cmd) { lock_guard guard(util::CommandQueue::_mx); --_inFlight; ++_recentlyCompleted; - LOGS(_log, LOG_LVL_DEBUG, + LOGS(_log, LOG_LVL_TRACE, "commandFinish " << getName() << " inFlight=" << _inFlight << " " << task->getIdStr()); _decrChunkTaskCount(task->getChunkId()); } - LOGS(_log, LOG_LVL_DEBUG, "tskEnd chunk=" << task->getChunkId() << " " << task->getIdStr()); + LOGS(_log, LOG_LVL_TRACE, "tskEnd chunk=" << task->getChunkId() << " " << task->getIdStr()); // Whenever a Task finishes, sleeping threads need to check if resources // are available to run new Tasks. _cv.notify_one(); @@ -112,6 +115,11 @@ bool ScanScheduler::ready() { return _ready(); } +string ScanScheduler::getRatingStr() const { + string const str = "min=" + to_string(_minRating) + " max=" + to_string(_maxRating); + return str; +} + /// Precondition: _mx is locked /// Returns true if there is a Task ready to go and we aren't up against any limits. bool ScanScheduler::_ready() { @@ -119,7 +127,7 @@ bool ScanScheduler::_ready() { if (_infoChanged) { _infoChanged = false; logStuff = true; - LOGS(_log, LOG_LVL_DEBUG, + LOGS(_log, LOG_LVL_TRACE, getName() << " ScanScheduler::_ready " << " inFlight=" << _inFlight << " maxThreads=" << _maxThreads << " adj=" << _maxThreadsAdj << " activeChunks=" << getActiveChunkCount() @@ -127,7 +135,7 @@ bool ScanScheduler::_ready() { } if (_inFlight >= maxInFlight()) { if (logStuff) { - LOGS(_log, LOG_LVL_DEBUG, getName() << " ScanScheduler::_ready too many in flight " << _inFlight); + LOGS(_log, LOG_LVL_TRACE, getName() << " ScanScheduler::_ready too many in flight " << _inFlight); } return false; } @@ -156,7 +164,7 @@ util::Command::Ptr ScanScheduler::getCmd(bool wait) { if (task != nullptr) { ++_inFlight; // in flight as soon as it is off the queue. QSERV_LOGCONTEXT_QUERY_JOB(task->getQueryId(), task->getJobId()); - LOGS(_log, LOG_LVL_DEBUG, + LOGS(_log, LOG_LVL_TRACE, "getCmd " << getName() << " tskStart chunk=" << task->getChunkId() << " tid=" << task->getIdStr() << " inflight=" << _inFlight << _taskQueue->queueInfo()); _infoChanged = true; @@ -182,36 +190,35 @@ void ScanScheduler::queCmd(vector const& cmds) { int jid = 0; // Convert to a vector of tasks for (auto const& cmd : cmds) { - wbase::Task::Ptr t = dynamic_pointer_cast(cmd); - if (t == nullptr) { + wbase::Task::Ptr tsk = dynamic_pointer_cast(cmd); + if (tsk == nullptr) { throw util::Bug(ERR_LOC, getName() + " queCmd could not be converted to Task or was nullptr"); } if (first) { first = false; - qid = t->getQueryId(); - jid = t->getJobId(); + qid = tsk->getQueryId(); + jid = tsk->getJobId(); QSERV_LOGCONTEXT_QUERY_JOB(qid, jid); } else { - if (qid != t->getQueryId() || jid != t->getJobId()) { - LOGS(_log, LOG_LVL_ERROR, - " mismatch multiple query/job ids in single queCmd " - << " expected QID=" << qid << " got=" << t->getQueryId() - << " expected JID=" << jid << " got=" << t->getJobId()); + if (qid != tsk->getQueryId()) { + string eMsg("Mismatch multiple query/job ids in single queCmd "); + eMsg += " expected QID=" + to_string(qid) + " got=" + to_string(tsk->getQueryId()); + eMsg += " expected JID=" + to_string(qid) + " got=" + to_string(tsk->getJobId()); + LOGS(_log, LOG_LVL_ERROR, eMsg); // This could cause difficult to detect problems later on. - throw util::Bug(ERR_LOC, "Mismatch multiple query/job ids in single queCmd"); + throw util::Bug(ERR_LOC, eMsg); return; } } - tasks.push_back(t); - LOGS(_log, LOG_LVL_INFO, getName() << " queCmd " << t->getIdStr()); + + tasks.push_back(tsk); + LOGS(_log, LOG_LVL_TRACE, getName() << " queCmd " << tsk->getIdStr()); } // Queue the tasks { lock_guard lock(util::CommandQueue::_mx); auto uqCount = _incrCountForUserQuery(qid, tasks.size()); - LOGS(_log, LOG_LVL_DEBUG, - getName() << " queCmd " - << " uqCount=" << uqCount); + LOGS(_log, LOG_LVL_TRACE, getName() << " queCmd " << " uqCount=" << uqCount); _taskQueue->queueTask(tasks); _infoChanged = true; } diff --git a/src/wsched/ScanScheduler.h b/src/wsched/ScanScheduler.h index e1739c7afe..7f65276def 100644 --- a/src/wsched/ScanScheduler.h +++ b/src/wsched/ScanScheduler.h @@ -68,7 +68,8 @@ class ScanScheduler : public SchedulerBase { util::Command::Ptr getCmd(bool wait) override; void commandStart(util::Command::Ptr const& cmd) override; void commandFinish(util::Command::Ptr const& cmd) override; - bool isRatingInRange(int rating) { return _minRating <= rating && rating <= _maxRating; } + bool isRatingInRange(int rating) const { return _minRating <= rating && rating <= _maxRating; } + std::string getRatingStr() const; // SchedulerBase overrides bool ready() override; diff --git a/src/wsched/SchedulerBase.cc b/src/wsched/SchedulerBase.cc index c3981f64d5..c755a0a973 100644 --- a/src/wsched/SchedulerBase.cc +++ b/src/wsched/SchedulerBase.cc @@ -39,6 +39,32 @@ using namespace std; namespace lsst::qserv::wsched { +SchedulerBase::SchedulerBase(std::string const& name, int maxThreads, int maxReserve, int maxActiveChunks, + int priority) + : _name{name}, + _maxReserve{maxReserve}, + _maxReserveDefault{maxReserve}, + _maxThreads{maxThreads}, + _maxThreadsAdj{maxThreads}, + _priority{priority}, + _priorityDefault{priority} { + setMaxActiveChunks(maxActiveChunks); + + using namespace std::chrono_literals; + std::vector bucketMaxVals{0.01, 0.1, 1}; + size_t maxSize = 10; + _histQueuedTasks = std::make_shared("queuedTasks", bucketMaxVals, 1h, maxSize); + _histRunningTasks = std::make_shared("runningTasks", bucketMaxVals, 1h, maxSize); + _histTransmittingTasks = + std::make_shared("transmittingTasks", bucketMaxVals, 1h, maxSize); + _histRecentlyCompletedTasks = + std::make_shared("recentlyCompletedTasks", bucketMaxVals, 1h, maxSize); + + LOGS(_log, LOG_LVL_INFO, + "Scheduler name=" << name << " maxThreads=" << _maxThreads << " maxThreads=" << _maxThreads + << " priority=" << _priority); +} + /// Set priority to use when starting next chunk. void SchedulerBase::setPriority(int priority) { _priority = priority; } @@ -61,7 +87,7 @@ int SchedulerBase::_decrCountForUserQuery(QueryId queryId) { count = --(iter->second); if (count <= 0) { _userQueryCounts.erase(iter); - LOGS(_log, LOG_LVL_DEBUG, queryId << " uqCount=0, erased"); + LOGS(_log, LOG_LVL_TRACE, queryId << " uqCount=0, erased"); } } return count; diff --git a/src/wsched/SchedulerBase.h b/src/wsched/SchedulerBase.h index 7f6e9047b3..36e63023e8 100644 --- a/src/wsched/SchedulerBase.h +++ b/src/wsched/SchedulerBase.h @@ -48,29 +48,7 @@ class SchedulerBase : public wcontrol::Scheduler { static int getMaxPriority() { return 1000000000; } - SchedulerBase(std::string const& name, int maxThreads, int maxReserve, int maxActiveChunks, int priority) - : _name{name}, - _maxReserve{maxReserve}, - _maxReserveDefault{maxReserve}, - _maxThreads{maxThreads}, - _maxThreadsAdj{maxThreads}, - _priority{priority}, - _priorityDefault{priority} { - setMaxActiveChunks(maxActiveChunks); - - using namespace std::chrono_literals; - // TODO: DM-??? set values from configuration, change values at runtime. - std::vector bucketMaxVals{0.01, 0.1, 1}; - size_t maxSize = 10; - _histQueuedTasks = - std::make_shared("queuedTasks", bucketMaxVals, 1h, maxSize); - _histRunningTasks = - std::make_shared("runningTasks", bucketMaxVals, 1h, maxSize); - _histTransmittingTasks = - std::make_shared("transmittingTasks", bucketMaxVals, 1h, maxSize); - _histRecentlyCompletedTasks = std::make_shared("recentlyCompletedTasks", - bucketMaxVals, 1h, maxSize); - } + SchedulerBase(std::string const& name, int maxThreads, int maxReserve, int maxActiveChunks, int priority); virtual ~SchedulerBase() {} SchedulerBase(SchedulerBase const&) = delete; SchedulerBase& operator=(SchedulerBase const&) = delete; diff --git a/src/wsched/testSchedulers.cc b/src/wsched/testSchedulers.cc index f27b7269c9..dd7cb141bc 100644 --- a/src/wsched/testSchedulers.cc +++ b/src/wsched/testSchedulers.cc @@ -32,12 +32,12 @@ // Qserv headers #include "mysql/MySqlConfig.h" -#include "proto/ScanTableInfo.h" -#include "proto/worker.pb.h" +#include "protojson/ScanTableInfo.h" #include "util/Command.h" #include "util/EventThread.h" #include "wbase/FileChannelShared.h" #include "wbase/Task.h" +#include "wbase/UberJobData.h" #include "wconfig/WorkerConfig.h" #include "wcontrol/SqlConnMgr.h" #include "wpublish/QueriesAndChunks.h" @@ -60,10 +60,14 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.wsched.testSchedulers"); using namespace std; using lsst::qserv::mysql::MySqlConfig; -using lsst::qserv::proto::TaskMsg; +using lsst::qserv::protojson::ScanInfo::Rating::FAST; +using lsst::qserv::protojson::ScanInfo::Rating::FASTEST; +using lsst::qserv::protojson::ScanInfo::Rating::MEDIUM; +using lsst::qserv::protojson::ScanInfo::Rating::SLOW; using lsst::qserv::wbase::FileChannelShared; using lsst::qserv::wbase::SendChannel; using lsst::qserv::wbase::Task; +using lsst::qserv::wbase::UberJobData; using lsst::qserv::wconfig::WorkerConfig; using lsst::qserv::wcontrol::SqlConnMgr; using lsst::qserv::wdb::ChunkResourceMgr; @@ -83,74 +87,130 @@ auto workerCfg = lsst::qserv::wconfig::WorkerConfig::create(); std::vector locSendSharedPtrs; -Task::Ptr makeTask(std::shared_ptr tm, shared_ptr const& queries) { +lsst::qserv::protojson::ScanInfo::Ptr makeScanInfoFastest() { + auto info = lsst::qserv::protojson::ScanInfo::create(); + info->scanRating = FASTEST; + int const sRating = FAST; + string const db = "moose"; + string const table = "Object"; + bool const lockInMem = true; + info->infoTables.emplace_back(db, table, lockInMem, sRating); + return info; +} + +lsst::qserv::protojson::ScanInfo::Ptr makeScanInfoFast(string const& slowestTableName = string("")) { + auto info = lsst::qserv::protojson::ScanInfo::create(); + info->scanRating = FAST; + int const sRating = FAST; + string const db = "moose"; + string const table = "Object"; + bool const lockInMem = true; + info->infoTables.emplace_back(db, table, lockInMem, sRating); + info->sortTablesSlowestFirst(); + if (slowestTableName != string("")) { + info->infoTables[0].table = slowestTableName; + } + return info; +} + +lsst::qserv::protojson::ScanInfo::Ptr makeScanInfoMedium(string const& slowestTableName = string("")) { + auto info = lsst::qserv::protojson::ScanInfo::create(); + info->scanRating = MEDIUM; + string const db = "moose"; + bool const lockInMem = true; + info->infoTables.emplace_back(db, "Object", lockInMem, FAST); + info->infoTables.emplace_back(db, "Source", lockInMem, MEDIUM); + info->sortTablesSlowestFirst(); + if (slowestTableName != string("")) { + info->infoTables[0].table = slowestTableName; + } + return info; +} + +lsst::qserv::protojson::ScanInfo::Ptr makeScanInfoSlow(string const& slowestTableName = string("")) { + auto info = lsst::qserv::protojson::ScanInfo::create(); + info->scanRating = SLOW; + string const db = "moose"; + bool const lockInMem = true; + info->infoTables.emplace_back(db, "Object", lockInMem, FAST); + info->infoTables.emplace_back(db, "Source", lockInMem, MEDIUM); + info->infoTables.emplace_back(db, "ForcedSource", lockInMem, SLOW); + info->sortTablesSlowestFirst(); + if (slowestTableName != string("")) { + info->infoTables[0].table = slowestTableName; + } + return info; +} + +UberJobData::Ptr makeUberJobData(uint64_t queryId, + std::shared_ptr const& scanInfo, + bool scanInteractive, QueriesAndChunks::Ptr const& queriesAndChunks) { + auto ujd = UberJobData::create(7, // UberJobId + "cz1", // czarName + 11, // czarId, + "aHost", // czarHost + 3333, // czarPort + queryId, + 0, // rowLimit + 5000000000, // maxTableSizeBytes + scanInfo, + scanInteractive, // scanInteractive + "worker_13", // workerId, + nullptr, // std::shared_ptr const& foreman + queriesAndChunks, + "whatever" // authKey + ); + return ujd; +} + +Task::Ptr makeTask(UberJobData::Ptr const& ujData, int jobId, int chunkId, int fragmentNumber, + size_t templateId, bool hasSubchunks, int subchunkId, + vector const& fragSubTables, + vector const& fragSubchunkIds, shared_ptr const& sc, + std::shared_ptr const& queryStats, + std::shared_ptr const& queriesAndChunks) { WorkerConfig::create(); - auto sendC = std::make_shared(); - auto sc = FileChannelShared::create(sendC, tm->czarid()); - locSendSharedPtrs.push_back(sc); - auto taskVect = Task::createTasks(tm, sc, crm, mySqlConfig, sqlConnMgr, queries); - Task::Ptr task = taskVect[0]; + string const db = ujData->getScanInfo()->infoTables[0].db; + int const attemptCount = 0; + Task::Ptr task = shared_ptr(new Task(ujData, jobId, attemptCount, chunkId, fragmentNumber, + templateId, hasSubchunks, subchunkId, db, fragSubTables, + fragSubchunkIds, queryStats)); return task; } struct SchedulerFixture { - typedef std::shared_ptr TaskMsgPtr; - SchedulerFixture(void) { counter = 20; } ~SchedulerFixture(void) {} - void addSomeFragments(TaskMsgPtr const& t, int numberOfFragments) { - for (int i = 0; i < numberOfFragments; ++i) { - TaskMsg::Fragment* f = t->add_fragment(); - f->add_query("Hello, this is a query."); - f->mutable_subchunks()->add_id(100 + i); - f->set_resulttable("r_341"); - } - } - - TaskMsgPtr newTaskMsg(int seq, lsst::qserv::QueryId qId, int jobId) { - TaskMsgPtr t = std::make_shared(); - t->set_queryid(qId); - t->set_jobid(jobId); - t->set_chunkid(seq); - t->set_czarid(1); - t->set_db("elephant"); - addSomeFragments(t, 3); - t->set_scaninteractive(false); - t->set_attemptcount(0); + Task::Ptr makeUTask(int seq, int jobId, UberJobData::Ptr const& ujData, + shared_ptr const& sc, + shared_ptr const& queries) { ++counter; + int const chunkId = seq; + int const fragmentNumber = 0; + size_t const templateId = 0; + bool const hasSubchunks = false; + int const subchunkId = 0; + vector fragSubTables; + vector fragSubchunkIds; + Task::Ptr t = makeTask(ujData, jobId, chunkId, fragmentNumber, templateId, hasSubchunks, subchunkId, + fragSubTables, fragSubchunkIds, sc, queries->getStats(ujData->getQueryId()), + queries); return t; } - TaskMsgPtr newTaskMsgSimple(int seq, lsst::qserv::QueryId qId, int jobId) { - TaskMsgPtr t = std::make_shared(); - t->set_queryid(qId); - t->set_jobid(jobId); - t->set_chunkid(seq); - t->set_czarid(1); - t->set_db("moose"); - t->set_scaninteractive(false); - t->set_attemptcount(0); - addSomeFragments(t, 1); - ++counter; - return t; - } - - TaskMsgPtr newTaskMsgScan(int seq, int priority, lsst::qserv::QueryId qId, int jobId, - std::string const& tableName = "whatever") { - auto taskMsg = newTaskMsg(seq, qId, jobId); - taskMsg->set_scanpriority(priority); - auto sTbl = taskMsg->add_scantable(); - sTbl->set_db("elephant"); - sTbl->set_table(tableName); - sTbl->set_scanrating(priority); - sTbl->set_lockinmemory(true); - return taskMsg; - } - - Task::Ptr queMsgWithChunkId(wsched::GroupScheduler& gs, int chunkId, lsst::qserv::QueryId qId, int jobId, + Task::Ptr queMsgWithChunkId(UberJobData::Ptr const& ujData, wsched::GroupScheduler& gs, int chunkId, + int jobId, shared_ptr const& sc, shared_ptr const& queries) { - Task::Ptr t = makeTask(newTaskMsg(chunkId, qId, jobId), queries); + int const fragmentNumber = 0; + size_t const templateId = 0; + bool const hasSubchunks = false; + int const subchunkId = 0; + vector fragSubTables; + vector fragSubchunkIds; + Task::Ptr t = makeTask(ujData, jobId, chunkId, fragmentNumber, templateId, hasSubchunks, subchunkId, + fragSubTables, fragSubchunkIds, sc, queries->getStats(ujData->getQueryId()), + queries); gs.queCmd(t); return t; } @@ -160,17 +220,25 @@ struct SchedulerFixture { BOOST_FIXTURE_TEST_SUITE(SchedulerSuite, SchedulerFixture) +/// examineAfter=0 Don't run _examineThread when 0 +/// deadAfter=1 Consider queries dead if they finished more than 1 second ago. +lsst::qserv::wpublish::QueriesAndChunks::Ptr setupQueries(int maxTasksBooted, int maxDarkTasks, + bool resetForTesting, int deadAfter = 1, + int examineAfter = 0) { + auto qac = lsst::qserv::wpublish::QueriesAndChunks::setupGlobal( + chrono::seconds(deadAfter), chrono::seconds(examineAfter), maxTasksBooted, maxDarkTasks, + resetForTesting); + return qac; +} + struct SchedFixture { - SchedFixture(double maxScanTimeFast, bool examinAllSleep) - : _maxScanTimeFast{maxScanTimeFast}, _examineAllSleep{examinAllSleep} { + SchedFixture(double maxScanTimeFast, lsst::qserv::wpublish::QueriesAndChunks::Ptr const& queries_) + : _maxScanTimeFast(maxScanTimeFast), queries(queries_) { setupQueriesBlend(); } ~SchedFixture() {} void setupQueriesBlend() { - queries = lsst::qserv::wpublish::QueriesAndChunks::setupGlobal( - std::chrono::seconds(1), std::chrono::seconds(_examineAllSleep), maxBootedC, maxDarkTasksC, - resetForTestingC); blend = std::make_shared("blendSched", queries, maxThreads, group, scanSlow, scanSchedulers); group->setDefaultPosition(0); @@ -181,41 +249,45 @@ struct SchedFixture { queries->setRequiredTasksCompleted(1); // Make it easy to set a baseline. } - int const fastest = lsst::qserv::proto::ScanInfo::Rating::FASTEST; - int const fast = lsst::qserv::proto::ScanInfo::Rating::FAST; - int const medium = lsst::qserv::proto::ScanInfo::Rating::MEDIUM; - int const slow = lsst::qserv::proto::ScanInfo::Rating::SLOW; - lsst::qserv::QueryId qIdInc{1}; - int maxThreads{9}; int maxActiveChunks{20}; int priority{2}; private: double _maxScanTimeFast{oneHr}; ///< Don't hit time limit in tests. - int _examineAllSleep{0}; ///< Don't run _examineThread when 0 public: wsched::GroupScheduler::Ptr group{ std::make_shared("GroupSched", maxThreads, 2, 3, priority++)}; wsched::ScanScheduler::Ptr scanSlow{std::make_shared( - "ScanSlow", maxThreads, 2, priority++, maxActiveChunks, medium + 1, slow, oneHr)}; + "ScanSlow", maxThreads, 2, priority++, maxActiveChunks, MEDIUM + 1, SLOW, oneHr)}; wsched::ScanScheduler::Ptr scanMed{std::make_shared( - "ScanMed", maxThreads, 2, priority++, maxActiveChunks, fast + 1, medium, oneHr)}; + "ScanMed", maxThreads, 2, priority++, maxActiveChunks, FAST + 1, MEDIUM, oneHr)}; wsched::ScanScheduler::Ptr scanFast{std::make_shared( - "ScanFast", maxThreads, 3, priority++, maxActiveChunks, fastest, fast, _maxScanTimeFast)}; + "ScanFast", maxThreads, 3, priority++, maxActiveChunks, FASTEST, FAST, _maxScanTimeFast)}; std::vector scanSchedulers{scanFast, scanMed}; lsst::qserv::wpublish::QueriesAndChunks::Ptr queries; wsched::BlendScheduler::Ptr blend; }; +void logCmd(lsst::qserv::util::Command::Ptr const& cmd, std::string const& note) { + if (cmd == nullptr) + LOGS(_log, LOG_LVL_WARN, note << " null"); + else + LOGS(_log, LOG_LVL_WARN, note << ":" << cmd->dump()); +} + // TODO: DM-33302 replace this test case BOOST_AUTO_TEST_CASE(Grouping) { - SchedFixture f(60.0, 1); // Values to keep QueriesAndChunk from triggering. - LOGS(_log, LOG_LVL_DEBUG, "Test_case grouping"); + + int const deadAfter = 1; + int const examineAfter = 1; + auto qac = setupQueries(maxBootedC, maxDarkTasksC, resetForTestingC, deadAfter, examineAfter); + SchedFixture fixt(60.0, qac); // Values to keep QueriesAndChunk from triggering. + // Test grouping by chunkId. Max entries added to a single group set to 3. wsched::GroupScheduler gs{"GroupSchedA", 100, 0, 3, 0}; // chunk Ids @@ -228,25 +300,52 @@ BOOST_AUTO_TEST_CASE(Grouping) { BOOST_CHECK(gs.ready() == false); lsst::qserv::QueryId qIdInc = 1; - Task::Ptr a1 = queMsgWithChunkId(gs, a, qIdInc++, 0, f.queries); + // Either FASTEST scan rating or scanInteractive = true should make the scan interactive. + auto scanInfoFastest = makeScanInfoFastest(); + auto scanInfoFast = makeScanInfoFast(); + bool const scanInteractive = true; + auto ujData_a1 = makeUberJobData(qIdInc++, scanInfoFastest, scanInteractive, fixt.queries); + shared_ptr sc = nullptr; + Task::Ptr a1 = queMsgWithChunkId(ujData_a1, gs, a, 0, sc, fixt.queries); BOOST_CHECK(gs.empty() == false); BOOST_CHECK(gs.ready() == true); - Task::Ptr b1 = queMsgWithChunkId(gs, b, qIdInc++, 0, f.queries); - Task::Ptr c1 = queMsgWithChunkId(gs, c, qIdInc++, 0, f.queries); - Task::Ptr b2 = queMsgWithChunkId(gs, b, qIdInc++, 0, f.queries); - Task::Ptr b3 = queMsgWithChunkId(gs, b, qIdInc++, 0, f.queries); - Task::Ptr b4 = queMsgWithChunkId(gs, b, qIdInc++, 0, f.queries); - Task::Ptr a2 = queMsgWithChunkId(gs, a, qIdInc++, 0, f.queries); - Task::Ptr a3 = queMsgWithChunkId(gs, a, qIdInc++, 0, f.queries); - Task::Ptr b5 = queMsgWithChunkId(gs, b, qIdInc++, 0, f.queries); - Task::Ptr d1 = queMsgWithChunkId(gs, d, qIdInc++, 0, f.queries); + auto b1Ujd = makeUberJobData(qIdInc++, scanInfoFastest, scanInteractive, fixt.queries); + Task::Ptr b1 = queMsgWithChunkId(b1Ujd, gs, b, 0, sc, fixt.queries); + + auto c1Ujd = makeUberJobData(qIdInc++, scanInfoFast, scanInteractive, fixt.queries); + Task::Ptr c1 = queMsgWithChunkId(c1Ujd, gs, c, 0, sc, fixt.queries); + + auto b2Ujd = makeUberJobData(qIdInc++, scanInfoFastest, false, fixt.queries); + Task::Ptr b2 = queMsgWithChunkId(b2Ujd, gs, b, 0, sc, fixt.queries); + + auto b3Ujd = makeUberJobData(qIdInc++, scanInfoFast, scanInteractive, fixt.queries); + Task::Ptr b3 = queMsgWithChunkId(b3Ujd, gs, b, 0, sc, fixt.queries); + + auto b4Ujd = makeUberJobData(qIdInc++, scanInfoFastest, scanInteractive, fixt.queries); + Task::Ptr b4 = queMsgWithChunkId(b4Ujd, gs, b, 0, sc, fixt.queries); + + auto a2Ujd = makeUberJobData(qIdInc++, scanInfoFastest, scanInteractive, fixt.queries); + Task::Ptr a2 = queMsgWithChunkId(a2Ujd, gs, a, 0, sc, fixt.queries); + + auto a3Ujd = makeUberJobData(qIdInc++, scanInfoFastest, scanInteractive, fixt.queries); + Task::Ptr a3 = queMsgWithChunkId(a3Ujd, gs, a, 0, sc, fixt.queries); + + auto b5Ujd = makeUberJobData(qIdInc++, scanInfoFastest, scanInteractive, fixt.queries); + Task::Ptr b5 = queMsgWithChunkId(b5Ujd, gs, b, 0, sc, fixt.queries); + + auto d1Ujd = makeUberJobData(qIdInc++, scanInfoFastest, scanInteractive, fixt.queries); + Task::Ptr d1 = queMsgWithChunkId(d1Ujd, gs, d, 0, sc, fixt.queries); + BOOST_CHECK(gs.getSize() == 5); BOOST_CHECK(gs.ready() == true); + // Should get all the first 3 'a' commands in order auto aa1 = gs.getCmd(false); auto aa2 = gs.getCmd(false); - Task::Ptr a4 = queMsgWithChunkId(gs, a, qIdInc++, 0, f.queries); // this should get its own group + auto a4Ujd = makeUberJobData(qIdInc++, scanInfoFastest, scanInteractive, fixt.queries); + Task::Ptr a4 = queMsgWithChunkId(a4Ujd, gs, a, 0, sc, fixt.queries); // this should get its own group + auto aa3 = gs.getCmd(false); BOOST_CHECK(a1.get() == aa1.get()); BOOST_CHECK(a2.get() == aa2.get()); @@ -298,15 +397,27 @@ BOOST_AUTO_TEST_CASE(Grouping) { BOOST_AUTO_TEST_CASE(GroupMaxThread) { // Test that maxThreads is meaningful. LOGS(_log, LOG_LVL_WARN, "Test_case GroupMaxThread"); - auto queries = QueriesAndChunks::setupGlobal(chrono::seconds(1), chrono::seconds(300), maxBootedC, - maxDarkTasksC, resetForTestingC); + auto scanInfo = makeScanInfoFastest(); + bool const scanInteractive = true; + shared_ptr sc = nullptr; + + auto queries = setupQueries(maxBootedC, maxDarkTasksC, resetForTestingC, 1, 300); wsched::GroupScheduler gs{"GroupSchedB", 3, 0, 100, 0}; lsst::qserv::QueryId qIdInc = 1; + int a = 42; - Task::Ptr a1 = queMsgWithChunkId(gs, a, qIdInc++, 0, queries); - Task::Ptr a2 = queMsgWithChunkId(gs, a, qIdInc++, 0, queries); - Task::Ptr a3 = queMsgWithChunkId(gs, a, qIdInc++, 0, queries); - Task::Ptr a4 = queMsgWithChunkId(gs, a, qIdInc++, 0, queries); + auto a1Ujd = makeUberJobData(qIdInc++, scanInfo, scanInteractive, queries); + Task::Ptr a1 = queMsgWithChunkId(a1Ujd, gs, a, 0, sc, queries); + + auto a2Ujd = makeUberJobData(qIdInc++, scanInfo, scanInteractive, queries); + Task::Ptr a2 = queMsgWithChunkId(a2Ujd, gs, a, 0, sc, queries); + + auto a3Ujd = makeUberJobData(qIdInc++, scanInfo, scanInteractive, queries); + Task::Ptr a3 = queMsgWithChunkId(a3Ujd, gs, a, 0, sc, queries); + + auto a4Ujd = makeUberJobData(qIdInc++, scanInfo, scanInteractive, queries); + Task::Ptr a4 = queMsgWithChunkId(a4Ujd, gs, a, 0, sc, queries); + BOOST_CHECK(gs.ready() == true); auto aa1 = gs.getCmd(false); BOOST_CHECK(a1.get() == aa1.get()); @@ -330,31 +441,33 @@ BOOST_AUTO_TEST_CASE(GroupMaxThread) { BOOST_AUTO_TEST_CASE(ScanScheduleTest) { LOGS(_log, LOG_LVL_DEBUG, "Test_case ScanScheduleTest"); - auto queries = QueriesAndChunks::setupGlobal(chrono::seconds(1), chrono::seconds(300), maxBootedC, - maxDarkTasksC, resetForTestingC); + + auto queries = setupQueries(maxBootedC, maxDarkTasksC, resetForTestingC, 1, 300); wsched::ScanScheduler sched{"ScanSchedA", 2, 1, 0, 20, 0, 100, oneHr}; + auto scanInfo = makeScanInfoFast(); + bool const scanInteractive = true; + shared_ptr sc = nullptr; lsst::qserv::QueryId qIdInc = 1; - // Test ready state as Tasks added and removed. BOOST_CHECK(sched.ready() == false); - Task::Ptr a38 = makeTask(newTaskMsgScan(38, 0, qIdInc++, 0), queries); + auto ujd = makeUberJobData(qIdInc++, scanInfo, scanInteractive, queries); + Task::Ptr a38 = makeUTask(38, 0, ujd, sc, queries); sched.queCmd(a38); // Calling read swaps active and pending heaps, putting a38 at the top of the active. BOOST_CHECK(sched.ready() == true); - Task::Ptr a40 = makeTask(newTaskMsgScan(40, 0, qIdInc++, 0), queries); // goes on active + ujd = makeUberJobData(qIdInc++, scanInfo, scanInteractive, queries); + Task::Ptr a40 = makeUTask(40, 0, ujd, sc, queries); // goes on active sched.queCmd(a40); - // TODO: This needs to be evaluated and removed. - // Making a non-scan message so MemManNone will grant it an empty Handle - Task::Ptr b41 = makeTask(newTaskMsg(41, qIdInc++, 0), queries); // goes on active + ujd = makeUberJobData(qIdInc++, scanInfo, scanInteractive, queries); + Task::Ptr b41 = makeUTask(41, 0, ujd, sc, queries); // goes on active sched.queCmd(b41); - // TODO: This needs to be evaluated and removed. - // Making a non-scan message so MemManNone will grant it an empty Handle - Task::Ptr a33 = makeTask(newTaskMsg(33, qIdInc++, 0), queries); // goes on pending. + ujd = makeUberJobData(qIdInc++, scanInfo, scanInteractive, queries); + Task::Ptr a33 = makeUTask(33, 0, ujd, sc, queries); // goes on pending. sched.queCmd(a33); BOOST_CHECK(sched.ready() == true); @@ -398,278 +511,317 @@ BOOST_AUTO_TEST_CASE(BlendScheduleTest) { // TODO: This needs to be evaluated and removed. // In this case, memMan->lock(..) always returns true (really HandleType::ISEMPTY). // ChunkIds matter as they control the order Tasks come off individual schedulers. - SchedFixture f(60.0, 1); // Values to keep QueriesAndChunk from triggering. - - BOOST_CHECK(f.blend->ready() == false); - BOOST_CHECK(f.blend->calcAvailableTheads() == 5); + int const deadAfter = 1; + int const examineAfter = 1; + auto qac = setupQueries(maxBootedC, maxDarkTasksC, resetForTestingC, deadAfter, examineAfter); + SchedFixture fixt(60.0, qac); // Values to keep QueriesAndChunk from triggering. + + auto scanInfoFastest = makeScanInfoFastest(); + auto scanInfoFast = makeScanInfoFast(); + auto scanInfoMedium = makeScanInfoMedium(); + auto scanInfoSlow = makeScanInfoSlow(); + bool const scanInteractiveT = true; + bool const scanInteractiveF = false; + shared_ptr sc = nullptr; + + BOOST_CHECK(fixt.blend->ready() == false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 5); // Put one message on each scheduler except ScanFast, which gets 2. LOGS(_log, LOG_LVL_DEBUG, "BlendScheduleTest-1 add Tasks"); - Task::Ptr g1 = makeTask(newTaskMsgSimple(40, f.qIdInc++, 0), f.queries); - f.blend->queCmd(g1); - BOOST_CHECK(f.group->getSize() == 1); - BOOST_CHECK(f.blend->ready() == true); - - auto taskMsg = newTaskMsgScan(27, lsst::qserv::proto::ScanInfo::Rating::FAST, f.qIdInc++, 0); - Task::Ptr sF1 = makeTask(taskMsg, f.queries); - f.blend->queCmd(sF1); - BOOST_CHECK(f.scanFast->getSize() == 1); - BOOST_CHECK(f.blend->ready() == true); - - taskMsg = newTaskMsgScan(40, lsst::qserv::proto::ScanInfo::Rating::FAST, f.qIdInc++, 0); - Task::Ptr sF2 = makeTask(taskMsg, f.queries); - f.blend->queCmd(sF2); - BOOST_CHECK(f.scanFast->getSize() == 2); - BOOST_CHECK(f.blend->ready() == true); - - taskMsg = newTaskMsgScan(34, lsst::qserv::proto::ScanInfo::Rating::SLOW, f.qIdInc++, 0); - Task::Ptr sS1 = makeTask(taskMsg, f.queries); - f.blend->queCmd(sS1); - BOOST_CHECK(f.scanSlow->getSize() == 1); - BOOST_CHECK(f.blend->ready() == true); - - taskMsg = newTaskMsgScan(31, lsst::qserv::proto::ScanInfo::Rating::MEDIUM, f.qIdInc++, 0); - Task::Ptr sM1 = makeTask(taskMsg, f.queries); - f.blend->queCmd(sM1); - BOOST_CHECK(f.scanMed->getSize() == 1); - BOOST_CHECK(f.blend->ready() == true); - - BOOST_CHECK(f.blend->getSize() == 5); - BOOST_CHECK(f.blend->calcAvailableTheads() == 5); + auto ujd = makeUberJobData(fixt.qIdInc++, scanInfoFastest, scanInteractiveT, fixt.queries); + Task::Ptr g1 = makeUTask(40, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(g1); + BOOST_CHECK(fixt.group->getSize() == 1); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoFast, scanInteractiveF, fixt.queries); + Task::Ptr sF1 = makeUTask(27, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(sF1); + BOOST_CHECK(fixt.scanFast->getSize() == 1); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoFast, scanInteractiveF, fixt.queries); + Task::Ptr sF2 = makeUTask(40, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(sF2); + BOOST_CHECK(fixt.scanFast->getSize() == 2); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoSlow, scanInteractiveF, fixt.queries); + Task::Ptr sS1 = makeUTask(34, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(sS1); + BOOST_CHECK(fixt.scanSlow->getSize() == 1); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoMedium, scanInteractiveF, fixt.queries); + Task::Ptr sM1 = makeUTask(31, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(sM1); + BOOST_CHECK(fixt.scanMed->getSize() == 1); + BOOST_CHECK(fixt.blend->ready() == true); + + BOOST_CHECK(fixt.blend->getSize() == 5); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 5); // Start all the Tasks. LOGS(_log, LOG_LVL_DEBUG, "BlendScheduleTest-1 start all tasks"); // Tasks should come out in order of scheduler priority. - auto og1 = f.blend->getCmd(false); + auto og1 = fixt.blend->getCmd(false); BOOST_CHECK(og1.get() == g1.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 4); - auto osF1 = f.blend->getCmd(false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 4); + auto osF1 = fixt.blend->getCmd(false); BOOST_CHECK(osF1.get() == sF1.get()); // sF1 has lower chunkId than sF2 - BOOST_CHECK(f.blend->calcAvailableTheads() == 3); - auto osF2 = f.blend->getCmd(false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 3); + auto osF2 = fixt.blend->getCmd(false); BOOST_CHECK(osF2.get() == sF2.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 2); - auto osM1 = f.blend->getCmd(false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 2); + auto osM1 = fixt.blend->getCmd(false); BOOST_CHECK(osM1.get() == sM1.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 1); - auto osS1 = f.blend->getCmd(false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 1); + auto osS1 = fixt.blend->getCmd(false); BOOST_CHECK(osS1.get() == sS1.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 0); - BOOST_CHECK(f.blend->getSize() == 0); - BOOST_CHECK(f.blend->ready() == false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 0); + BOOST_CHECK(fixt.blend->getSize() == 0); + BOOST_CHECK(fixt.blend->ready() == false); // All threads should now be in use or reserved, should be able to start one // Task for each scheduler but second Task should remain on queue. - Task::Ptr g2 = makeTask(newTaskMsgSimple(41, f.qIdInc++, 0), f.queries); - f.blend->queCmd(g2); - BOOST_CHECK(f.group->getSize() == 1); - BOOST_CHECK(f.blend->getSize() == 1); - BOOST_CHECK(f.blend->ready() == true); - - Task::Ptr g3 = makeTask(newTaskMsgSimple(12, f.qIdInc++, 0), f.queries); - f.blend->queCmd(g3); - BOOST_CHECK(f.group->getSize() == 2); - BOOST_CHECK(f.blend->getSize() == 2); - BOOST_CHECK(f.blend->ready() == true); - - taskMsg = newTaskMsgScan(70, lsst::qserv::proto::ScanInfo::Rating::FAST, f.qIdInc++, 0); - Task::Ptr sF3 = makeTask(taskMsg, f.queries); - f.blend->queCmd(sF3); - BOOST_CHECK(f.scanFast->getSize() == 1); - BOOST_CHECK(f.blend->getSize() == 3); - BOOST_CHECK(f.blend->ready() == true); - - taskMsg = newTaskMsgScan(72, lsst::qserv::proto::ScanInfo::Rating::FAST, f.qIdInc++, 0); - Task::Ptr sF4 = makeTask(taskMsg, f.queries); - f.blend->queCmd(sF4); - BOOST_CHECK(f.scanFast->getSize() == 2); - BOOST_CHECK(f.blend->getSize() == 4); - BOOST_CHECK(f.blend->ready() == true); - - taskMsg = newTaskMsgScan(13, lsst::qserv::proto::ScanInfo::Rating::MEDIUM, f.qIdInc++, 0); - Task::Ptr sM2 = makeTask(taskMsg, f.queries); - f.blend->queCmd(sM2); - BOOST_CHECK(f.scanMed->getSize() == 1); - BOOST_CHECK(f.blend->getSize() == 5); - BOOST_CHECK(f.blend->ready() == true); - - taskMsg = newTaskMsgScan(15, lsst::qserv::proto::ScanInfo::Rating::MEDIUM, f.qIdInc++, 0); - Task::Ptr sM3 = makeTask(taskMsg, f.queries); - f.blend->queCmd(sM3); - BOOST_CHECK(f.scanMed->getSize() == 2); - BOOST_CHECK(f.blend->getSize() == 6); - BOOST_CHECK(f.blend->ready() == true); - - taskMsg = newTaskMsgScan(5, lsst::qserv::proto::ScanInfo::Rating::SLOW, f.qIdInc++, 0); - Task::Ptr sS2 = makeTask(taskMsg, f.queries); - f.blend->queCmd(sS2); - BOOST_CHECK(f.scanSlow->getSize() == 1); - BOOST_CHECK(f.blend->getSize() == 7); - BOOST_CHECK(f.blend->ready() == true); - - taskMsg = newTaskMsgScan(6, lsst::qserv::proto::ScanInfo::Rating::SLOW, f.qIdInc++, 0); - Task::Ptr sS3 = makeTask(taskMsg, f.queries); - f.blend->queCmd(sS3); - BOOST_CHECK(f.scanSlow->getSize() == 2); - BOOST_CHECK(f.blend->getSize() == 8); - BOOST_CHECK(f.blend->ready() == true); + ujd = makeUberJobData(fixt.qIdInc++, scanInfoFastest, scanInteractiveT, fixt.queries); + Task::Ptr g2 = makeUTask(41, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(g2); + BOOST_CHECK(fixt.group->getSize() == 1); + BOOST_CHECK(fixt.blend->getSize() == 1); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoFastest, scanInteractiveT, fixt.queries); + Task::Ptr g3 = makeUTask(12, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(g3); + BOOST_CHECK(fixt.group->getSize() == 2); + BOOST_CHECK(fixt.blend->getSize() == 2); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoFast, scanInteractiveF, fixt.queries); + Task::Ptr sF3 = makeUTask(70, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(sF3); + BOOST_CHECK(fixt.scanFast->getSize() == 1); + BOOST_CHECK(fixt.blend->getSize() == 3); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoFast, scanInteractiveF, fixt.queries); + Task::Ptr sF4 = makeUTask(72, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(sF4); + BOOST_CHECK(fixt.scanFast->getSize() == 2); + BOOST_CHECK(fixt.blend->getSize() == 4); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoMedium, scanInteractiveF, fixt.queries); + Task::Ptr sM2 = makeUTask(13, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(sM2); + BOOST_CHECK(fixt.scanMed->getSize() == 1); + BOOST_CHECK(fixt.blend->getSize() == 5); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoMedium, scanInteractiveF, fixt.queries); + Task::Ptr sM3 = makeUTask(15, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(sM3); + BOOST_CHECK(fixt.scanMed->getSize() == 2); + BOOST_CHECK(fixt.blend->getSize() == 6); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoSlow, scanInteractiveF, fixt.queries); + Task::Ptr sS2 = makeUTask(5, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(sS2); + BOOST_CHECK(fixt.scanSlow->getSize() == 1); + BOOST_CHECK(fixt.blend->getSize() == 7); + BOOST_CHECK(fixt.blend->ready() == true); + + ujd = makeUberJobData(fixt.qIdInc++, scanInfoSlow, scanInteractiveF, fixt.queries); + Task::Ptr sS3 = makeUTask(6, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(sS3); + BOOST_CHECK(fixt.scanSlow->getSize() == 2); + BOOST_CHECK(fixt.blend->getSize() == 8); + BOOST_CHECK(fixt.blend->ready() == true); // Expect 1 group, 1 fast, 1 medium, and 1 slow in that order - auto og2 = f.blend->getCmd(false); + auto og2 = fixt.blend->getCmd(false); BOOST_CHECK(og2.get() == g2.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 0); - auto osF3 = f.blend->getCmd(false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 0); + BOOST_CHECK(fixt.blend->ready() == true); + auto osF3 = fixt.blend->getCmd(false); BOOST_CHECK(osF3.get() == sF3.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 0); - BOOST_CHECK(f.blend->ready() == true); - auto osM2 = f.blend->getCmd(false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 0); + BOOST_CHECK(fixt.blend->ready() == true); + auto osM2 = fixt.blend->getCmd(false); BOOST_CHECK(osM2.get() == sM2.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 0); - BOOST_CHECK(f.blend->ready() == true); - auto osS2 = f.blend->getCmd(false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 0); + BOOST_CHECK(fixt.blend->ready() == true); + auto osS2 = fixt.blend->getCmd(false); BOOST_CHECK(osS2.get() == sS2.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 0); - BOOST_CHECK(f.blend->getSize() == 4); - BOOST_CHECK(f.blend->ready() == false); // all threads in use + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 0); + BOOST_CHECK(fixt.blend->getSize() == 4); + BOOST_CHECK(fixt.blend->ready() == false); // all threads in use // Finishing a fast Task should allow the last fast Task to go. LOGS(_log, LOG_LVL_DEBUG, "BlendScheduleTest-1 call commandFinish"); - f.blend->commandFinish(osF3); - auto osF4 = f.blend->getCmd(false); + fixt.blend->commandFinish(osF3); + auto osF4 = fixt.blend->getCmd(false); BOOST_CHECK(osF4.get() == sF4.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 0); - BOOST_CHECK(f.blend->ready() == false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 0); + BOOST_CHECK(fixt.blend->ready() == false); // Finishing 2 fast Tasks should allow a group Task to go. - f.blend->commandFinish(osF1); - BOOST_CHECK(f.blend->calcAvailableTheads() == 0); - f.blend->commandFinish(osF2); - BOOST_CHECK(f.blend->calcAvailableTheads() == 1); - auto og3 = f.blend->getCmd(false); + fixt.blend->commandFinish(osF1); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 0); + fixt.blend->commandFinish(osF2); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 1); + auto og3 = fixt.blend->getCmd(false); BOOST_CHECK(og3.get() == g3.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 1); - BOOST_CHECK(f.blend->ready() == false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 1); + BOOST_CHECK(fixt.blend->ready() == false); // Finishing the last fast Task should let a medium Task go. - f.blend->commandFinish(osF4); - BOOST_CHECK(f.blend->calcAvailableTheads() == 2); - auto osM3 = f.blend->getCmd(false); + fixt.blend->commandFinish(osF4); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 2); + auto osM3 = fixt.blend->getCmd(false); BOOST_CHECK(osM3.get() == sM3.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 2); - BOOST_CHECK(f.blend->ready() == false); - BOOST_CHECK(f.blend->getCmd(false) == nullptr); - - // Finishing a group Task should allow a slow Task to got (only remaining Task) - BOOST_CHECK(f.blend->getSize() == 1); - f.blend->commandFinish(og1); - BOOST_CHECK(f.blend->calcAvailableTheads() == 2); - auto osS3 = f.blend->getCmd(false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 2); + BOOST_CHECK(fixt.blend->ready() == false); + BOOST_CHECK(fixt.blend->getCmd(false) == nullptr); + + // Finishing a group Task should allow a slow Task to go (only remaining Task) + BOOST_CHECK(fixt.blend->getSize() == 1); + fixt.blend->commandFinish(og1); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 2); + auto osS3 = fixt.blend->getCmd(false); BOOST_CHECK(osS3.get() == sS3.get()); - BOOST_CHECK(f.blend->calcAvailableTheads() == 2); - BOOST_CHECK(f.blend->getSize() == 0); - BOOST_CHECK(f.blend->ready() == false); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 2); + BOOST_CHECK(fixt.blend->getSize() == 0); + BOOST_CHECK(fixt.blend->ready() == false); // Close out all tasks and check counts. LOGS(_log, LOG_LVL_DEBUG, "BlendScheduleTest-1 close out all Tasks"); - f.blend->commandFinish(og2); - BOOST_CHECK(f.blend->calcAvailableTheads() == 2); - BOOST_CHECK(f.blend->getInFlight() == 7); - f.blend->commandFinish(og3); - BOOST_CHECK(f.blend->calcAvailableTheads() == 3); - BOOST_CHECK(f.blend->getInFlight() == 6); - f.blend->commandFinish(osM1); - BOOST_CHECK(f.blend->calcAvailableTheads() == 3); - BOOST_CHECK(f.blend->getInFlight() == 5); - f.blend->commandFinish(osM2); - BOOST_CHECK(f.blend->calcAvailableTheads() == 3); - f.blend->commandFinish(osM3); - BOOST_CHECK(f.blend->calcAvailableTheads() == 4); - f.blend->commandFinish(osS1); - BOOST_CHECK(f.blend->calcAvailableTheads() == 4); - f.blend->commandFinish(osS2); - BOOST_CHECK(f.blend->calcAvailableTheads() == 4); - f.blend->commandFinish(osS3); - BOOST_CHECK(f.blend->calcAvailableTheads() == 5); - BOOST_CHECK(f.blend->getInFlight() == 0); + fixt.blend->commandFinish(og2); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 2); + BOOST_CHECK(fixt.blend->getInFlight() == 7); + fixt.blend->commandFinish(og3); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 3); + BOOST_CHECK(fixt.blend->getInFlight() == 6); + fixt.blend->commandFinish(osM1); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 3); + BOOST_CHECK(fixt.blend->getInFlight() == 5); + fixt.blend->commandFinish(osM2); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 3); + fixt.blend->commandFinish(osM3); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 4); + fixt.blend->commandFinish(osS1); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 4); + fixt.blend->commandFinish(osS2); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 4); + fixt.blend->commandFinish(osS3); + BOOST_CHECK(fixt.blend->calcAvailableTheads() == 5); + BOOST_CHECK(fixt.blend->getInFlight() == 0); LOGS(_log, LOG_LVL_DEBUG, "BlendScheduleTest-1 done"); } BOOST_AUTO_TEST_CASE(BlendScheduleThreadLimitingTest) { LOGS(_log, LOG_LVL_DEBUG, "Test_case BlendScheduleThreadLimitingTest"); - SchedFixture f(60.0, 1); // Values to keep QueriesAndChunk from triggering. + int const deadAfter = 1; + int const examineAfter = 1; + auto qac = setupQueries(maxBootedC, maxDarkTasksC, resetForTestingC, deadAfter, examineAfter); + SchedFixture fixt(60.0, qac); // Values to keep QueriesAndChunk from triggering. + + auto scanInfoFastest = makeScanInfoFastest(); + auto scanInfoFast = makeScanInfoFast(); + auto scanInfoMedium = makeScanInfoMedium(); + auto scanInfoSlow = makeScanInfoSlow(); + bool const scanInteractiveT = true; + bool const scanInteractiveF = false; + shared_ptr sc = nullptr; + // Test that only 6 threads can be started on a single ScanScheduler // This leaves 3 threads available, 1 for each other scheduler. - BOOST_CHECK(f.blend->ready() == false); + BOOST_CHECK(fixt.blend->ready() == false); std::vector scanTasks; for (int j = 0; j < 7; ++j) { - auto tsk = makeTask(newTaskMsgScan(j, lsst::qserv::proto::ScanInfo::Rating::MEDIUM, f.qIdInc++, 0), - f.queries); - f.blend->queCmd(tsk); + auto ujd = makeUberJobData(fixt.qIdInc++, scanInfoMedium, scanInteractiveF, fixt.queries); + auto tsk = makeUTask(j, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(tsk); if (j < 6) { - BOOST_CHECK(f.blend->ready() == true); - auto cmd = f.blend->getCmd(false); + BOOST_CHECK(fixt.blend->ready() == true); + auto cmd = fixt.blend->getCmd(false); BOOST_CHECK(cmd != nullptr); auto task = std::dynamic_pointer_cast(cmd); scanTasks.push_back(task); } if (j == 6) { - BOOST_CHECK(f.blend->ready() == false); - BOOST_CHECK(f.blend->getCmd(false) == nullptr); + BOOST_CHECK(fixt.blend->ready() == false); + BOOST_CHECK(fixt.blend->getCmd(false) == nullptr); } } { // Finishing one task should allow the 7th one to run. - f.blend->commandFinish(scanTasks[0]); - BOOST_CHECK(f.blend->ready() == true); - auto cmd = f.blend->getCmd(false); + fixt.blend->commandFinish(scanTasks[0]); + BOOST_CHECK(fixt.blend->ready() == true); + auto cmd = fixt.blend->getCmd(false); BOOST_CHECK(cmd != nullptr); auto task = std::dynamic_pointer_cast(cmd); scanTasks.push_back(task); } // Finish all the scanTasks, scanTasks[0] is already finished. - for (int j = 1; j < 7; ++j) f.blend->commandFinish(scanTasks[j]); - BOOST_CHECK(f.blend->getInFlight() == 0); - BOOST_CHECK(f.blend->ready() == false); + for (int j = 1; j < 7; ++j) fixt.blend->commandFinish(scanTasks[j]); + BOOST_CHECK(fixt.blend->getInFlight() == 0); + BOOST_CHECK(fixt.blend->ready() == false); // Test that only 6 threads can be started on a single GroupScheduler // This leaves 3 threads available, 1 for each other scheduler. std::vector groupTasks; for (int j = 0; j < 7; ++j) { - f.blend->queCmd(makeTask(newTaskMsg(j, f.qIdInc++, 0), f.queries)); + auto ujd = makeUberJobData(fixt.qIdInc++, scanInfoFastest, scanInteractiveT, fixt.queries); + auto tsk = makeUTask(j, 0, ujd, sc, fixt.queries); + fixt.blend->queCmd(tsk); if (j < 6) { - BOOST_CHECK(f.blend->ready() == true); - auto cmd = f.blend->getCmd(false); + BOOST_CHECK(fixt.blend->ready() == true); + auto cmd = fixt.blend->getCmd(false); BOOST_CHECK(cmd != nullptr); auto task = std::dynamic_pointer_cast(cmd); groupTasks.push_back(task); } if (j == 6) { - BOOST_CHECK(f.blend->ready() == false); - BOOST_CHECK(f.blend->getCmd(false) == nullptr); + BOOST_CHECK(fixt.blend->ready() == false); + BOOST_CHECK(fixt.blend->getCmd(false) == nullptr); } } { // Finishing one task should allow the 7th one to run. - f.blend->commandFinish(groupTasks[0]); - BOOST_CHECK(f.blend->ready() == true); - auto cmd = f.blend->getCmd(false); + fixt.blend->commandFinish(groupTasks[0]); + BOOST_CHECK(fixt.blend->ready() == true); + auto cmd = fixt.blend->getCmd(false); BOOST_CHECK(cmd != nullptr); auto task = std::dynamic_pointer_cast(cmd); groupTasks.push_back(task); } // Finish all the groupTasks, groupTasks[0] is already finished. - for (int j = 1; j < 7; ++j) f.blend->commandFinish(groupTasks[j]); - BOOST_CHECK(f.blend->getInFlight() == 0); - BOOST_CHECK(f.blend->ready() == false); + for (int j = 1; j < 7; ++j) fixt.blend->commandFinish(groupTasks[j]); + BOOST_CHECK(fixt.blend->getInFlight() == 0); + BOOST_CHECK(fixt.blend->ready() == false); LOGS(_log, LOG_LVL_DEBUG, "BlendScheduleTest-2 done"); } BOOST_AUTO_TEST_CASE(BlendScheduleQueryRemovalTest) { // Test that space is appropriately reserved for each scheduler as Tasks are started and finished. - // TODO: This needs to be evaluated and removed. // In this case, memMan->lock(..) always returns true (really HandleType::ISEMPTY). // ChunkIds matter as they control the order Tasks come off individual schedulers. - SchedFixture f(60.0, 1); // Values to keep QueriesAndChunk from triggering. + int const deadAfter = 1; + int const examineAfter = 1; + auto qac = setupQueries(maxBootedC, maxDarkTasksC, resetForTestingC, deadAfter, examineAfter); + SchedFixture fixt(60.0, qac); // Values to keep QueriesAndChunk from triggering. + + auto scanInfoFastest = makeScanInfoFastest(); + auto scanInfoFast = makeScanInfoFast(); + auto scanInfoMedium = makeScanInfoMedium(); + auto scanInfoSlow = makeScanInfoSlow(); + bool const scanInteractiveF = false; + shared_ptr sc = nullptr; + LOGS(_log, LOG_LVL_DEBUG, "Test_case BlendScheduleQueryRemovalTest"); // Add two queries to scanFast scheduler and then move one query to scanSlow. int startChunk = 70; @@ -678,30 +830,32 @@ BOOST_AUTO_TEST_CASE(BlendScheduleQueryRemovalTest) { unsigned int jobsB = jobs; std::vector queryATasks; std::vector queryBTasks; - lsst::qserv::QueryId qIdA = f.qIdInc++; - lsst::qserv::QueryId qIdB = f.qIdInc++; + lsst::qserv::QueryId qIdA = fixt.qIdInc++; + lsst::qserv::QueryId qIdB = fixt.qIdInc++; { int jobId = 0; int chunkId = startChunk; + auto ujdA = makeUberJobData(qIdA, scanInfoFast, scanInteractiveF, qac); + auto ujdB = makeUberJobData(qIdB, scanInfoFast, scanInteractiveF, qac); for (unsigned int j = 0; j < jobs; ++j) { - auto taskMsg = newTaskMsgScan(chunkId, lsst::qserv::proto::ScanInfo::Rating::FAST, qIdA, jobId); - Task::Ptr mv = makeTask(taskMsg, f.queries); + Task::Ptr mv = makeUTask(chunkId, jobId, ujdA, sc, fixt.queries); queryATasks.push_back(mv); - f.queries->addTask(mv); - f.blend->queCmd(mv); - taskMsg = newTaskMsgScan(chunkId++, lsst::qserv::proto::ScanInfo::Rating::FAST, qIdB, jobId++); - mv = makeTask(taskMsg, f.queries); + fixt.queries->addTask(mv); + fixt.blend->queCmd(mv); + mv = makeUTask(chunkId, jobId, ujdB, sc, fixt.queries); queryBTasks.push_back(mv); - f.queries->addTask(mv); - f.blend->queCmd(mv); + fixt.queries->addTask(mv); + fixt.blend->queCmd(mv); + chunkId++; + jobId++; } } - BOOST_CHECK(f.scanFast->getSize() == jobs * 2); - BOOST_CHECK(f.scanSlow->getSize() == 0); + BOOST_CHECK(fixt.scanFast->getSize() == jobs * 2); + BOOST_CHECK(fixt.scanSlow->getSize() == 0); // This should run 1 job from one of the queries, but there are no guarantees about which one. // This is to test that moveUserQuery behaves appropriately for running Tasks. - auto poppedTask = f.blend->getCmd(false); + auto poppedTask = fixt.blend->getCmd(false); bool poppedFromA = false; for (auto const& tk : queryATasks) { if (tk == poppedTask) { @@ -714,56 +868,68 @@ BOOST_AUTO_TEST_CASE(BlendScheduleQueryRemovalTest) { else --jobsB; - f.blend->moveUserQuery(qIdA, f.scanFast, f.scanSlow); // move query qIdA to scanSlow. - LOGS(_log, LOG_LVL_DEBUG, "fastSize=" << f.scanFast->getSize() << " slowSize=" << f.scanSlow->getSize()); - BOOST_CHECK(f.scanFast->getSize() == jobsB); - BOOST_CHECK(f.scanSlow->getSize() == jobsA); + fixt.blend->moveUserQuery(qIdA, fixt.scanFast, fixt.scanSlow); // move query qIdA to scanSlow. + LOGS(_log, LOG_LVL_DEBUG, + "fastSize=" << fixt.scanFast->getSize() << " slowSize=" << fixt.scanSlow->getSize()); + BOOST_CHECK(fixt.scanFast->getSize() == jobsB); + BOOST_CHECK(fixt.scanSlow->getSize() == jobsA); // Can't use queryATasks[0] for this as it was popped from the queue before the move. auto taskFromA = queryATasks[1]; auto schedForA = std::dynamic_pointer_cast(taskFromA->getTaskScheduler()); LOGS(_log, LOG_LVL_DEBUG, "taskFromA=" << taskFromA->getIdStr() << " sched=" << schedForA->getName()); - BOOST_CHECK(schedForA == f.scanSlow); + BOOST_CHECK(schedForA == fixt.scanSlow); } BOOST_AUTO_TEST_CASE(BlendScheduleQueryBootTaskTest) { // Test if a task is removed if it takes takes too long. // Give the user query 0.1 seconds to run and run it for a second, it should get removed. double tenthOfSecInMinutes = 1.0 / 600.0; // task - SchedFixture f(tenthOfSecInMinutes, 1); // sleep 1 second then check if tasks took too long + int const deadAfter = 1; + int const examineAfter = 1; + auto qac = setupQueries(maxBootedC, maxDarkTasksC, resetForTestingC, deadAfter, examineAfter); + SchedFixture fixt(tenthOfSecInMinutes, qac); // Values to keep QueriesAndChunk from triggering. + + auto scanInfoFastest = makeScanInfoFastest(); + auto scanInfoFast = makeScanInfoFast(); + auto scanInfoMedium = makeScanInfoMedium(); + auto scanInfoSlow = makeScanInfoSlow(); + bool const scanInteractiveF = false; + shared_ptr sc = nullptr; LOGS(_log, LOG_LVL_DEBUG, "Test_case BlendScheduleQueryBootTaskTest"); // Create a thread pool to run task - auto pool = lsst::qserv::util::ThreadPool::newThreadPool(20, 1000, f.blend); + auto pool = lsst::qserv::util::ThreadPool::newThreadPool(20, 1000, fixt.blend); // Create fake data - one query to get a baseline time, another to take too long. // IMPORTANT: the "fast" taskl is needed to establish the baseline in QueriesAndChunks. // Otherwise the next task (the one which is going to be booted from its scheduler) // won't be booted. - int qid = 5; - auto taskMsg = newTaskMsgScan(27, lsst::qserv::proto::ScanInfo::Rating::FAST, qid++, 0); - Task::Ptr task = makeTask(taskMsg, f.queries); + int const qidA = 5; + int const qidB = 6; + auto ujd = makeUberJobData(qidA, scanInfoFast, scanInteractiveF, qac); + Task::Ptr task = makeUTask(27, 0, ujd, sc, fixt.queries); std::atomic running{false}; - auto fastFunc = [&running, &task, queriesAndChunks = f.queries](lsst::qserv::util::CmdData*) { + auto fastFunc = [&running, &task, queriesAndChunks = fixt.queries](lsst::qserv::util::CmdData*) { queriesAndChunks->startedTask(task); std::this_thread::sleep_for(std::chrono::milliseconds(1)); queriesAndChunks->finishedTask(task); running = true; }; task->setUnitTest(fastFunc); - f.queries->addTask(task); - f.blend->queCmd(task); + fixt.queries->addTask(task); + fixt.blend->queCmd(task); while (!running) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); } running = false; - // f.queries should now have a baseline for chunk 27. - LOGS(_log, LOG_LVL_DEBUG, "Chunks after fastFunc " << *f.queries); + // fixt.queries should now have a baseline for chunk 27. + LOGS(_log, LOG_LVL_DEBUG, "Chunks after fastFunc " << *fixt.queries); - taskMsg = newTaskMsgScan(27, lsst::qserv::proto::ScanInfo::Rating::FAST, qid, 0); - task = makeTask(taskMsg, f.queries); + ujd = makeUberJobData(qidB, scanInfoFast, scanInteractiveF, qac); + task = makeUTask(27, 0, ujd, sc, fixt.queries); std::atomic slowSleepDone{false}; auto slowFunc = [&running, &slowSleepDone, &task, - queriesAndChunks = f.queries](lsst::qserv::util::CmdData*) { + queriesAndChunks = fixt.queries](lsst::qserv::util::CmdData*) { queriesAndChunks->startedTask(task); running = true; std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -774,13 +940,13 @@ BOOST_AUTO_TEST_CASE(BlendScheduleQueryBootTaskTest) { LOGS(_log, LOG_LVL_DEBUG, "slowFunc end"); }; task->setUnitTest(slowFunc); - f.queries->addTask(task); - auto queryStats = f.queries->getStats(qid); + fixt.queries->addTask(task); + auto queryStats = fixt.queries->getStats(qidA); BOOST_CHECK(queryStats != nullptr); if (queryStats != nullptr) { BOOST_CHECK(queryStats->getTasksBooted() == 0); } - f.blend->queCmd(task); + fixt.blend->queCmd(task); // Wait for slowFunc to start running then wait for slowFunc to finish sleeping. while (!running) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); @@ -791,13 +957,13 @@ BOOST_AUTO_TEST_CASE(BlendScheduleQueryBootTaskTest) { // By now the slowFunc query has taken a second, far longer than the 0.1 seconds it was allowed. // examineAll() should boot the query. - LOGS(_log, LOG_LVL_INFO, "Chunks after slowFunc " << *f.queries); - f.queries->examineAll(); + LOGS(_log, LOG_LVL_INFO, "Chunks after slowFunc " << *fixt.queries); + fixt.queries->examineAll(); running = false; // allow slowFunc to exit its loop and finish. - LOGS(_log, LOG_LVL_INFO, "Chunks after examineAll " << *f.queries); + LOGS(_log, LOG_LVL_INFO, "Chunks after examineAll " << *fixt.queries); // Check if the tasks booted value for qid has gone up. - queryStats = f.queries->getStats(qid); + queryStats = fixt.queries->getStats(qidB); BOOST_CHECK(queryStats != nullptr); if (queryStats != nullptr) { LOGS(_log, LOG_LVL_INFO, "taskBooted=" << queryStats->getTasksBooted()); @@ -810,44 +976,80 @@ BOOST_AUTO_TEST_CASE(BlendScheduleQueryBootTaskTest) { } BOOST_AUTO_TEST_CASE(SlowTableHeapTest) { - LOGS(_log, LOG_LVL_DEBUG, "Test_case SlowTableHeapTest start"); - auto queries = QueriesAndChunks::setupGlobal(chrono::seconds(1), chrono::seconds(300), maxBootedC, - maxDarkTasksC, resetForTestingC); + LOGS(_log, LOG_LVL_DEBUG, "Test_case SlowTableHeapTest start, see ScanInfo::compareTables"); + + int const deadAfter = 1; + int const examineAfter = 1; + auto qac = setupQueries(maxBootedC, maxDarkTasksC, resetForTestingC, deadAfter, examineAfter); + SchedFixture fixt(60.0, qac); // Values to keep QueriesAndChunk from triggering. + + bool const scanInteractiveF = false; + shared_ptr sc = nullptr; wsched::ChunkTasks::SlowTableHeap heap{}; lsst::qserv::QueryId qIdInc = 1; BOOST_CHECK(heap.empty() == true); - Task::Ptr a1 = makeTask(newTaskMsgScan(7, 3, qIdInc++, 0, "charlie"), queries); + auto scanI = makeScanInfoMedium("charlie"); + auto ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a1 = makeUTask(7, 0, ujd, sc, fixt.queries); heap.push(a1); BOOST_CHECK(heap.top().get() == a1.get()); BOOST_CHECK(heap.empty() == false); - Task::Ptr a2 = makeTask(newTaskMsgScan(7, 3, qIdInc++, 0, "delta"), queries); + scanI = makeScanInfoMedium("delta"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a2 = makeUTask(7, 0, ujd, sc, fixt.queries); heap.push(a2); - BOOST_CHECK(heap.top().get() == a2.get()); + auto hTop = heap.top(); + logCmd(hTop, "hTop a2"); + BOOST_CHECK(hTop.get() == a2.get()); - Task::Ptr a3 = makeTask(newTaskMsgScan(7, 4, qIdInc++, 0, "bravo"), queries); + scanI = makeScanInfoSlow("bravo"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a3 = makeUTask(7, 0, ujd, sc, fixt.queries); heap.push(a3); + hTop = heap.top(); + logCmd(hTop, "hTop a3 first"); BOOST_CHECK(heap.top().get() == a3.get()); - Task::Ptr a4 = makeTask(newTaskMsgScan(7, 2, qIdInc++, 0, "alpha"), queries); + scanI = makeScanInfoFast("alpha"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a4 = makeUTask(7, 0, ujd, sc, fixt.queries); heap.push(a4); + hTop = heap.top(); + logCmd(hTop, "hTop a3 second"); BOOST_CHECK(heap.top().get() == a3.get()); BOOST_CHECK(heap.size() == 4); - BOOST_CHECK(heap.pop().get() == a3.get()); - BOOST_CHECK(heap.pop().get() == a2.get()); - BOOST_CHECK(heap.pop().get() == a1.get()); - BOOST_CHECK(heap.pop().get() == a4.get()); + auto hPop = heap.pop(); + logCmd(hPop, "hPop expect a3 bravo"); + BOOST_CHECK(hPop.get() == a3.get()); + + hPop = heap.pop(); + logCmd(hPop, "hPop expect a2 delta"); + BOOST_CHECK(hPop.get() == a2.get()); + + hPop = heap.pop(); + logCmd(hPop, "hPop expect a1 charlie"); + BOOST_CHECK(hPop.get() == a1.get()); + + hPop = heap.pop(); + logCmd(hPop, "hPop expect a4 alpha"); + BOOST_CHECK(hPop.get() == a4.get()); BOOST_CHECK(heap.empty() == true); LOGS(_log, LOG_LVL_DEBUG, "SlowTableHeapTest done"); } BOOST_AUTO_TEST_CASE(ChunkTasksTest) { - LOGS(_log, LOG_LVL_DEBUG, "Test_case ChunkTasksTest start"); - auto queries = QueriesAndChunks::setupGlobal(chrono::seconds(1), chrono::seconds(300), maxBootedC, - maxDarkTasksC, resetForTestingC); + LOGS(_log, LOG_LVL_DEBUG, "Test_case ChunkTasksTest start, see ScanInfo::compareTables"); + int const deadAfter = 1; + int const examineAfter = 1; + auto qac = setupQueries(maxBootedC, maxDarkTasksC, resetForTestingC, deadAfter, examineAfter); + SchedFixture fixt(60.0, qac); + shared_ptr sc = nullptr; + bool const scanInteractiveF = false; + int chunkId = 7; wsched::ChunkTasks chunkTasks{chunkId}; lsst::qserv::QueryId qIdInc = 1; @@ -855,21 +1057,29 @@ BOOST_AUTO_TEST_CASE(ChunkTasksTest) { BOOST_CHECK(chunkTasks.empty() == true); BOOST_CHECK(chunkTasks.readyToAdvance() == true); - Task::Ptr a1 = makeTask(newTaskMsgScan(chunkId, 3, qIdInc++, 0, "charlie"), queries); + auto scanI = makeScanInfoMedium("charlie"); + auto ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a1 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); chunkTasks.queTask(a1); BOOST_CHECK(chunkTasks.empty() == false); BOOST_CHECK(chunkTasks.readyToAdvance() == false); BOOST_CHECK(chunkTasks.size() == 1); - Task::Ptr a2 = makeTask(newTaskMsgScan(chunkId, 3, qIdInc++, 0, "delta"), queries); + scanI = makeScanInfoMedium("delta"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a2 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); chunkTasks.queTask(a2); BOOST_CHECK(chunkTasks.size() == 2); - Task::Ptr a3 = makeTask(newTaskMsgScan(chunkId, 4, qIdInc++, 0, "bravo"), queries); + scanI = makeScanInfoSlow("bravo"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a3 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); chunkTasks.queTask(a3); BOOST_CHECK(chunkTasks.size() == 3); - Task::Ptr a4 = makeTask(newTaskMsgScan(chunkId, 2, qIdInc++, 0, "alpha"), queries); + scanI = makeScanInfoFast("alpha"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a4 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); chunkTasks.queTask(a4); BOOST_CHECK(chunkTasks.size() == 4); @@ -916,9 +1126,15 @@ BOOST_AUTO_TEST_CASE(ChunkTasksTest) { } BOOST_AUTO_TEST_CASE(ChunkTasksQueueTest) { - LOGS(_log, LOG_LVL_DEBUG, "Test_case ChunkTasksQueueTest start"); - auto queries = QueriesAndChunks::setupGlobal(chrono::seconds(1), chrono::seconds(300), maxBootedC, - maxDarkTasksC, resetForTestingC); + LOGS(_log, LOG_LVL_DEBUG, "Test_case ChunkTasksQueueTest start, see ScanInfo::compareTables"); + + int const deadAfter = 1; + int const examineAfter = 1; + auto qac = setupQueries(maxBootedC, maxDarkTasksC, resetForTestingC, deadAfter, examineAfter); + SchedFixture fixt(60.0, qac); + shared_ptr sc = nullptr; + bool const scanInteractiveF = false; + int firstChunkId = 100; int secondChunkId = 150; int chunkId = firstChunkId; @@ -928,15 +1144,25 @@ BOOST_AUTO_TEST_CASE(ChunkTasksQueueTest) { BOOST_CHECK(ctl.empty() == true); BOOST_CHECK(ctl.ready(true) == false); - Task::Ptr a1 = makeTask(newTaskMsgScan(chunkId, 3, qIdInc++, 0, "charlie"), queries); + auto scanI = makeScanInfoMedium("charlie"); + auto ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a1 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); ctl.queueTask(a1); BOOST_CHECK(ctl.empty() == false); - Task::Ptr a2 = makeTask(newTaskMsgScan(chunkId, 3, qIdInc++, 0, "delta"), queries); + scanI = makeScanInfoMedium("delta"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a2 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); ctl.queueTask(a2); - Task::Ptr a3 = makeTask(newTaskMsgScan(chunkId, 4, qIdInc++, 0, "bravo"), queries); + + scanI = makeScanInfoSlow("bravo"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a3 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); ctl.queueTask(a3); - Task::Ptr a4 = makeTask(newTaskMsgScan(chunkId, 2, qIdInc++, 0, "alpha"), queries); + + scanI = makeScanInfoFast("alpha"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr a4 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); ctl.queueTask(a4); BOOST_CHECK(ctl.ready(true) == true); @@ -954,15 +1180,25 @@ BOOST_AUTO_TEST_CASE(ChunkTasksQueueTest) { BOOST_CHECK(ctl.empty() == true); chunkId = secondChunkId; - Task::Ptr b1 = makeTask(newTaskMsgScan(chunkId, 3, qIdInc++, 0, "c"), queries); + scanI = makeScanInfoMedium("c"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr b1 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); ctl.queueTask(b1); BOOST_CHECK(ctl.empty() == false); - Task::Ptr b2 = makeTask(newTaskMsgScan(chunkId, 3, qIdInc++, 0, "d"), queries); + scanI = makeScanInfoMedium("d"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr b2 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); ctl.queueTask(b2); - Task::Ptr b3 = makeTask(newTaskMsgScan(chunkId, 4, qIdInc++, 0, "b"), queries); + + scanI = makeScanInfoSlow("b"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr b3 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); ctl.queueTask(b3); - Task::Ptr b4 = makeTask(newTaskMsgScan(chunkId, 2, qIdInc++, 0, "a"), queries); + + scanI = makeScanInfoFast("a"); + ujd = makeUberJobData(qIdInc++, scanI, scanInteractiveF, qac); + Task::Ptr b4 = makeUTask(chunkId, 0, ujd, sc, fixt.queries); ctl.queueTask(b4); ctl.queueTask(a3); ctl.queueTask(a4); diff --git a/src/xrdlog/CMakeLists.txt b/src/xrdlog/CMakeLists.txt deleted file mode 100644 index 011294e0c1..0000000000 --- a/src/xrdlog/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -add_library(xrdlog MODULE) - -target_sources(xrdlog PRIVATE - XrdLogging.cc -) - -target_include_directories(xrdlog PRIVATE - ${XROOTD_INCLUDE_DIRS} -) - -target_link_libraries(xrdlog PUBLIC - log - XrdSsiLib -) - -install( - TARGETS xrdlog DESTINATION ${CMAKE_INSTALL_LIBDIR} -) diff --git a/src/xrdlog/XrdLogging.cc b/src/xrdlog/XrdLogging.cc deleted file mode 100644 index 08fe1468b1..0000000000 --- a/src/xrdlog/XrdLogging.cc +++ /dev/null @@ -1,72 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2014-2016 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -/// Implement logging hook to route xrootd/cmsd messages to our logger. -/// This source file has no header file. - -// Class header - -// System headers -#include - -// Third party headers -#include "XrdSsi/XrdSsiLogger.hh" - -// LSST headers -#include "lsst/log/Log.h" - -// Qserv headers - -/******************************************************************************/ -/* L o g g i n g I n t e r c e p t H o o k */ -/******************************************************************************/ - -namespace { - -const char* origin; - -void QservLogger(struct timeval const& mtime, unsigned long tID, const char* msg, int mlen) { - static log4cxx::spi::LocationInfo xrdLoc(origin, log4cxx::spi::LocationInfo::calcShortFileName(origin), - "", 0); - static LOG_LOGGER myLog = LOG_GET("lsst.qserv.xrdssi.msgs"); - - if (myLog.isInfoEnabled()) { - while (mlen && msg[mlen - 1] == '\n') --mlen; // strip all trailing newlines - std::string theMsg(msg, mlen); - lsst::log::Log::MDC("LWP", std::to_string(tID)); - myLog.logMsg(log4cxx::Level::getInfo(), xrdLoc, theMsg); - } -} - -XrdSsiLogger::MCB_t& ConfigLog() { - // Set the originator of the messages - origin = (getenv("XRDPROG") ? getenv("XRDPROG") : ""); - - // Configure the logging system - LOG_CONFIG(); - - // Return the address the logger to be used - return QservLogger; -} - -bool dummy = XrdSsiLogger::SetMCB(ConfigLog(), XrdSsiLogger::mcbServer); -} // namespace diff --git a/src/xrdreq/CMakeLists.txt b/src/xrdreq/CMakeLists.txt deleted file mode 100644 index f71141ccc7..0000000000 --- a/src/xrdreq/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -add_library(xrdreq SHARED) -add_dependencies(xrdreq proto) - -target_sources(xrdreq PRIVATE - QservRequest.cc - QueryManagementAction.cc - QueryManagementRequest.cc -) - -target_include_directories(xrdreq PRIVATE - ${XROOTD_INCLUDE_DIRS} -) - -target_link_libraries(xrdreq PUBLIC - log - proto - protobuf - XrdSsiLib - XrdCl -) - -install( - TARGETS xrdreq -) - -FUNCTION(XRDREQ_UTILS) - FOREACH(UTIL IN ITEMS ${ARGV}) - add_executable(${UTIL}) - target_sources(${UTIL} PRIVATE ${UTIL}.cc) - target_include_directories(${UTIL} PRIVATE ${XROOTD_INCLUDE_DIRS}) - target_link_libraries(${UTIL} PRIVATE - crypto - pthread - proto - util - global - xrdreq - ) - install(TARGETS ${UTIL}) - ENDFOREACH() -ENDFUNCTION() - -xrdreq_utils( - qserv-query-management -) - -install( - TARGETS xrdreq -) diff --git a/src/xrdreq/QservRequest.cc b/src/xrdreq/QservRequest.cc deleted file mode 100644 index 6310d1c096..0000000000 --- a/src/xrdreq/QservRequest.cc +++ /dev/null @@ -1,216 +0,0 @@ -/* - * LSST Data Management System - * Copyright 2018 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// Class header -#include "xrdreq/QservRequest.h" - -// System headers -#include -#include - -// Qserv headers -#include "lsst/log/Log.h" - -using namespace std; - -namespace { - -LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdreq.QservRequest"); - -// Set this parameter to some reasonable default -int const bufInitialSize = 1024; - -} // namespace - -namespace lsst::qserv::xrdreq { - -atomic QservRequest::_numClassInstances(0); - -QservRequest::~QservRequest() { - delete[] _buf; - - --_numClassInstances; - LOGS(_log, LOG_LVL_TRACE, "QservRequest destructed instances: " << _numClassInstances); -} - -QservRequest::QservRequest() - : _bufIncrementSize(bufInitialSize), - _bufSize(0), - _bufCapacity(bufInitialSize), - _buf(new char[bufInitialSize]) { - // This report is used solely for debugging purposes to allow tracking - // potential memory leaks within applications. - ++_numClassInstances; - LOGS(_log, LOG_LVL_TRACE, "QservRequest constructed instances: " << _numClassInstances); -} - -void QservRequest::cancel() { - // This will decrement the reference counter to the pointee at the end of the current - // block regardless of any exceptions that may be thrown below. - auto self = move(_refToSelf4keepAlive); - Finished(true); -} - -void QservRequest::setRefToSelf4keepAlive(shared_ptr ptr) { - if ((ptr == nullptr) || (this != ptr.get())) { - stringstream ss; - ss << "QservRequest::" << __func__ << ": the value of " << ptr - << " passed as an argument is not pointing to the current object."; - throw invalid_argument(ss.str()); - } - _refToSelf4keepAlive = ptr; -} - -char* QservRequest::GetRequest(int& dlen) { - // Ask a subclass to serialize its request into the frame buffer - onRequest(_frameBuf); - - // Tell SSI which data and how many bytes to send - dlen = _frameBuf.size(); - return _frameBuf.data(); -} - -bool QservRequest::ProcessResponse(const XrdSsiErrInfo& eInfo, const XrdSsiRespInfo& rInfo) { - string const context = "QservRequest::" + string(__func__) + " "; - - if (eInfo.hasError()) { - // This will decrement the reference counter to the pointee at the end of the current - // block regardless of any exceptions that may be thrown below. - auto self = move(_refToSelf4keepAlive); - - // Copy the argument before sending the upstream notification - // Otherwise the current object may get disposed before we even had - // a chance to notify XRootD/SSI by calling Finished(). - string const errorStr = rInfo.eMsg; - - LOGS(_log, LOG_LVL_ERROR, context << "** FAILED **, error: " << errorStr); - - // Tell XrootD to release all resources associated with this request - Finished(); - - // Notify a subclass on the abnormal condition - // WARNING: This has to be the last call as the object may get deleted - // downstream. - onError(errorStr); - return false; - } - LOGS(_log, LOG_LVL_TRACE, - context << " eInfo.rType: " << rInfo.rType << "(" << rInfo.State() << ")" - << ", eInfo.blen: " << rInfo.blen); - - switch (rInfo.rType) { - case XrdSsiRespInfo::isData: - case XrdSsiRespInfo::isStream: - - LOGS(_log, LOG_LVL_TRACE, context << "** REQUESTING RESPONSE DATA **"); - GetResponseData(_buf + _bufSize, _bufIncrementSize); - return true; - - default: - // This will decrement the reference counter to the pointee at the end of the current - // block regardless of any exceptions that may be thrown below. - auto self = move(_refToSelf4keepAlive); - - // Copy the argument before sending the upstream notification - // Otherwise the current object may get disposed before we even had - // a chance to notify XRootD/SSI by calling Finished(). - string const responseType = to_string(rInfo.rType); - - // Tell XrootD to release all resources associated with this request - Finished(); - - // Notify a subclass on the abnormal condition - // WARNING: This has to be the last call as the object may get deleted - // downstream. - onError("QservRequest::ProcessResponse ** ERROR ** unexpected response type: " + responseType); - return false; - } -} - -void QservRequest::ProcessResponseData(const XrdSsiErrInfo& eInfo, char* buff, int blen, bool last) { - string const context = "QservRequest::" + string(__func__) + " "; - - LOGS(_log, LOG_LVL_TRACE, context << "eInfo.isOK: " << eInfo.isOK()); - - if (not eInfo.isOK()) { - // This will decrement the reference counter to the pointee at the end of the current - // block regardless of any exceptions that may be thrown below. - auto self = move(_refToSelf4keepAlive); - - // Copy these arguments before sending the upstream notification. - // Otherwise the current object may get disposed before we even had - // a chance to notify XRootD/SSI by calling Finished(). - - string const errorStr = eInfo.Get(); - int const errorNum = eInfo.GetArg(); - - LOGS(_log, LOG_LVL_ERROR, - context << "** FAILED ** eInfo.Get(): " << errorStr << ", eInfo.GetArg(): " << errorNum); - - // Tell XrootD to realease all resources associated with this request - Finished(); - - // Notify a subclass on the ubnormal condition. - // WARNING: This has to be the last call as the object may get deleted - // downstream. - onError(errorStr); - - } else { - LOGS(_log, LOG_LVL_TRACE, context << "blen: " << blen << ", last: " << last); - - // Update the byte counter - _bufSize += blen; - - if (last) { - // This will decrement the reference counter to the pointee at the end of the current - // block regardless of any exceptions that may be thrown below. - auto self = move(_refToSelf4keepAlive); - - // Tell XrootD to release all resources associated with this request - Finished(); - - // Ask a subclass to process the response - // WARNING: This has to be the last call as the object may get deleted - // downstream. - proto::FrameBufferView view(_buf, _bufSize); - onResponse(view); - - } else { - // Double the buffer's capacity and copy over its previous content into the new location - int prevBufCapacity = _bufCapacity; - _bufIncrementSize = prevBufCapacity; - _bufCapacity += _bufIncrementSize; - - char* prevBuf = _buf; - _buf = new char[_bufCapacity]; - - copy(prevBuf, prevBuf + prevBufCapacity, _buf); - - delete[] prevBuf; - - // Keep reading - GetResponseData(_buf + _bufSize, _bufIncrementSize); - } - } -} - -} // namespace lsst::qserv::xrdreq diff --git a/src/xrdreq/QservRequest.h b/src/xrdreq/QservRequest.h deleted file mode 100644 index 4306d91311..0000000000 --- a/src/xrdreq/QservRequest.h +++ /dev/null @@ -1,120 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2011-2018 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_XRDREQ_QSERV_REQUEST_H -#define LSST_QSERV_XRDREQ_QSERV_REQUEST_H - -// System headers -#include -#include -#include - -// Third party headers -#include "XrdSsi/XrdSsiRequest.hh" - -// Qserv headers -#include "proto/FrameBuffer.h" -#include "proto/worker.pb.h" - -namespace lsst::qserv::xrdreq { - -/** - * Class QservRequest is a base class for a family of the client-side requests - * (classes) to Qserv workers. - */ -class QservRequest : public XrdSsiRequest { -public: - QservRequest(QservRequest const&) = delete; - QservRequest& operator=(QservRequest const&) = delete; - virtual ~QservRequest() override; - - /** - * Do a proper request cancellation to ensure a pointer to the request gets deleted - * after calling XrdSsiRequest::Finished(true). - */ - void cancel(); - -protected: - QservRequest(); - - /** - * Setting a pointer to the object would guarantee that the life expectancy - * of the request be preserved before it's finished/failed and the corresponding - * notifications are sent to a subclass via the virtual methods QservRequest::onResponse() - * or QservRequest::onError(). The pointer will be reset after calling either of - * these methods, or the method QservRequest::cancel(). - * @param ptr The pointer to be set. - * @throws std::invalid_argument if the pointer is empty or pointing to a different - * request object. - */ - void setRefToSelf4keepAlive(std::shared_ptr ptr); - - /** - * Serialize a request into the provided buffer. The method is required to be - * provided by a subclass. - * @param buf A request buffer for serializing a request. - */ - virtual void onRequest(proto::FrameBuffer& buf) = 0; - - /** - * Process response from Qserv. The method is required to be provided by a subclass. - * @param view The buffer view for parsing results. - */ - virtual void onResponse(proto::FrameBufferView& view) = 0; - - /** - * Notify a base class about a failure occurred when sending a request data - * or receiving a response. - * @param error A message explaining a reason of the failure. - */ - virtual void onError(std::string const& msg) = 0; - - char* GetRequest(int& dlen) override; - bool ProcessResponse(const XrdSsiErrInfo& eInfo, const XrdSsiRespInfo& rInfo) override; - void ProcessResponseData(const XrdSsiErrInfo& eInfo, char* buff, int blen, bool last) override; - -private: - /// The global counter for the number of instances of any subclasses - static std::atomic _numClassInstances; - - /// Request buffer is prepared by subclasses before sending a request to a worker. - proto::FrameBuffer _frameBuf; - - // Response buffer is updated when receiving a response stream of data from a worker. - - /// The (very first and the) last increment of the capacity of the incoming - /// buffer is used to limit the amount of bytes to be received from a server. - int _bufIncrementSize; - - int _bufSize; ///< actual (meaningful) number of bytes in the incoming buffer - int _bufCapacity; ///< total capacity of the incoming buffer - - char* _buf; ///< buffer for incomming data - - /// The reference to the object is needed to guarantee the life expectency of - /// the request object while the request is still being processed. - std::shared_ptr _refToSelf4keepAlive; -}; - -} // namespace lsst::qserv::xrdreq - -#endif // LSST_QSERV_XRDREQ_QSERV_REQUEST_H \ No newline at end of file diff --git a/src/xrdreq/QueryManagementAction.cc b/src/xrdreq/QueryManagementAction.cc deleted file mode 100644 index f63a013b12..0000000000 --- a/src/xrdreq/QueryManagementAction.cc +++ /dev/null @@ -1,137 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// Class header -#include "xrdreq/QueryManagementAction.h" - -// System headers -#include - -// Third party headers -#include "XrdCl/XrdClFile.hh" -#include "XrdCl/XrdClXRootDResponses.hh" -#include "XrdSsi/XrdSsiProvider.hh" -#include "XrdSsi/XrdSsiService.hh" - -// Qserv headers -#include "xrdreq/QueryManagementRequest.h" - -// LSST headers -#include "lsst/log/Log.h" - -/// This C++ symbol is provided by the SSI shared library -extern XrdSsiProvider* XrdSsiProviderClient; - -using namespace std; - -namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdreq.QueryManagementAction"); - -string xrootdStatus2str(XrdCl::XRootDStatus const& s) { - return "status=" + to_string(s.status) + ", code=" + to_string(s.code) + ", errNo=" + to_string(s.errNo) + - ", message='" + s.GetErrorMessage() + "'"; -} - -/// The RAII wrapper around the silly C pointer to facilitate proper deletion -/// of the object returned by the XROOTD API. -struct LocationInfoRAII { - XrdCl::LocationInfo* locationInfo = nullptr; - ~LocationInfoRAII() { delete locationInfo; } -}; - -} // namespace - -namespace lsst::qserv::xrdreq { - -void QueryManagementAction::notifyAllWorkers(string const& xrootdFrontendUrl, - proto::QueryManagement::Operation op, uint32_t czarId, - QueryId queryId, CallbackType onFinish) { - auto const ptr = shared_ptr(new QueryManagementAction()); - ptr->_notifyAllWorkers(xrootdFrontendUrl, op, czarId, queryId, onFinish); -} - -QueryManagementAction::QueryManagementAction() { - LOGS(_log, LOG_LVL_TRACE, "QueryManagementAction ** CONSTRUCTED **"); -} - -QueryManagementAction::~QueryManagementAction() { - LOGS(_log, LOG_LVL_TRACE, "QueryManagementAction ** DELETED **"); -} - -void QueryManagementAction::_notifyAllWorkers(std::string const& xrootdFrontendUrl, - proto::QueryManagement::Operation op, uint32_t czarId, - QueryId queryId, CallbackType onFinish) { - string const context = "QueryManagementAction::" + string(__func__) + " "; - - // Find all subscribers (worker XROOTD servers) serving this special resource. - // Throw an exception if no workers are registered. - ::LocationInfoRAII locationInfoHandler; - string const queryResourceName = "/query"; - XrdCl::FileSystem fileSystem(xrootdFrontendUrl); - XrdCl::XRootDStatus const status = fileSystem.Locate(queryResourceName, XrdCl::OpenFlags::Flags::None, - locationInfoHandler.locationInfo); - if (!status.IsOK()) { - throw runtime_error(context + "failed to locate subscribers for resource " + queryResourceName + - ", " + ::xrootdStatus2str(status)); - } - if (uint32_t const numLocations = locationInfoHandler.locationInfo->GetSize(); numLocations == 0) { - throw runtime_error(context + "no subscribers are serving resource " + queryResourceName); - } else { - // Fill worker addresses as keys into the response object. - for (uint32_t i = 0; i < numLocations; ++i) { - _response[locationInfoHandler.locationInfo->At(i).GetAddress()] = string(); - } - } - - // Send a request to each worker. Note capturing a copy of 'self' to ensure - // the curent object will still existr while the requests will be being processed. - auto const self = shared_from_this(); - for (auto itr : _response) { - string const workerAddress = itr.first; - - // Connect to the worker service - XrdSsiErrInfo errInfo; - XrdSsiService* serviceProvider = XrdSsiProviderClient->GetService(errInfo, workerAddress); - if (nullptr == serviceProvider) { - throw runtime_error(context + " failed to contact worker service " + workerAddress + - ", error: " + errInfo.Get()); - } - - // Make and configure the request object - auto request = xrdreq::QueryManagementRequest::create( - op, czarId, queryId, - [self, workerAddress, onFinish](proto::WorkerCommandStatus::Code code, string const& error) { - if (code != proto::WorkerCommandStatus::SUCCESS) { - self->_response[workerAddress] = error; - } - if (++(self->_numWorkerRequestsFinished) == self->_response.size()) { - if (onFinish != nullptr) onFinish(self->_response); - } - }); - - // Initiate request processing - XrdSsiResource resource(queryResourceName); - serviceProvider->ProcessRequest(*request, resource); - } -} - -} // namespace lsst::qserv::xrdreq diff --git a/src/xrdreq/QueryManagementAction.h b/src/xrdreq/QueryManagementAction.h deleted file mode 100644 index f1779cae57..0000000000 --- a/src/xrdreq/QueryManagementAction.h +++ /dev/null @@ -1,96 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_XRDREQ_QUERY_MANAGEMENT_ACTION_H -#define LSST_QSERV_XRDREQ_QUERY_MANAGEMENT_ACTION_H - -// System headers -#include -#include -#include -#include -#include - -// Qserv headers -#include "global/intTypes.h" -#include "proto/worker.pb.h" - -namespace lsst::qserv::xrdreq { - -/** - * Class QueryManagementAction is an interface for managing query completion/cancellation - * at all Qserv workers that are connected as "publishers" to the XROOTD redirector. - */ -class QueryManagementAction : public std::enable_shared_from_this { -public: - /// The reponse type represents errors reported by the workers, where worker - /// names are the keys. And the values are the error messages. Empty strings - /// indicate the succesful completion of the requests. - using Response = std::map; - - /// The callback function type to be used for notifications on the operation completion. - using CallbackType = std::function; - - /** - * The front-end method for initiating the operation at all workers. - * - * @note The only way to track the completion of the requests sent via - * this interface is by providing the callback function. The request delivery - * is not guaranteeded in case if the XROOTD/SSI network will be clogged by - * the heavy traffic. It's safe to call the same operation many times if needed. - * - * @param xrootdFrontendUrl A location of the XROOTD redirector. - * @param op An operation be initiated at the workers. - * @param onFinish The optional callback to be fired upon the completion of - * the requested operation. - * - * @throws std::runtime_error For failures encountered when connecting to - * the manager or initiating the requesed operation. - */ - static void notifyAllWorkers(std::string const& xrootdFrontendUrl, proto::QueryManagement::Operation op, - uint32_t czarId, QueryId queryId, CallbackType onFinish = nullptr); - - QueryManagementAction(QueryManagementAction const&) = delete; - QueryManagementAction& operator=(QueryManagementAction const&) = delete; - virtual ~QueryManagementAction(); - -private: - QueryManagementAction(); - - /** - * The actual implementation of the request processor. - * @see QueryManagementAction::notifyAllWorkers() - */ - void _notifyAllWorkers(std::string const& xrootdFrontendUrl, proto::QueryManagement::Operation op, - uint32_t czarId, QueryId queryId, CallbackType onFinish); - - /// The collection of worker responses. - Response _response; - - /// The counter will get incremented as worker responses will be received. - /// User-provided callback function (if any) will be called when all requests - /// will finish (succeed or fail). - std::atomic _numWorkerRequestsFinished{0}; -}; - -} // namespace lsst::qserv::xrdreq - -#endif // LSST_QSERV_XRDREQ_QUERY_MANAGEMENT_ACTION_H diff --git a/src/xrdreq/QueryManagementRequest.cc b/src/xrdreq/QueryManagementRequest.cc deleted file mode 100644 index 82860cdd59..0000000000 --- a/src/xrdreq/QueryManagementRequest.cc +++ /dev/null @@ -1,91 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// Class header -#include "xrdreq/QueryManagementRequest.h" - -// LSST headers -#include "lsst/log/Log.h" - -using namespace std; - -namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdreq.QueryManagementRequest"); -} // namespace - -namespace lsst::qserv::xrdreq { - -QueryManagementRequest::Ptr QueryManagementRequest::create(proto::QueryManagement::Operation op, - uint32_t czarId, QueryId queryId, - QueryManagementRequest::CallbackType onFinish) { - QueryManagementRequest::Ptr ptr(new QueryManagementRequest(op, czarId, queryId, onFinish)); - ptr->setRefToSelf4keepAlive(ptr); - return ptr; -} - -QueryManagementRequest::QueryManagementRequest(proto::QueryManagement::Operation op, uint32_t czarId, - QueryId queryId, QueryManagementRequest::CallbackType onFinish) - : _op(op), _czarId(czarId), _queryId(queryId), _onFinish(onFinish) { - LOGS(_log, LOG_LVL_TRACE, "QueryManagementRequest ** CONSTRUCTED **"); -} - -QueryManagementRequest::~QueryManagementRequest() { - LOGS(_log, LOG_LVL_TRACE, "QueryManagementRequest ** DELETED **"); -} - -void QueryManagementRequest::onRequest(proto::FrameBuffer& buf) { - proto::QueryManagement message; - message.set_op(_op); - message.set_czar_id(_czarId); - message.set_query_id(_queryId); - buf.serialize(message); -} - -void QueryManagementRequest::onResponse(proto::FrameBufferView& view) { - if (nullptr != _onFinish) { - // Clearing the stored callback after finishing the up-stream notification - // has two purposes: - // - // 1. it guaranties (exactly) one time notification - // 2. it breaks the up-stream dependency on a caller object if a shared - // pointer to the object was mentioned as the lambda-function's closure - auto onFinish = move(_onFinish); - _onFinish = nullptr; - onFinish(proto::WorkerCommandStatus::SUCCESS, string()); - } -} - -void QueryManagementRequest::onError(string const& error) { - if (nullptr != _onFinish) { - // Clearing the stored callback after finishing the up-stream notification - // has two purposes: - // - // 1. it guaranties (exactly) one time notification - // 2. it breaks the up-stream dependency on a caller object if a shared - // pointer to the object was mentioned as the lambda-function's closure - auto onFinish = move(_onFinish); - _onFinish = nullptr; - onFinish(proto::WorkerCommandStatus::ERROR, error); - } -} - -} // namespace lsst::qserv::xrdreq diff --git a/src/xrdreq/QueryManagementRequest.h b/src/xrdreq/QueryManagementRequest.h deleted file mode 100644 index 9c92fcfe6f..0000000000 --- a/src/xrdreq/QueryManagementRequest.h +++ /dev/null @@ -1,95 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_XRDREQ_QUERY_MANAGEMENT_REQUEST_H -#define LSST_QSERV_XRDREQ_QUERY_MANAGEMENT_REQUEST_H - -// System headers -#include -#include -#include - -// Qserv headers -#include "global/intTypes.h" -#include "proto/worker.pb.h" -#include "xrdreq/QservRequest.h" - -namespace lsst::qserv::xrdreq { - -/** - * Class QueryManagementRequest represents requests for managing query - * completion/cancellation at Qserv workers. - * @note No actuall responses are expected from these requests beyond - * the error messages in case of any problems in delivering or processing - * notifications. - */ -class QueryManagementRequest : public QservRequest { -public: - /// The pointer type for instances of the class - typedef std::shared_ptr Ptr; - - /// The callback function type to be used for notifications on - /// the operation completion. - using CallbackType = std::function; // error message (if failed) - - /** - * Static factory method is needed to prevent issues with the lifespan - * and memory management of instances created otherwise (as values or via - * low-level pointers). - * @param op An operation to be initiated. - * @param queryId An uinque identifier of a query affected by the request. - * Note that a cole of the identifier depends on which operation - * was requested. - * @param onFinish (optional) callback function to be called upon the completion - * (successful or not) of the request. - * @return the smart pointer to the object of the class - */ - static Ptr create(proto::QueryManagement::Operation op, uint32_t czarId, QueryId queryId, - CallbackType onFinish = nullptr); - - QueryManagementRequest() = delete; - QueryManagementRequest(QueryManagementRequest const&) = delete; - QueryManagementRequest& operator=(QueryManagementRequest const&) = delete; - - virtual ~QueryManagementRequest() override; - -protected: - /// @see QueryManagementRequest::create() - QueryManagementRequest(proto::QueryManagement::Operation op, uint32_t czarId, QueryId queryId, - CallbackType onFinish); - - virtual void onRequest(proto::FrameBuffer& buf) override; - virtual void onResponse(proto::FrameBufferView& view) override; - virtual void onError(std::string const& error) override; - -private: - // Parameters of the object - - proto::QueryManagement::Operation _op = proto::QueryManagement::CANCEL_AFTER_RESTART; - uint32_t _czarId = 0; - QueryId _queryId = 0; - CallbackType _onFinish; -}; - -} // namespace lsst::qserv::xrdreq - -#endif // LSST_QSERV_XRDREQ_QUERY_MANAGEMENT_REQUEST_H diff --git a/src/xrdreq/qserv-query-management.cc b/src/xrdreq/qserv-query-management.cc deleted file mode 100644 index 0e410ff5ec..0000000000 --- a/src/xrdreq/qserv-query-management.cc +++ /dev/null @@ -1,154 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -// System header -#include -#include -#include -#include -#include - -// Third party headers -#include "XrdSsi/XrdSsiProvider.hh" -#include "XrdSsi/XrdSsiService.hh" - -// Qserv headers -#include "global/intTypes.h" -#include "proto/worker.pb.h" -#include "util/BlockPost.h" -#include "util/CmdLineParser.h" -#include "xrdreq/QueryManagementAction.h" -#include "xrdreq/QueryManagementRequest.h" - -/// This C++ symbol is provided by the SSI shared library -extern XrdSsiProvider* XrdSsiProviderClient; - -namespace global = lsst::qserv; -namespace proto = lsst::qserv::proto; -namespace util = lsst::qserv::util; -namespace xrdreq = lsst::qserv::xrdreq; - -using namespace std; - -namespace { - -// Command line parameters - -vector const allowedOperations = {"CANCEL_AFTER_RESTART", "CANCEL", "COMPLETE"}; -proto::QueryManagement::Operation operation = proto::QueryManagement::CANCEL_AFTER_RESTART; -uint32_t czarId; -global::QueryId queryId; -bool allWorkers = false; -string serviceProviderLocation; - -proto::QueryManagement::Operation str2operation(string const& str) { - if (str == "CANCEL_AFTER_RESTART") { - return proto::QueryManagement::CANCEL_AFTER_RESTART; - } else if (str == "CANCEL") { - return proto::QueryManagement::CANCEL; - } else if (str == "COMPLETE") { - return proto::QueryManagement::COMPLETE; - } - throw invalid_argument("error: unknown operation '" + str + "'"); -} - -int test() { - bool finished = false; - if (allWorkers) { - xrdreq::QueryManagementAction::notifyAllWorkers( - serviceProviderLocation, operation, czarId, queryId, - [&finished](xrdreq::QueryManagementAction::Response const& response) { - for (auto itr : response) { - cout << "worker: " << itr.first << " error: " << itr.second << endl; - } - finished = true; - }); - } else { - // Connect to a service provider - XrdSsiErrInfo errInfo; - auto serviceProvider = XrdSsiProviderClient->GetService(errInfo, serviceProviderLocation); - if (nullptr == serviceProvider) { - cerr << "failed to contact service provider at: " << serviceProviderLocation - << ", error: " << errInfo.Get() << endl; - return 1; - } - cout << "connected to service provider at: " << serviceProviderLocation << endl; - - // Prepare the request - auto request = xrdreq::QueryManagementRequest::create( - operation, czarId, queryId, - [&finished](proto::WorkerCommandStatus::Code code, string const& error) { - cout << "code=" << proto::WorkerCommandStatus_Code_Name(code) << ", error='" << error - << "'" << endl; - finished = true; - }); - - // Submit the request - XrdSsiResource resource("/query"); - serviceProvider->ProcessRequest(*request, resource); - } - - // Wait before the request will finish or fail - util::BlockPost blockPost(1000, 2000); - while (!finished) { - blockPost.wait(200); - } - return 0; -} -} // namespace - -int main(int argc, const char* const argv[]) { - // Verify that the version of the library that we linked against is - // compatible with the version of the headers we compiled against. - - GOOGLE_PROTOBUF_VERIFY_VERSION; - - // Parse command line parameters - try { - util::CmdLineParser parser( - argc, argv, - "\n" - "Usage:\n" - " \n" - " [--service=]\n" - "\n" - "Flags an options:\n" - " --all-workers - The flag indicating if the operation had to involve all workers.\n" - " --service= - A location of the service provider (default: 'localhost:1094').\n" - "\n" - "Parameters:\n" - " - An operation over the query (queries). Allowed values of\n" - " the parameter are: CANCEL_AFTER_RESTART, CANCEL, COMPLETE.\n" - " - The unique identifier of Czar.\n" - " - User query identifier.\n"); - - ::operation = ::str2operation(parser.parameterRestrictedBy(1, ::allowedOperations)); - ::czarId = parser.parameter(2); - ::queryId = parser.parameter(3); - ::allWorkers = parser.flag("all-workers"); - ::serviceProviderLocation = parser.option("service", "localhost:1094"); - - } catch (exception const& ex) { - cerr << ex.what() << endl; - return 1; - } - return ::test(); -} diff --git a/src/xrdsvc/CMakeLists.txt b/src/xrdsvc/CMakeLists.txt deleted file mode 100644 index 9fc8d622d1..0000000000 --- a/src/xrdsvc/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -add_library(xrdsvc SHARED) -add_dependencies(xrdsvc proto) - -target_sources(xrdsvc PRIVATE - ChannelStream.cc - HttpModule.cc - HttpMonitorModule.cc - HttpReplicaMgtModule.cc - HttpSvc.cc - SsiProvider.cc - SsiRequest.cc - SsiService.cc - StreamBuffer.cc -) - -target_include_directories(xrdsvc PRIVATE - ${XROOTD_INCLUDE_DIRS} -) - -target_link_libraries(xrdsvc PUBLIC - boost_filesystem - boost_system - global - log - mysql - http - sql - wbase - wconfig - wcontrol - wpublish - wsched - XrdSsiLib -) - -install( - TARGETS xrdsvc -) diff --git a/src/xrdsvc/ChannelStream.cc b/src/xrdsvc/ChannelStream.cc deleted file mode 100644 index 2c02610b48..0000000000 --- a/src/xrdsvc/ChannelStream.cc +++ /dev/null @@ -1,115 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2014-2016 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// Class header -#include "xrdsvc/ChannelStream.h" - -// Third-party headers -#include "boost/utility.hpp" - -// LSST headers -#include "lsst/log/Log.h" - -// Qserv headers -#include "global/debugUtil.h" -#include "util/Bug.h" -#include "util/common.h" - -namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdsvc.ChannelStream"); -} - -using namespace std; - -namespace lsst::qserv::xrdsvc { - -/// Provide each Channel stream with a unique identifier. -atomic ChannelStream::_sequenceSource{0}; - -/// Constructor -ChannelStream::ChannelStream() : XrdSsiStream(isActive), _closed(false), _seq(_sequenceSource++) {} - -/// Destructor -ChannelStream::~ChannelStream() { clearMsgs(); } - -/// Push in a data packet -void ChannelStream::append(StreamBuffer::Ptr const &streamBuffer, bool last) { - if (_closed) { - throw util::Bug(ERR_LOC, - "ChannelStream::append: Stream closed, append(...,last=true) already received"); - } - LOGS(_log, LOG_LVL_DEBUG, - "seq=" << _seq << " ChannelStream::append last=" << last << " " - << util::prettyCharBuf(streamBuffer->data, streamBuffer->getSize(), 5)); - { - unique_lock lock(_mutex); - ++_appendCount; - LOGS(_log, LOG_LVL_DEBUG, - "seq=" << to_string(_seq) << " Trying to append message (flowing) appC=" << _appendCount - << " getBC=" << _getBufCount); - _msgs.push_back(streamBuffer); - _closed = last; // if last is true, then we are closed. - } - _hasDataCondition.notify_one(); -} - -/// Pull out a data packet as a Buffer object (called by XrdSsi code) -XrdSsiStream::Buffer *ChannelStream::GetBuff(XrdSsiErrInfo &eInfo, int &dlen, bool &last) { - ++_getBufCount; - // This InstanceCount should be fairly quiet as there should only be one at a time. - util::InstanceCount inst("GetBuf seq=" + to_string(_seq)); - unique_lock lock(_mutex); - while (_msgs.empty() && !_closed) { // No msgs, but we aren't done - // wait. - LOGS(_log, LOG_LVL_INFO, "seq=" << _seq << " Waiting, no data ready "); - _hasDataCondition.wait(lock); - } - if (_msgs.empty() && _closed) { - // It's closed and no more msgs are available. - LOGS(_log, LOG_LVL_INFO, "seq=" << _seq << " Not waiting, but closed"); - dlen = 0; - eInfo.Set("Not an active stream", EOPNOTSUPP); - return 0; - } - - StreamBuffer::Ptr sb = _msgs.front(); - dlen = sb->getSize(); - _msgs.pop_front(); - last = _closed && _msgs.empty(); - LOGS(_log, LOG_LVL_INFO, - "seq=" << to_string(_seq) << " returning buffer (" << dlen << ", " << (last ? "(last)" : "(more)") - << ")" - << " getBufCount=" << _getBufCount); - return sb.get(); -} - -void ChannelStream::clearMsgs() { - LOGS(_log, LOG_LVL_DEBUG, "seq=" << to_string(_seq) << " ChannelStream::clearMsgs()"); - unique_lock lock(_mutex); - while (!_msgs.empty()) { - _msgs.front()->Recycle(); - _msgs.pop_front(); - } -} - -} // namespace lsst::qserv::xrdsvc diff --git a/src/xrdsvc/ChannelStream.h b/src/xrdsvc/ChannelStream.h deleted file mode 100644 index ee2de6005d..0000000000 --- a/src/xrdsvc/ChannelStream.h +++ /dev/null @@ -1,75 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2014-2018 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_XRDSVC_CHANNELSTREAM_H -#define LSST_QSERV_XRDSVC_CHANNELSTREAM_H - -// System headers -#include -#include -#include -#include - -// qserv headers -#include "xrdsvc/StreamBuffer.h" - -// Third-party headers -#include "XrdSsi/XrdSsiErrInfo.hh" // required by XrdSsiStream -#include "XrdSsi/XrdSsiStream.hh" - -namespace lsst::qserv::xrdsvc { - -/// ChannelStream is an implementation of an XrdSsiStream that accepts -/// SendChannel streamed data. -class ChannelStream : public XrdSsiStream { -public: - ChannelStream(); - virtual ~ChannelStream(); - - /// Push in a data packet - void append(StreamBuffer::Ptr const &StreamBuffer, bool last); - - /// Empty _msgs, calling StreamBuffer::Recycle() where needed. - void clearMsgs(); - - /// Pull out a data packet as a Buffer object (called by XrdSsi code) - Buffer *GetBuff(XrdSsiErrInfo &eInfo, int &dlen, bool &last) override; - - bool closed() const { return _closed; } - - uint64_t getSeq() const { return _seq; } - -private: - bool _closed; ///< Closed to new append() calls? - // Can keep a deque of (buf, bufsize) to reduce copying, if needed. - std::deque _msgs; ///< Message queue - std::mutex _mutex; ///< _msgs protection - std::condition_variable _hasDataCondition; ///< _msgs condition - uint64_t const _seq; ///< Unique identifier for this instance. - static std::atomic _sequenceSource; ///< Source of unique identifiers. - std::atomic _appendCount{0}; ///< number of appends - std::atomic _getBufCount{0}; ///< number of buffers -}; - -} // namespace lsst::qserv::xrdsvc - -#endif // LSST_QSERV_XRDSVC_CHANNELSTREAM_H diff --git a/src/xrdsvc/SsiProvider.cc b/src/xrdsvc/SsiProvider.cc deleted file mode 100644 index f7a0684110..0000000000 --- a/src/xrdsvc/SsiProvider.cc +++ /dev/null @@ -1,209 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2014-2016 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -/// Implement XrdSsiProviderServer to provide Qserv's SsiService -/// implementation. Link this file when building a plugin to be used as -/// ssi.svclib or oss.statlib. - -// Class header -#include "xrdsvc/SsiProvider.h" - -// System headers -#include -#include - -// Third party headers -#include "boost/filesystem.hpp" -#include "XrdSsi/XrdSsiCluster.hh" -#include "XrdSsi/XrdSsiLogger.hh" - -// LSST headers -#include "lsst/log/Log.h" - -// Qserv headers -#include "global/ResourceUnit.h" -#include "wconfig/WorkerConfig.h" -#include "wpublish/ChunkInventory.h" -#include "xrdsvc/XrdName.h" - -/******************************************************************************/ -/* G l o b a l s */ -/******************************************************************************/ - -// The following are global sysbols that point to an instance of our provider -// object. The SSI framework looks for these symbols when the shared library -// plug-in is loaded. The framework must find a valid provider object at load -// time or it will refuse to use the shared library. As the library is never -// unloaded, the object does not need to be deleted. -// -XrdSsiProvider* XrdSsiProviderServer = new lsst::qserv::xrdsvc::SsiProviderServer; - -XrdSsiProvider* XrdSsiProviderLookup = XrdSsiProviderServer; - -namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdsvc.SsiProvider"); -} - -/******************************************************************************/ -/* D e s t r u c t o r */ -/******************************************************************************/ - -namespace lsst::qserv::xrdsvc { - -SsiProviderServer::~SsiProviderServer() {} - -/******************************************************************************/ -/* I n i t */ -/******************************************************************************/ - -bool SsiProviderServer::Init(XrdSsiLogger* logP, XrdSsiCluster* clsP, std::string cfgFn, std::string parms, - int argc, char** argv) { - lsst::qserv::xrdsvc::XrdName x; - - if (argc != 2) { - LOGS(_log, LOG_LVL_TRACE, "argc: " << argc); - LOGS(_log, LOG_LVL_FATAL, - "Incorrect xrdssi configuration, launch " - "xrootd with option '-+xrdssi /path/to/xrdssi/cfg/file'"); - exit(EXIT_FAILURE); - } - - LOGS(_log, LOG_LVL_DEBUG, "Qserv xrdssi plugin configuration file: " << argv[1]); - - std::string workerConfigFile = argv[1]; - auto const workerConfig = wconfig::WorkerConfig::create(workerConfigFile); - LOGS(_log, LOG_LVL_DEBUG, "Qserv xrdssi plugin configuration: " << *workerConfig); - - // Save the ssi logger as it places messages in another file than our log. - // - _logSsi = logP; - - // Save the cluster object as we will need to use it to inform the cluster - // when chunks come and go. We also can use it to schedule ourselves. The - // object or its absence will indicate whether or not we need to provide - // any service other than QueryResource(). - // - _cmsSsi = clsP; - - // We would process the configuration file (if present), any present - // parameters and the command line arguments. However, at the moment, we - // have nothing of interest in any of these arguments. So, we ignore them. - // - - // Herald our initialization - // - LOGS(_log, LOG_LVL_DEBUG, "SsiProvider initializing..."); - _logSsi->Msg("Qserv", "Provider Initializing"); - - // Initialize the inventory. We need to be able to handle QueryResource() - // calls either in the data provider and the metadata provider (we can be - // either one). - // - _chunkInventory.init(x.getName(), workerConfig->getMySqlConfig()); - - // If we are a data provider (i.e. xrootd) then we need to get the service - // object. It will print the exported paths. Otherwise, we need to print - // them here. This is kludgy and should be corrected when we transition to a - // single shared memory inventory object which should do this by itself. - // - if (clsP && clsP->DataContext()) { - _service.reset(new SsiService(logP)); - } else { - std::ostringstream ss; - ss << "Provider valid paths(ci): "; - _chunkInventory.dbgPrint(ss); - LOGS(_log, LOG_LVL_DEBUG, ss.str()); - _logSsi->Msg("Qserv", ss.str().c_str()); - } - - // We have completed full initialization. Return sucess. - // - return true; -} - -/******************************************************************************/ -/* Q u e r y R e s o u r c e */ -/******************************************************************************/ - -XrdSsiProvider::rStat SsiProviderServer::QueryResource(char const* rName, char const* contact) { - // Validate resource name based on its proposed type - - ResourceUnit ru(rName); - if (ru.unitType() == ResourceUnit::DBCHUNK) { - // Extract db and chunk from path and validate result - - // If the chunk exists on our node then tell the caller it is here. - if (_chunkInventory.has(ru.db(), ru.chunk())) { - LOGS(_log, LOG_LVL_DEBUG, "SsiProvider Query " << rName << " present"); - return isPresent; - } - - // Tell the caller we do not have the chunk. - LOGS(_log, LOG_LVL_DEBUG, "SsiProvider Query " << rName << " absent"); - return notPresent; - } else if (ru.unitType() == ResourceUnit::QUERY) { - return isPresent; - } - - // Treat other resources as absolute path names of files - boost::filesystem::path const path(rName); - if (path.is_absolute()) { - boost::system::error_code ec; - if (boost::filesystem::exists(path, ec) && !ec.value()) { - LOGS(_log, LOG_LVL_DEBUG, "SsiProvider File Resource " << rName << " recognized"); - return isPresent; - } - } - - LOGS(_log, LOG_LVL_DEBUG, "SsiProvider Query " << rName << " invalid"); - return notPresent; -} - -void SsiProviderServer::ResourceAdded(const char* rName) { - // Handle resource based on its proposed type - - ResourceUnit ru(rName); - if (ru.unitType() == ResourceUnit::DBCHUNK) { - // Extract db and chunk from path and add the resource to the chunk - // inventory - _chunkInventory.add(ru.db(), ru.chunk()); - LOGS(_log, LOG_LVL_DEBUG, "SsiProvider ResourceAdded " << rName); - return; - } - LOGS(_log, LOG_LVL_DEBUG, "SsiProvider ResourceAdded " << rName << " invalid"); -} - -void SsiProviderServer::ResourceRemoved(const char* rName) { - // Handle resource based on its proposed type - - ResourceUnit ru(rName); - if (ru.unitType() == ResourceUnit::DBCHUNK) { - // Extract db and chunk from path and add the resource to the chunk - // inventory - _chunkInventory.remove(ru.db(), ru.chunk()); - LOGS(_log, LOG_LVL_DEBUG, "SsiProvider ResourceRemoved " << rName); - return; - } - LOGS(_log, LOG_LVL_DEBUG, "SsiProvider ResourceRemoved " << rName << " invalid"); -} - -} // namespace lsst::qserv::xrdsvc diff --git a/src/xrdsvc/SsiProvider.h b/src/xrdsvc/SsiProvider.h deleted file mode 100644 index 7969774796..0000000000 --- a/src/xrdsvc/SsiProvider.h +++ /dev/null @@ -1,79 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2014-2015 AURA/LSST. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -/// Implement XrdSsiProviderServer to provide Qserv's SsiService -/// implementation. Link this file when building a plugin to be used as -/// ssi.svclib. - -#ifndef LSST_QSERV_XRDSVC_SSIPROVIDER_H -#define LSST_QSERV_XRDSVC_SSIPROVIDER_H - -// System headers -#include -#include - -// Third-party headers -#include "XrdSsi/XrdSsiProvider.hh" - -// Qserv headers -#include "wpublish/ChunkInventory.h" -#include "xrdsvc/SsiService.h" - -// Forward declarations -class XrdSsiCluster; -class XrdSsiLogger; - -namespace lsst::qserv::xrdsvc { - -class SsiProviderServer : public XrdSsiProvider { -public: - XrdSsiService* GetService(XrdSsiErrInfo& eInfo, std::string const& contact, int oHold = 256) override { - return _service.get(); - } - - wpublish::ChunkInventory& GetChunkInventory() { return _chunkInventory; } - - XrdSsiCluster* GetClusterManager() { return _cmsSsi; } - - bool Init(XrdSsiLogger* logP, XrdSsiCluster* clsP, std::string cfgFn, std::string parms, int argc, - char** argv) override; - - rStat QueryResource(char const* rName, char const* contact = 0) override; - - void ResourceAdded(const char* rName) override; - - void ResourceRemoved(const char* rName) override; - - SsiProviderServer() : _cmsSsi(0), _logSsi(0) {} - virtual ~SsiProviderServer(); - -private: - wpublish::ChunkInventory _chunkInventory; - std::unique_ptr _service; - - XrdSsiCluster* _cmsSsi; - XrdSsiLogger* _logSsi; -}; - -} // namespace lsst::qserv::xrdsvc - -#endif // LSST_QSERV_XRDSVC_SSIPROVIDER_H diff --git a/src/xrdsvc/SsiRequest.cc b/src/xrdsvc/SsiRequest.cc deleted file mode 100644 index c1c9da2341..0000000000 --- a/src/xrdsvc/SsiRequest.cc +++ /dev/null @@ -1,389 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2015-2016 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// Class header -#include -#include -#include -#include -#include -#include - -// Third-party headers -#include "XrdSsi/XrdSsiRequest.hh" - -// LSST headers -#include "lsst/log/Log.h" - -// Qserv headers -#include "global/intTypes.h" -#include "global/LogContext.h" -#include "global/ResourceUnit.h" -#include "proto/FrameBuffer.h" -#include "proto/worker.pb.h" -#include "util/InstanceCount.h" -#include "util/HoldTrack.h" -#include "util/Timer.h" -#include "wbase/FileChannelShared.h" -#include "wbase/TaskState.h" -#include "wbase/Task.h" -#include "wconfig/WorkerConfig.h" -#include "wcontrol/Foreman.h" -#include "wcontrol/ResourceMonitor.h" -#include "wpublish/ChunkInventory.h" -#include "xrdsvc/ChannelStream.h" - -namespace proto = lsst::qserv::proto; -namespace wbase = lsst::qserv::wbase; - -namespace { - -LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdsvc.SsiRequest"); - -} // namespace - -namespace lsst::qserv::xrdsvc { - -SsiRequest::Ptr SsiRequest::newSsiRequest(std::string const& rname, - std::shared_ptr const& foreman) { - auto req = SsiRequest::Ptr(new SsiRequest(rname, foreman)); - req->_selfKeepAlive = req; - return req; -} - -SsiRequest::SsiRequest(std::string const& rname, std::shared_ptr const& foreman) - : _validator(foreman->chunkInventory()->newValidator()), _foreman(foreman), _resourceName(rname) {} - -SsiRequest::~SsiRequest() { - LOGS(_log, LOG_LVL_DEBUG, "~SsiRequest()"); - UnBindRequest(); -} - -void SsiRequest::reportError(std::string const& errStr) { - LOGS(_log, LOG_LVL_WARN, errStr); - replyError(errStr, EINVAL); - ReleaseRequestBuffer(); -} - -uint64_t countLimiter = 0; // LockupDB - -// Step 4 -/// Called by XrdSsi to actually process a request. -void SsiRequest::execute(XrdSsiRequest& req) { - util::Timer t; - LOGS(_log, LOG_LVL_DEBUG, "Execute request, resource=" << _resourceName); - - char* reqData = nullptr; - int reqSize; - t.start(); - reqData = req.GetRequest(reqSize); - t.stop(); - LOGS(_log, LOG_LVL_DEBUG, "GetRequest took " << t.getElapsed() << " seconds"); - - // We bind this object to the request now. This allows us to respond at any - // time (much simpler). Though the manual forgot to say that all pending - // events will be reflected on a different thread the moment we bind the - // request; the fact allows us to use a mutex to serialize the order of - // initialization and possible early cancellation. We protect this code - // with a mutex gaurd which will be released upon exit. - // - std::lock_guard lock(_finMutex); - BindRequest(req); - - ResourceUnit ru(_resourceName); - - // Make sure the requested resource belongs to this worker - if (!(*_validator)(ru)) { - reportError("WARNING: request to the unowned resource detected:" + _resourceName); - return; - } - - auto const sendChannel = std::make_shared(shared_from_this()); - - // Process the request - switch (ru.unitType()) { - case ResourceUnit::DBCHUNK: { - // Increment the counter of the database/chunk resources in use - _foreman->resourceMonitor()->increment(_resourceName); - - // reqData has the entire request, so we can unpack it without waiting for - // more data. - LOGS(_log, LOG_LVL_DEBUG, "Decoding TaskMsg of size " << reqSize); - auto taskMsg = std::make_shared(); - if (!taskMsg->ParseFromArray(reqData, reqSize) || !taskMsg->IsInitialized()) { - reportError("Failed to decode TaskMsg on resource db=" + ru.db() + - " chunkId=" + std::to_string(ru.chunk())); - return; - } - - QSERV_LOGCONTEXT_QUERY_JOB(taskMsg->queryid(), taskMsg->jobid()); - - if (!taskMsg->has_db() || !taskMsg->has_chunkid() || (ru.db() != taskMsg->db()) || - (ru.chunk() != taskMsg->chunkid())) { - reportError("Mismatched db/chunk in TaskMsg on resource db=" + ru.db() + - " chunkId=" + std::to_string(ru.chunk())); - return; - } - - if (not(taskMsg->has_queryid() && taskMsg->has_jobid() && taskMsg->has_scaninteractive() && - taskMsg->has_attemptcount() && taskMsg->has_czarid())) { - reportError(std::string("taskMsg missing required field ") + - " queryid:" + std::to_string(taskMsg->has_queryid()) + - " jobid:" + std::to_string(taskMsg->has_jobid()) + - " scaninteractive:" + std::to_string(taskMsg->has_scaninteractive()) + - " attemptcount:" + std::to_string(taskMsg->has_attemptcount()) + - " czarid:" + std::to_string(taskMsg->has_czarid())); - return; - } - _channelShared = wbase::FileChannelShared::create(sendChannel, taskMsg->czarid(), - _foreman->chunkInventory()->id()); - auto const tasks = wbase::Task::createTasks(taskMsg, _channelShared, _foreman->chunkResourceMgr(), - _foreman->mySqlConfig(), _foreman->sqlConnMgr(), - _foreman->queriesAndChunks(), _foreman->httpPort()); - for (auto const& task : tasks) { - _tasks.push_back(task); - } - - // Now that the request is decoded (successfully or not), release the - // xrootd request buffer. To avoid data races, this must happen before - // the task is handed off to another thread for processing, as there is a - // reference to this SsiRequest inside the reply channel for the task, - // and after the call to BindRequest. - ReleaseRequestBuffer(); - t.start(); - _foreman->processTasks(tasks); // Queues tasks to be run later. - t.stop(); - LOGS(_log, LOG_LVL_DEBUG, - "Enqueued TaskMsg for " << ru << " in " << t.getElapsed() << " seconds"); - break; - } - case ResourceUnit::QUERY: { - LOGS(_log, LOG_LVL_DEBUG, "Parsing request details for resource=" << _resourceName); - proto::QueryManagement request; - try { - // reqData has the entire request, so we can unpack it without waiting for - // more data. - proto::FrameBufferView view(reqData, reqSize); - view.parse(request); - ReleaseRequestBuffer(); - } catch (proto::FrameBufferError const& ex) { - reportError("Failed to decode a query completion/cancellation command, error: " + - std::string(ex.what())); - break; - } - LOGS(_log, LOG_LVL_DEBUG, - "QueryManagement: op=" << proto::QueryManagement_Operation_Name(request.op()) - << " query_id=" << request.query_id()); - - switch (request.op()) { - case proto::QueryManagement::CANCEL_AFTER_RESTART: - // TODO: locate and cancel the coresponding tasks, remove the tasks - // from the scheduler queues. - wbase::FileChannelShared::cleanUpResultsOnCzarRestart(request.czar_id(), - request.query_id()); - break; - case proto::QueryManagement::CANCEL: - // TODO: locate and cancel the coresponding tasks, remove the tasks - // from the scheduler queues. - wbase::FileChannelShared::cleanUpResults(request.czar_id(), request.query_id()); - break; - case proto::QueryManagement::COMPLETE: - wbase::FileChannelShared::cleanUpResults(request.czar_id(), request.query_id()); - break; - default: - reportError("QueryManagement: op=" + proto::QueryManagement_Operation_Name(request.op()) + - " is not supported by the current implementation."); - return; - } - - // Send back the empty response since no info is expected by a caller - // for this type of requests beyond the usual error notifications (if any). - this->reply((char const*)0, 0); - break; - } - default: - reportError("Unexpected unit type '" + std::to_string(ru.unitType()) + - "', resource name: " + _resourceName); - break; - } - - // Note that upon exit the _finMutex will be unlocked allowing Finished() - // to actually do something once everything is actually setup. -} - -/// Called by SSI to free resources. -void SsiRequest::Finished(XrdSsiRequest& req, XrdSsiRespInfo const& rinfo, bool cancel) { // Step 8 - util::HoldTrack::Mark markA(ERR_LOC, "SsiRequest::Finished start"); - if (cancel) { - // Either the czar of xrootd has decided to cancel the Job. - // Try to cancel all of the tasks, if there are any. - for (auto&& wTask : _tasks) { - auto task = wTask.lock(); - if (task != nullptr) { - task->cancel(); - } - } - } - - // This call is sync (blocking). - // client finished retrieving response, or cancelled. - // release response resources (e.g. buf) - // But first we must make sure that request setup completed (i.e execute()) by - // locking _finMutex. - { - std::lock_guard finLock(_finMutex); - // Clean up _stream if it exists and don't add anything new to it either. - _reqFinished = true; - if (_stream != nullptr) { - _stream->clearMsgs(); - } - } - - // This will clear the cyclic dependency: - // FileChannelShared -> ChannelStream -> SsiRequest -> FileChannelShared - // - // TODO: Eliminate xrdsvc::ChannelStream sinve this class seems to be useless - // in the file-based result delivery protocol. - _channelShared.reset(); - - auto keepAlive = freeSelfKeepAlive(); - - // No buffers allocated, so don't need to free. - // We can release/unlink the file now - const char* type = ""; - switch (rinfo.rType) { - case XrdSsiRespInfo::isNone: - type = "type=isNone"; - break; - case XrdSsiRespInfo::isData: - type = "type=isData"; - break; - case XrdSsiRespInfo::isError: - type = "type=isError"; - break; - case XrdSsiRespInfo::isFile: - type = "type=isFile"; - break; - case XrdSsiRespInfo::isStream: - type = "type=isStream"; - break; - case XrdSsiRespInfo::isHandle: - type = "type=isHandle"; - break; - } - - // Decrement the counter of the database/chunk resources in use - ResourceUnit ru(_resourceName); - if (ru.unitType() == ResourceUnit::DBCHUNK) { - _foreman->resourceMonitor()->decrement(_resourceName); - } - - // We can't do much other than close the file. - // It should work (on linux) to unlink the file after we open it, though. - // With the optimizer on '-Og', there was a double free for a SsiRequest. - // The likely cause could be keepAlive being optimized out for being unused. - // The problem has not reoccurred since adding keepAlive to the following - // comment, but having code depend on a comment line is ugly in its own way. - LOGS(_log, LOG_LVL_DEBUG, "RequestFinished " << type << " " << keepAlive.use_count()); -} - -bool SsiRequest::reply(char const* buf, int bufLen) { - Status s = SetResponse(buf, bufLen); - if (s != XrdSsiResponder::wasPosted) { - LOGS(_log, LOG_LVL_ERROR, "DANGER: Couldn't post response of length=" << bufLen); - return false; - } - return true; -} - -bool SsiRequest::replyError(std::string const& msg, int code) { - Status s = SetErrResponse(msg.c_str(), code); - if (s != XrdSsiResponder::wasPosted) { - LOGS(_log, LOG_LVL_ERROR, "DANGER: Couldn't post error response " << msg); - return false; - } - return true; -} - -bool SsiRequest::replyStream(StreamBuffer::Ptr const& sBuf, bool last) { - LOGS(_log, LOG_LVL_DEBUG, "replyStream, checking stream size=" << sBuf->getSize() << " last=" << last); - - // Normally, XrdSsi would call Recycle() when it is done with sBuf, but if this function - // returns false, then it must call Recycle(). Otherwise, the scheduler will likely - // wedge waiting for the buffer to be released. - std::lock_guard finLock(_finMutex); - if (_reqFinished) { - // Finished() was called, give up. - LOGS(_log, LOG_LVL_ERROR, "replyStream called after reqFinished."); - sBuf->Recycle(); - return false; - } - // Create a stream if needed. - if (!_stream) { - _stream = std::make_shared(); - if (SetResponse(_stream.get()) != XrdSsiResponder::Status::wasPosted) { - LOGS(_log, LOG_LVL_WARN, "SetResponse stream failed, calling Recycle for sBuf"); - // SetResponse return value indicates XrdSsi wont call Recycle(). - sBuf->Recycle(); - return false; - } - } else if (_stream->closed()) { - // XrdSsi isn't going to call Recycle if we wind up here. - LOGS(_log, LOG_LVL_ERROR, "Logic error SsiRequest::replyStream called with stream closed."); - sBuf->Recycle(); - return false; - } - // XrdSsi or Finished() will call Recycle(). - LOGS(_log, LOG_LVL_INFO, "SsiRequest::replyStream seq=" << getSeq()); - _stream->append(sBuf, last); - return true; -} - -bool SsiRequest::sendMetadata(const char* buf, int blen) { - Status stat = SetMetadata(buf, blen); - switch (stat) { - case XrdSsiResponder::wasPosted: - return true; - case XrdSsiResponder::notActive: - LOGS(_log, LOG_LVL_ERROR, "failed to " << __func__ << " notActive"); - break; - case XrdSsiResponder::notPosted: - LOGS(_log, LOG_LVL_ERROR, "failed to " << __func__ << " notPosted blen=" << blen); - break; - default: - LOGS(_log, LOG_LVL_ERROR, "failed to " << __func__ << " unkown state blen=" << blen); - } - return false; -} - -SsiRequest::Ptr SsiRequest::freeSelfKeepAlive() { - Ptr keepAlive = std::move(_selfKeepAlive); - return keepAlive; -} - -uint64_t SsiRequest::getSeq() const { - if (_stream == nullptr) return 0; - return _stream->getSeq(); -} - -} // namespace lsst::qserv::xrdsvc diff --git a/src/xrdsvc/SsiRequest.h b/src/xrdsvc/SsiRequest.h deleted file mode 100644 index 3583a0cef9..0000000000 --- a/src/xrdsvc/SsiRequest.h +++ /dev/null @@ -1,128 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2015 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_XRDSVC_SSIREQUEST_H -#define LSST_QSERV_XRDSVC_SSIREQUEST_H - -// System headers -#include -#include -#include -#include - -// Third-party headers -#include "XrdSsi/XrdSsiResponder.hh" - -// Qserv headers -#include "global/ResourceUnit.h" -#include "mysql/MySqlConfig.h" -#include "xrdsvc/StreamBuffer.h" - -// Forward declarations -class XrdSsiService; - -namespace lsst::qserv { -namespace wbase { -class FileChannelShared; -class SendChannel; -class Task; -} // namespace wbase -namespace wcontrol { -class Foreman; -} -} // namespace lsst::qserv - -namespace lsst::qserv::xrdsvc { - -class ChannelStream; -class StreamBuffer; - -/// An implementation of XrdSsiResponder that is used by SsiService to provide -/// qserv worker services. The SSI interface encourages such an approach, and -/// object lifetimes are explicitly stated in the documentation which we -/// adhere to using BindRequest() and UnBindRequest() responder methods. -class SsiRequest : public XrdSsiResponder, public std::enable_shared_from_this { -public: - // Smart pointer definitions - - typedef std::shared_ptr ValidatorPtr; - typedef std::shared_ptr Ptr; - - /// Use factory to ensure proper construction for enable_shared_from_this. - static SsiRequest::Ptr newSsiRequest(std::string const& rname, - std::shared_ptr const& processor); - - virtual ~SsiRequest(); - - void execute(XrdSsiRequest& req); - - /** - * Implements the virtual method defined in the base class - * @see XrdSsiResponder::Finished - */ - void Finished(XrdSsiRequest& req, XrdSsiRespInfo const& rinfo, bool cancel = false) override; - - bool isFinished() { return _reqFinished; } - - bool reply(char const* buf, int bufLen); - bool replyError(std::string const& msg, int code); - bool replyStream(StreamBuffer::Ptr const& sbuf, bool last); - - bool sendMetadata(const char* buf, int blen); - - /// Call this to allow object to die after it truly is no longer needed. - /// i.e. It is know Finish() will not be called. - /// NOTE: It is important that any non-static SsiRequest member - /// function make a local copy of the returned pointer so that - /// SsiRequest is guaranteed to live to the end of - /// the function call. - Ptr freeSelfKeepAlive(); - - uint64_t getSeq() const; - -private: - /// Constructor (called by the static factory method newSsiRequest) - SsiRequest(std::string const& rname, std::shared_ptr const& processor); - - /// For internal error reporting - void reportError(std::string const& errStr); - -private: - ValidatorPtr _validator; ///< validates request against what's available - std::shared_ptr const _foreman; ///< actual msg processor - - std::mutex _finMutex; ///< Protects execute() from Finish(), _finished, and _stream - std::atomic _reqFinished{false}; ///< set to true when Finished called - std::string _resourceName; ///< chunk identifier - - std::shared_ptr _stream; - std::shared_ptr _channelShared; ///< Must live before Finished() gets called. - std::vector> _tasks; ///< List of tasks for use in cancellation. - - /// Make sure this object exists until Finish() is called. - /// Make a local copy before calling reset() within and non-static member function. - Ptr _selfKeepAlive; -}; - -} // namespace lsst::qserv::xrdsvc - -#endif // LSST_QSERV_XRDSVC_SSIREQUEST_H diff --git a/src/xrdsvc/StreamBuffer.cc b/src/xrdsvc/StreamBuffer.cc deleted file mode 100644 index 8024aa5684..0000000000 --- a/src/xrdsvc/StreamBuffer.cc +++ /dev/null @@ -1,139 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2018 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ - -// Class header -#include "xrdsvc/StreamBuffer.h" - -// Third-party headers -#include "boost/utility.hpp" - -// LSST headers -#include "lsst/log/Log.h" - -// Qserv headers -#include "wbase/Task.h" -#include "wcontrol/WorkerStats.h" - -namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdsvc.StreamBuffer"); -} - -using namespace std; - -namespace lsst::qserv::xrdsvc { - -// Factory function, because this should be able to delete itself when Recycle() is called. -StreamBuffer::Ptr StreamBuffer::createWithMove(std::string &input, std::shared_ptr const &task) { - Ptr ptr(new StreamBuffer(input, task)); - ptr->_selfKeepAlive = ptr; - return ptr; -} - -StreamBuffer::StreamBuffer(std::string &input, wbase::Task::Ptr const &task) : _task(task) { - _dataStr = std::move(input); - // TODO: try to make 'data' a const char* in xrootd code. - // 'data' is not being changed after being passed, so hopefully not an issue. - //_dataStr will not be used again, but this is ugly. - data = (char *)(_dataStr.data()); - next = 0; - - auto now = CLOCK::now(); - _createdTime = now; - _startTime = now; - _endTime = now; - - _wStats = wcontrol::WorkerStats::get(); - if (_wStats != nullptr) { - _wStats->startQueryRespConcurrentQueued(_createdTime); - } -} - -void StreamBuffer::startTimer() { - auto now = CLOCK::now(); - _startTime = now; - _endTime = now; - - if (_wStats != nullptr) { - _wStats->endQueryRespConcurrentQueued(_createdTime, _startTime); // add time to queued time - } -} - -/// xrdssi calls this to recycle the buffer when finished. -void StreamBuffer::Recycle() { - { - std::lock_guard lg(_mtx); - _doneWithThis = true; - } - _cv.notify_all(); - - _endTime = CLOCK::now(); - if (_wStats != nullptr) { - _wStats->endQueryRespConcurrentXrootd(_startTime, _endTime); - } - - if (_task != nullptr) { - auto taskSched = _task->getTaskScheduler(); - if (taskSched != nullptr) { - std::chrono::duration secs = _endTime - _startTime; - taskSched->histTimeOfTransmittingTasks->addEntry(secs.count()); - LOGS(_log, LOG_LVL_TRACE, "Recycle " << taskSched->histTimeOfTransmittingTasks->getJson()); - } else { - LOGS(_log, LOG_LVL_WARN, "Recycle transmit taskSched == nullptr"); - } - } else { - LOGS(_log, LOG_LVL_DEBUG, "Recycle transmit _task == nullptr"); - } - // Effectively reset _selfKeepAlive, and if nobody else was - // referencing this, this object will delete itself when - // this function is done. - // std::move is used instead of reset() as reset() could - // result in _keepalive deleting itself while still in use. - Ptr keepAlive = std::move(_selfKeepAlive); -} - -void StreamBuffer::cancel() { - // Recycle may still need to be called by XrdSsi or there will be a memory - // leak. XrdSsi calling Recycle is beyond what can be controlled here, but - // better a possible leak than corrupted memory or a permanently wedged - // thread in a limited pool. - // In any case, this code having an effect should be extremely rare. - // FUTURE: It would be nice to eliminate this possible memory leak. - // Possible fix, atomic _recycleCalled, create thread - // to check if _recycleCalled == true. If true or 24 hours pass - // use `Ptr keepAlive = std::move(_selfKeepAlive);` to kill the object. - { - std::lock_guard lg(_mtx); - _doneWithThis = true; - _cancelled = true; - } - _cv.notify_all(); -} - -// Wait until recycle is called. -bool StreamBuffer::waitForDoneWithThis() { - std::unique_lock uLock(_mtx); - _cv.wait(uLock, [this]() { return _doneWithThis || _cancelled; }); - return !_cancelled; -} - -} // namespace lsst::qserv::xrdsvc diff --git a/src/xrdsvc/StreamBuffer.h b/src/xrdsvc/StreamBuffer.h deleted file mode 100644 index f704e2c70a..0000000000 --- a/src/xrdsvc/StreamBuffer.h +++ /dev/null @@ -1,113 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2014-2018 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_XRDSVC_STREAMBUFFER_H -#define LSST_QSERV_XRDSVC_STREAMBUFFER_H - -// System headers -#include -#include -#include -#include -#include - -// qserv headers -#include "util/InstanceCount.h" - -// Third-party headers -#include "XrdSsi/XrdSsiErrInfo.hh" // required by XrdSsiStream -#include "XrdSsi/XrdSsiStream.hh" - -namespace lsst::qserv { -namespace wbase { -class Task; -} -namespace wcontrol { -class WorkerStats; -} -} // namespace lsst::qserv - -namespace lsst::qserv::xrdsvc { - -/// StreamBuffer is a single use buffer for transferring data packets -/// to XrdSsi. -/// Its notable feature is the Recycle() function, which XrdSsi will -/// promptly call when it no longer needs the buffer. -class StreamBuffer : public XrdSsiStream::Buffer { -public: - using Ptr = std::shared_ptr; - - // Copying this would be very confusing for something waiting for Recycle(). - StreamBuffer() = delete; - StreamBuffer(StreamBuffer const &) = delete; - StreamBuffer &operator=(StreamBuffer const &) = delete; - - /// Factory function, because this should be able to delete itself when Recycle() is called. - /// The constructor uses move to avoid copying the string. - static StreamBuffer::Ptr createWithMove(std::string &input, - std::shared_ptr const &task = nullptr); - - size_t getSize() const { return _dataStr.size(); } - - /// Call to recycle the buffer when finished (normally called by XrdSsi). - void Recycle() override; - - /// Wait until Recycle() is called. - /// @return true if there is data in the buffer. - bool waitForDoneWithThis(); - - /// Start the timer that will be stopped when Recycle() is called. - void startTimer(); - - /// Unblock the condition variable on cancel. - void cancel(); - - ~StreamBuffer() override = default; - -private: - /// This constructor will invalidate 'input'. - explicit StreamBuffer(std::string &input, std::shared_ptr const &task); - - /// Pointer to the task for keeping statistics. - /// NOTE: This will be nullptr for many things, so check before using. - std::shared_ptr _task; - std::string _dataStr; - std::mutex _mtx; - std::condition_variable _cv; - bool _doneWithThis = false; - bool _cancelled = false; - Ptr _selfKeepAlive; ///< keep this object alive until after Recycle() is called. - // util::InstanceCount _ic{"StreamBuffer"}; ///< Useful as it indicates amount of waiting for czar. - - std::chrono::time_point _createdTime; ///< Time this instance was created. - std::chrono::time_point - _startTime; ///< Time this instance was handed to xrootd. - std::chrono::time_point - _endTime; ///< Time xrootd was finished with this instance. - /// Pointer for worker statistics. - /// NOTE: This will be nullptr for many things, so check before using. - std::shared_ptr _wStats; -}; - -} // namespace lsst::qserv::xrdsvc - -#endif // LSST_QSERV_XRDSVC_STREAMBUFFER_H diff --git a/src/xrdsvc/XrdName.h b/src/xrdsvc/XrdName.h deleted file mode 100644 index 3bc61091f9..0000000000 --- a/src/xrdsvc/XrdName.h +++ /dev/null @@ -1,61 +0,0 @@ -// -*- LSST-C++ -*- -/* - * LSST Data Management System - * Copyright 2013 LSST Corporation. - * - * This product includes software developed by the - * LSST Project (http://www.lsst.org/). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the LSST License Statement and - * the GNU General Public License along with this program. If not, - * see . - */ -#ifndef LSST_QSERV_XRDFS_XRDNAME_H -#define LSST_QSERV_XRDFS_XRDNAME_H - -// System headers -#include -#include -#include - -namespace lsst::qserv::xrdsvc { - -/// XrdName is a small class that helps extract the name of a running xrootd (or -/// cmsd) instance. It does this by checking an environment variable that is -/// specified to be set during initialization of any xrootd/cmsd process. -class XrdName { -public: - XrdName() { - char const* name = std::getenv("XRDNAME"); - _setName(name ? name : "unknown"); - } - - std::string const& getName() const { return _name; } - -private: - void _setName(char const* name) { - _name.clear(); - // Discard non alpha-numeric characters other than '_' - for (char const* s = name; *s != '\0'; ++s) { - if (std::isalnum(*s) || *s == '_') { - _name.push_back(*s); - } - } - } - - std::string _name; -}; - -} // namespace lsst::qserv::xrdsvc - -#endif // LSST_QSERV_XRDFS_XRDNAME_H