Skip to content

Commit

Permalink
Implement the run collect-info through Edgeview
Browse files Browse the repository at this point in the history
- Add the implementation on running collect-info through Edgeview
- Egeview is mostly only with read permission access to device filesystems
  and it does not want to import all the pkgs and replicate the collect-info.sh
  operations and maintain compatibility of two sets of the collect-info
- this implementation has a protocol to setup a request for debug container
  to start run 'collect-info.sh', and remove the request file after it is done;
  Edgeview then uses existing copy file protocol to transfer the collect-info
  tarball file back to user's laptop; then Edgeview put another request for
- debug container to remove the created tarball file in /persist
  debug container has added a background task to monitor the request-
- Since collect-info is mostly live info, there is no need to get all the newlogs,
  only fetch the newlogs of the past 10 days.
- change the debug/ssh.sh into debug/debug-tasks.sh to reflect it does
  not only handling the ssh task

Signed-off-by: Naiming Shen <naiming@zededa.com>
  • Loading branch information
naiming-zededa committed Jul 31, 2024
1 parent e8a6474 commit 3578fc3
Show file tree
Hide file tree
Showing 8 changed files with 253 additions and 26 deletions.
4 changes: 2 additions & 2 deletions pkg/debug/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ RUN mkdir -p /out/var/empty

# tweaking various bit
WORKDIR /out
COPY ssh.sh spec.sh scripts/ ./usr/bin/
COPY debug-tasks.sh spec.sh scripts/ ./usr/bin/
RUN mkdir -p ./etc/ssh ./root/.ssh
RUN chmod 0700 ./root/.ssh
RUN cp /etc/passwd /etc/group ./etc/
Expand All @@ -126,4 +126,4 @@ WORKDIR /

COPY --from=build /out/ /

CMD ["/sbin/tini", "/usr/bin/ssh.sh"]
CMD ["/sbin/tini", "/usr/bin/debug-tasks.sh"]
62 changes: 62 additions & 0 deletions pkg/debug/debug-tasks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/sh

# setting things up for being able to access linux kernel symbols
echo 0 > /proc/sys/kernel/kptr_restrict
echo -1 > /proc/sys/kernel/perf_event_paranoid

# This script used to be 'ssh.sh', at the time of introducing the
# monitoring task, it was renamed to 'debug-tasks.sh' to reflect
# multiple tasks that debug container can perform in the background.

KEYS=$(find /etc/ssh -name 'ssh_host_*_key')
[ -z "$KEYS" ] && ssh-keygen -A >/dev/null 2>/dev/null

# Background monitoring non-ssh related tasks
monitor_file_and_execute_tasks() {
# edgeview request to generate collect-info and remove tar.gz file
# the protocol is for edgeview to create a file
# /run/edgeview/edgeview-request-collect-info, and debug container
# run 'collect-info.sh' to generate the tar.gz file, when the job is
# done, remove the file /run/edgeview/edgeview-request-collect-info.
# edgeview can also request to remove the tar.gz file by creating
# /run/edgeview/edgeview-request-remove-tar.gz file, and debug container
# will get the file name and remove the generated tar.gz file.
#
# if addition tasks are needed, this can monitor multiple directories
# and perform different actions by debug container in the future.
while true; do
if [ -f "/run/edgeview/edgeview-request-collect-info" ]; then
echo "edgeview request to run collect-info..." > /dev/kmsg
# the newlog part of the collection, only collect last 10 days
/usr/bin/collect-info.sh -t 10
# remove the request file
echo "edgeview request collect-info done" > /dev/kmsg
rm /run/edgeview/edgeview-request-collect-info
fi
if [ -f "/run/edgeview/edgeview-request-remove-tar-gz" ]; then
fileToRemove=$(cat /run/edgeview/edgeview-request-remove-tar-gz)
echo "edgeview request to remove $fileToRemove file" > /dev/kmsg
if [ -f "$fileToRemove" ]; then
rm "$fileToRemove"
fi
# remove the request file
rm /run/edgeview/edgeview-request-remove-tar-gz
fi
sleep 5
done
}

# Start the monitoring task in the background
monitor_file_and_execute_tasks &

if [ -f "/config/remote_access_disabled" ]; then
# this is picked up by newlogd
echo "Remote access disabled, ssh server not started" > /dev/kmsg
while true; do
# sleep for INT_MAX, keep the container running
sleep inf
done
else
exec /usr/sbin/sshd -D -e
fi

24 changes: 21 additions & 3 deletions pkg/debug/scripts/collect-info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# Script version, don't forget to bump up once something is changed

VERSION=24
VERSION=25
# Add required packages here, it will be passed to "apk add".
# Once something added here don't forget to add the same package
# to the Dockerfile ('ENV PKGS' line) of the debug container,
Expand All @@ -20,6 +20,7 @@ INFO_DIR="eve-info-v$VERSION-$DATE"
TARBALL_FILE="/persist/$INFO_DIR.tar.gz"
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")

READ_LOGS_DAYS=
READ_LOGS_DEV=
READ_LOGS_APP=
TAR_WHOLE_SYS=
Expand Down Expand Up @@ -47,11 +48,12 @@ usage()
echo "Read-logs mode:"
echo " -d - read device logs only"
echo " -a APPLICATION-UUID - read specified application logs only"
echo " -t NUMBER-OF-DAYS - read logs from the last NUMBER-OF-DAYS [1-30]"
echo " -j - output logs in json"
exit 1
}

while getopts "vhsa:dj" o; do
while getopts "vhsa:djt:" o; do
case "$o" in
h)
usage
Expand All @@ -63,6 +65,13 @@ while getopts "vhsa:dj" o; do
a)
READ_LOGS_APP="$OPTARG"
;;
t)
READ_LOGS_DAYS="$OPTARG"
if [ "$READ_LOGS_DAYS" -lt 1 ] || [ "$READ_LOGS_DAYS" -gt 30 ]; then
echo "Error: READ_LOGS_DAYS must be between 1 and 30."
exit 1
fi
;;
d)
READ_LOGS_DEV=1
;;
Expand Down Expand Up @@ -157,6 +166,7 @@ fi
# Create temporary dir
echo "- basic setup"
TMP_DIR=$(mktemp -d)
LOG_TMP_DIR="$TMP_DIR/dayslogs"
DIR="$TMP_DIR/$INFO_DIR"
mkdir -p "$DIR"
mkdir -p "$DIR/network"
Expand Down Expand Up @@ -433,9 +443,17 @@ find /sys/kernel/security -name "tpm*" | while read -r TPM; do
fi
done

if [ -n "$READ_LOGS_DAYS" ]; then
mkdir -p "$LOG_TMP_DIR"
# Find and copy log files from /persist/newlog to $LOG_TMP_DIR in previous days
find /persist/newlog -type f -mtime -"$READ_LOGS_DAYS" -exec ln -s {} "$LOG_TMP_DIR" \;
ln -s "$LOG_TMP_DIR" "$DIR/persist-newlog"
else
ln -s /persist/newlog "$DIR/persist-newlog"
fi

ln -s /persist/status "$DIR/persist-status"
ln -s /persist/log "$DIR/persist-log"
ln -s /persist/newlog "$DIR/persist-newlog"
ln -s /persist/netdump "$DIR/persist-netdump"
ln -s /persist/kcrashes "$DIR/persist-kcrashes"
[ -d /persist/memory-monitor/output ] && ln -s /persist/memory-monitor/output "$DIR/persist-memory-monitor-output"
Expand Down
21 changes: 0 additions & 21 deletions pkg/debug/ssh.sh

This file was deleted.

4 changes: 4 additions & 0 deletions pkg/edgeview/src/basics.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ func initOpts() {
"watcher",
"zedagent",
"zedclient",
"zedkube",
"zedmanager",
"zedrouter",
"zfsmanager"}
Expand All @@ -115,6 +116,7 @@ func initOpts() {
"app",
"configitem",
"cat",
"collectinfo",
"cp",
"datastore",
"dmesg",
Expand Down Expand Up @@ -686,6 +688,8 @@ func printHelp(opt string) {
// system
case "configitem":
helpOn("configitem", "display the device configitem settings, highlight the non-default values")
case "collectinfo":
helpOn("collectinfo", "collect the device information using collect-info.sh and download a compressed file in tar.gz format")
case "cp":
helpOn("cp/<path>", "copy file from the device to locally mounted directory by specify the path")
helpExample("cp//config/device.cert.pem", "copy the /config/device.cert.pem file to local directory", true)
Expand Down
1 change: 1 addition & 0 deletions pkg/edgeview/src/copyfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const (
copyTarFiles
copyTechSupport
copyKubeConfig
copyCollectInfo
)

var (
Expand Down
3 changes: 3 additions & 0 deletions pkg/edgeview/src/edge-view.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,9 @@ func main() {
} else if strings.HasPrefix(pqueryopt, "tar/") {
psysopt = pqueryopt
fstatus.cType = copyTarFiles
} else if strings.HasPrefix(pqueryopt, "collectinfo") {
psysopt = pqueryopt
fstatus.cType = copyCollectInfo
} else {
_, err := checkOpts(pqueryopt, netopts)
if err != nil {
Expand Down
Loading

0 comments on commit 3578fc3

Please sign in to comment.