commit - 8cc7eb12ea0d2cd26be310c5b02e32ac6a47b5a9
commit + 4c9b38eda9a90a091d25041dc45c8669adf9639e
blob - 5217edd9bc9894e37dd088138285c1925d5fe5be
blob + 61d717d8fd989813ca141faf91da108914673579
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
import logging
import os
import pickle
-import sys
from tempfile import SpooledTemporaryFile
+import time
from typing import (
Any,
Callable,
logger = logging.getLogger(__name__)
heads_logger = logger.getChild("refs")
+remote_logger = logger.getChild("remote")
+
+# How often to log messages for long-running operations, in seconds
+LOGGING_INTERVAL = 30
+
+
+def split_lines_and_remainder(buf: bytes) -> Tuple[List[bytes], bytes]:
+ """Get newline-terminated (``b"\\r"`` or ``b"\\n"``) lines from `buf`,
+ and the beginning of the last line if it isn't terminated."""
+
+ lines = buf.splitlines(keepends=True)
+ if not lines:
+ return [], b""
+
+ if buf.endswith((b"\r", b"\n")):
+ # The buffer ended with a newline, everything can be sent as lines
+ return lines, b""
+ else:
+ # The buffer didn't end with a newline, we need to keep the
+ # last bit as the beginning of the next line
+ return lines[:-1], lines[-1]
class RepoRepresentation:
statsd=self.statsd,
)
- def do_progress(msg: bytes) -> None:
- sys.stderr.buffer.write(msg)
- sys.stderr.flush()
+ # Remote logging utilities
+
+ # Number of lines (ending with a carriage return) elided when debug
+ # logging is not enabled
+ remote_lines_elided = 0
+
+ # Timestamp where the last elision was logged
+ last_elision_logged = time.monotonic()
+
+ def maybe_log_elision(force: bool = False):
+ nonlocal remote_lines_elided
+ nonlocal last_elision_logged
+
+ if remote_lines_elided and (
+ force
+ # Always log at least every LOGGING_INTERVAL
+ or time.monotonic() > last_elision_logged + LOGGING_INTERVAL
+ ):
+ remote_logger.info(
+ "%s remote line%s elided",
+ remote_lines_elided,
+ "s" if remote_lines_elided > 1 else "",
+ )
+ remote_lines_elided = 0
+ last_elision_logged = time.monotonic()
+ def log_remote_message(line: bytes):
+ nonlocal remote_lines_elided
+
+ do_debug = remote_logger.isEnabledFor(logging.DEBUG)
+
+ if not line.endswith(b"\n"):
+ # This is a verbose line, ending with a carriage return only
+ if do_debug:
+ if stripped := line.strip():
+ remote_logger.debug(
+ "remote: %s", stripped.decode("utf-8", "backslashreplace")
+ )
+ else:
+ remote_lines_elided += 1
+ maybe_log_elision()
+ else:
+ # This is the last line in the current section, we will always log it
+ maybe_log_elision(force=True)
+ if stripped := line.strip():
+ remote_logger.info(
+ "remote: %s", stripped.decode("utf-8", "backslashreplace")
+ )
+
+ # This buffer keeps the end of what do_remote has received, across
+ # calls, if it happens to be unterminated
+ next_line_buf = b""
+
+ def do_remote(msg: bytes) -> None:
+ nonlocal next_line_buf
+
+ lines, next_line_buf = split_lines_and_remainder(next_line_buf + msg)
+
+ for line in lines:
+ log_remote_message(line)
+
try:
with raise_not_found_repository():
fetch_info = self.fetch_pack_from_origin(
- self.origin.url, base_repo, do_progress
+ self.origin.url, base_repo, do_remote
)
except NotFound:
# NotFound inherits from ValueError and should not be caught
self.dumb = dumb.check_protocol(self.origin.url, self.requests_extra_kwargs)
if not self.dumb:
raise
+ else:
+ # Always log what remains in the next_line_buf, if it's not empty
+ maybe_log_elision(force=True)
+ log_remote_message(next_line_buf)
logger.debug(
"Protocol used for communication: %s", "dumb" if self.dumb else "smart"
blob - 0ced59cf70860949970d8df78a860481e61bea48
blob + 25c460ee36f0f78b3c27962a9d05df9edb660190
--- swh/loader/git/tests/test_loader.py
+++ swh/loader/git/tests/test_loader.py
from requests import HTTPError
from swh.loader.git import converters, dumb
-from swh.loader.git.loader import FetchPackReturn, GitLoader
+from swh.loader.git.loader import FetchPackReturn, GitLoader, split_lines_and_remainder
from swh.loader.git.tests.test_from_disk import SNAPSHOT1, FullGitLoaderTests
from swh.loader.tests import (
assert_last_visit_matches,
f"Pack file too big for repository {repo_url}, "
f"limit is {loader.pack_size_bytes} bytes, current size is {big_size_kib*1024}"
)
+
+
+@pytest.mark.parametrize(
+ "input,output",
+ (
+ (b"", ([], b"")),
+ (b"trailing", ([], b"trailing")),
+ (b"line1\r", ([b"line1\r"], b"")),
+ (b"line1\rtrailing", ([b"line1\r"], b"trailing")),
+ (b"line1\r\ntrailing", ([b"line1\r\n"], b"trailing")),
+ (b"line1\r\nline2\ntrailing", ([b"line1\r\n", b"line2\n"], b"trailing")),
+ (b"line1\r\nline2\nline3\r", ([b"line1\r\n", b"line2\n", b"line3\r"], b"")),
+ ),
+)
+def test_split_lines_and_remainder(input, output):
+ assert split_lines_and_remainder(input) == output