Skip to content

Commit a85abcc

Browse files
committed
data: status: introduce --not-in-remote
Also supports cloud versioning. Fixes #8761
1 parent d2869e2 commit a85abcc

File tree

4 files changed

+76
-4
lines changed

4 files changed

+76
-4
lines changed

dvc/commands/data.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,22 @@
2121

2222
class CmdDataStatus(CmdBase):
2323
COLORS = {
24+
"not_in_remote": "red",
2425
"not_in_cache": "red",
2526
"committed": "green",
2627
"uncommitted": "yellow",
2728
"untracked": "cyan",
2829
}
2930
LABELS = {
31+
"not_in_remote": "Not in remote",
3032
"not_in_cache": "Not in cache",
3133
"committed": "DVC committed changes",
3234
"uncommitted": "DVC uncommitted changes",
3335
"untracked": "Untracked files",
3436
"unchanged": "DVC unchanged files",
3537
}
3638
HINTS = {
39+
"not_in_remote": ('use "dvc push <file>..." to upload files',),
3740
"not_in_cache": ('use "dvc fetch <file>..." to download files',),
3841
"committed": ("git commit the corresponding dvc files to update the repo",),
3942
"uncommitted": (
@@ -114,6 +117,7 @@ def run(self) -> int:
114117
status = self.repo.data_status(
115118
granular=self.args.granular,
116119
untracked_files=self.args.untracked_files,
120+
not_in_remote=self.args.not_in_remote,
117121
)
118122

119123
if not self.args.unchanged:
@@ -239,6 +243,12 @@ def add_parser(subparsers, parent_parser):
239243
nargs="?",
240244
help="Show untracked files.",
241245
)
246+
data_status_parser.add_argument(
247+
"--not-in-remote",
248+
action="store_true",
249+
default=False,
250+
help="Show files missing from remote.",
251+
)
242252
data_status_parser.set_defaults(func=CmdDataStatus)
243253

244254
DATA_LS_HELP = "List data tracked by DVC with its metadata."

dvc/repo/data.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ def _diff(
5959
*,
6060
granular: bool = False,
6161
not_in_cache: bool = False,
62+
not_in_remote: bool = False,
6263
) -> Dict[str, List[str]]:
64+
from dvc_data.index import StorageError
6365
from dvc_data.index.diff import UNCHANGED, UNKNOWN, diff
6466

6567
ret: Dict[str, List[str]] = {}
@@ -102,6 +104,17 @@ def _add_change(typ, change):
102104
# NOTE: emulating previous behaviour
103105
_add_change("not_in_cache", change)
104106

107+
try:
108+
if (
109+
not_in_remote
110+
and change.old
111+
and change.old.hash_info
112+
and not old.storage_map.remote_exists(change.old)
113+
):
114+
_add_change("not_in_remote", change)
115+
except StorageError:
116+
pass
117+
105118
_add_change(change.typ, change)
106119

107120
return ret
@@ -177,6 +190,7 @@ def _diff_head_to_index(
177190

178191
class Status(TypedDict):
179192
not_in_cache: List[str]
193+
not_in_remote: List[str]
180194
committed: Dict[str, List[str]]
181195
uncommitted: Dict[str, List[str]]
182196
untracked: List[str]
@@ -203,12 +217,20 @@ def _transform_git_paths_to_dvc(repo: "Repo", files: Iterable[str]) -> List[str]
203217
return [repo.fs.path.relpath(file, start) for file in files]
204218

205219

206-
def status(repo: "Repo", untracked_files: str = "no", **kwargs: Any) -> Status:
220+
def status(
221+
repo: "Repo",
222+
untracked_files: str = "no",
223+
not_in_remote: bool = False,
224+
**kwargs: Any,
225+
) -> Status:
207226
from dvc.scm import NoSCMError, SCMError
208227

209228
head = kwargs.pop("head", "HEAD")
210-
uncommitted_diff = _diff_index_to_wtree(repo, **kwargs)
211-
not_in_cache = uncommitted_diff.pop("not_in_cache", [])
229+
uncommitted_diff = _diff_index_to_wtree(
230+
repo,
231+
not_in_remote=not_in_remote,
232+
**kwargs,
233+
)
212234
unchanged = set(uncommitted_diff.pop("unchanged", []))
213235

214236
try:
@@ -223,7 +245,8 @@ def status(repo: "Repo", untracked_files: str = "no", **kwargs: Any) -> Status:
223245
untracked = _transform_git_paths_to_dvc(repo, untracked)
224246
# order matters here
225247
return Status(
226-
not_in_cache=not_in_cache,
248+
not_in_cache=uncommitted_diff.pop("not_in_cache", []),
249+
not_in_remote=uncommitted_diff.pop("not_in_remote", []),
227250
committed=committed_diff,
228251
uncommitted=uncommitted_diff,
229252
untracked=untracked,

tests/func/test_data_status.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"uncommitted": {},
1414
"git": {},
1515
"not_in_cache": [],
16+
"not_in_remote": [],
1617
"unchanged": [],
1718
"untracked": [],
1819
}
@@ -81,6 +82,7 @@ def test_directory(M, tmp_dir, dvc, scm):
8182
}
8283

8384
assert dvc.data_status(granular=True, untracked_files="all") == {
85+
**EMPTY_STATUS,
8486
"committed": {
8587
"added": M.unordered(
8688
join("dir", "bar"),
@@ -396,6 +398,42 @@ def test_missing_dir_object_from_index(M, tmp_dir, dvc, scm):
396398
}
397399

398400

401+
def test_missing_remote_cache(M, tmp_dir, dvc, scm, local_remote):
402+
tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
403+
tmp_dir.dvc_gen("foobar", "foobar")
404+
405+
assert dvc.data_status(untracked_files="all", not_in_remote=True) == {
406+
**EMPTY_STATUS,
407+
"untracked": M.unordered("foobar.dvc", "dir.dvc", ".gitignore"),
408+
"committed": {"added": M.unordered("foobar", join("dir", ""))},
409+
"not_in_remote": M.unordered("foobar", join("dir", "")),
410+
"git": M.dict(),
411+
}
412+
413+
assert dvc.data_status(
414+
granular=True, untracked_files="all", not_in_remote=True
415+
) == {
416+
**EMPTY_STATUS,
417+
"untracked": M.unordered("foobar.dvc", "dir.dvc", ".gitignore"),
418+
"committed": {
419+
"added": M.unordered(
420+
"foobar",
421+
join("dir", ""),
422+
join("dir", "foo"),
423+
join("dir", "bar"),
424+
)
425+
},
426+
"uncommitted": {},
427+
"not_in_remote": M.unordered(
428+
"foobar",
429+
join("dir", ""),
430+
join("dir", "foo"),
431+
join("dir", "bar"),
432+
),
433+
"git": M.dict(),
434+
}
435+
436+
399437
def test_root_from_dir_to_file(M, tmp_dir, dvc, scm):
400438
tmp_dir.dvc_gen({"data": {"foo": "foo", "bar": "bar"}})
401439
remove("data")

tests/unit/command/test_data_status.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def test_cli(dvc, mocker, mocked_status):
5151
assert cmd.run() == 0
5252
status.assert_called_once_with(
5353
untracked_files="all",
54+
not_in_remote=False,
5455
granular=True,
5556
)
5657

0 commit comments

Comments
 (0)