Skip to content
This repository was archived by the owner on Jun 4, 2021. It is now read-only.

Commit f8b5ad8

Browse files
authored
Merge pull request #132 from google/upstream-1544753093
Add docker cache directory
2 parents a6f2d42 + eeffdb4 commit f8b5ad8

File tree

5 files changed

+125
-13
lines changed

5 files changed

+125
-13
lines changed

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ optional arguments:
7171
from DOCKER_CONFIG
7272
--stderrthreshold STDERRTHRESHOLD
7373
Write log events at or above this level to stderr.
74-
7574
```
7675

7776
## pusher.par
@@ -107,7 +106,6 @@ optional arguments:
107106
from DOCKER_CONFIG
108107
--stderrthreshold STDERRTHRESHOLD
109108
Write log events at or above this level to stderr.
110-
111109
```
112110

113111
## importer.par

client/v2_2/save_.py

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from __future__ import print_function
2020

21+
import errno
2122
import io
2223
import json
2324
import os
@@ -140,8 +141,10 @@ def tarball(name, image,
140141
multi_image_tarball({name: image}, tar, {})
141142

142143

143-
def fast(image, directory,
144-
threads = 1):
144+
def fast(image,
145+
directory,
146+
threads = 1,
147+
cache_directory = None):
145148
"""Produce a FromDisk compatible file layout under the provided directory.
146149
147150
After calling this, the following filesystem will exist:
@@ -162,6 +165,7 @@ def fast(image, directory,
162165
image: a docker image to save.
163166
directory: an existing empty directory under which to save the layout.
164167
threads: the number of threads to use when performing the upload.
168+
cache_directory: directory that stores file cache.
165169
166170
Returns:
167171
A tuple whose first element is the path to the config file, and whose second
@@ -174,6 +178,38 @@ def write_file(name, accessor,
174178
with io.open(name, u'wb') as f:
175179
f.write(accessor(arg))
176180

181+
def write_file_and_store(name, accessor,
182+
arg, cached_layer):
183+
write_file(cached_layer, accessor, arg)
184+
link(cached_layer, name)
185+
186+
def link(source, dest):
187+
"""Creates a symbolic link dest pointing to source.
188+
189+
Unlinks first to remove "old" layers if needed
190+
e.g., image A latest has layers 1, 2 and 3
191+
after a while it has layers 1, 2 and 3'.
192+
Since in both cases the layers are named 001, 002 and 003,
193+
unlinking promises the correct layers are linked in the image directory.
194+
195+
Args:
196+
source: image directory source.
197+
dest: image directory destination.
198+
"""
199+
try:
200+
os.symlink(source, dest)
201+
except OSError as e:
202+
if e.errno == errno.EEXIST:
203+
os.unlink(dest)
204+
os.symlink(source, dest)
205+
else:
206+
raise e
207+
208+
def valid(cached_layer, digest):
209+
with io.open(cached_layer, u'rb') as f:
210+
current_digest = docker_digest.SHA256(f.read(), '')
211+
return current_digest == digest
212+
177213
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
178214
future_to_params = {}
179215
config_file = os.path.join(directory, 'config.json')
@@ -192,18 +228,30 @@ def write_file(name, accessor,
192228
layers = []
193229
for blob in reversed(image.fs_layers()):
194230
# Create a local copy
231+
layer_name = os.path.join(directory, '%03d.tar.gz' % idx)
195232
digest_name = os.path.join(directory, '%03d.sha256' % idx)
233+
# Strip the sha256: prefix
234+
digest = blob[7:].encode('utf8')
196235
f = executor.submit(
197236
write_file,
198237
digest_name,
199-
# Strip the sha256: prefix
200238
lambda blob: blob[7:].encode('utf8'),
201239
blob)
202240
future_to_params[f] = digest_name
203241

204-
layer_name = os.path.join(directory, '%03d.tar.gz' % idx)
205-
f = executor.submit(write_file, layer_name, image.blob, blob)
206-
future_to_params[f] = layer_name
242+
if cache_directory:
243+
# Search for a local cached copy
244+
cached_layer = os.path.join(cache_directory, digest)
245+
if os.path.exists(cached_layer) and valid(cached_layer, digest):
246+
f = executor.submit(link, cached_layer, layer_name)
247+
future_to_params[f] = layer_name
248+
else:
249+
f = executor.submit(write_file_and_store, layer_name, image.blob,
250+
blob, cached_layer)
251+
future_to_params[f] = layer_name
252+
else:
253+
f = executor.submit(write_file, layer_name, image.blob, blob)
254+
future_to_params[f] = layer_name
207255

208256
layers.append((digest_name, layer_name))
209257
idx += 1

puller_test.sh

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
# Trick to chase the symlink before the docker build.
2020
cp -f puller.par puller2.par
2121

22+
timing=-1
23+
2224
# Test pulling an image by just invoking the puller
2325
function test_puller() {
2426
local image=$1
@@ -81,6 +83,53 @@ function test_image() {
8183
test_base "${image}" python2.7 gcr.io/cloud-builders/bazel
8284
}
8385

86+
function test_puller_with_cache() {
87+
local image=$1
88+
89+
# Test it in our current environment.
90+
puller.par --name="${image}" --directory=/tmp/ --cache=/tmp/containerregistry_docker_cache_dir
91+
}
92+
function test_image_with_cache() {
93+
local image=$1
94+
95+
test_image_with_timing "${image}"
96+
local first_pull_timing=$timing
97+
echo "TIMING: ${image} - First pull took ${first_pull_timing} seconds"
98+
99+
test_image_with_timing "${image}"
100+
local second_pull_timing=$timing
101+
echo "TIMING: ${image} - Second pull took ${second_pull_timing} seconds"
102+
# TODO - is there a better way to test that the cache was used beside asserting the first_pull > second_pull???
103+
}
104+
105+
function test_image_with_timing() {
106+
local image=$1
107+
108+
echo "TESTING: ${image}"
109+
local pull_start=$(date +%s)
110+
test_puller_with_cache "${image}"
111+
local pull_end=$(date +%s)
112+
timing=$(($pull_end-$pull_start))
113+
114+
test_base "${image}" python2.7 python:2.7
115+
test_base "${image}" python2.7 gcr.io/cloud-builders/bazel
116+
}
117+
118+
function clear_cache_directory() {
119+
rm -fr /tmp/containerregistry_docker_cache_dir
120+
}
121+
122+
function create_cache_directory() {
123+
mkdir -p /tmp/containerregistry_docker_cache_dir
124+
}
125+
126+
clear_cache_directory
127+
128+
create_cache_directory
129+
130+
# Test pulling with cache
131+
test_image_with_cache gcr.io/google-appengine/python:latest
132+
84133
# Test pulling a trivial image.
85134
test_image gcr.io/google-containers/pause:2.0
86135

@@ -125,3 +174,6 @@ test_puller_multiplatform index.docker.io/library/busybox:1.29.3 \
125174
# TODO: add multiplatform test cases on --os-features and --features
126175

127176
# TODO(user): Add an authenticated pull test.
177+
178+
clear_cache_directory
179+

tools/fast_puller_.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@
6060
action='store',
6161
help='The path to the directory where the client configuration files are '
6262
'located. Overiddes the value from DOCKER_CONFIG')
63+
parser.add_argument(
64+
'--cache', action='store', help='Image\'s files cache directory.')
6365

6466
_THREADS = 8
6567

@@ -108,20 +110,32 @@ def main():
108110
platform = platform_args.FromArgs(args)
109111
# pytype: disable=wrong-arg-types
110112
with img_list.resolve(platform) as default_child:
111-
save.fast(default_child, args.directory, threads=_THREADS)
113+
save.fast(
114+
default_child,
115+
args.directory,
116+
threads=_THREADS,
117+
cache_directory=args.cache)
112118
return
113119
# pytype: enable=wrong-arg-types
114120

115121
logging.info('Pulling v2.2 image from %r ...', name)
116122
with v2_2_image.FromRegistry(name, creds, transport, accept) as v2_2_img:
117123
if v2_2_img.exists():
118-
save.fast(v2_2_img, args.directory, threads=_THREADS)
124+
save.fast(
125+
v2_2_img,
126+
args.directory,
127+
threads=_THREADS,
128+
cache_directory=args.cache)
119129
return
120130

121131
logging.info('Pulling v2 image from %r ...', name)
122132
with v2_image.FromRegistry(name, creds, transport) as v2_img:
123133
with v2_compat.V22FromV2(v2_img) as v2_2_img:
124-
save.fast(v2_2_img, args.directory, threads=_THREADS)
134+
save.fast(
135+
v2_2_img,
136+
args.directory,
137+
threads=_THREADS,
138+
cache_directory=args.cache)
125139
return
126140
# pylint: disable=broad-except
127141
except Exception as e:

transform/v2_2/metadata_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def Override(data,
160160

161161
output['config'] = defaults.get('config', {})
162162

163-
# pytype: disable=attribute-error
163+
# pytype: disable=attribute-error,unsupported-operands
164164
if options.entrypoint:
165165
output['config']['Entrypoint'] = options.entrypoint
166166
if options.cmd:
@@ -204,7 +204,7 @@ def Override(data,
204204

205205
if options.workdir:
206206
output['config']['WorkingDir'] = options.workdir
207-
# pytype: enable=attribute-error
207+
# pytype: enable=attribute-error,unsupported-operands
208208

209209
# diff_ids are ordered from bottom-most to top-most
210210
diff_ids = defaults.get('rootfs', {}).get('diff_ids', [])

0 commit comments

Comments
 (0)