First, install caffe as being explained in my other post here.
Googlenet Model
Download the bvlc_googlenet.caffemodel from https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet
and put it in
caffe/models/bvlc_googlenet/
PIP
1 |
sudo apt install python-pip |
IPython, scipy, Jupyter, protobuf, scikit-image
Always install in the user space with –user
1 |
pip install --user IPython==5.0 scipy Jupyter protobuf scikit-image |
Running jupyter notebook
1 2 |
export PYTHONPATH=/home/behnam/usr/python:$PYTHONPATH jupyter notebook |
open a new notebook and paste the following into it and correct the “model_path” and
img = np.float32(PIL.Image.open(‘/home/behnam/Downloads/fractal.jpg’)) according to your setup.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# imports and basic notebook setup from cStringIO import StringIO import numpy as np import scipy.ndimage as nd import PIL.Image from IPython.display import clear_output, Image, display from google.protobuf import text_format import caffe # If your GPU supports CUDA and Caffe was built with CUDA support, # uncomment the following to run Caffe operations on the GPU. caffe.set_mode_gpu() caffe.set_device(0) # select GPU device if multiple devices exist def showarray(a, fmt='jpeg'): a = np.uint8(np.clip(a, 0, 255)) f = StringIO() PIL.Image.fromarray(a).save(f, fmt) display(Image(data=f.getvalue())) #model_path = '../caffe/models/bvlc_googlenet/' # substitute your path here model_path = '/home/behnam/.softwares/caffe/models/bvlc_googlenet/' # substitute your path here net_fn = model_path + 'deploy.prototxt' param_fn = model_path + 'bvlc_googlenet.caffemodel' # Patching model to be able to compute gradients. # Note that you can also manually add "force_backward: true" line to "deploy.prototxt". model = caffe.io.caffe_pb2.NetParameter() text_format.Merge(open(net_fn).read(), model) model.force_backward = True open('tmp.prototxt', 'w').write(str(model)) net = caffe.Classifier('tmp.prototxt', param_fn, mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB # a couple of utility functions for converting to and from Caffe's input image layout def preprocess(net, img): return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data'] def deprocess(net, img): return np.dstack((img + net.transformer.mean['data'])[::-1]) def objective_L2(dst): dst.diff[:] = dst.data def make_step(net, step_size=1.5, end='inception_4c/output', jitter=32, clip=True, objective=objective_L2): '''Basic gradient ascent step.''' src = net.blobs['data'] # input image is stored in Net's 'data' blob dst = net.blobs[end] ox, oy = np.random.randint(-jitter, jitter+1, 2) src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift net.forward(end=end) objective(dst) # specify the optimization objective net.backward(start=end) g = src.diff[0] # apply normalized ascent step to the input image src.data[:] += step_size/np.abs(g).mean() * g src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image if clip: bias = net.transformer.mean['data'] src.data[:] = np.clip(src.data, -bias, 255-bias) def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4, end='inception_4c/output', clip=True, **step_params): # prepare base images for all octaves octaves = [preprocess(net, base_img)] for i in xrange(octave_n-1): octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1)) src = net.blobs['data'] detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details for octave, octave_base in enumerate(octaves[::-1]): h, w = octave_base.shape[-2:] if octave > 0: # upscale details from the previous octave h1, w1 = detail.shape[-2:] detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1) src.reshape(1,3,h,w) # resize the network's input image size src.data[0] = octave_base+detail for i in xrange(iter_n): make_step(net, end=end, clip=clip, **step_params) # visualization vis = deprocess(net, src.data[0]) if not clip: # adjust image contrast if clipping is disabled vis = vis*(255.0/np.percentile(vis, 99.98)) showarray(vis) print octave, i, end, vis.shape clear_output(wait=True) # extract details produced on the current octave detail = src.data[0]-octave_base # returning the resulting image return deprocess(net, src.data[0]) img = np.float32(PIL.Image.open('/home/behnam/Downloads/fractal.jpg')) #img = np.float32(PIL.Image.open('/home/behnam/Downloads/sky1024px.jpg')) showarray(img) !mkdir frames frame = img frame_i = 0 h, w = frame.shape[:2] s = 0.05 # scale coefficient for i in xrange(100): frame = deepdream(net, frame) PIL.Image.fromarray(np.uint8(frame)).save("frames/%04d.jpg"%frame_i) frame = nd.affine_transform(frame, [1-s,1-s,1], [h*s/2,w*s/2,0], order=1) frame_i += 1 Image(filename='frames/0029.jpg') |