Skip to content

Commit

Permalink
week02
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name committed Apr 18, 2023
1 parent 8d22ea8 commit c78290c
Show file tree
Hide file tree
Showing 6 changed files with 2,366 additions and 0 deletions.
31 changes: 31 additions & 0 deletions week02_autodiff/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[__slides__](https://yadi.sk/i/eRVlESjqlIPBGw)


## Materials

- __In english:__
* Deep learning frameworks - [video](https://www.youtube.com/watch?v=Vf_-OkqbwPo)
* [PyTorch tutorial](https://www.youtube.com/watch?v=VMcRWYEKmhw)
* [Tensorflow tutorial](https://www.youtube.com/watch?v=FQ660T4uu7k)

- __In russian:__
* [Pytorch tutorial](https://yadi.sk/i/O3mQ76u43So3h9) __recommended__
* [Tensorflow tutorial](https://www.youtube.com/watch?v=FQ660T4uu7k) (english only for now. Links are welcome)

## More on DL frameworks
- A lecture on nonlinearities, intializations and other tricks in deep learning (karpathy) - [video](https://www.youtube.com/watch?v=GUtlrDbHhJM)
- A lecture on activations, recap of adaptive SGD and dropout (karpathy) - [video](https://www.youtube.com/watch?v=KaR4lIdI1MQ)
- [a deep learning neophite cheat sheet](http://www.kdnuggets.com/2016/03/must-know-tips-deep-learning-part-1.html)
- [bonus video] Deep learning philosophy: [our humble take](https://www.youtube.com/watch?v=9qyE1Ev1Xdw) (english)
- [reading] on weight initialization: [blog post](http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization)
- [reading] pretty much all the module 1 of http://cs231n.github.io/


## Practice

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/yandexdataschool/Practical_DL/blob/spring33/week02_autodiff/seminar_pytorch.ipynb)

As usual, go to `seminar_pytorch.ipynb` and follow instructions from there. You will also need to pass `homework_pytorch.ipynb` for full score.

__Alternative (TensorFlow):__ a similar tutorial for tensorflow is provided in `tensorflow.ipynb`. From now on, you *can* submit assignments in any framework - but you will have to do some extra engineering in that case. However, unless you're already profficient with PyTorch, we recommend you stick to it.

499 changes: 499 additions & 0 deletions week02_autodiff/homework.ipynb

Large diffs are not rendered by default.

63 changes: 63 additions & 0 deletions week02_autodiff/mnist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import sys
import os
import time

import numpy as np

__doc__="""taken from https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py"""

def load_dataset():
# We first define a download function, supporting both Python 2 and 3.
if sys.version_info[0] == 2:
from urllib import urlretrieve
else:
from urllib.request import urlretrieve

def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
print("Downloading %s" % filename)
urlretrieve(source + filename, filename)

# We then define functions for loading MNIST images and labels.
# For convenience, they also download the requested files if needed.
import gzip

def load_mnist_images(filename):
if not os.path.exists(filename):
download(filename)
# Read the inputs in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)

def load_mnist_labels(filename):
if not os.path.exists(filename):
download(filename)
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data

# We can now download and read the training and test set images and labels.
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')

# We reserve the last 10000 training examples for validation.
X_train, X_val = X_train[:-10000], X_train[-10000:]
y_train, y_val = y_train[:-10000], y_train[-10000:]

# We just return all the arrays in order, as expected in main().
# (It doesn't matter how we do this as long as we can read them again.)
return X_train, y_train, X_val, y_val, X_test, y_test




52 changes: 52 additions & 0 deletions week02_autodiff/notmnist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import os
from glob import glob

import numpy as np
from matplotlib.pyplot import imread
from skimage.transform import resize
from sklearn.model_selection import train_test_split


def load_notmnist(path='./notMNIST_small', letters='ABCDEFGHIJ',
img_shape=(28, 28), test_size=0.25, one_hot=False):
# download data if it's missing. If you have any problems, go to the urls
# and load it manually.
if not os.path.exists(path):
print("Downloading data...")
assert os.system(
'wget http://yaroslavvb.com/upload/notMNIST/notMNIST_small.tar.gz') == 0
print("Extracting ...")
assert os.system(
'tar -zxvf notMNIST_small.tar.gz > untar_notmnist.log') == 0

data, labels = [], []
print("Parsing...")
for img_path in glob(os.path.join(path, '*/*')):
class_i = img_path.split(os.sep)[-2]
if class_i not in letters:
continue
try:
data.append(resize(imread(img_path), img_shape))
labels.append(class_i,)
except BaseException:
print(
"found broken img: %s [it's ok if <10 images are broken]" %
img_path)

data = np.stack(data)[:, None].astype('float32')
data = (data - np.mean(data)) / np.std(data)

# convert classes to ints
letter_to_i = {l: i for i, l in enumerate(letters)}
labels = np.array(list(map(letter_to_i.get, labels)))

if one_hot:
labels = (np.arange(np.max(labels) + 1)
[None, :] == labels[:, None]).astype('float32')

# split into train/test
X_train, X_test, y_train, y_test = train_test_split(
data, labels, test_size=test_size, random_state=42)

print("Done")
return X_train, y_train, X_test, y_test
Loading

0 comments on commit c78290c

Please sign in to comment.