forked from yandexdataschool/Practical_DL
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Your Name
committed
Apr 18, 2023
1 parent
8d22ea8
commit c78290c
Showing
6 changed files
with
2,366 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
[__slides__](https://yadi.sk/i/eRVlESjqlIPBGw) | ||
|
||
|
||
## Materials | ||
|
||
- __In english:__ | ||
* Deep learning frameworks - [video](https://www.youtube.com/watch?v=Vf_-OkqbwPo) | ||
* [PyTorch tutorial](https://www.youtube.com/watch?v=VMcRWYEKmhw) | ||
* [Tensorflow tutorial](https://www.youtube.com/watch?v=FQ660T4uu7k) | ||
|
||
- __In russian:__ | ||
* [Pytorch tutorial](https://yadi.sk/i/O3mQ76u43So3h9) __recommended__ | ||
* [Tensorflow tutorial](https://www.youtube.com/watch?v=FQ660T4uu7k) (english only for now. Links are welcome) | ||
|
||
## More on DL frameworks | ||
- A lecture on nonlinearities, intializations and other tricks in deep learning (karpathy) - [video](https://www.youtube.com/watch?v=GUtlrDbHhJM) | ||
- A lecture on activations, recap of adaptive SGD and dropout (karpathy) - [video](https://www.youtube.com/watch?v=KaR4lIdI1MQ) | ||
- [a deep learning neophite cheat sheet](http://www.kdnuggets.com/2016/03/must-know-tips-deep-learning-part-1.html) | ||
- [bonus video] Deep learning philosophy: [our humble take](https://www.youtube.com/watch?v=9qyE1Ev1Xdw) (english) | ||
- [reading] on weight initialization: [blog post](http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization) | ||
- [reading] pretty much all the module 1 of http://cs231n.github.io/ | ||
|
||
|
||
## Practice | ||
|
||
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/yandexdataschool/Practical_DL/blob/spring33/week02_autodiff/seminar_pytorch.ipynb) | ||
|
||
As usual, go to `seminar_pytorch.ipynb` and follow instructions from there. You will also need to pass `homework_pytorch.ipynb` for full score. | ||
|
||
__Alternative (TensorFlow):__ a similar tutorial for tensorflow is provided in `tensorflow.ipynb`. From now on, you *can* submit assignments in any framework - but you will have to do some extra engineering in that case. However, unless you're already profficient with PyTorch, we recommend you stick to it. | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import sys | ||
import os | ||
import time | ||
|
||
import numpy as np | ||
|
||
__doc__="""taken from https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py""" | ||
|
||
def load_dataset(): | ||
# We first define a download function, supporting both Python 2 and 3. | ||
if sys.version_info[0] == 2: | ||
from urllib import urlretrieve | ||
else: | ||
from urllib.request import urlretrieve | ||
|
||
def download(filename, source='http://yann.lecun.com/exdb/mnist/'): | ||
print("Downloading %s" % filename) | ||
urlretrieve(source + filename, filename) | ||
|
||
# We then define functions for loading MNIST images and labels. | ||
# For convenience, they also download the requested files if needed. | ||
import gzip | ||
|
||
def load_mnist_images(filename): | ||
if not os.path.exists(filename): | ||
download(filename) | ||
# Read the inputs in Yann LeCun's binary format. | ||
with gzip.open(filename, 'rb') as f: | ||
data = np.frombuffer(f.read(), np.uint8, offset=16) | ||
# The inputs are vectors now, we reshape them to monochrome 2D images, | ||
# following the shape convention: (examples, channels, rows, columns) | ||
data = data.reshape(-1, 1, 28, 28) | ||
# The inputs come as bytes, we convert them to float32 in range [0,1]. | ||
# (Actually to range [0, 255/256], for compatibility to the version | ||
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.) | ||
return data / np.float32(256) | ||
|
||
def load_mnist_labels(filename): | ||
if not os.path.exists(filename): | ||
download(filename) | ||
# Read the labels in Yann LeCun's binary format. | ||
with gzip.open(filename, 'rb') as f: | ||
data = np.frombuffer(f.read(), np.uint8, offset=8) | ||
# The labels are vectors of integers now, that's exactly what we want. | ||
return data | ||
|
||
# We can now download and read the training and test set images and labels. | ||
X_train = load_mnist_images('train-images-idx3-ubyte.gz') | ||
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz') | ||
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz') | ||
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz') | ||
|
||
# We reserve the last 10000 training examples for validation. | ||
X_train, X_val = X_train[:-10000], X_train[-10000:] | ||
y_train, y_val = y_train[:-10000], y_train[-10000:] | ||
|
||
# We just return all the arrays in order, as expected in main(). | ||
# (It doesn't matter how we do this as long as we can read them again.) | ||
return X_train, y_train, X_val, y_val, X_test, y_test | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import os | ||
from glob import glob | ||
|
||
import numpy as np | ||
from matplotlib.pyplot import imread | ||
from skimage.transform import resize | ||
from sklearn.model_selection import train_test_split | ||
|
||
|
||
def load_notmnist(path='./notMNIST_small', letters='ABCDEFGHIJ', | ||
img_shape=(28, 28), test_size=0.25, one_hot=False): | ||
# download data if it's missing. If you have any problems, go to the urls | ||
# and load it manually. | ||
if not os.path.exists(path): | ||
print("Downloading data...") | ||
assert os.system( | ||
'wget http://yaroslavvb.com/upload/notMNIST/notMNIST_small.tar.gz') == 0 | ||
print("Extracting ...") | ||
assert os.system( | ||
'tar -zxvf notMNIST_small.tar.gz > untar_notmnist.log') == 0 | ||
|
||
data, labels = [], [] | ||
print("Parsing...") | ||
for img_path in glob(os.path.join(path, '*/*')): | ||
class_i = img_path.split(os.sep)[-2] | ||
if class_i not in letters: | ||
continue | ||
try: | ||
data.append(resize(imread(img_path), img_shape)) | ||
labels.append(class_i,) | ||
except BaseException: | ||
print( | ||
"found broken img: %s [it's ok if <10 images are broken]" % | ||
img_path) | ||
|
||
data = np.stack(data)[:, None].astype('float32') | ||
data = (data - np.mean(data)) / np.std(data) | ||
|
||
# convert classes to ints | ||
letter_to_i = {l: i for i, l in enumerate(letters)} | ||
labels = np.array(list(map(letter_to_i.get, labels))) | ||
|
||
if one_hot: | ||
labels = (np.arange(np.max(labels) + 1) | ||
[None, :] == labels[:, None]).astype('float32') | ||
|
||
# split into train/test | ||
X_train, X_test, y_train, y_test = train_test_split( | ||
data, labels, test_size=test_size, random_state=42) | ||
|
||
print("Done") | ||
return X_train, y_train, X_test, y_test |
Oops, something went wrong.