outlace.github.io/theano2.html at master · outlace/outlace.github.io

History

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

<!DOCTYPE html>

<head>

<title>Δ ℚuantitative √ourney | Beginner Tutorial: Theano 2</title>

</head>

<body>

<nav>

<ul>

<li><a href="http://outlace.com/pages/about.html">About</a></li>

<li><a href="http://outlace.com/categories/">Categories</a></li>

<li><a href="http://outlace.com/archives/{slug}/">Archives</a></li>

</ul>

</nav>

<h1><a href="http://outlace.com/">Δ ℚuantitative √ourney</a></h1>

<h2>Science, Math, Statistics, Machine Learning ...</h2>

</div>

</header>

<a href="http://outlace.com/theano2.html" rel="bookmark" title="Permanent Link to "Beginner Tutorial: Theano 2"">Beginner Tutorial: Theano 2</a>

</h2>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<h3 id="Beginner-Tutorial:-Neural-Networks-in-Theano">Beginner Tutorial: Neural Networks in Theano<a class="anchor-link" href="#Beginner-Tutorial:-Neural-Networks-in-Theano">¶</a></h3>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<h4 id="What-is-Theano-and-why-should-I-use-it?">What is Theano and why should I use it?<a class="anchor-link" href="#What-is-Theano-and-why-should-I-use-it?">¶</a></h4>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>Theano is part framework and part library for evaluating and optimizing mathematical expressions. It's popular in the machine learning world because it allows you to build up optimized symbolic computational graphs and the gradients can be automatically computed. Moreover, Theano also supports running code on the GPU. Automatic gradients + GPU sounds pretty nice. I won't be showing you how to run on the GPU because I'm using a Macbook Air and as far as I know, Theano doesn't support or barely supports OpenCL at this time. But you can check out their <a href="http://deeplearning.net/software/theano/tutorial/using_gpu.html">documentation</a> if you have an nVidia GPU ready to go.</p>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<h4 id="Summary">Summary<a class="anchor-link" href="#Summary">¶</a></h4><p>As the title suggests, I'm going to show how to build a simple neural network (yep, you guessed it, using our favorite XOR problem..) using Theano. The reason I wrote this post is because I found the existing Theano tutorials to be not simple enough. I'm all about reducing things to fundamentals. Given that, I will not be using all the bells-and-whistles that Theano has to offer and I'm going to be writing code that maximizes for readability. Nonetheless, using what I show here, you should be able to scale up to more complex algorithms.</p>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<h4 id="Assumptions">Assumptions<a class="anchor-link" href="#Assumptions">¶</a></h4><p>I assume you know how to write a simple neural network in Python (including training it with gradient descent/backpropagation). I also assume you've at least browsed through the Theano <a href="http://deeplearning.net/software/theano/index.html">documentation</a> and have a feel for what it's about (I didn't do it justice in my explanation of "why Theano" above).</p>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<h3 id="Let's-get-started">Let's get started<a class="anchor-link" href="#Let's-get-started">¶</a></h3><p>First, let's import all the goodies we'll need.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">theano</span>

<span class="kn">import</span> <span class="nn">theano.tensor</span> <span class="k">as</span> <span class="nn">T</span>

<span class="kn">import</span> <span class="nn">theano.tensor.nnet</span> <span class="k">as</span> <span class="nn">nnet</span>

<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>

</pre></div>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>Before we actually build the neural network, let's just get familiarized with how Theano works. Let's do something really simple, we'll simply ask Theano to give us the derivative of a simple mathematical expression like

$$ f(x) = e^{sin{(x^2)}} $$

As you can see, this is an equation of a single variable $x$. So let's use Theano to symbolically define our variable $x$. What do I mean by symbolically? Well, we're going to be building a Theano expression using variables and numbers similar to how we'd write this equation down on paper. We're not actually computing anything yet. Since Theano is a Python library, we define these expression variables as one of many kinds of Theano variable types.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="n">x</span> <span class="o">=</span> <span class="n">T</span><span class="o">.</span><span class="n">dscalar</span><span class="p">()</span>

</pre></div>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>So dscalar() is a type of Theano variable or data type that is computationally represented as a float64. There are many other data types available (see <a href="http://deeplearning.net/software/theano/library/tensor/basic.html">here</a>), but we're interested in just defining a single variable that is a scalar.</p>

<p>Now let's build out the expression.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

</pre></div>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>Here I've defined our expression that is equivalent to the mathematical one above. <code>fx</code> is now a variable itself that depends on the <code>x</code> variable.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="nb">type</span><span class="p">(</span><span class="n">fx</span><span class="p">)</span> <span class="c1">#just to show you that fx is a theano variable type</span>

</pre></div>

</div>

<pre>theano.tensor.var.TensorVariable</pre>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>Okay, so that's nice. What now? Well, now we need to "compile" this expression into a Theano function. Theano will do some magic behind the scenes including building a computational graph, optimizing operations, and compiling to C code to get this to run fast and allow it to compute gradients.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="n">f</span> <span class="o">=</span> <span class="n">theano</span><span class="o">.</span><span class="n">function</span><span class="p">(</span><span class="n">inputs</span><span class="o">=</span><span class="p">[</span><span class="n">x</span><span class="p">],</span> <span class="n">outputs</span><span class="o">=</span><span class="p">[</span><span class="n">fx</span><span class="p">])</span>

</pre></div>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell ">

</pre></div>

</div>

<pre>[array(0.602681965908778)]</pre>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>We compiled our <code>fx</code> expression into a Theano function. As you can see, <code>theano.function</code> has two required arguments, inputs and outputs. Our only input is our Theano variable <code>x</code> and our output is our <code>fx</code> expression. Then we ran the f() function supplying it with the value <code>10</code> and it accurately spit out the computation. So up until this point we could have easily just <code>np.exp(np.sin(100))</code> using numpy and get the same result. But that would be an exact, imperative, computation and not a symbolic computational graph. Now let's show off Theano's autodifferentiation.</p>

<p>To do that, we'll use <code>T.grad()</code> which will give us a symbolically differentiated expression of our function, then we pass it to <code>theano.function</code> to compile a new function to call it. <code>wrt</code> stands for 'with respect to', i.e. we're deriving our expression <code>fx</code> with respect to it's variable <code>x</code>.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<span class="n">fprime</span> <span class="o">=</span> <span class="n">theano</span><span class="o">.</span><span class="n">function</span><span class="p">([</span><span class="n">x</span><span class="p">],</span> <span class="n">fp</span><span class="p">)</span>

</pre></div>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="n">fprime</span><span class="p">(</span><span class="mi">15</span><span class="p">)</span>

</pre></div>

</div>

<pre>array(4.347404090286685)</pre>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>4.347 is indeed the derivative of our expression evaluated at $x=15$, don't worry, I checked with WolframAlpha. And to be clear, Theano can take the derivative of arbitrarily complex expressions. Don't be fooled by our extremely simple starter expression here. Automatically calculating gradients is a huge help since it saves us the time of having to manually come up with the gradient expressions for whatever neural network we build.</p>

<p>So there you have it. Those are the very basics of Theano. We're going to utilize a few other features of Theano in the neural net we'll build but not much.</p>

<h4 id="Now,-for-an-XOR-neural-network">Now, for an XOR neural network<a class="anchor-link" href="#Now,-for-an-XOR-neural-network">¶</a></h4>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>We're going to symbolically define two Theano variables called <code>x</code> and <code>y</code>. We're going to build our familiar XOR network with 2 input units (+ a bias), 2 hidden units (+ a bias), and 1 output unit. So our <code>x</code> variable will always be a 2-element vector (e.g. [0,1]) and our <code>y</code> variable will always be a scalar and is our expected value for each pair of <code>x</code> values.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<span class="n">y</span> <span class="o">=</span> <span class="n">T</span><span class="o">.</span><span class="n">dscalar</span><span class="p">()</span>

</pre></div>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>Now let's define a Python function that will be a matrix multiplier and sigmoid function, so it will accept and <code>x</code> vector (and concatenate in a bias value of 1) and a <code>w</code> weight matrix, multiply them, and then run them through a sigmoid function. Theano has the sigmoid function built in the <code>nnet</code> class that we imported above. We'll use this function as our basic layer output function.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">layer</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">w</span><span class="p">):</span>

<span class="n">b</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">theano</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">floatX</span><span class="p">)</span>

<span class="n">new_x</span> <span class="o">=</span> <span class="n">T</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">x</span><span class="p">,</span> <span class="n">b</span><span class="p">])</span>

<span class="n">m</span> <span class="o">=</span> <span class="n">T</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">w</span><span class="o">.</span><span class="n">T</span><span class="p">,</span> <span class="n">new_x</span><span class="p">)</span> <span class="c1">#theta1: 3x3 * x: 3x1 = 3x1 ;;; theta2: 1x4 * 4x1</span>

<span class="n">h</span> <span class="o">=</span> <span class="n">nnet</span><span class="o">.</span><span class="n">sigmoid</span><span class="p">(</span><span class="n">m</span><span class="p">)</span>

<span class="k">return</span> <span class="n">h</span>

</pre></div>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>Theano can be a bit touchy. In order to concatenate a scalar value of 1 to our 1-dimensional vector <code>x</code>, we create a numpy array with a single element (<code>1</code>), and explicitly pass in the <code>dtype</code> parameter to make it a float64 and compatible with our Theano vector variable. You'll also notice that Theano provides its own version of many numpy functions, such as the dot product that we're using. Theano can work with numpy but in the end it all has to get converted to Theano types.</p>

<p>This feels a little bit premature, but let's go ahead and implement our gradient descent function. Don't worry, it's very simple. We're just going to have a function that defines a learning rate <code>alpha</code> and accepts a cost/error expression and a weight matrix. It will use Theano's <code>grad()</code> function to compute the gradient of the cost function with respect to the given weight matrix and return an updated weight matrix.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">grad_desc</span><span class="p">(</span><span class="n">cost</span><span class="p">,</span> <span class="n">theta</span><span class="p">):</span>

<span class="n">alpha</span> <span class="o">=</span> <span class="mf">0.1</span> <span class="c1">#learning rate</span>

<span class="k">return</span> <span class="n">theta</span> <span class="o">-</span> <span class="p">(</span><span class="n">alpha</span> <span class="o">*</span> <span class="n">T</span><span class="o">.</span><span class="n">grad</span><span class="p">(</span><span class="n">cost</span><span class="p">,</span> <span class="n">wrt</span><span class="o">=</span><span class="n">theta</span><span class="p">))</span>

</pre></div>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>We're making good progress. At this point we can define our weight matrices and initialize them to random values.

Since our weight matrices will take on definite values, they're not going to be represented as Theano variables, they're going to be defined as Theano's <em>shared</em> variable. A shared variable is what we use for things we want to give a definite value but we also want to update. Notice that I didn't define the <code>alpha</code> or <code>b</code> (the bias term) as shared variables, I just hard-coded them as strict values because I am never going to update/modify them.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="n">theta1</span> <span class="o">=</span> <span class="n">theano</span><span class="o">.</span><span class="n">shared</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="n">theano</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">floatX</span><span class="p">))</span> <span class="c1"># randomly initialize</span>

<span class="n">theta2</span> <span class="o">=</span> <span class="n">theano</span><span class="o">.</span><span class="n">shared</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span><span class="mi">1</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="n">theano</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">floatX</span><span class="p">))</span>

</pre></div>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>So here we've defined our two weight matrices for our 3 layer network and initialized them using numpy's random class. Again we specifically define the dtype parameter so it will be a float64, compatible with our Theano <code>dscalar</code> and <code>dvector</code> variable types.</p>

<p>Here's where the fun begins. We can start actually doing our computations for each layer in the network. Of course we'll start by computing the hidden layer's output using our previously defined <code>layer</code> function, and pass in the Theano <code>x</code> variable we defined above and our <code>theta1</code> matrix.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="n">hid1</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">theta1</span><span class="p">)</span> <span class="c1">#hidden layer</span>

</pre></div>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>We can do the same for our final output layer. Notice I use the T.sum() function on the outside which is the same as numpy's sum(). This is only because Theano will complain if you don't make it explicitly clear that our output is returning a scalar and not a matrix. Our matrix dimensional analysis is sure to return a 1x1 single element vector but we need to convert it to a scalar since we're substracting <code>out1</code> from <code>y</code> in our cost expression that follows.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="n">out1</span> <span class="o">=</span> <span class="n">T</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">layer</span><span class="p">(</span><span class="n">hid1</span><span class="p">,</span> <span class="n">theta2</span><span class="p">))</span> <span class="c1">#output layer</span>

<span class="n">fc</span> <span class="o">=</span> <span class="p">(</span><span class="n">out1</span> <span class="o">-</span> <span class="n">y</span><span class="p">)</span><span class="o">**</span><span class="mi">2</span> <span class="c1">#cost expression</span>

</pre></div>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>Ahh, almost done. We're going to compile two Theano functions. One will be our cost expression (for training), and the other will be our output layer expression (to run the network forward).</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cost</span> <span class="o">=</span> <span class="n">theano</span><span class="o">.</span><span class="n">function</span><span class="p">(</span><span class="n">inputs</span><span class="o">=</span><span class="p">[</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">],</span> <span class="n">outputs</span><span class="o">=</span><span class="n">fc</span><span class="p">,</span> <span class="n">updates</span><span class="o">=</span><span class="p">[</span>

<span class="p">(</span><span class="n">theta1</span><span class="p">,</span> <span class="n">grad_desc</span><span class="p">(</span><span class="n">fc</span><span class="p">,</span> <span class="n">theta1</span><span class="p">)),</span>

<span class="p">(</span><span class="n">theta2</span><span class="p">,</span> <span class="n">grad_desc</span><span class="p">(</span><span class="n">fc</span><span class="p">,</span> <span class="n">theta2</span><span class="p">))])</span>

<span class="n">run_forward</span> <span class="o">=</span> <span class="n">theano</span><span class="o">.</span><span class="n">function</span><span class="p">(</span><span class="n">inputs</span><span class="o">=</span><span class="p">[</span><span class="n">x</span><span class="p">],</span> <span class="n">outputs</span><span class="o">=</span><span class="n">out1</span><span class="p">)</span>

</pre></div>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>Our <code>theano.function</code> call looks a bit different than in our first example. Yeah, we have this additional <code>updates</code> parameter. <code>updates</code> allows us to update our shared variables according to an expression. <code>updates</code> expects a list of 2-tuples:</p>

<div class="highlight"><pre><span></span><span class="n">updates</span><span class="o">=</span><span class="p">[(</span><span class="n">shared_variable</span><span class="p">,</span> <span class="n">update_value</span><span class="p">),</span> <span class="o">...</span><span class="p">]</span>

</pre></div>

<p>The second part of each tuple can be an expression or function that returns the new value we want to update the first part to. In our case, we have two shared variables we want to update, <code>theta1</code> and <code>theta2</code> and we want to use our <code>grad_desc</code> function to give us the updated data. Of course our <code>grad_desc</code> function expects two arguments, a cost function and a weight matrix, so we pass those in. <code>fc</code> is our cost expression. So every time we invoke/call the <code>cost</code> function that we've compiled with Theano, it will also update our shared variables according to our <code>grad_desc</code> rule. Pretty convenient!</p>

<p>Additionally, we've compiled a <code>run_forward</code> function just so we can run the network forward and make sure it has trained properly. We don't need to update anything there.</p>

<p>Now let's define our training data and setup a <code>for</code> loop to iterate through our training epochs.</p>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="n">inputs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">],[</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">],[</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">],[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]])</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span><span class="mi">2</span><span class="p">)</span> <span class="c1">#training data X</span>

<span class="n">exp_y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span> <span class="c1">#training data Y</span>

<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10000</span><span class="p">):</span>

<span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">inputs</span><span class="p">)):</span>

<span class="n">cur_cost</span> <span class="o">=</span> <span class="n">cost</span><span class="p">(</span><span class="n">inputs</span><span class="p">[</span><span class="n">k</span><span class="p">],</span> <span class="n">exp_y</span><span class="p">[</span><span class="n">k</span><span class="p">])</span> <span class="c1">#call our Theano-compiled cost function, it will auto update weights</span>

<span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">500</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> <span class="c1">#only print the cost every 500 epochs/iterations (to save space)</span>

<span class="nb">print</span><span class="p">(</span><span class="s1">'Cost: </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">cur_cost</span><span class="p">,))</span>

</pre></div>

</div>

<pre>Cost: 0.6729492014975456

Cost: 0.23521333773509118

Cost: 0.20385060705569344

Cost: 0.09715044753510742

Cost: 0.039259128265329804

Cost: 0.027491611330928263

Cost: 0.013058140670015577

Cost: 0.007656970860067689

Cost: 0.005215440091514665

Cost: 0.0038843551856147704

Cost: 0.003063599050987251

Cost: 0.002513378114127917

Cost: 0.0021217874358153673

Cost: 0.0018303604198688056

Cost: 0.0016058512119977342

Cost: 0.0014280751222236468

Cost: 0.001284121957016395

Cost: 0.0011653769062277865

Cost: 0.0010658859592106108

Cost: 0.000981410600338758

</pre>

</div>

</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell ">

<div class=" highlight hl-ipython3"><pre><span></span><span class="c1">#Training done! Let's test it out</span>

<span class="nb">print</span><span class="p">(</span><span class="n">run_forward</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">]))</span>

<span class="nb">print</span><span class="p">(</span><span class="n">run_forward</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">]))</span>

</pre></div>

</div>

<pre>0.9752392598335232

0.03272599279350485

0.965279382474992

0.030138157640063574

</pre>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<p>It works!</p>

<h4 id="Closing-words">Closing words<a class="anchor-link" href="#Closing-words">¶</a></h4><p>Theano is a pretty robust and complicated library but hopefully this simple introduction helps you get started. I certainly struggled with it before it made sense to me. And clearly using Theano for an XOR neural network is overkill, but its optimization power and GPU utilization really comes into play for bigger projects. Nonetheless, not having to think about manually calculating gradients is nice.</p>

<p>Cheers</p>

</div>

</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">

<h4 id="References:">References:<a class="anchor-link" href="#References:">¶</a></h4><ol>

<li><a href="http://deeplearning.net/software/theano/index.html">http://deeplearning.net/software/theano/index.html</a></li>

<li><a href="https://gist.github.com/honnibal/6a9e5ef2921c0214eeeb">https://gist.github.com/honnibal/6a9e5ef2921c0214eeeb</a></li>

</ol>

</div>

var mathjaxscript = document.createElement('script');

mathjaxscript.id = 'mathjaxscript_pelican_#%@#$@#';

mathjaxscript.type = 'text/javascript';

mathjaxscript.src = '//cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML';

mathjaxscript[(window.opera ? "innerHTML" : "text")] =

"MathJax.Hub.Config({" +

" config: ['MMLorHTML.js']," +

" TeX: { extensions: ['AMSmath.js','AMSsymbols.js','noErrors.js','noUndefined.js'], equationNumbers: { autoNumber: 'AMS' } }," +

" jax: ['input/TeX','input/MathML','output/HTML-CSS']," +

" extensions: ['tex2jax.js','mml2jax.js','MathMenu.js','MathZoom.js']," +

" displayAlign: 'center'," +

" displayIndent: '0em'," +

" showMathMenu: true," +

" tex2jax: { " +

" inlineMath: [ ['$','$'] ], " +

" displayMath: [ ['$$','$$'] ]," +

" processEscapes: true," +

" preview: 'TeX'," +

" }, " +

" 'HTML-CSS': { " +

" linebreaks: { automatic: true, width: '95% container' }, " +

" styles: { '.MathJax_Display, .MathJax .mo, .MathJax .mi, .MathJax .mn': {color: 'black ! important'} }" +

" } " +

"}); ";

(document.body || document.getElementsByTagName('head')[0]).appendChild(mathjaxscript);

}

</script>

<a href="http://outlace.com/theano2.html">posted at 00:00</a>

by Brandon Brown

 · <a href="http://outlace.com/category/frameworks/" rel="tag">Frameworks</a>

 <a href="http://outlace.com/tag/theano/" class="tags">Theano</a>

 <a href="http://outlace.com/tag/frameworks/" class="tags">Frameworks</a>

</div>

var disqus_shortname = 'outlace';

(function() {

var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;

dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';

(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);

})();

</script>

</article>

<p>

</footer>

</div>

</div>

var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");

document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));

</script>

try {

var pageTracker = _gat._getTracker("UA-65814776-1");

pageTracker._trackPageview();

} catch(err) {}</script>

</body>

</html>

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

theano2.html

theano2.html

Files

theano2.html

Latest commit

History

theano2.html

File metadata and controls