tairanhe.github.io/index.html at main · TairanHe/tairanhe.github.io

History

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">

<html>

<head>

/* Design Credits: Jon Barron and Deepak Pathak and Abhishek Kar and Saurabh Gupta*/

a {

color: #1772d0;

text-decoration:none;

}

a:focus, a:hover {

color: #f09228;

text-decoration:none;

}

body,td,th {

font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;

font-size: 16px;

font-weight: 400

}

heading {

font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;

font-size: 17px; /* 19 */

font-weight: 600 /* 1000 */

}

{

border: 0;

height: 1px;

background-image: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.75), rgba(0, 0, 0, 0));

}

strong {

font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;

font-size: 16px;

font-weight: 600 /* 800 */

}

strongred {

font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;

color: 'red' ;

font-size: 16px

}

sectionheading {

font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;

font-size: 22px;

font-weight: 600

}

pageheading {

font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;

font-size: 38px;

font-weight: 400

}

.ImageBorder

{

border-width: 1px;

border-color: Black;

}

</style>

<title>Tairan He</title>

(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){

(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),

m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)

})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

ga('create', 'UA-XXXXX-Y', 'auto');

ga('send', 'pageview');

</script>

</head>

<body>

<pageheading>Tairan He 「何泰然」</pageheading><br>

</p>

<tr>

| <a href="data/TairanHe_CV_20241222.pdf">CV</a> |

<a href="mailto:tairanh@andrew.cmu.edu">Email</a> |

<a href="https://scholar.google.com/citations?user=TVWH2U8AAAAJ">Google Scholar</a> |

<br/>

| <a href="https://github.com/TairanHe">Github</a> |

<a href="https://www.linkedin.com/in/tairan-he-41a904294/">LinkedIn</a> |

<a href="https://space.bilibili.com/14145636">Bilibili</a> |

</p>

</td>

<p>I am a second-year Ph.D. student at the <a href="https://www.ri.cmu.edu">Robotics Institute</a> at <a href="https://www.cmu.edu">Carnegie Mellon University</a>, advised by <a href="http://www.gshi.me"> Guanya Shi</a> and <a href="https://www.cs.cmu.edu/~cliu6/"> Changliu Liu</a>. I am also a member of <a href="https://research.nvidia.com/labs/gear/">NVIDIA GEAR group </a> led by <a href="https://jimfan.me/"> Jim Fan</a> and <a href="https://yukezhu.me/"> Yuke Zhu</a>. My research is supported by CMU RI Presidential Fellowship and NVIDIA Graduate Fellowship.

</p>

<p>I received my Bachelor's degree in computer science at <a href="http://en.sjtu.edu.cn"> Shanghai Jiao Tong University</a>, advised by <a href="http://wnzhang.net"> Weinan Zhang</a>. I also spent time at <a href="https://www.microsoft.com/en-us/research/lab/microsoft-research-asia/"> Microsoft Research Asia</a>.

</p>

<!-- <p>Goal: challenge conventional notions of what robots can achieve, develop robots that improves everyone's life. Focus: developing intelligent robots being able to do useful tasks with <u>intelligence, generalizability, agility and safety</u>. Method: learning-based methods that scale with the computation and data. Robots: Mobile robots, legged robots, robotic manipulators, and humanoid robots.

</p> -->

<p><strong>Goal:</strong> Robots that improve everyone's life.</p>

<p><strong>Focus:</strong> How to build the <u>data flywheel for robotics</u> to unlock human-level athletic skills and semantic intelligence? How to make robots perform useful tasks with <u>adaptability, generalizability, agility, and safety</u>?</p>

<p><strong>Method:</strong> Utilizing learning-based methods that scale with computation and data.</p>

<p><strong>Robots:</strong> I love working on humanoids and aim to make them capable of doing everything I can do—and more.</p>

<p>Email: tairanh [AT] andrew.cmu.edu

</p>

</td>

</tr>

</table>

<hr/>

</table>

<ul>

<li>[12/2024] Received NVIDIA Graduate Fellowship. Thanks, NVIDIA!</li>

<li>[11/2024] Received CMU RI Presidential Fellowship. Thanks, CMU!</li>

<li>[07/2024] <a href="https://agile-but-safe.github.io/">ABS</a> is selected as the <a href="https://roboticsconference.org/2024/program/awards/">Outstanding Student Paper Award Finalist at RSS 2024</a>!</li>

<li>[04/2024] Invited talk at <a href="https://www.techbeat.net/talk-info?id=864">TechBeat</a>.</li>

<!-- <a href="javascript:toggleblock('news')">---- show more ----</a>

<li>[11/2024] Received CMU RI Presidential Fellowship. Thanks, CMU!</li>

</div> -->

</ul>

</td>

</tr>

</table>

<hr/>

<tr><td><sectionheading>  Publications</sectionheading></td></tr>

</table>

<tr>

</a></td>

<heading>HOVER: Versatile Neural Whole-Body Controller for Humanoid Robots</heading></a><br>

Tairan He*, Wenli Xiao*, Toru Lin, Zhengyi Luo, Zhengjia Xu, Zhenyu Jiang, Jan Kautz, Changliu Liu, Guanya Shi, Xiaolong Wang, Linxi "Jim" Fan†, Yuke Zhu† <br>

2024<br>

</p>

<a href="https://hover-versatile-humanoid.github.io/">webpage</a> |

<a href="https://hover-versatile-humanoid.github.io/resources/HOVER_paper.pdf">pdf</a> |

<a href="javascript:toggleblock('hover_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('hover')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2410.21229">arXiv</a>

<p align="justify"> <i id="hover_abs">Humanoid whole-body control requires adapting to diverse tasks such as navigation, loco-manipulation, and tabletop manipulation, each demanding a different mode of control. For example, navigation relies on root velocity tracking, while tabletop manipulation prioritizes upper-body joint angle tracking. Existing approaches typically train individual policies tailored to a specific command space, limiting their transferability across modes. We present the key insight that full-body kinematic motion imitation can serve as a common abstraction for all these tasks and provide general-purpose motor skills for learning multiple modes of whole-body control. Building on this, we propose HOVER (Humanoid Versatile Controller), a multi-mode policy distillation framework that consolidates diverse control modes into a unified policy. HOVER enables seamless transitions between control modes while preserving the distinct advantages of each, offering a robust and scalable solution for humanoid control across a wide range of modes. By eliminating the need for policy retraining for each control mode, our approach improves efficiency and flexibility for future humanoid applications.</i></p>

@article{he2024hover,

title={HOVER: Versatile Neural Whole-Body Controller for Humanoid Robots},

author={He, Tairan and Xiao, Wenli and Lin, Toru and Luo, Zhengyi and Xu, Zhenjia and Jiang, Zhenyu and Kautz, Jan and Liu, Changliu and Shi, Guanya and Wang, Xiaolong and Fan, Linxi and Zhu, Yuke},

journal={arXiv preprint arXiv:2410.21229},

year={2024}

}

</pre>

</div>

</td>

</tr>

<tr>

</a></td>

<heading>OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning</heading></a><br>

Tairan He*, Zhengyi Luo*, Xialin He*, Wenli Xiao, Chong Zhang, Weinan Zhang, Kris Kitani, Changliu Liu, Guanya Shi <br>

CoRL 2024<br>

</p>

<a href="https://omni.human2humanoid.com/">webpage</a> |

<a href="https://omni.human2humanoid.com/resources/OmniH2O_paper.pdf">pdf</a> |

<a href="javascript:toggleblock('omnih2o_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('omnih2o')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2406.08858">arXiv</a> |

<a href="https://github.com/LeCAR-Lab/human2humanoid">code</a> |

<a href="https://www.youtube.com/watch?v=ofgxZHv0GMk">video</a> |

<a href="https://spectrum.ieee.org/video-friday-drone-vs-flying-canoe">media (ieee spectrum)</a>

<p align="justify"> <i id="omnih2o_abs">We present OmniH2O (Omni Human-to-Humanoid), a learning-based system for whole-body humanoid teleoperation and autonomy. Using kinematic pose as a universal control interface, OmniH2O enables various ways for a human to control a full-sized humanoid with dexterous hands, including using real-time teleoperation through VR headset, verbal instruction, and RGB camera. OmniH2O also enables full autonomy by learning from teleoperated demonstrations or integrating with frontier models such as GPT-4. OmniH2O demonstrates versatility and dexterity in various real-world whole-body tasks through teleoperation or autonomy, such as playing multiple sports, moving and manipulating objects, and interacting with humans. We develop an RL-based sim-to-real pipeline, which involves large-scale retargeting and augmentation of human motion datasets, learning a real-world deployable policy with sparse sensor input by imitating a privileged teacher policy, and reward designs to enhance robustness and stability. We release the first humanoid whole-body control dataset, OmniH2O-6, containing six everyday tasks, and demonstrate humanoid whole-body skill learning from teleoperated datasets.</i></p>

@article{he2024omnih2o,

title={OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning},

author={He, Tairan and Luo, Zhengyi and He, Xialin and Xiao, Wenli and Zhang, Chong and Zhang, Weinan and Kitani, Kris and Liu, Changliu and Shi, Guanya},

journal={arXiv preprint arXiv:2406.08858},

year={2024}

}

</pre>

</div>

</td>

</tr>

<tr>

</a></td>

<heading>WoCoCo: Learning Whole-Body Humanoid Control with Sequential Contacts</heading></a><br>

Chong Zhang*, Wenli Xiao*, Tairan He, Guanya Shi <br>

CoRL 2024 <b style="color:rgb(255, 100, 100);">(Oral)</b><br>

</p>

<a href="https://lecar-lab.github.io/wococo/">webpage</a> |

<a href="https://arxiv.org/pdf/2406.06005">pdf</a> |

<a href="javascript:toggleblock('wococo_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('wococo')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2406.06005">arXiv</a> |

<a href="https://www.youtube.com/watch?v=L18X-QbXqPI&ab_channel=LeCARLabatCMU">teaser video</a> |

<a href="https://www.youtube.com/watch?v=_S6DNhPDuTw&t=1s&ab_channel=LeCARLabatCMU">introduction video</a> |

<a href="https://spectrum.ieee.org/video-friday-drone-vs-flying-canoe">media (ieee spectrum)</a>

<p align="justify"> <i id="wococo_abs">Humanoid activities involving sequential contacts are crucial for complex robotic interactions and operations in the real world and are traditionally solved by model-based motion planning, which is time-consuming and often relies on simplified dynamics models. Although model-free reinforcement learning (RL) has become a powerful tool for versatile and robust whole-body humanoid control, it still requires tedious task-specific tuning and state machine design and suffers from long-horizon exploration issues in tasks involving contact sequences. In this work, we propose WoCoCo (Whole-Body Control with Sequential Contacts), a unified framework to learn whole-body humanoid control with sequential contacts by naturally decomposing the tasks into separate contact stages. Such decomposition facilitates simple and general policy learning pipelines through task-agnostic reward and sim-to-real designs, requiring only one or two task-related terms to be specified for each task. We demonstrated that end-to-end RL-based controllers trained with WoCoCo enable four challenging whole-body humanoid tasks involving diverse contact sequences in the real world without any motion priors: 1) versatile parkour jumping, 2) box loco-manipulation, 3) dynamic clap-and-tap dancing, and 4) cliffside climbing. We further show that WoCoCo is a general framework beyond humanoid by applying it in 22-DoF dinosaur robot loco-manipulation tasks.</i></p>

@article{zhang2024wococo,

title={WoCoCo: Learning Whole-Body Humanoid Control with Sequential Contacts},

author={Zhang, Chong and Xiao, Wenli and He, Tairan and Shi, Guanya},

journal={arXiv e-prints},

pages={arXiv--2406},

year={2024}

}

</pre>

</div>

</td>

</tr>

<tr>

</a></td>

<heading>Learning Human-to-Humanoid Real-Time Whole-Body Teleoperation</heading></a><br>

Tairan He*, Zhengyi Luo*, Wenli Xiao, Chong Zhang, Kris Kitani, Changliu Liu, Guanya Shi <br>

IROS 2024 <b style="color:rgb(255, 100, 100);">(Oral Presentation)</b><br>

ICRA 2024 Agile Robotics Workshop (Spotlight)<br>

</p>

<a href="https://human2humanoid.com/">webpage</a> |

<a href="https://human2humanoid.com/resources/H2O_paper.pdf">pdf</a> |

<a href="javascript:toggleblock('h2o_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('h2o')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2403.04436">arXiv</a> |

<a href="https://github.com/LeCAR-Lab/human2humanoid">code</a> |

<a href="https://www.youtube.com/watch?v=0W4N2q7xtcQ&ab_channel=LeCARLabatCMU">video</a> |

<a href="https://spectrum.ieee.org/video-friday-human-to-humanoid">media (ieee spectrum)</a>

<p align="justify"> <i id="h2o_abs">We present <span style="color: Red;">H</span>uman <span style="color: Red;">to</span> Human<span style="color: Red;">o</span>id (<strong>H2O</strong>), a reinforcement learning (RL) based framework that enables real-time whole-body teleoperation of a full-sized humanoid robot with only an RGB camera. To create a large-scale retargeted motion dataset of human movements for humanoid robots, we propose a scalable ''sim-to-data" process to filter and pick feasible motions using a privileged motion imitator. Afterwards, we train a robust real-time humanoid motion imitator in simulation using these refined motions and transfer it to the real humanoid robot in a zero-shot manner. We successfully achieve teleoperation of dynamic whole-body motions in real-world scenarios, including walking, back jumping, kicking, turning, waving, pushing, boxing, etc. To the best of our knowledge, this is the first demonstration to achieve learning-based real-time whole-body humanoid teleoperation.</i></p>

@article{he2024learning,

title={Learning human-to-humanoid real-time whole-body teleoperation},

author={He, Tairan and Luo, Zhengyi and Xiao, Wenli and Zhang, Chong and Kitani, Kris and Liu, Changliu and Shi, Guanya},

journal={arXiv preprint arXiv:2403.04436},

year={2024}

}

</pre>

</div>

</td>

</tr>

<tr>

</a></td>

<heading>Agile But Safe: Learning Collision-Free High-Speed Legged Locomotion</heading></a><br>

Tairan He*, Chong Zhang*, Wenli Xiao, Guanqi He, Changliu Liu, Guanya Shi<br>

RSS 2024 <b style="color:rgb(255, 100, 100);">(Outstanding Student Paper Award Finalist - Top 3)</b><br>

ICRA 2024 Agile Robotics Workshop (Spotlight)<br>

</p>

<a href="https://agile-but-safe.github.io/">webpage</a> |

<a href="https://arxiv.org/pdf/2401.17583.pdf">pdf</a> |

<a href="javascript:toggleblock('agile-but-safe_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('agile-but-safe')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2401.17583">arXiv</a> |

<a href="https://github.com/LeCAR-Lab/ABS">code</a> |

<a href="https://www.youtube.com/watch?v=elWwPn5IhjA">real-world demo</a> |

<a href="https://www.youtube.com/watch?v=oyMf-yaB2d0">video story</a> |

<a href="https://spectrum.ieee.org/video-friday-agile-but-safe">media (ieee spectrum)</a>

<p align="justify"> <i id="agile-but-safe_abs">Legged robots navigating cluttered environments must be jointly agile for efficient task execution and safe to avoid collisions with obstacles or humans. Existing studies either develop conservative controllers (< 1.0 m/s) to ensure safety, or focus on agility without considering potentially fatal collisions. This paper introduces Agile But Safe (ABS), a learning-based control framework that enables agile and collision-free locomotion for quadrupedal robots. ABS involves an agile policy to execute agile motor skills amidst obstacles and a recovery policy to prevent failures, collaboratively achieving high-speed and collision-free navigation. The policy switch in ABS is governed by a learned control-theoretic reach-avoid value network, which also guides the recovery policy as an objective function, thereby safeguarding the robot in a closed loop. The training process involves the learning of the agile policy, the reach-avoid value network, the recovery policy, and an exteroception representation network, all in simulation. These trained modules can be directly deployed in the real world with onboard sensing and computation, leading to high-speed and collision-free navigation in confined indoor and outdoor spaces with both static and dynamic obstacles.</i></p>

@article{he2024agile,

title={Agile but safe: Learning collision-free high-speed legged locomotion},

author={He, Tairan and Zhang, Chong and Xiao, Wenli and He, Guanqi and Liu, Changliu and Shi, Guanya},

journal={arXiv preprint arXiv:2401.17583},

year={2024}

}

</pre>

</div>

</td>

</tr>

<tr>

</a></td>

<heading>Safe Deep Policy Adaption</heading></a><br>

Wenli Xiao*, Tairan He*, John Dolan, Guanya Shi<br>

ICRA 2024<br>

CoRL 2023 Deployable Workshop<br>

</p>

<a href="https://sites.google.com/view/safe-deep-policy-adaptation">webpage</a> |

<a href="https://arxiv.org/pdf/2310.08602.pdf">pdf</a> |

<a href="javascript:toggleblock('safedpa_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('safedpa')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2310.08602">arXiv</a> |

<a href="https://github.com/LeCAR-Lab/SafeDPA">code</a> |

<a href="https://www.youtube.com/watch?v=PkyRzlRQVbE">video</a>

<p align="justify"> <i id="safedpa_abs">A critical goal of autonomy and artificial intelligence is enabling autonomous robots to rapidly adapt in dynamic and uncertain environments. Classic adaptive control and safe control provide stability and safety guarantees but are limited to specific system classes. In contrast, policy adaptation based on reinforcement learning (RL) offers versatility and generalizability but presents safety and robustness challenges. We propose SafeDPA, a novel RL and control framework that simultaneously tackles the problems of policy adaptation and safe reinforcement learning. SafeDPA jointly learns adaptive policy and dynamics models in simulation, predicts environment configurations, and fine-tunes dynamics models with few-shot real-world data. A safety filter based on the Control Barrier Function (CBF) on top of the RL policy is introduced to ensure safety during real-world deployment. We provide theoretical safety guarantees of SafeDPA and show the robustness of SafeDPA against learning errors and extra perturbations. Comprehensive experiments on (1) classic control problems (Inverted Pendulum), (2) simulation benchmarks (Safety Gym), and (3) a real-world agile robotics platform (RC Car) demonstrate great superiority of SafeDPA in both safety and task performance, over state-of-the-art baselines. Particularly, SafeDPA demonstrates notable generalizability, achieving a 300% increase in safety rate compared to the baselines, under unseen disturbances in real-world experiments.</i></p>

@article{xiao2023safe,

title={Safe Deep Policy Adaptation},

author={Xiao, Wenli and He, Tairan and Dolan, John and Shi, Guanya},

journal={arXiv preprint arXiv:2310.08602},

year={2023}

}

</pre>

</div>

</td>

</tr>

<tr>

</a></td>

<heading>Progressive Adaptive Chance-Constrained Safeguards for Reinforcement Learning</heading></a><br>

Zhaorun Chen, Binhao Chen, Tairan He, Liang Gong, Chengliang Liu<br>

IROS 2024 <b style="color:rgb(255, 100, 100);">(Oral Pitch)</b><br>

</p>

<a href="https://arxiv.org/pdf/2310.03379.pdf">pdf</a> |

<a href="javascript:toggleblock('acs_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('acs')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2310.03379">arXiv</a>

<p align="justify"> <i id="acs_abs">An attached arm can significantly increase the applicability of legged robots to several mobile manipulation tasks that are not possible for the wheeled or tracked counterparts. The standard control pipeline for such legged manipulators is to decouple the controller into that of manipulation and locomotion. However, this is ineffective and requires immense engineering to support coordination between the arm and legs, error can propagate across modules causing non-smooth unnatural motions. It is also biological implausible where there is evidence for strong motor synergies across limbs. In this work, we propose to learn a unified policy for whole-body control of a legged manipulator using reinforcement learning. We propose Regularized Online Adaptation to bridge the Sim2Real gap for high-DoF control, and Advantage Mixing exploiting the causal dependency in the action space to overcome local minima during training the whole-body system. We also present a simple design for a low-cost legged manipulator, and find that our unified policy can demonstrate dynamic and agile behaviors across several task setups.</i></p>

@article{chen2023progressive,

title={Progressive Adaptive Chance-Constrained Safeguards for Reinforcement Learning},

author={Chen, Zhaorun and Chen, Binhao and He, Tairan and Gong, Liang and Liu, Chengliang},

journal={arXiv preprint arXiv:2310.03379},

year={2023}

}

</pre>

</div>

</td>

</tr>

<tr>

</a>

</td>

<heading>State-wise Safe Reinforcement Learning: A Survey</heading></a><br>

Weiye Zhao, Tairan He, Rui Chen, Tianhao Wei, Changliu Liu<br>

IJCAI 2023<br>

</p>

<a href="https://arxiv.org/pdf/2302.03122.pdf">pdf</a> |

<a href="javascript:toggleblock('saferl_survey_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('saferl_survey')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2302.03122">arXiv</a>

<p align="justify"> <i id="saferl_survey_abs">Despite the tremendous success of Reinforcement Learning (RL) algorithms in simulation environments, applying RL to real-world applications still faces many challenges. A major concern is safety, in another word, constraint satisfaction. State-wise constraints are one of the most common constraints in real-world applications and one of the most challenging constraints in Safe RL. Enforcing state-wise constraints is necessary and essential to many challenging tasks such as autonomous driving, robot manipulation. This paper provides a comprehensive review of existing approaches that address state-wise constraints in RL. Under the framework of State-wise Constrained Markov Decision Process (SCMDP), we will discuss the connections, differences, and trade-offs of existing approaches in terms of (i) safety guarantee and scalability, (ii) safety and reward performance, and (iii) safety after convergence and during training. We also summarize limitations of current methods and discuss potential future directions.</i></p>

@inproceedings{ijcai2023p763,

title = {State-wise Safe Reinforcement Learning: A Survey},

author = {Zhao, Weiye and He, Tairan and Chen, Rui and Wei, Tianhao and Liu, Changliu},

booktitle = {Proceedings of the Thirty-Second International Joint Conference on

Artificial Intelligence, {IJCAI-23}},

publisher = {International Joint Conferences on Artificial Intelligence Organization},

editor = {Edith Elkind},

pages = {6814--6822},

year = {2023},

month = {8},

note = {Survey Track},

doi = {10.24963/ijcai.2023/763},

url = {https://doi.org/10.24963/ijcai.2023/763},

}

</pre>

</div>

</td>

</tr>

<tr>

</a></td>

<heading>Visual Imitation Learning with Patch Rewards</heading></a><br>

Minghuan Liu, Tairan He, Weinan Zhang, Shuicheng Yan, Zhongwen Xu

ICLR 2023<br>

</p>

<a href="https://sites.google.com/view/patchail/">webpage</a> |

<a href="https://arxiv.org/pdf/2302.00965.pdf">pdf</a> |

<a href="javascript:toggleblock('patchail_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('patchail')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2302.00965">arXiv</a> |

<p align="justify"> <i id="patchail_abs">Visual imitation learning enables reinforcement learning agents to learn to be- have from expert visual demonstrations such as videos or image sequences, with- out explicit, well-defined rewards. Previous research either adopted supervised learning techniques or induce simple and coarse scalar rewards from pixels, ne- glecting the dense information contained in the image demonstrations. In this work, we propose to measure the expertise of various local regions of image sam- ples, or called patches, and recover multi-dimensional patch rewards accordingly. Patch reward is a more precise rewarding characterization that serves as a fine- grained expertise measurement and visual explainability tool. Specifically, we present Adversarial Imitation Learning with Patch Rewards (PatchAIL), which employs a patch-based discriminator to measure the expertise of different local parts from given images and provide patch rewards. The patch-based knowledge is also used to regularize the aggregated reward and stabilize the training. We evaluate our method on DeepMind Control Suite and Atari tasks. The experiment results have demonstrated that PatchAIL outperforms baseline methods and pro- vides valuable interpretations for visual demonstrations.</i></p>

@article{liu2023visual,

title={Visual imitation learning with patch rewards},

author={Liu, Minghuan and He, Tairan and Zhang, Weinan and Yan, Shuicheng and Xu, Zhongwen},

journal={arXiv preprint arXiv:2302.00965},

year={2023}

}

</pre>

</div>

</td>

</tr>

<tr>

</a>

</td>

<heading>Safety Index Synthesis via Sum-of-Squares Programming</heading></a><br>

Weiye Zhao*, Tairan He*, Tianhao Wei, Simin Liu, Changliu Liu<br>

ACC 2023<br>

</p>

<a href="https://arxiv.org/pdf/2209.09134.pdf">pdf</a> |

<a href="javascript:toggleblock('sisos_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('sisos')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2209.09134">arXiv</a>

<p align="justify"> <i id="sisos_abs">Control systems often need to satisfy strict safety requirements. Safety index provides a handy way to evaluate the safety level of the system and derive the resulting safe control policies. However, designing safety index functions under control limits is difficult and requires a great amount of expert knowledge. This paper proposes a framework for synthesizing the safety index for general control systems using sum-of-squares programming. Our approach is to show that ensuring the non-emptiness of safe control on the safe set boundary is equivalent to a local manifold positiveness problem. We then prove that this problem is equivalent to sum-of-squares programming via the Positivstellensatz of algebraic geometry. We validate the proposed method on robot arms with different degrees of freedom and ground vehicles. The results show that the synthesized safety index guarantees safety and our method is effective even in high-dimensional robot systems.</i></p>

@inproceedings{zhao2023safety,

title={Safety index synthesis via sum-of-squares programming},

author={Zhao, Weiye and He, Tairan and Wei, Tianhao and Liu, Simin and Liu, Changliu},

booktitle={2023 American Control Conference (ACC)},

pages={732--737},

year={2023},

organization={IEEE}

}

</pre>

</div>

</td>

</tr>

<tr>

</a>

</td>

<heading>Probabilistic Safeguard for Reinforcement Learning Using Safety Index Guided Gaussian Process Models</heading></a><br>

Weiye Zhao*, Tairan He*, Changliu Liu<br>

L4DC 2023<br>

</p>

<a href="https://arxiv.org/pdf/2210.01041.pdf">pdf</a> |

<a href="javascript:toggleblock('uaissa_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('uaissa')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2210.01041">arXiv</a>

<p align="justify"> <i id="uaissa_abs">Safety is one of the biggest concerns to applying reinforcement learning (RL) to the physical world. In its core part, it is challenging to ensure RL agents persistently satisfy a hard state constraint without white-box or black-box dynamics models. This paper presents an integrated model learning and safe control framework to safeguard any agent, where its dynamics are learned as Gaussian processes. The proposed theory provides (i) a novel method to construct an offline dataset for model learning that best achieves safety requirements; (ii) a parameterization rule for safety index to ensure the existence of safe control; (iii) a safety guarantee in terms of probabilistic forward invariance when the model is learned using the aforementioned dataset. Simulation results show that our framework guarantees almost zero safety violation on various continuous control tasks.</i></p>

@inproceedings{zhao2023probabilistic,

title={Probabilistic safeguard for reinforcement learning using safety index guided gaussian process models},

author={Zhao, Weiye and He, Tairan and Liu, Changliu},

booktitle={Learning for Dynamics and Control Conference},

pages={783--796},

year={2023},

organization={PMLR}

}

</pre>

</div>

</td>

</tr>

<tr>

</a>

</td>

<heading>AutoCost: Evolving Intrinsic Cost for Zero-violation Reinforcement Learning</heading></a><br>

Tairan He, Weiye Zhao, Changliu Liu<br>

AAAI 2023<br>

</p>

<a href="https://arxiv.org/pdf/2301.10339.pdf">pdf</a> |

<a href="javascript:toggleblock('autocost_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('autocost')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2301.10339">arXiv</a>

<p align="justify"> <i id="autocost_abs">Safety is a critical hurdle that limits the application of deep reinforcement learning (RL) to real-world control tasks. To this end, constrained reinforcement learning leverages cost functions to improve safety in constrained Markov decision processes. However, such constrained RL methods fail to achieve zero violation even when the cost limit is zero. This paper analyzes the reason for such failure, which suggests that a proper cost function plays an important role in constrained RL. Inspired by the analysis, we propose AutoCost, a simple yet effective framework that automatically searches for cost functions that help constrained RL to achieve zero-violation performance. We validate the proposed method and the searched cost function on the safe RL benchmark Safety Gym. We compare the performance of augmented agents that use our cost function to provide additive intrinsic costs with baseline agents that use the same policy learners but with only extrinsic costs. Results show that the converged policies with intrinsic costs in all environments achieve zero constraint violation and comparable performance with baselines.</i></p>

@article{he2023autocost,

title={Autocost: Evolving intrinsic cost for zero-violation reinforcement learning},

author={He, Tairan and Zhao, Weiye and Liu, Changliu},

journal={arXiv preprint arXiv:2301.10339},

year={2023}

}

</pre>

</div>

</td>

</tr>

<tr>

</a>

</td>

<heading>Reinforcement Learning with Automated Auxiliary Loss Search</heading></a><br>

Tairan He, Yuge Zhang, Kan Ren, Minghuan Liu, Che Wang, Weinan Zhang, Yuqing Yang, Dongsheng Li<br>

NeurIPS 2022<br>

</p>

<a href="https://seqml.github.io/a2ls/">webpage</a> |

<a href="https://arxiv.org/pdf/2210.06041.pdf">pdf</a> |

<a href="javascript:toggleblock('a2ls_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('a2ls')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2210.06041">arXiv</a> |

<p align="justify"> <i id="a2ls_abs">A good state representation is crucial to solving complicated reinforcement learning (RL) challenges. Many recent works focus on designing auxiliary losses for learning informative representations. Unfortunately, these handcrafted objectives rely heavily on expert knowledge and may be sub-optimal. In this paper, we propose a principled and universal method for learning better representations with auxiliary loss functions, named Automated Auxiliary Loss Search (A2LS), which automatically searches for top-performing auxiliary loss functions for RL. Specifically, based on the collected trajectory data, we define a general auxiliary loss space of size 7.5×1020 and explore the space with an efficient evolutionary search strategy. Empirical results show that the discovered auxiliary loss (namely, A2-winner) significantly improves the performance on both high-dimensional (image) and low-dimensional (vector) unseen tasks with much higher efficiency, showing promising generalization ability to different settings and even different benchmark domains. We conduct a statistical analysis to reveal the relations between patterns of auxiliary losses and RL performance.</i></p>

@inproceedings{zhao2021model,

title={Model-free safe control for zero-violation reinforcement learning},

author={Zhao, Weiye and He, Tairan and Liu, Changliu},

booktitle={5th Annual Conference on Robot Learning},

year={2021}

}

</pre>

</div>

</td>

</tr>

<tr>

</a></td>

<heading>Model-free Safe Control for Zero-Violation Reinforcement Learning</heading></a><br>

Weiye Zhao, Tairan He, Changliu Liu<br>

CoRL 2021<br>

</p>

<a href="https://proceedings.mlr.press/v164/zhao22a/zhao22a.pdf">pdf</a> |

<a href="javascript:toggleblock('issa_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('issa')" class="togglebib">bibtex</a> |

<a href="https://openreview.net/forum?id=UGp6FDaxB0f">openreview</a> |

<p align="justify"> <i id="issa_abs">While deep reinforcement learning (DRL) has impressive performance in a variety of continuous control tasks, one critical hurdle that limits the application of DRL to physical world is the lack of safety guarantees. It is challenging for DRL agents to persistently satisfy a hard state constraint (known as the safety specification) during training. On the other hand, safe control methods with safety guarantees have been extensively studied. However, to synthesize safe control, these methods require explicit analytical models of the dynamic system; but these models are usually not available in DRL. This paper presents a model-free safe control strategy to synthesize safeguards for DRL agents, which will ensure zero safety violation during training. In particular, we present an implicit safe set algorithm, which synthesizes the safety index (also called the barrier certificate) and the subsequent safe control law only by querying a black-box dynamic function (e.g., a digital twin simulator). The theoretical results indicate the implicit safe set algorithm guarantees forward invariance and finite-time convergence to the safe set. We validate the proposed method on the state-of-the-art safety benchmark Safety Gym. Results show that the proposed method achieves zero safety violation and gains 95 cumulative reward compared to state-of-the-art safe DRL methods. Moreover, it can easily scale to high-dimensional systems.

</i></p>

@inproceedings{zhao2021model,

title={Model-free safe control for zero-violation reinforcement learning},

author={Zhao, Weiye and He, Tairan and Liu, Changliu},

booktitle={5th Annual Conference on Robot Learning},

year={2021}

}

</pre>

</div>

</td>

</tr>

<tr>

</a></td>

<heading>Energy-Based Imitation Learning</heading></a><br>

Minghuan Liu, Tairan He, Minkai Xu, Weinan Zhang <br>

AMASS 2021 <b style="color:rgb(255, 100, 100);">(Oral)</b><br>

</p>

<a href="https://arxiv.org/pdf/2004.09395.pdf">pdf</a> |

<a href="javascript:toggleblock('ebil_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('ebil')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2004.09395">arXiv</a> |

<p align="justify"> <i id="ebil_abs">A good state representation is crucial to solving complicated reinforcement learning (RL) challenges. Many recent works focus on designing auxiliary losses for learning informative representations. Unfortunately, these handcrafted objectives rely heavily on expert knowledge and may be sub-optimal. In this paper, we propose a principled and universal method for learning better representations with auxiliary loss functions, named Automated Auxiliary Loss Search (A2LS), which automatically searches for top-performing auxiliary loss functions for RL. Specifically, based on the collected trajectory data, we define a general auxiliary loss space of size 7.5×1020 and explore the space with an efficient evolutionary search strategy. Empirical results show that the discovered auxiliary loss (namely, A2-winner) significantly improves the performance on both high-dimensional (image) and low-dimensional (vector) unseen tasks with much higher efficiency, showing promising generalization ability to different settings and even different benchmark domains. We conduct a statistical analysis to reveal the relations between patterns of auxiliary losses and RL performance.</i></p>

@inproceedings{zhao2021model,

title={Model-free safe control for zero-violation reinforcement learning},

author={Zhao, Weiye and He, Tairan and Liu, Changliu},

booktitle={5th Annual Conference on Robot Learning},

year={2021}

}

</pre>

</div>

</td>

</tr>

<!-- <tr>

</a></td>

<heading>Deep Whole-Body Control: Learning a Unified Policy for Manipulation and Locomotion</heading></a><br>

Tairan He*, Xuxin Cheng*, Deepak Pathak<br>

CoRL 2022 (Oral)<br>

<b style="color:rgb(255, 100, 100);">Best Systems Paper Award Finalist (top 4)</b>

</p>

<a href="https://manipulation-locomotion.github.io">webpage</a> |

<a href="https://arxiv.org/pdf/2210.10044.pdf">pdf</a> |

<a href="javascript:toggleblock('maniploco_abs')">abstract</a> |

<a shape="rect" href="javascript:togglebib('maniploco')" class="togglebib">bibtex</a> |

<a href="https://arxiv.org/abs/2210.10044">arXiv</a> |

<a href="https://openreview.net/forum?id=zldI4UpuG7v">OpenReview</a> |

<a href="https://www.youtube.com/watch?v=i9EdPl8uJUA">video</a>

<p align="justify"> <i id="maniploco_abs">An attached arm can significantly increase the applicability of legged robots to several mobile manipulation tasks that are not possible for the wheeled or tracked counterparts. The standard control pipeline for such legged manipulators is to decouple the controller into that of manipulation and locomotion. However, this is ineffective and requires immense engineering to support coordination between the arm and legs, error can propagate across modules causing non-smooth unnatural motions. It is also biological implausible where there is evidence for strong motor synergies across limbs. In this work, we propose to learn a unified policy for whole-body control of a legged manipulator using reinforcement learning. We propose Regularized Online Adaptation to bridge the Sim2Real gap for high-DoF control, and Advantage Mixing exploiting the causal dependency in the action space to overcome local minima during training the whole-body system. We also present a simple design for a low-cost legged manipulator, and find that our unified policy can demonstrate dynamic and agile behaviors across several task setups.</i></p>

@inproceedings{fu2022deep,

author = {Fu, Zipeng and Cheng, Xuxin and

Pathak, Deepak},

title = {Deep Whole-Body Control: Learning a Unified Policy

for Manipulation and Locomotion},

booktitle = {Conference on Robot Learning ({CoRL})},

year = {2022}

}

</pre>

</div>

</td>

</tr> -->

</table>

<tr><td><sectionheading>  Projects</sectionheading></td></tr>

</table>

<tr>

</a>

</td>

<heading>SJTU Anonymous Forum 「无可奉告」</heading></a><br>

</p>

<a href="https://github.com/TairanHe/SJTU-Anonymous_Forum"> Android Code</a> |

<a href="https://github.com/oscardhc/Forum"> iOS Code</a> |

<a href="http://wukefenggao.cn"> Project Page</a> |

<a href="https://www.bilibili.com/video/BV1Rp4y187ZJ"> Farewell Video</a>

<p align="justify"> <i id="wkfg_abs">A carefree forum platform for SJTUers sharing and talking with anonymous identity. More than <font color="red"><em><strong>10000+</strong></em></font> users used「无可奉告」in the SJTU campus.</i></p>

</div>

</td>

</tr>

</table>

<tr><td><sectionheading>  Reviewer Service</sectionheading></td></tr>

</table>

<tr>

<!-- International Joint Conference on Artificial Intelligence <b>(IJCAI)</b> 2024

International Conference on Machine Learning <b>(ICML)</b>, 2024

<br>

International Conference on Learning Representations <b>(ICLR)</b>, 2024

<br>

IEEE Conference on Decision and Control <b>(CDC)</b>, 2023

<br>

Conference on Neural Information Processing Systems <b>(NeurIPS)</b>, 2023

<br>

Learning for Dynamics & Control Conference <b>(L4DC)</b> 2023

<br>

AAAI Conference on Artificial Intelligence <b>(AAAI)</b> 2023, 2024, 2025

<br>

Conference on Robot Learning <b>(CoRL)</b> 2022, 2023, 2024

</p>

</td>

</tr>

</table>

<tbody>

<tr>

<br>

<div>

<!-- <a target="_top" href="http://clustrmaps.com/site/1acpn?utm_source=widget&utm_campaign=widget_ctr" id="clustrmaps-widget-v2" class="clustrmaps-map-control" style="width: 300px;">

--> </div>

</td>

</tr>

</tbody>

</table>

<hr/>

Website template from <a href="http://www.cs.berkeley.edu/~barron/">here</a> and <a href="http://www.cs.cmu.edu/~dpathak/">here</a>

</font></p></td></tr>

</table>

</td></tr>

</table>

hideallbibs();

</script>

hideblock('material_review_abs');

</script>

hideblock('ieee_iot_abs');

</script>

hideblock('acm_turc_abs');

</script>

hideblock('aog_mcts_abs');

</script>

hideblock('pragmatics_marl_abs');

</script>

hideblock('collab_marl_abs');

</script>

hideblock('rma_abs');

</script>

hideblock('energyloco_abs');

</script>

hideblock('navloco_abs');

</script>

hideblock('wococo_abs');

</script>

hideblock('omnih2o_abs');

</script>

hideblock('hover_abs');

</script>

hideblock('h2o_abs');

</script>

hideblock('agile-but-safe_abs');

</script>

hideblock('safedpa_abs');

</script>

hideblock('acs_abs');

</script>

hideblock('saferl_survey_abs');

</script>

hideblock('patchail_abs');

</script>

hideblock('sisos_abs');

</script>

hideblock('uaissa_abs');

</script>

hideblock('autocost_abs');

</script>

hideblock('a2ls_abs');

</script>

hideblock('issa_abs');

</script>

hideblock('ebil_abs');

</script>

hideblock('maniploco_abs');

</script>

hideblock('parkour_abs');

</script>

hideblock('mobile_aloha_abs');

</script>

</body>

</html>

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

index.html

index.html

Files

index.html

Latest commit

History

index.html

File metadata and controls