-
Notifications
You must be signed in to change notification settings - Fork 6
/
index.html
902 lines (786 loc) · 56 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta name="generator" content="HTML Tidy for Linux/x86 (vers 11 February 2007), see www.w3.org">
<style type="text/css">
/* Design Credits: Jon Barron and Deepak Pathak and Abhishek Kar and Saurabh Gupta*/
a {
color: #1772d0;
text-decoration:none;
}
a:focus, a:hover {
color: #f09228;
text-decoration:none;
}
body,td,th {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
font-size: 16px;
font-weight: 400
}
heading {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
font-size: 17px; /* 19 */
font-weight: 600 /* 1000 */
}
hr
{
border: 0;
height: 1px;
background-image: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.75), rgba(0, 0, 0, 0));
}
strong {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
font-size: 16px;
font-weight: 600 /* 800 */
}
strongred {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
color: 'red' ;
font-size: 16px
}
sectionheading {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
font-size: 22px;
font-weight: 600
}
pageheading {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
font-size: 38px;
font-weight: 400
}
.ImageBorder
{
border-width: 1px;
border-color: Black;
}
</style>
<link rel="shortcut icon" href="images/apple-touch-ri-logo-white-120x120.png">
<script type="text/javascript" src="js/hidebib.js"></script>
<title>Tairan He</title>
<meta name="Tairan He's Homepage" http-equiv="Content-Type" content="Tairan He's Homepage">
<link href='https://fonts.googleapis.com/css?family=Titillium+Web:400,600,400italic,600italic,300,300italic' rel='stylesheet' type='text/css'>
<!-- Start : Google Analytics Code -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-XXXXX-Y', 'auto');
ga('send', 'pageview');
</script>
<!-- End : Google Analytics Code -->
<!-- Scramble Script by Jeff Donahue -->
<script src="js/scramble.js"></script>
</head>
<body>
<table width="900" border="0" align="center" border="0" cellspacing="0" cellpadding="20">
<tr><td>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="20">
<p align="center">
<pageheading>Tairan He 「何泰然」</pageheading><br>
</p>
<tr>
<td width="30%" valign="top"><a href="images/tairan&h1_crop.JPG"><img src="images/tairan&h1_crop.JPG" width="100%" style="border-radius:15px"></a>
<p align=center>
| <a href="data/TairanHe_CV_20241222.pdf">CV</a> |
<a href="mailto:tairanh@andrew.cmu.edu">Email</a> |
<a href="https://scholar.google.com/citations?user=TVWH2U8AAAAJ">Google Scholar</a> |
<br/>
| <a href="https://github.com/TairanHe">Github</a> |
<a href="https://www.linkedin.com/in/tairan-he-41a904294/">LinkedIn</a> |
<a href="https://space.bilibili.com/14145636">Bilibili</a> |
</p>
<p align="center" style="margin-top:-8px;"><iframe id="twitter-widget-0" scrolling="no" frameborder="0" allowtransparency="true" allowfullscreen="true" class="twitter-follow-button twitter-follow-button-rendered" style="position: static; visibility: visible; width: 156px; height: 20px;" title="Twitter Follow Button" src="https://platform.twitter.com/widgets/follow_button.2f70fb173b9000da126c79afe2098f02.en.html#dnt=false&id=twitter-widget-0&lang=en&screen_name=TairanHe99&show_count=false&show_screen_name=true&size=m&time=1706734206165" data-screen-name=""></iframe><script async="" src="https://platform.twitter.com/widgets.js" charset="utf-8"></script></p>
</td>
<td width="70%" valign="top" align="justify">
<p>I am a second-year Ph.D. student at the <a href="https://www.ri.cmu.edu">Robotics Institute</a> at <a href="https://www.cmu.edu">Carnegie Mellon University</a>, advised by <a href="http://www.gshi.me"> Guanya Shi</a> and <a href="https://www.cs.cmu.edu/~cliu6/"> Changliu Liu</a>. I am also a member of <a href="https://research.nvidia.com/labs/gear/">NVIDIA GEAR group </a> led by <a href="https://jimfan.me/"> Jim Fan</a> and <a href="https://yukezhu.me/"> Yuke Zhu</a>. My research is supported by CMU RI Presidential Fellowship and NVIDIA Graduate Fellowship.
</p>
<p>I received my Bachelor's degree in computer science at <a href="http://en.sjtu.edu.cn"> Shanghai Jiao Tong University</a>, advised by <a href="http://wnzhang.net"> Weinan Zhang</a>. I also spent time at <a href="https://www.microsoft.com/en-us/research/lab/microsoft-research-asia/"> Microsoft Research Asia</a>.
</p>
<!-- <p>Goal: challenge conventional notions of what robots can achieve, develop robots that improves everyone's life. Focus: developing intelligent robots being able to do useful tasks with <u>intelligence, generalizability, agility and safety</u>. Method: learning-based methods that scale with the computation and data. Robots: Mobile robots, legged robots, robotic manipulators, and humanoid robots.
</p> -->
<p><strong>Goal:</strong> Robots that improve everyone's life.</p>
<p><strong>Focus:</strong> How to build the <u>data flywheel for robotics</u> to unlock human-level athletic skills and semantic intelligence? How to make robots perform useful tasks with <u>adaptability, generalizability, agility, and safety</u>?</p>
<p><strong>Method:</strong> Utilizing learning-based methods that scale with computation and data.</p>
<p><strong>Robots:</strong> I love working on humanoids and aim to make them capable of doing everything I can do—and more.</p>
<p>Email: tairanh [AT] andrew.cmu.edu
</p>
</td>
</tr>
</table>
<hr/>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="10">
<tr><td><sectionheading> News</sectionheading></td></tr>
</table>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="15">
<ul>
<li>[12/2024] Received NVIDIA Graduate Fellowship. Thanks, NVIDIA!</li>
<li>[11/2024] Received CMU RI Presidential Fellowship. Thanks, CMU!</li>
<li>[07/2024] <a href="https://agile-but-safe.github.io/">ABS</a> is selected as the <a href="https://roboticsconference.org/2024/program/awards/">Outstanding Student Paper Award Finalist at RSS 2024</a>!</li>
<li>[04/2024] Invited talk at <a href="https://www.techbeat.net/talk-info?id=864">TechBeat</a>.</li>
<!-- <a href="javascript:toggleblock('news')">---- show more ----</a>
<div id="news" style="display:none">
<li>[11/2024] Received CMU RI Presidential Fellowship. Thanks, CMU!</li>
</div> -->
</ul>
</td>
</tr>
</table>
<hr/>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="10">
<tr><td><sectionheading> Publications</sectionheading></td></tr>
</table>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="15">
<tr>
<td width="40%" valign="top" align="center"><a href="https://hover-versatile-humanoid.github.io/">
<video playsinline autoplay loop muted src="images/hover/HOVER-Teaser-preview-720.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://hover-versatile-humanoid.github.io/" id="HOVER">
<heading>HOVER: Versatile Neural Whole-Body Controller for Humanoid Robots</heading></a><br>
Tairan He*, Wenli Xiao*, Toru Lin, Zhengyi Luo, Zhengjia Xu, Zhenyu Jiang, Jan Kautz, Changliu Liu, Guanya Shi, Xiaolong Wang, Linxi "Jim" Fan†, Yuke Zhu† <br>
2024<br>
</p>
<div class="paper" id="hover">
<a href="https://hover-versatile-humanoid.github.io/">webpage</a> |
<a href="https://hover-versatile-humanoid.github.io/resources/HOVER_paper.pdf">pdf</a> |
<a href="javascript:toggleblock('hover_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('hover')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2410.21229">arXiv</a>
<p align="justify"> <i id="hover_abs">Humanoid whole-body control requires adapting to diverse tasks such as navigation, loco-manipulation, and tabletop manipulation, each demanding a different mode of control. For example, navigation relies on root velocity tracking, while tabletop manipulation prioritizes upper-body joint angle tracking. Existing approaches typically train individual policies tailored to a specific command space, limiting their transferability across modes. We present the key insight that full-body kinematic motion imitation can serve as a common abstraction for all these tasks and provide general-purpose motor skills for learning multiple modes of whole-body control. Building on this, we propose HOVER (Humanoid Versatile Controller), a multi-mode policy distillation framework that consolidates diverse control modes into a unified policy. HOVER enables seamless transitions between control modes while preserving the distinct advantages of each, offering a robust and scalable solution for humanoid control across a wide range of modes. By eliminating the need for policy retraining for each control mode, our approach improves efficiency and flexibility for future humanoid applications.</i></p>
<pre xml:space="preserve">
@article{he2024hover,
title={HOVER: Versatile Neural Whole-Body Controller for Humanoid Robots},
author={He, Tairan and Xiao, Wenli and Lin, Toru and Luo, Zhengyi and Xu, Zhenjia and Jiang, Zhenyu and Kautz, Jan and Liu, Changliu and Shi, Guanya and Wang, Xiaolong and Fan, Linxi and Zhu, Yuke},
journal={arXiv preprint arXiv:2410.21229},
year={2024}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://omni.human2humanoid.com/">
<video playsinline autoplay loop muted src="images/omnih2o/Preview-OmniH2O.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://omni.human2humanoid.com/" id="OmniH2O">
<heading>OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning</heading></a><br>
Tairan He*, Zhengyi Luo*, Xialin He*, Wenli Xiao, Chong Zhang, Weinan Zhang, Kris Kitani, Changliu Liu, Guanya Shi <br>
CoRL 2024<br>
</p>
<div class="paper" id="omnih2o">
<a href="https://omni.human2humanoid.com/">webpage</a> |
<a href="https://omni.human2humanoid.com/resources/OmniH2O_paper.pdf">pdf</a> |
<a href="javascript:toggleblock('omnih2o_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('omnih2o')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2406.08858">arXiv</a> |
<a href="https://github.com/LeCAR-Lab/human2humanoid">code</a> |
<a href="https://www.youtube.com/watch?v=ofgxZHv0GMk">video</a> |
<a href="https://spectrum.ieee.org/video-friday-drone-vs-flying-canoe">media (ieee spectrum)</a>
<p align="justify"> <i id="omnih2o_abs">We present OmniH2O (Omni Human-to-Humanoid), a learning-based system for whole-body humanoid teleoperation and autonomy. Using kinematic pose as a universal control interface, OmniH2O enables various ways for a human to control a full-sized humanoid with dexterous hands, including using real-time teleoperation through VR headset, verbal instruction, and RGB camera. OmniH2O also enables full autonomy by learning from teleoperated demonstrations or integrating with frontier models such as GPT-4. OmniH2O demonstrates versatility and dexterity in various real-world whole-body tasks through teleoperation or autonomy, such as playing multiple sports, moving and manipulating objects, and interacting with humans. We develop an RL-based sim-to-real pipeline, which involves large-scale retargeting and augmentation of human motion datasets, learning a real-world deployable policy with sparse sensor input by imitating a privileged teacher policy, and reward designs to enhance robustness and stability. We release the first humanoid whole-body control dataset, OmniH2O-6, containing six everyday tasks, and demonstrate humanoid whole-body skill learning from teleoperated datasets.</i></p>
<pre xml:space="preserve">
@article{he2024omnih2o,
title={OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning},
author={He, Tairan and Luo, Zhengyi and He, Xialin and Xiao, Wenli and Zhang, Chong and Zhang, Weinan and Kitani, Kris and Liu, Changliu and Shi, Guanya},
journal={arXiv preprint arXiv:2406.08858},
year={2024}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://lecar-lab.github.io/wococo/">
<video playsinline autoplay loop muted src="images/wococo/wococo-preview.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://lecar-lab.github.io/wococo/" id="WoCoCo">
<heading>WoCoCo: Learning Whole-Body Humanoid Control with Sequential Contacts</heading></a><br>
Chong Zhang*, Wenli Xiao*, Tairan He, Guanya Shi <br>
CoRL 2024 <b style="color:rgb(255, 100, 100);">(Oral)</b><br>
</p>
<div class="paper" id="wococo">
<a href="https://lecar-lab.github.io/wococo/">webpage</a> |
<a href="https://arxiv.org/pdf/2406.06005">pdf</a> |
<a href="javascript:toggleblock('wococo_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('wococo')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2406.06005">arXiv</a> |
<a href="https://www.youtube.com/watch?v=L18X-QbXqPI&ab_channel=LeCARLabatCMU">teaser video</a> |
<a href="https://www.youtube.com/watch?v=_S6DNhPDuTw&t=1s&ab_channel=LeCARLabatCMU">introduction video</a> |
<a href="https://spectrum.ieee.org/video-friday-drone-vs-flying-canoe">media (ieee spectrum)</a>
<p align="justify"> <i id="wococo_abs">Humanoid activities involving sequential contacts are crucial for complex robotic interactions and operations in the real world and are traditionally solved by model-based motion planning, which is time-consuming and often relies on simplified dynamics models. Although model-free reinforcement learning (RL) has become a powerful tool for versatile and robust whole-body humanoid control, it still requires tedious task-specific tuning and state machine design and suffers from long-horizon exploration issues in tasks involving contact sequences. In this work, we propose WoCoCo (Whole-Body Control with Sequential Contacts), a unified framework to learn whole-body humanoid control with sequential contacts by naturally decomposing the tasks into separate contact stages. Such decomposition facilitates simple and general policy learning pipelines through task-agnostic reward and sim-to-real designs, requiring only one or two task-related terms to be specified for each task. We demonstrated that end-to-end RL-based controllers trained with WoCoCo enable four challenging whole-body humanoid tasks involving diverse contact sequences in the real world without any motion priors: 1) versatile parkour jumping, 2) box loco-manipulation, 3) dynamic clap-and-tap dancing, and 4) cliffside climbing. We further show that WoCoCo is a general framework beyond humanoid by applying it in 22-DoF dinosaur robot loco-manipulation tasks.</i></p>
<pre xml:space="preserve">
@article{zhang2024wococo,
title={WoCoCo: Learning Whole-Body Humanoid Control with Sequential Contacts},
author={Zhang, Chong and Xiao, Wenli and He, Tairan and Shi, Guanya},
journal={arXiv e-prints},
pages={arXiv--2406},
year={2024}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://human2humanoid.com/">
<video playsinline autoplay loop muted src="images/h2o/h2o-preview.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://human2humanoid.com/" id="H2O">
<heading>Learning Human-to-Humanoid Real-Time Whole-Body Teleoperation</heading></a><br>
Tairan He*, Zhengyi Luo*, Wenli Xiao, Chong Zhang, Kris Kitani, Changliu Liu, Guanya Shi <br>
IROS 2024 <b style="color:rgb(255, 100, 100);">(Oral Presentation)</b><br>
ICRA 2024 Agile Robotics Workshop (Spotlight)<br>
</p>
<div class="paper" id="h2o">
<a href="https://human2humanoid.com/">webpage</a> |
<a href="https://human2humanoid.com/resources/H2O_paper.pdf">pdf</a> |
<a href="javascript:toggleblock('h2o_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('h2o')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2403.04436">arXiv</a> |
<a href="https://github.com/LeCAR-Lab/human2humanoid">code</a> |
<a href="https://www.youtube.com/watch?v=0W4N2q7xtcQ&ab_channel=LeCARLabatCMU">video</a> |
<a href="https://spectrum.ieee.org/video-friday-human-to-humanoid">media (ieee spectrum)</a>
<p align="justify"> <i id="h2o_abs">We present <span style="color: Red;">H</span>uman <span style="color: Red;">to</span> Human<span style="color: Red;">o</span>id (<strong>H2O</strong>), a reinforcement learning (RL) based framework that enables real-time whole-body teleoperation of a full-sized humanoid robot with only an RGB camera. To create a large-scale retargeted motion dataset of human movements for humanoid robots, we propose a scalable ''sim-to-data" process to filter and pick feasible motions using a privileged motion imitator. Afterwards, we train a robust real-time humanoid motion imitator in simulation using these refined motions and transfer it to the real humanoid robot in a zero-shot manner. We successfully achieve teleoperation of dynamic whole-body motions in real-world scenarios, including walking, back jumping, kicking, turning, waving, pushing, boxing, etc. To the best of our knowledge, this is the first demonstration to achieve learning-based real-time whole-body humanoid teleoperation.</i></p>
<pre xml:space="preserve">
@article{he2024learning,
title={Learning human-to-humanoid real-time whole-body teleoperation},
author={He, Tairan and Luo, Zhengyi and Xiao, Wenli and Zhang, Chong and Kitani, Kris and Liu, Changliu and Shi, Guanya},
journal={arXiv preprint arXiv:2403.04436},
year={2024}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://agile-but-safe.github.io/">
<video playsinline autoplay loop muted src="images/agile-but-safe/abs-gif-preview-long.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://agile-but-safe.github.io/" id="AGILE-BUT-SAFE">
<heading>Agile But Safe: Learning Collision-Free High-Speed Legged Locomotion</heading></a><br>
Tairan He*, Chong Zhang*, Wenli Xiao, Guanqi He, Changliu Liu, Guanya Shi<br>
RSS 2024 <b style="color:rgb(255, 100, 100);">(Outstanding Student Paper Award Finalist - Top 3)</b><br>
ICRA 2024 Agile Robotics Workshop (Spotlight)<br>
</p>
<div class="paper" id="agile-but-safe">
<a href="https://agile-but-safe.github.io/">webpage</a> |
<a href="https://arxiv.org/pdf/2401.17583.pdf">pdf</a> |
<a href="javascript:toggleblock('agile-but-safe_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('agile-but-safe')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2401.17583">arXiv</a> |
<a href="https://github.com/LeCAR-Lab/ABS">code</a> |
<a href="https://www.youtube.com/watch?v=elWwPn5IhjA">real-world demo</a> |
<a href="https://www.youtube.com/watch?v=oyMf-yaB2d0">video story</a> |
<a href="https://spectrum.ieee.org/video-friday-agile-but-safe">media (ieee spectrum)</a>
<p align="justify"> <i id="agile-but-safe_abs">Legged robots navigating cluttered environments must be jointly agile for efficient task execution and safe to avoid collisions with obstacles or humans. Existing studies either develop conservative controllers (< 1.0 m/s) to ensure safety, or focus on agility without considering potentially fatal collisions. This paper introduces Agile But Safe (ABS), a learning-based control framework that enables agile and collision-free locomotion for quadrupedal robots. ABS involves an agile policy to execute agile motor skills amidst obstacles and a recovery policy to prevent failures, collaboratively achieving high-speed and collision-free navigation. The policy switch in ABS is governed by a learned control-theoretic reach-avoid value network, which also guides the recovery policy as an objective function, thereby safeguarding the robot in a closed loop. The training process involves the learning of the agile policy, the reach-avoid value network, the recovery policy, and an exteroception representation network, all in simulation. These trained modules can be directly deployed in the real world with onboard sensing and computation, leading to high-speed and collision-free navigation in confined indoor and outdoor spaces with both static and dynamic obstacles.</i></p>
<pre xml:space="preserve">
@article{he2024agile,
title={Agile but safe: Learning collision-free high-speed legged locomotion},
author={He, Tairan and Zhang, Chong and Xiao, Wenli and He, Guanqi and Liu, Changliu and Shi, Guanya},
journal={arXiv preprint arXiv:2401.17583},
year={2024}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://sites.google.com/view/safe-deep-policy-adaptation">
<video playsinline autoplay loop muted src="images/safedpa/SafeDPA-showoff.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://sites.google.com/view/safe-deep-policy-adaptation" id="SAFEDPA">
<heading>Safe Deep Policy Adaption</heading></a><br>
Wenli Xiao*, Tairan He*, John Dolan, Guanya Shi<br>
ICRA 2024<br>
CoRL 2023 Deployable Workshop<br>
<!-- <b style="color:rgb(255, 100, 100);">Best Systems Paper Award Finalist (top 3)</b> -->
</p>
<div class="paper" id="safedpa">
<a href="https://sites.google.com/view/safe-deep-policy-adaptation">webpage</a> |
<a href="https://arxiv.org/pdf/2310.08602.pdf">pdf</a> |
<a href="javascript:toggleblock('safedpa_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('safedpa')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2310.08602">arXiv</a> |
<a href="https://github.com/LeCAR-Lab/SafeDPA">code</a> |
<a href="https://www.youtube.com/watch?v=PkyRzlRQVbE">video</a>
<p align="justify"> <i id="safedpa_abs">A critical goal of autonomy and artificial intelligence is enabling autonomous robots to rapidly adapt in dynamic and uncertain environments. Classic adaptive control and safe control provide stability and safety guarantees but are limited to specific system classes. In contrast, policy adaptation based on reinforcement learning (RL) offers versatility and generalizability but presents safety and robustness challenges. We propose SafeDPA, a novel RL and control framework that simultaneously tackles the problems of policy adaptation and safe reinforcement learning. SafeDPA jointly learns adaptive policy and dynamics models in simulation, predicts environment configurations, and fine-tunes dynamics models with few-shot real-world data. A safety filter based on the Control Barrier Function (CBF) on top of the RL policy is introduced to ensure safety during real-world deployment. We provide theoretical safety guarantees of SafeDPA and show the robustness of SafeDPA against learning errors and extra perturbations. Comprehensive experiments on (1) classic control problems (Inverted Pendulum), (2) simulation benchmarks (Safety Gym), and (3) a real-world agile robotics platform (RC Car) demonstrate great superiority of SafeDPA in both safety and task performance, over state-of-the-art baselines. Particularly, SafeDPA demonstrates notable generalizability, achieving a 300% increase in safety rate compared to the baselines, under unseen disturbances in real-world experiments.</i></p>
<pre xml:space="preserve">
@article{xiao2023safe,
title={Safe Deep Policy Adaptation},
author={Xiao, Wenli and He, Tairan and Dolan, John and Shi, Guanya},
journal={arXiv preprint arXiv:2310.08602},
year={2023}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://arxiv.org/abs/2310.03379">
<video playsinline autoplay loop muted src="images/acs/ACS-Video.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://arxiv.org/abs/2310.03379" id="ACS">
<heading>Progressive Adaptive Chance-Constrained Safeguards for Reinforcement Learning</heading></a><br>
Zhaorun Chen, Binhao Chen, Tairan He, Liang Gong, Chengliang Liu<br>
IROS 2024 <b style="color:rgb(255, 100, 100);">(Oral Pitch)</b><br>
</p>
<div class="paper" id="acs">
<!-- <a href="https://manipulation-locomotion.github.io">webpage</a> | -->
<a href="https://arxiv.org/pdf/2310.03379.pdf">pdf</a> |
<a href="javascript:toggleblock('acs_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('acs')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2310.03379">arXiv</a>
<p align="justify"> <i id="acs_abs">An attached arm can significantly increase the applicability of legged robots to several mobile manipulation tasks that are not possible for the wheeled or tracked counterparts. The standard control pipeline for such legged manipulators is to decouple the controller into that of manipulation and locomotion. However, this is ineffective and requires immense engineering to support coordination between the arm and legs, error can propagate across modules causing non-smooth unnatural motions. It is also biological implausible where there is evidence for strong motor synergies across limbs. In this work, we propose to learn a unified policy for whole-body control of a legged manipulator using reinforcement learning. We propose Regularized Online Adaptation to bridge the Sim2Real gap for high-DoF control, and Advantage Mixing exploiting the causal dependency in the action space to overcome local minima during training the whole-body system. We also present a simple design for a low-cost legged manipulator, and find that our unified policy can demonstrate dynamic and agile behaviors across several task setups.</i></p>
<pre xml:space="preserve">
@article{chen2023progressive,
title={Progressive Adaptive Chance-Constrained Safeguards for Reinforcement Learning},
author={Chen, Zhaorun and Chen, Binhao and He, Tairan and Gong, Liang and Liu, Chengliang},
journal={arXiv preprint arXiv:2310.03379},
year={2023}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center">
<a href="https://arxiv.org/abs/2302.03122">
<img src="images/saferl_survey/saferl_survey.png" alt="sym" width="90%" style="padding-top:0px; padding-bottom:0px; border-radius:15px; height: auto;">
</a>
</td>
<td width="60%" valign="top">
<p><a href="https://arxiv.org/abs/2302.03122" id="SAFERL_SURVEY">
<heading>State-wise Safe Reinforcement Learning: A Survey</heading></a><br>
Weiye Zhao, Tairan He, Rui Chen, Tianhao Wei, Changliu Liu<br>
IJCAI 2023<br>
</p>
<div class="paper" id="saferl_survey">
<a href="https://arxiv.org/pdf/2302.03122.pdf">pdf</a> |
<a href="javascript:toggleblock('saferl_survey_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('saferl_survey')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2302.03122">arXiv</a>
<p align="justify"> <i id="saferl_survey_abs">Despite the tremendous success of Reinforcement Learning (RL) algorithms in simulation environments, applying RL to real-world applications still faces many challenges. A major concern is safety, in another word, constraint satisfaction. State-wise constraints are one of the most common constraints in real-world applications and one of the most challenging constraints in Safe RL. Enforcing state-wise constraints is necessary and essential to many challenging tasks such as autonomous driving, robot manipulation. This paper provides a comprehensive review of existing approaches that address state-wise constraints in RL. Under the framework of State-wise Constrained Markov Decision Process (SCMDP), we will discuss the connections, differences, and trade-offs of existing approaches in terms of (i) safety guarantee and scalability, (ii) safety and reward performance, and (iii) safety after convergence and during training. We also summarize limitations of current methods and discuss potential future directions.</i></p>
<pre xml:space="preserve">
@inproceedings{ijcai2023p763,
title = {State-wise Safe Reinforcement Learning: A Survey},
author = {Zhao, Weiye and He, Tairan and Chen, Rui and Wei, Tianhao and Liu, Changliu},
booktitle = {Proceedings of the Thirty-Second International Joint Conference on
Artificial Intelligence, {IJCAI-23}},
publisher = {International Joint Conferences on Artificial Intelligence Organization},
editor = {Edith Elkind},
pages = {6814--6822},
year = {2023},
month = {8},
note = {Survey Track},
doi = {10.24963/ijcai.2023/763},
url = {https://doi.org/10.24963/ijcai.2023/763},
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://sites.google.com/view/patchail/">
<video playsinline autoplay loop muted src="images/patchail/PatchAIL-Allplay-3.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://sites.google.com/view/patchail/" id="PATCHAIL">
<heading>Visual Imitation Learning with Patch Rewards</heading></a><br>
Minghuan Liu, Tairan He, Weinan Zhang, Shuicheng Yan, Zhongwen Xu
ICLR 2023<br>
</p>
<div class="paper" id="patchail">
<a href="https://sites.google.com/view/patchail/">webpage</a> |
<a href="https://arxiv.org/pdf/2302.00965.pdf">pdf</a> |
<a href="javascript:toggleblock('patchail_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('patchail')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2302.00965">arXiv</a> |
<a href="https://github.com/sail-sg/PatchAIL">code</a>
<p align="justify"> <i id="patchail_abs">Visual imitation learning enables reinforcement learning agents to learn to be- have from expert visual demonstrations such as videos or image sequences, with- out explicit, well-defined rewards. Previous research either adopted supervised learning techniques or induce simple and coarse scalar rewards from pixels, ne- glecting the dense information contained in the image demonstrations. In this work, we propose to measure the expertise of various local regions of image sam- ples, or called patches, and recover multi-dimensional patch rewards accordingly. Patch reward is a more precise rewarding characterization that serves as a fine- grained expertise measurement and visual explainability tool. Specifically, we present Adversarial Imitation Learning with Patch Rewards (PatchAIL), which employs a patch-based discriminator to measure the expertise of different local parts from given images and provide patch rewards. The patch-based knowledge is also used to regularize the aggregated reward and stabilize the training. We evaluate our method on DeepMind Control Suite and Atari tasks. The experiment results have demonstrated that PatchAIL outperforms baseline methods and pro- vides valuable interpretations for visual demonstrations.</i></p>
<pre xml:space="preserve">
@article{liu2023visual,
title={Visual imitation learning with patch rewards},
author={Liu, Minghuan and He, Tairan and Zhang, Weinan and Yan, Shuicheng and Xu, Zhongwen},
journal={arXiv preprint arXiv:2302.00965},
year={2023}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center">
<a href="https://arxiv.org/abs/2209.09134">
<img src="images/sisos/sisos.png" alt="sym" width="90%" style="padding-top:0px; padding-bottom:0px; border-radius:15px; height: auto;">
</a>
</td>
<td width="60%" valign="top">
<p><a href="https://arxiv.org/abs/2302.03122" id="SISOS">
<heading>Safety Index Synthesis via Sum-of-Squares Programming</heading></a><br>
Weiye Zhao*, Tairan He*, Tianhao Wei, Simin Liu, Changliu Liu<br>
ACC 2023<br>
</p>
<div class="paper" id="sisos">
<a href="https://arxiv.org/pdf/2209.09134.pdf">pdf</a> |
<a href="javascript:toggleblock('sisos_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('sisos')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2209.09134">arXiv</a>
<p align="justify"> <i id="sisos_abs">Control systems often need to satisfy strict safety requirements. Safety index provides a handy way to evaluate the safety level of the system and derive the resulting safe control policies. However, designing safety index functions under control limits is difficult and requires a great amount of expert knowledge. This paper proposes a framework for synthesizing the safety index for general control systems using sum-of-squares programming. Our approach is to show that ensuring the non-emptiness of safe control on the safe set boundary is equivalent to a local manifold positiveness problem. We then prove that this problem is equivalent to sum-of-squares programming via the Positivstellensatz of algebraic geometry. We validate the proposed method on robot arms with different degrees of freedom and ground vehicles. The results show that the synthesized safety index guarantees safety and our method is effective even in high-dimensional robot systems.</i></p>
<pre xml:space="preserve">
@inproceedings{zhao2023safety,
title={Safety index synthesis via sum-of-squares programming},
author={Zhao, Weiye and He, Tairan and Wei, Tianhao and Liu, Simin and Liu, Changliu},
booktitle={2023 American Control Conference (ACC)},
pages={732--737},
year={2023},
organization={IEEE}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center">
<a href="https://arxiv.org/abs/2210.01041">
<img src="images/uaissa/uaissa.png" alt="sym" width="90%" style="padding-top:0px; padding-bottom:0px; border-radius:15px; height: auto;">
</a>
</td>
<td width="60%" valign="top">
<p><a href="https://arxiv.org/abs/2302.03122" id="UAISSA">
<heading>Probabilistic Safeguard for Reinforcement Learning Using Safety Index Guided Gaussian Process Models</heading></a><br>
Weiye Zhao*, Tairan He*, Changliu Liu<br>
L4DC 2023<br>
</p>
<div class="paper" id="uaissa">
<a href="https://arxiv.org/pdf/2210.01041.pdf">pdf</a> |
<a href="javascript:toggleblock('uaissa_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('uaissa')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2210.01041">arXiv</a>
<p align="justify"> <i id="uaissa_abs">Safety is one of the biggest concerns to applying reinforcement learning (RL) to the physical world. In its core part, it is challenging to ensure RL agents persistently satisfy a hard state constraint without white-box or black-box dynamics models. This paper presents an integrated model learning and safe control framework to safeguard any agent, where its dynamics are learned as Gaussian processes. The proposed theory provides (i) a novel method to construct an offline dataset for model learning that best achieves safety requirements; (ii) a parameterization rule for safety index to ensure the existence of safe control; (iii) a safety guarantee in terms of probabilistic forward invariance when the model is learned using the aforementioned dataset. Simulation results show that our framework guarantees almost zero safety violation on various continuous control tasks.</i></p>
<pre xml:space="preserve">
@inproceedings{zhao2023probabilistic,
title={Probabilistic safeguard for reinforcement learning using safety index guided gaussian process models},
author={Zhao, Weiye and He, Tairan and Liu, Changliu},
booktitle={Learning for Dynamics and Control Conference},
pages={783--796},
year={2023},
organization={PMLR}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center">
<a href="https://arxiv.org/abs/2302.03122">
<img src="images/autocost/autocost.png" alt="sym" width="80%" style="padding-top:0px; padding-bottom:0px; border-radius:15px; height: auto;">
</a>
</td>
<td width="60%" valign="top">
<p><a href="https://arxiv.org/abs/2302.03122" id="AUTOCOST">
<heading>AutoCost: Evolving Intrinsic Cost for Zero-violation Reinforcement Learning</heading></a><br>
Tairan He, Weiye Zhao, Changliu Liu<br>
AAAI 2023<br>
</p>
<div class="paper" id="autocost">
<a href="https://arxiv.org/pdf/2301.10339.pdf">pdf</a> |
<a href="javascript:toggleblock('autocost_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('autocost')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2301.10339">arXiv</a>
<p align="justify"> <i id="autocost_abs">Safety is a critical hurdle that limits the application of deep reinforcement learning (RL) to real-world control tasks. To this end, constrained reinforcement learning leverages cost functions to improve safety in constrained Markov decision processes. However, such constrained RL methods fail to achieve zero violation even when the cost limit is zero. This paper analyzes the reason for such failure, which suggests that a proper cost function plays an important role in constrained RL. Inspired by the analysis, we propose AutoCost, a simple yet effective framework that automatically searches for cost functions that help constrained RL to achieve zero-violation performance. We validate the proposed method and the searched cost function on the safe RL benchmark Safety Gym. We compare the performance of augmented agents that use our cost function to provide additive intrinsic costs with baseline agents that use the same policy learners but with only extrinsic costs. Results show that the converged policies with intrinsic costs in all environments achieve zero constraint violation and comparable performance with baselines.</i></p>
<pre xml:space="preserve">
@article{he2023autocost,
title={Autocost: Evolving intrinsic cost for zero-violation reinforcement learning},
author={He, Tairan and Zhao, Weiye and Liu, Changliu},
journal={arXiv preprint arXiv:2301.10339},
year={2023}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center">
<a href="https://seqml.github.io/a2ls/">
<img src="images/a2ls/a2ls.png" alt="sym" width="100%" style="padding-top:0px; padding-bottom:0px; border-radius:15px; height: auto;">
</a>
</td>
<td width="60%" valign="top">
<p><a href="https://seqml.github.io/a2ls/" id="A2LS">
<heading>Reinforcement Learning with Automated Auxiliary Loss Search</heading></a><br>
Tairan He, Yuge Zhang, Kan Ren, Minghuan Liu, Che Wang, Weinan Zhang, Yuqing Yang, Dongsheng Li<br>
NeurIPS 2022<br>
</p>
<div class="paper" id="a2ls">
<a href="https://seqml.github.io/a2ls/">webpage</a> |
<a href="https://arxiv.org/pdf/2210.06041.pdf">pdf</a> |
<a href="javascript:toggleblock('a2ls_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('a2ls')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2210.06041">arXiv</a> |
<a href="https://github.com/microsoft/autorl-research/tree/main/a2ls">code</a>
<p align="justify"> <i id="a2ls_abs">A good state representation is crucial to solving complicated reinforcement learning (RL) challenges. Many recent works focus on designing auxiliary losses for learning informative representations. Unfortunately, these handcrafted objectives rely heavily on expert knowledge and may be sub-optimal. In this paper, we propose a principled and universal method for learning better representations with auxiliary loss functions, named Automated Auxiliary Loss Search (A2LS), which automatically searches for top-performing auxiliary loss functions for RL. Specifically, based on the collected trajectory data, we define a general auxiliary loss space of size 7.5×1020 and explore the space with an efficient evolutionary search strategy. Empirical results show that the discovered auxiliary loss (namely, A2-winner) significantly improves the performance on both high-dimensional (image) and low-dimensional (vector) unseen tasks with much higher efficiency, showing promising generalization ability to different settings and even different benchmark domains. We conduct a statistical analysis to reveal the relations between patterns of auxiliary losses and RL performance.</i></p>
<pre xml:space="preserve">
@inproceedings{zhao2021model,
title={Model-free safe control for zero-violation reinforcement learning},
author={Zhao, Weiye and He, Tairan and Liu, Changliu},
booktitle={5th Annual Conference on Robot Learning},
year={2021}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://proceedings.mlr.press/v164/zhao22a.html">
<video playsinline autoplay loop muted src="images/issa/Comparison.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="55%" valign="top">
<p><a href="https://proceedings.mlr.press/v164/zhao22a.html" id="ISSA">
<heading>Model-free Safe Control for Zero-Violation Reinforcement Learning</heading></a><br>
Weiye Zhao, Tairan He, Changliu Liu<br>
CoRL 2021<br>
</p>
<div class="paper" id="issa">
<a href="https://proceedings.mlr.press/v164/zhao22a/zhao22a.pdf">pdf</a> |
<a href="javascript:toggleblock('issa_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('issa')" class="togglebib">bibtex</a> |
<a href="https://openreview.net/forum?id=UGp6FDaxB0f">openreview</a> |
<a href="https://github.com/TairanHe/ISSA">code</a>
<p align="justify"> <i id="issa_abs">While deep reinforcement learning (DRL) has impressive performance in a variety of continuous control tasks, one critical hurdle that limits the application of DRL to physical world is the lack of safety guarantees. It is challenging for DRL agents to persistently satisfy a hard state constraint (known as the safety specification) during training. On the other hand, safe control methods with safety guarantees have been extensively studied. However, to synthesize safe control, these methods require explicit analytical models of the dynamic system; but these models are usually not available in DRL. This paper presents a model-free safe control strategy to synthesize safeguards for DRL agents, which will ensure zero safety violation during training. In particular, we present an implicit safe set algorithm, which synthesizes the safety index (also called the barrier certificate) and the subsequent safe control law only by querying a black-box dynamic function (e.g., a digital twin simulator). The theoretical results indicate the implicit safe set algorithm guarantees forward invariance and finite-time convergence to the safe set. We validate the proposed method on the state-of-the-art safety benchmark Safety Gym. Results show that the proposed method achieves zero safety violation and gains 95 cumulative reward compared to state-of-the-art safe DRL methods. Moreover, it can easily scale to high-dimensional systems.
</i></p>
<pre xml:space="preserve">
@inproceedings{zhao2021model,
title={Model-free safe control for zero-violation reinforcement learning},
author={Zhao, Weiye and He, Tairan and Liu, Changliu},
booktitle={5th Annual Conference on Robot Learning},
year={2021}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://arxiv.org/abs/2004.09395">
<video playsinline autoplay loop muted src="images/ebil/ebil_heat_40.mp4" poster="./images/loading-icon.gif" alt="sym" width="80%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://arxiv.org/abs/2302.03122" id="EBIL">
<heading>Energy-Based Imitation Learning</heading></a><br>
Minghuan Liu, Tairan He, Minkai Xu, Weinan Zhang <br>
AMASS 2021 <b style="color:rgb(255, 100, 100);">(Oral)</b><br>
</p>
<div class="paper" id="ebil">
<a href="https://arxiv.org/pdf/2004.09395.pdf">pdf</a> |
<a href="javascript:toggleblock('ebil_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('ebil')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2004.09395">arXiv</a> |
<a href="https://github.com/apexrl/EBIL-torch">code</a>
<p align="justify"> <i id="ebil_abs">A good state representation is crucial to solving complicated reinforcement learning (RL) challenges. Many recent works focus on designing auxiliary losses for learning informative representations. Unfortunately, these handcrafted objectives rely heavily on expert knowledge and may be sub-optimal. In this paper, we propose a principled and universal method for learning better representations with auxiliary loss functions, named Automated Auxiliary Loss Search (A2LS), which automatically searches for top-performing auxiliary loss functions for RL. Specifically, based on the collected trajectory data, we define a general auxiliary loss space of size 7.5×1020 and explore the space with an efficient evolutionary search strategy. Empirical results show that the discovered auxiliary loss (namely, A2-winner) significantly improves the performance on both high-dimensional (image) and low-dimensional (vector) unseen tasks with much higher efficiency, showing promising generalization ability to different settings and even different benchmark domains. We conduct a statistical analysis to reveal the relations between patterns of auxiliary losses and RL performance.</i></p>
<pre xml:space="preserve">
@inproceedings{zhao2021model,
title={Model-free safe control for zero-violation reinforcement learning},
author={Zhao, Weiye and He, Tairan and Liu, Changliu},
booktitle={5th Annual Conference on Robot Learning},
year={2021}
}
</pre>
</div>
</td>
</tr>
<!-- <tr>
<td width="40%" valign="top" align="center"><a href="https://manipulation-locomotion.github.io">
<video playsinline autoplay loop muted src="images/wbc-clip.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://manipulation-locomotion.github.io" id="MANIPLOCO">
<heading>Deep Whole-Body Control: Learning a Unified Policy for Manipulation and Locomotion</heading></a><br>
Tairan He*, Xuxin Cheng*, Deepak Pathak<br>
CoRL 2022 (Oral)<br>
<b style="color:rgb(255, 100, 100);">Best Systems Paper Award Finalist (top 4)</b>
</p>
<div class="paper" id="maniploco">
<a href="https://manipulation-locomotion.github.io">webpage</a> |
<a href="https://arxiv.org/pdf/2210.10044.pdf">pdf</a> |
<a href="javascript:toggleblock('maniploco_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('maniploco')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2210.10044">arXiv</a> |
<a href="https://openreview.net/forum?id=zldI4UpuG7v">OpenReview</a> |
<a href="https://www.youtube.com/watch?v=i9EdPl8uJUA">video</a>
<p align="justify"> <i id="maniploco_abs">An attached arm can significantly increase the applicability of legged robots to several mobile manipulation tasks that are not possible for the wheeled or tracked counterparts. The standard control pipeline for such legged manipulators is to decouple the controller into that of manipulation and locomotion. However, this is ineffective and requires immense engineering to support coordination between the arm and legs, error can propagate across modules causing non-smooth unnatural motions. It is also biological implausible where there is evidence for strong motor synergies across limbs. In this work, we propose to learn a unified policy for whole-body control of a legged manipulator using reinforcement learning. We propose Regularized Online Adaptation to bridge the Sim2Real gap for high-DoF control, and Advantage Mixing exploiting the causal dependency in the action space to overcome local minima during training the whole-body system. We also present a simple design for a low-cost legged manipulator, and find that our unified policy can demonstrate dynamic and agile behaviors across several task setups.</i></p>
<pre xml:space="preserve">
@inproceedings{fu2022deep,
author = {Fu, Zipeng and Cheng, Xuxin and
Pathak, Deepak},
title = {Deep Whole-Body Control: Learning a Unified Policy
for Manipulation and Locomotion},
booktitle = {Conference on Robot Learning ({CoRL})},
year = {2022}
}
</pre>
</div>
</td>
</tr> -->
</table>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="10">
<tr><td><sectionheading> Projects</sectionheading></td></tr>
</table>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="15">
<tr>
<td width="40%" valign="top" align="center">
<a href="https://www.bilibili.com/video/BV1Rp4y187ZJ">
<img src="images/wkfg/wkfgicon.png" alt="sym" width="70%" style="padding-top:0px; padding-bottom:0px; border-radius:15px; height: auto;">
</a>
</td>
<td width="60%" valign="top">
<p><a href="https://www.bilibili.com/video/BV1Rp4y187ZJ" id="AUTOCOST">
<heading>SJTU Anonymous Forum 「无可奉告」</heading></a><br>
</p>
<div class="paper" id="autocost">
<a href="https://github.com/TairanHe/SJTU-Anonymous_Forum"> Android Code</a> |
<a href="https://github.com/oscardhc/Forum"> iOS Code</a> |
<a href="http://wukefenggao.cn"> Project Page</a> |
<a href="https://www.bilibili.com/video/BV1Rp4y187ZJ"> Farewell Video</a>
<p align="justify"> <i id="wkfg_abs">A carefree forum platform for SJTUers sharing and talking with anonymous identity. More than <font color="red"><em><strong>10000+</strong></em></font> users used「无可奉告」in the SJTU campus.</i></p>
</div>
</td>
</tr>
</table>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="10">
<tr><td><sectionheading> Reviewer Service</sectionheading></td></tr>
</table>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="15">
<tr>
<td style="padding:20px;width:100%;vertical-align:middle">
<!-- International Joint Conference on Artificial Intelligence <b>(IJCAI)</b> 2024
<br> -->
International Conference on Machine Learning <b>(ICML)</b>, 2024
<br>
International Conference on Learning Representations <b>(ICLR)</b>, 2024
<br>
IEEE Conference on Decision and Control <b>(CDC)</b>, 2023
<br>
Conference on Neural Information Processing Systems <b>(NeurIPS)</b>, 2023
<br>
Learning for Dynamics & Control Conference <b>(L4DC)</b> 2023
<br>
AAAI Conference on Artificial Intelligence <b>(AAAI)</b> 2023, 2024, 2025
<br>
Conference on Robot Learning <b>(CoRL)</b> 2022, 2023, 2024
</p>
</td>
</tr>
</table>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="20">
<tbody>
<tr>
<td style="padding:0px">
<br>
<br>
<div>
<script type="text/javascript" id="clustrmaps" src="//cdn.clustrmaps.com/map_v2.js?cl=080808&w=350&t=tt&d=Biz007_Pw8FVsAWycLRoKM_5XR_da9ccb8qGNbWVwnk&co=ffffff&cmo=3acc3a&cmn=ff5353&ct=808080"></script>
<!-- <a target="_top" href="http://clustrmaps.com/site/1acpn?utm_source=widget&utm_campaign=widget_ctr" id="clustrmaps-widget-v2" class="clustrmaps-map-control" style="width: 300px;">
--> </div>
</td>
</tr>
</tbody>
</table>
<hr/>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="2">
<tr><td><br><p align="right">
Website template from <a href="http://www.cs.berkeley.edu/~barron/">here</a> and <a href="http://www.cs.cmu.edu/~dpathak/">here</a>
</font></p></td></tr>
</table>
</td></tr>
</table>
<script xml:space="preserve" language="JavaScript">
hideallbibs();
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('material_review_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('ieee_iot_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('acm_turc_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('aog_mcts_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('pragmatics_marl_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('collab_marl_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('rma_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('energyloco_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('navloco_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('wococo_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('omnih2o_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('hover_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('h2o_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('agile-but-safe_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('safedpa_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('acs_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('saferl_survey_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('patchail_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('sisos_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('uaissa_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('autocost_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('a2ls_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('issa_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('ebil_abs');
</script>
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('maniploco_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('parkour_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('mobile_aloha_abs');
</script>
</body>
</html>