diff --git a/README.md b/README.md
index f33cf738..8894b81a 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ async function main() {
     label.innerText = report.text;
   };
   const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
-  const engine: webllm.EngineInterface = await webllm.CreateEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
     /*engineConfig=*/{ initProgressCallback: initProgressCallback }
   );
@@ -53,10 +53,10 @@ async function main() {
 main();
 ```
 
-Note that if you need to separate the instantiation of `webllm.Engine` from loading a model, you could substitute
+Note that if you need to separate the instantiation of `webllm.MLCEngine` from loading a model, you could substitute
 
 ```typescript
-const engine: webllm.EngineInterface = await webllm.CreateEngine(
+const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
   selectedModel,
   /*engineConfig=*/{ initProgressCallback: initProgressCallback }
 );
@@ -65,7 +65,7 @@ const engine: webllm.EngineInterface = await webllm.CreateEngine(
 with the equivalent
 
 ```typescript
-const engine: webllm.EngineInterface = new webllm.Engine();
+const engine: webllm.MLCEngineInterface = new webllm.MLCEngine();
 engine.setInitProgressCallback(initProgressCallback);
 await engine.reload(selectedModel, chatConfig, appConfig);
 ```
@@ -81,7 +81,7 @@ async function main() {
     console.log(report.text);
   };
   const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k";
-  const engine = await webllm.CreateEngine(
+  const engine = await webllm.CreateMLCEngine(
     selectedModel,
     {initProgressCallback: initProgressCallback}
   );
@@ -105,31 +105,31 @@ WebLLM comes with API support for WebWorker so you can hook
 the generation process into a separate worker thread so that
 the compute in the webworker won't disrupt the UI.
 
-We first create a worker script that created a Engine and
+We first create a worker script that created a MLCEngine and
 hook it up to a handler that handles requests.
 
 ```typescript
 // worker.ts
-import { EngineWorkerHandler, Engine } from "@mlc-ai/web-llm";
+import { MLCEngineWorkerHandler, MLCEngine } from "@mlc-ai/web-llm";
 
-// Hookup an Engine to a worker handler
-const engine = new Engine();
-const handler = new EngineWorkerHandler(engine);
+// Hookup an MLCEngine to a worker handler
+const engine = new MLCEngine();
+const handler = new MLCEngineWorkerHandler(engine);
 self.onmessage = (msg: MessageEvent) => {
   handler.onmessage(msg);
 };
 ```
 
-Then in the main logic, we create a `WebWorkerEngine` that
-implements the same `EngineInterface`. The rest of the logic remains the same.
+Then in the main logic, we create a `WebWorkerMLCEngine` that
+implements the same `MLCEngineInterface`. The rest of the logic remains the same.
 
 ```typescript
 // main.ts
 import * as webllm from "@mlc-ai/web-llm";
 
 async function main() {
-  // Use a WebWorkerEngine instead of Engine here
-  const engine: webllm.EngineInterface = await webllm.CreateWebWorkerEngine(
+  // Use a WebWorkerMLCEngine instead of MLCEngine here
+  const engine: webllm.MLCEngineInterface = await webllm.CreateWebWorkerMLCEngine(
     /*worker=*/new Worker(
       new URL('./worker.ts', import.meta.url),
       { type: 'module' }
@@ -147,29 +147,29 @@ WebLLM comes with API support for ServiceWorker so you can hook the generation p
 into a service worker to avoid reloading the model in every page visit and optimize 
 your application's offline experience.
 
-We first create a service worker script that created a Engine and hook it up to a handler
+We first create a service worker script that created a MLCEngine and hook it up to a handler
 that handles requests when the service worker is ready.
 
 ```typescript
 // sw.ts
 import {
-  ServiceWorkerEngineHandler,
-  EngineInterface,
-  Engine,
+  ServiceWorkerMLCEngineHandler,
+  MLCEngineInterface,
+  MLCEngine,
 } from "@mlc-ai/web-llm";
 
-const engine: EngineInterface = new Engine();
-let handler: ServiceWorkerEngineHandler;
+const engine: MLCEngineInterface = new MLCEngine();
+let handler: ServiceWorkerMLCEngineHandler;
 
 self.addEventListener("activate", function (event) {
-  handler = new ServiceWorkerEngineHandler(engine);
+  handler = new ServiceWorkerMLCEngineHandler(engine);
   console.log("Service Worker is ready")
 });
 
 ```
 
 Then in the main logic, we register the service worker and then create the engine using
-`CreateServiceWorkerEngine` function. The rest of the logic remains the same.
+`CreateServiceWorkerMLCEngine` function. The rest of the logic remains the same.
 
 ```typescript
 // main.ts
@@ -180,8 +180,8 @@ if ("serviceWorker" in navigator) {
   );
 }
 
-const engine: webllm.EngineInterface =
-  await webllm.CreateServiceWorkerEngine(
+const engine: webllm.MLCEngineInterface =
+  await webllm.CreateServiceWorkerMLCEngine(
     /*modelId=*/selectedModel,
     /*engineConfig=*/{ initProgressCallback: initProgressCallback }
   );
@@ -208,7 +208,7 @@ WebLLM is designed to be fully compatible with [OpenAI API](https://platform.ope
 ## Model Support
 
 We export all supported models in `webllm.prebuiltAppConfig`, where you can see a list of models
-that you can simply call `const engine: webllm.EngineInterface = await webllm.CreateEngine(anyModel)` with.
+that you can simply call `const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(anyModel)` with.
 Prebuilt models include:
 - Llama-2
 - Llama-3
@@ -257,7 +257,7 @@ async main() {
   // and cache it in the browser cache
   // The chat will also load the model library from "/url/to/myllama3b.wasm",
   // assuming that it is compatible to the model in myLlamaUrl.
-  const engine = await webllm.CreateEngine(
+  const engine = await webllm.CreateMLCEngine(
     "MyLlama-3b-v1-q4f32_0", 
     /*engineConfig=*/{ chatOpts: chatOpts, appConfig: appConfig }
   );
diff --git a/examples/cache-usage/src/cache_usage.ts b/examples/cache-usage/src/cache_usage.ts
index 7a498df8..e9dc7af6 100644
--- a/examples/cache-usage/src/cache_usage.ts
+++ b/examples/cache-usage/src/cache_usage.ts
@@ -25,7 +25,7 @@ async function main() {
 
   // 1. This triggers downloading and caching the model with either Cache or IndexedDB Cache
   const selectedModel = "Phi2-q4f16_1"
-  const engine: webllm.EngineInterface = await webllm.CreateEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     "Phi2-q4f16_1",
     { initProgressCallback: initProgressCallback, appConfig: appConfig }
   );
diff --git a/examples/chrome-extension-webgpu-service-worker/src/background.ts b/examples/chrome-extension-webgpu-service-worker/src/background.ts
index 4932fbf9..4b467347 100644
--- a/examples/chrome-extension-webgpu-service-worker/src/background.ts
+++ b/examples/chrome-extension-webgpu-service-worker/src/background.ts
@@ -1,13 +1,13 @@
-import { ExtensionServiceWorkerEngineHandler, Engine } from "@mlc-ai/web-llm";
+import { ExtensionServiceWorkerMLCEngineHandler, MLCEngine } from "@mlc-ai/web-llm";
 
 // Hookup an engine to a service worker handler
-const engine = new Engine();
+const engine = new MLCEngine();
 let handler;
 
 chrome.runtime.onConnect.addListener(function (port) {
   console.assert(port.name === "web_llm_service_worker");
   if (handler === undefined) {
-    handler = new ExtensionServiceWorkerEngineHandler(engine, port);
+    handler = new ExtensionServiceWorkerMLCEngineHandler(engine, port);
   } else {
     handler.setPort(port);
   }
diff --git a/examples/chrome-extension-webgpu-service-worker/src/popup.ts b/examples/chrome-extension-webgpu-service-worker/src/popup.ts
index 55adc5b1..630486ce 100644
--- a/examples/chrome-extension-webgpu-service-worker/src/popup.ts
+++ b/examples/chrome-extension-webgpu-service-worker/src/popup.ts
@@ -7,8 +7,8 @@ import "./popup.css";
 
 import {
   ChatCompletionMessageParam,
-  CreateExtensionServiceWorkerEngine,
-  EngineInterface,
+  CreateExtensionServiceWorkerMLCEngine,
+  MLCEngineInterface,
   InitProgressReport,
 } from "@mlc-ai/web-llm";
 import { prebuiltAppConfig } from "@mlc-ai/web-llm";
@@ -36,7 +36,7 @@ const progressBar: ProgressBar = new Line("#loadingContainer", {
   svgStyle: { width: "100%", height: "100%" },
 });
 
-/***************** Web-LLM Engine Configuration *****************/
+/***************** Web-LLM MLCEngine Configuration *****************/
 const initProgressCallback = (report: InitProgressReport) => {
   progressBar.animate(report.progress, {
     duration: 50,
@@ -46,7 +46,7 @@ const initProgressCallback = (report: InitProgressReport) => {
   }
 };
 
-const engine: EngineInterface = await CreateExtensionServiceWorkerEngine(
+const engine: MLCEngineInterface = await CreateExtensionServiceWorkerMLCEngine(
   "Mistral-7B-Instruct-v0.2-q4f16_1",
   { initProgressCallback: initProgressCallback }
 );
diff --git a/examples/chrome-extension/src/popup.ts b/examples/chrome-extension/src/popup.ts
index 081febff..ce17b70e 100644
--- a/examples/chrome-extension/src/popup.ts
+++ b/examples/chrome-extension/src/popup.ts
@@ -6,7 +6,7 @@
 
 import './popup.css';
 
-import { EngineInterface, InitProgressReport, CreateEngine, ChatCompletionMessageParam } from "@mlc-ai/web-llm";
+import { MLCEngineInterface, InitProgressReport, CreateMLCEngine, ChatCompletionMessageParam } from "@mlc-ai/web-llm";
 import { ProgressBar, Line } from "progressbar.js";
 
 const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
@@ -43,7 +43,7 @@ const initProgressCallback = (report: InitProgressReport) => {
 
 // const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k";
 const selectedModel = "Mistral-7B-Instruct-v0.2-q4f16_1";
-const engine: EngineInterface = await CreateEngine(
+const engine: MLCEngineInterface = await CreateMLCEngine(
     selectedModel,
     { initProgressCallback: initProgressCallback }
 );
diff --git a/examples/function-calling/src/function_calling.ts b/examples/function-calling/src/function_calling.ts
index 72318531..21b9605a 100644
--- a/examples/function-calling/src/function_calling.ts
+++ b/examples/function-calling/src/function_calling.ts
@@ -24,7 +24,7 @@ async function main() {
     setLabel("init-label", report.text);
   };
   const selectedModel = "gorilla-openfunctions-v2-q4f16_1"
-  const engine: webllm.EngineInterface = await webllm.CreateEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
     { appConfig: myAppConfig, initProgressCallback: initProgressCallback }
   );
diff --git a/examples/get-started-web-worker/src/main.ts b/examples/get-started-web-worker/src/main.ts
index 3a6c1ad4..0c89d7a8 100644
--- a/examples/get-started-web-worker/src/main.ts
+++ b/examples/get-started-web-worker/src/main.ts
@@ -19,7 +19,7 @@ async function mainNonStreaming() {
   };
   const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
 
-  const engine: webllm.EngineInterface = await webllm.CreateWebWorkerEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateWebWorkerMLCEngine(
     new Worker(
       new URL('./worker.ts', import.meta.url),
       { type: 'module' }
@@ -59,7 +59,7 @@ async function mainStreaming() {
   };
   const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
 
-  const engine: webllm.EngineInterface = await webllm.CreateWebWorkerEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateWebWorkerMLCEngine(
     new Worker(
       new URL('./worker.ts', import.meta.url),
       { type: 'module' }
diff --git a/examples/get-started-web-worker/src/worker.ts b/examples/get-started-web-worker/src/worker.ts
index 508d60b9..06d9709a 100644
--- a/examples/get-started-web-worker/src/worker.ts
+++ b/examples/get-started-web-worker/src/worker.ts
@@ -1,8 +1,8 @@
-import { EngineWorkerHandler, Engine } from "@mlc-ai/web-llm";
+import { MLCEngineWorkerHandler, MLCEngine } from "@mlc-ai/web-llm";
 
 // Hookup an engine to a worker handler
-const engine = new Engine();
-const handler = new EngineWorkerHandler(engine);
+const engine = new MLCEngine();
+const handler = new MLCEngineWorkerHandler(engine);
 self.onmessage = (msg: MessageEvent) => {
   handler.onmessage(msg);
 };
diff --git a/examples/get-started/src/get_started.ts b/examples/get-started/src/get_started.ts
index a34e8cd1..9b39ef68 100644
--- a/examples/get-started/src/get_started.ts
+++ b/examples/get-started/src/get_started.ts
@@ -14,7 +14,7 @@ async function main() {
   };
   // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
   const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
-  const engine: webllm.EngineInterface = await webllm.CreateEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
     { initProgressCallback: initProgressCallback }
   );
@@ -29,7 +29,7 @@ async function main() {
   //     },
   //   ]
   // };
-  // const engine: webllm.EngineInterface = await webllm.CreateEngine(
+  // const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
   //   selectedModel,
   //   { appConfig: appConfig, initProgressCallback: initProgressCallback }
   // );
@@ -56,7 +56,7 @@ async function main() {
   console.log(reply0);
   console.log(await engine.runtimeStatsText());
 
-  // To change model, either create a new engine via `CreateEngine()`, or call `engine.reload(modelId)`
+  // To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)`
 }
 
 main();
diff --git a/examples/json-mode/src/json_mode.ts b/examples/json-mode/src/json_mode.ts
index b8894f34..d36e7240 100644
--- a/examples/json-mode/src/json_mode.ts
+++ b/examples/json-mode/src/json_mode.ts
@@ -13,7 +13,7 @@ async function main() {
         setLabel("init-label", report.text);
     };
     const selectedModel = "Llama-2-7b-chat-hf-q4f32_1";
-    const engine: webllm.EngineInterface = await webllm.CreateEngine(
+    const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
         selectedModel,
         { initProgressCallback: initProgressCallback }
     );
diff --git a/examples/json-schema/src/json_schema.ts b/examples/json-schema/src/json_schema.ts
index f8074cef..8738af92 100644
--- a/examples/json-schema/src/json_schema.ts
+++ b/examples/json-schema/src/json_schema.ts
@@ -37,7 +37,7 @@ async function simpleStructuredTextExample() {
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
   };
-  const engine: webllm.EngineInterface = await webllm.CreateEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     "Llama-2-7b-chat-hf-q4f16_1",
     { initProgressCallback: initProgressCallback }
   );
@@ -104,7 +104,7 @@ async function harryPotterExample() {
     setLabel("init-label", report.text);
   };
 
-  const engine: webllm.EngineInterface = await webllm.CreateEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     "Llama-2-7b-chat-hf-q4f16_1",
     { initProgressCallback: initProgressCallback }
   );
@@ -171,7 +171,7 @@ async function functionCallingExample() {
   };
 
   const selectedModel = "Hermes-2-Pro-Mistral-7B-q4f16_1";
-  const engine: webllm.EngineInterface = await webllm.CreateEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
     {
       initProgressCallback: initProgressCallback,
diff --git a/examples/logit-processor/src/logit_processor.ts b/examples/logit-processor/src/logit_processor.ts
index 01d3373a..7941e24f 100644
--- a/examples/logit-processor/src/logit_processor.ts
+++ b/examples/logit-processor/src/logit_processor.ts
@@ -21,12 +21,12 @@ async function main() {
   const logitProcessorRegistry = new Map<string, webllm.LogitProcessor>();
   logitProcessorRegistry.set("Phi2-q4f32_1", myLogitProcessor);
 
-  let engine: webllm.EngineInterface;
+  let engine: webllm.MLCEngineInterface;
 
   // Depending on whether we use a web worker, the code is slightly different
   if (USE_WEB_WORKER) {
     // see worker.ts on how LogitProcessor plays a role there
-    engine = await webllm.CreateWebWorkerEngine(
+    engine = await webllm.CreateWebWorkerMLCEngine(
       new Worker(
         new URL('./worker.ts', import.meta.url),
         { type: 'module' }
@@ -35,7 +35,7 @@ async function main() {
       { initProgressCallback: initProgressCallback }
     );
   } else {
-    engine = await webllm.CreateEngine(
+    engine = await webllm.CreateMLCEngine(
       "Phi2-q4f32_1",
       {
         initProgressCallback: initProgressCallback,
diff --git a/examples/logit-processor/src/worker.ts b/examples/logit-processor/src/worker.ts
index aaa7aba9..ec7f6d44 100644
--- a/examples/logit-processor/src/worker.ts
+++ b/examples/logit-processor/src/worker.ts
@@ -8,9 +8,9 @@ const myLogitProcessor = new MyLogitProcessor();
 const logitProcessorRegistry = new Map<string, webllm.LogitProcessor>();
 logitProcessorRegistry.set("Phi2-q4f32_1", myLogitProcessor);
 
-const engine = new webllm.Engine();
+const engine = new webllm.MLCEngine();
 engine.setLogitProcessorRegistry(logitProcessorRegistry);
-const handler = new webllm.EngineWorkerHandler(engine);
+const handler = new webllm.MLCEngineWorkerHandler(engine);
 self.onmessage = (msg: MessageEvent) => {
   handler.onmessage(msg);
 };
diff --git a/examples/multi-round-chat/src/multi_round_chat.ts b/examples/multi-round-chat/src/multi_round_chat.ts
index d059d519..15d69ac4 100644
--- a/examples/multi-round-chat/src/multi_round_chat.ts
+++ b/examples/multi-round-chat/src/multi_round_chat.ts
@@ -18,7 +18,7 @@ async function main() {
     setLabel("init-label", report.text);
   };
   const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
-  const engine: webllm.EngineInterface = await webllm.CreateEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
     { initProgressCallback: initProgressCallback }
   );
diff --git a/examples/next-simple-chat/src/utils/chat_component.tsx b/examples/next-simple-chat/src/utils/chat_component.tsx
index 85a76000..c39643eb 100644
--- a/examples/next-simple-chat/src/utils/chat_component.tsx
+++ b/examples/next-simple-chat/src/utils/chat_component.tsx
@@ -1,5 +1,5 @@
 import { useState } from "react";
-import { Engine } from "@mlc-ai/web-llm";
+import { MLCEngine } from "@mlc-ai/web-llm";
 import ChatUI from "~/utils/chat_ui";
 
 const ChatComponent = () => {
@@ -8,7 +8,7 @@ const ChatComponent = () => {
   );
   const [prompt, setPrompt] = useState("");
   const [runtimeStats, setRuntimeStats] = useState("");
-  const [chat_ui] = useState(new ChatUI(new Engine()));
+  const [chat_ui] = useState(new ChatUI(new MLCEngine()));
   const updateMessage = (kind: string, text: string, append: boolean) => {
     if (kind == "init") {
       text = "[System Initalize] " + text;
diff --git a/examples/next-simple-chat/src/utils/chat_ui.ts b/examples/next-simple-chat/src/utils/chat_ui.ts
index f75d4545..d8856ba3 100644
--- a/examples/next-simple-chat/src/utils/chat_ui.ts
+++ b/examples/next-simple-chat/src/utils/chat_ui.ts
@@ -1,7 +1,7 @@
-import { EngineInterface, ChatCompletionMessageParam } from "@mlc-ai/web-llm";
+import { MLCEngineInterface, ChatCompletionMessageParam } from "@mlc-ai/web-llm";
 
 export default class ChatUI {
-    private engine: EngineInterface;
+    private engine: MLCEngineInterface;
     private chatLoaded = false;
     private requestInProgress = false;
     // We use a request chain to ensure that
@@ -9,7 +9,7 @@ export default class ChatUI {
     private chatRequestChain: Promise<void> = Promise.resolve();
     private chatHistory: ChatCompletionMessageParam[] = [];
 
-    constructor(engine: EngineInterface) {
+    constructor(engine: MLCEngineInterface) {
         this.engine = engine;
     }
     /**
diff --git a/examples/seed-to-reproduce/src/seed.ts b/examples/seed-to-reproduce/src/seed.ts
index deacfc6b..c4eb3109 100644
--- a/examples/seed-to-reproduce/src/seed.ts
+++ b/examples/seed-to-reproduce/src/seed.ts
@@ -19,7 +19,7 @@ async function main() {
         setLabel("init-label", report.text);
     };
     const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
-    const engine: webllm.EngineInterface = await webllm.CreateEngine(
+    const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
         selectedModel,
         { initProgressCallback: initProgressCallback }
     );
diff --git a/examples/service-worker/package.json b/examples/service-worker/package.json
index 54677cdc..7a4de406 100644
--- a/examples/service-worker/package.json
+++ b/examples/service-worker/package.json
@@ -3,8 +3,8 @@
     "version": "0.1.0",
     "private": true,
     "scripts": {
-        "start": "parcel src/index.html --port 3000",
-        "build": "parcel build src/index.html --dist-dir lib"
+        "start": "rm -rf .parcel-cache && parcel src/index.html --port 3000",
+        "build": "rm -rf .parcel-cache && parcel build src/index.html --dist-dir lib"
     },
     "devDependencies": {
         "buffer": "^6.0.3",
diff --git a/examples/service-worker/src/main.ts b/examples/service-worker/src/main.ts
index 73ca6648..437cc58b 100644
--- a/examples/service-worker/src/main.ts
+++ b/examples/service-worker/src/main.ts
@@ -39,8 +39,8 @@ async function mainNonStreaming() {
   };
   const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
 
-  const engine: webllm.EngineInterface =
-    await webllm.CreateServiceWorkerEngine(selectedModel, {
+  const engine: webllm.MLCEngineInterface =
+    await webllm.CreateServiceWorkerMLCEngine(selectedModel, {
       initProgressCallback: initProgressCallback,
     });
 
@@ -77,8 +77,8 @@ async function mainStreaming() {
   };
   const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
 
-  const engine: webllm.ServiceWorkerEngine =
-    await webllm.CreateServiceWorkerEngine(selectedModel, {
+  const engine: webllm.ServiceWorkerMLCEngine =
+    await webllm.CreateServiceWorkerMLCEngine(selectedModel, {
       initProgressCallback: initProgressCallback,
     });
 
diff --git a/examples/service-worker/src/sw.ts b/examples/service-worker/src/sw.ts
index 86c70497..93936006 100644
--- a/examples/service-worker/src/sw.ts
+++ b/examples/service-worker/src/sw.ts
@@ -1,13 +1,13 @@
 import {
-  ServiceWorkerEngineHandler,
-  EngineInterface,
-  Engine,
+  ServiceWorkerMLCEngineHandler,
+  MLCEngineInterface,
+  MLCEngine,
 } from "@mlc-ai/web-llm";
 
-const engine: EngineInterface = new Engine();
-let handler: ServiceWorkerEngineHandler;
+const engine: MLCEngineInterface = new MLCEngine();
+let handler: ServiceWorkerMLCEngineHandler;
 
 self.addEventListener("activate", function (event) {
-  handler = new ServiceWorkerEngineHandler(engine);
+  handler = new ServiceWorkerMLCEngineHandler(engine);
   console.log("Web-LLM Service Worker Activated")
 });
diff --git a/examples/simple-chat-upload/src/simple_chat.ts b/examples/simple-chat-upload/src/simple_chat.ts
index e0ad419f..86889e58 100644
--- a/examples/simple-chat-upload/src/simple_chat.ts
+++ b/examples/simple-chat-upload/src/simple_chat.ts
@@ -13,7 +13,7 @@ class ChatUI {
   private uiChat: HTMLElement;
   private uiChatInput: HTMLInputElement;
   private uiChatInfoLabel: HTMLLabelElement;
-  private engine: webllm.EngineInterface | webllm.WebWorkerEngine;
+  private engine: webllm.MLCEngineInterface | webllm.WebWorkerMLCEngine;
   private config: webllm.AppConfig = appConfig;
   private selectedModel: string;
   private chatLoaded = false;
@@ -27,7 +27,7 @@ class ChatUI {
    * An asynchronous factory constructor since we need to await getMaxStorageBufferBindingSize();
    * this is not allowed in a constructor (which cannot be asynchronous).
    */
-  public static CreateAsync = async (engine: webllm.EngineInterface) => {
+  public static CreateAsync = async (engine: webllm.MLCEngineInterface) => {
     const chatUI = new ChatUI();
     chatUI.engine = engine;
     // get the elements
@@ -286,16 +286,16 @@ class ChatUI {
 }
 
 const useWebWorker = appConfig.use_web_worker;
-let engine: webllm.EngineInterface;
+let engine: webllm.MLCEngineInterface;
 
-// Here we do not use `CreateEngine()` but instantiate an engine that is not loaded with model
+// Here we do not use `CreateMLCEngine()` but instantiate an engine that is not loaded with model
 if (useWebWorker) {
-  engine = new webllm.WebWorkerEngine(new Worker(
+  engine = new webllm.WebWorkerMLCEngine(new Worker(
     new URL('./worker.ts', import.meta.url),
     { type: 'module' }
   ));
 } else {
-  engine = new webllm.Engine();
+  engine = new webllm.MLCEngine();
 }
 ChatUI.CreateAsync(engine);
 
diff --git a/examples/simple-chat-upload/src/worker.ts b/examples/simple-chat-upload/src/worker.ts
index 3cb03508..a83ac98f 100644
--- a/examples/simple-chat-upload/src/worker.ts
+++ b/examples/simple-chat-upload/src/worker.ts
@@ -1,8 +1,8 @@
 // Serve the engine workload through web worker
-import { EngineWorkerHandler, Engine } from "@mlc-ai/web-llm";
+import { MLCEngineWorkerHandler, MLCEngine } from "@mlc-ai/web-llm";
 
-const engine = new Engine();
-const handler = new EngineWorkerHandler(engine);
+const engine = new MLCEngine();
+const handler = new MLCEngineWorkerHandler(engine);
 self.onmessage = (msg: MessageEvent) => {
   handler.onmessage(msg);
 };
diff --git a/examples/simple-chat/src/simple_chat.ts b/examples/simple-chat/src/simple_chat.ts
index f17c32a6..ed45bd7b 100644
--- a/examples/simple-chat/src/simple_chat.ts
+++ b/examples/simple-chat/src/simple_chat.ts
@@ -13,7 +13,7 @@ class ChatUI {
   private uiChat: HTMLElement;
   private uiChatInput: HTMLInputElement;
   private uiChatInfoLabel: HTMLLabelElement;
-  private engine: webllm.EngineInterface | webllm.WebWorkerEngine;
+  private engine: webllm.MLCEngineInterface | webllm.WebWorkerMLCEngine;
   private config: webllm.AppConfig = appConfig;
   private selectedModel: string;
   private chatLoaded = false;
@@ -27,7 +27,7 @@ class ChatUI {
    * An asynchronous factory constructor since we need to await getMaxStorageBufferBindingSize();
    * this is not allowed in a constructor (which cannot be asynchronous).
    */
-  public static CreateAsync = async (engine: webllm.EngineInterface) => {
+  public static CreateAsync = async (engine: webllm.MLCEngineInterface) => {
     const chatUI = new ChatUI();
     chatUI.engine = engine;
     // get the elements
@@ -305,15 +305,15 @@ class ChatUI {
 }
 
 const useWebWorker = appConfig.use_web_worker;
-let engine: webllm.EngineInterface;
+let engine: webllm.MLCEngineInterface;
 
-// Here we do not use `CreateEngine()` but instantiate an engine that is not loaded with model
+// Here we do not use `CreateMLCEngine()` but instantiate an engine that is not loaded with model
 if (useWebWorker) {
-  engine = new webllm.WebWorkerEngine(new Worker(
+  engine = new webllm.WebWorkerMLCEngine(new Worker(
     new URL('./worker.ts', import.meta.url),
     { type: 'module' }
   ));
 } else {
-  engine = new webllm.Engine();
+  engine = new webllm.MLCEngine();
 }
 ChatUI.CreateAsync(engine);
diff --git a/examples/simple-chat/src/worker.ts b/examples/simple-chat/src/worker.ts
index 3cb03508..a83ac98f 100644
--- a/examples/simple-chat/src/worker.ts
+++ b/examples/simple-chat/src/worker.ts
@@ -1,8 +1,8 @@
 // Serve the engine workload through web worker
-import { EngineWorkerHandler, Engine } from "@mlc-ai/web-llm";
+import { MLCEngineWorkerHandler, MLCEngine } from "@mlc-ai/web-llm";
 
-const engine = new Engine();
-const handler = new EngineWorkerHandler(engine);
+const engine = new MLCEngine();
+const handler = new MLCEngineWorkerHandler(engine);
 self.onmessage = (msg: MessageEvent) => {
   handler.onmessage(msg);
 };
diff --git a/examples/streaming/src/streaming.ts b/examples/streaming/src/streaming.ts
index 300e6def..a2b7bbd5 100644
--- a/examples/streaming/src/streaming.ts
+++ b/examples/streaming/src/streaming.ts
@@ -16,7 +16,7 @@ async function main() {
     setLabel("init-label", report.text);
   };
   const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
-  const engine: webllm.EngineInterface = await webllm.CreateEngine(
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
     { initProgressCallback: initProgressCallback }
   );
diff --git a/package-lock.json b/package-lock.json
index cba8664e..600781a7 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "@mlc-ai/web-llm",
-  "version": "0.2.35",
+  "version": "0.2.36",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@mlc-ai/web-llm",
-      "version": "0.2.35",
+      "version": "0.2.36",
       "license": "Apache-2.0",
       "devDependencies": {
         "@mlc-ai/web-tokenizers": "^0.1.3",
diff --git a/src/config.ts b/src/config.ts
index 74edc44d..9469f1af 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -76,17 +76,17 @@ export interface ChatConfig {
 export interface ChatOptions extends Partial<ChatConfig> { }
 
 /**
- * Optional configurations for `CreateEngine()` and `CreateWebWorkerEngine()`.
+ * Optional configurations for `CreateMLCEngine()` and `CreateWebWorkerMLCEngine()`.
  * 
  * chatOpts: To optionally override the `mlc-chat-config.json` of `modelId`.
  * appConfig: Configure the app, including the list of models and whether to use IndexedDB cache.
  * initProgressCallback: A callback for showing the progress of loading the model.
  * logitProcessorRegistry: A register for stateful logit processors, see `webllm.LogitProcessor`.
  * 
- * @note All fields are optional, and `logitProcessorRegistry` is only used for `CreateEngine()`
- * not `CreateWebWorkerEngine()`.
+ * @note All fields are optional, and `logitProcessorRegistry` is only used for `CreateMLCEngine()`
+ * not `CreateWebWorkerMLCEngine()`.
  */
-export interface EngineConfig {
+export interface MLCEngineConfig {
   chatOpts?: ChatOptions,
   appConfig?: AppConfig,
   initProgressCallback?: InitProgressCallback,
diff --git a/src/engine.ts b/src/engine.ts
index 22d26b77..7962eb2e 100644
--- a/src/engine.ts
+++ b/src/engine.ts
@@ -9,7 +9,7 @@ import {
   GenerationConfig,
   postInitAndCheckGenerationConfigValues,
   Role,
-  EngineConfig,
+  MLCEngineConfig,
 } from "./config";
 import { LLMChatPipeline } from "./llm_chat";
 import {
@@ -27,7 +27,7 @@ import {
 import * as ChatCompletionAPI from "./openai_api_protocols/index";
 import {
   InitProgressCallback,
-  EngineInterface,
+  MLCEngineInterface,
   GenerateProgressCallback,
   LogitProcessor
 } from "./types";
@@ -35,22 +35,22 @@ import { Conversation, compareConversationObject, getConversation } from "./conv
 
 
 /**
- * Creates `Engine`, and loads `modelId` onto WebGPU.
+ * Creates `MLCEngine`, and loads `modelId` onto WebGPU.
  * 
- * Equivalent to `new webllm.Engine().reload(...)`.
+ * Equivalent to `new webllm.MLCEngine().reload(...)`.
  * 
  * @param modelId The model to load, needs to either be in `webllm.prebuiltAppConfig`, or in
  * `engineConfig.appConfig`.
- * @param engineConfig Optionally configures the engine, see `webllm.EngineConfig`.
- * @returns An initialized `WebLLM.Engine` with `modelId` loaded.
- * @throws Throws error when device lost (mostly due to OOM); users should re-call `CreateEngine()`,
+ * @param engineConfig Optionally configures the engine, see `webllm.MLCEngineConfig`.
+ * @returns An initialized `WebLLM.MLCEngine` with `modelId` loaded.
+ * @throws Throws error when device lost (mostly due to OOM); users should re-call `CreateMLCEngine()`,
  *   potentially with a smaller model or smaller context window size.
  */
-export async function CreateEngine(
+export async function CreateMLCEngine(
   modelId: string,
-  engineConfig?: EngineConfig,
-): Promise<Engine> {
-  const engine = new Engine();
+  engineConfig?: MLCEngineConfig,
+): Promise<MLCEngine> {
+  const engine = new MLCEngine();
   engine.setInitProgressCallback(engineConfig?.initProgressCallback);
   engine.setLogitProcessorRegistry(engineConfig?.logitProcessorRegistry);
   await engine.reload(modelId, engineConfig?.chatOpts, engineConfig?.appConfig);
@@ -58,11 +58,11 @@ export async function CreateEngine(
 }
 
 /**
- * The main interface of Engine, which loads a model and performs tasks.
+ * The main interface of MLCEngine, which loads a model and performs tasks.
  * 
- * You can either initialize one with `webllm.CreateEngine(modelId)`, or `webllm.Engine().reload(modelId)`.
+ * You can either initialize one with `webllm.CreateMLCEngine(modelId)`, or `webllm.MLCEngine().reload(modelId)`.
  */
-export class Engine implements EngineInterface {
+export class MLCEngine implements MLCEngineInterface {
   public chat: API.Chat;
 
   private currentModelId?: string = undefined;  // Model current loaded, undefined if nothing is loaded
@@ -319,7 +319,7 @@ export class Engine implements EngineInterface {
       return cntr;
     }
 
-    async function _getChunk(thisModule: Engine): Promise<ChatCompletionChunk | undefined> {
+    async function _getChunk(thisModule: MLCEngine): Promise<ChatCompletionChunk | undefined> {
       // Remove the replacement character (U+FFFD) from the response to handle emojis.
       // Each emoji is made up of multiples of 4 tokens; when truncated, it is displayed as �, so
       // we skip this delta until a full emoji is rendered
@@ -409,7 +409,7 @@ export class Engine implements EngineInterface {
   ): Promise<AsyncIterable<ChatCompletionChunk> | ChatCompletion> {
     // 0. Preprocess inputs
     if (!this.currentModelId) {
-      throw new Error("Please call `Engine.reload(model)` first, or initialize with CreateEngine().");
+      throw new Error("Please call `MLCEngine.reload(model)` first, or initialize with CreateMLCEngine().");
     }
     ChatCompletionAPI.postInitAndCheckFields(request);
     const genConfig: GenerationConfig = {
diff --git a/src/extension_service_worker.ts b/src/extension_service_worker.ts
index ee210da7..f4f93d52 100644
--- a/src/extension_service_worker.ts
+++ b/src/extension_service_worker.ts
@@ -1,11 +1,11 @@
 import * as tvmjs from "tvmjs";
-import { AppConfig, ChatOptions, EngineConfig } from "./config";
+import { AppConfig, ChatOptions, MLCEngineConfig } from "./config";
 import { ReloadParams, WorkerRequest } from "./message";
-import { EngineInterface } from "./types";
+import { MLCEngineInterface } from "./types";
 import {
   ChatWorker,
-  EngineWorkerHandler,
-  WebWorkerEngine,
+  MLCEngineWorkerHandler,
+  WebWorkerMLCEngine,
   PostMessageHandler,
 } from "./web_worker";
 import { areAppConfigsEqual, areChatOptionsEqual } from "./utils";
@@ -40,23 +40,23 @@ export class PortPostMessageHandler implements PostMessageHandler {
  *
  * @example
  *
- * const engine = new Engine();
+ * const engine = new MLCEngine();
  * let handler;
  * chrome.runtime.onConnect.addListener(function (port) {
  *   if (handler === undefined) {
- *     handler = new ServiceWorkerEngineHandler(engine, port);
+ *     handler = new ServiceWorkerMLCEngineHandler(engine, port);
  *   } else {
  *     handler.setPort(port);
  *   }
  *   port.onMessage.addListener(handler.onmessage.bind(handler));
  * });
  */
-export class ServiceWorkerEngineHandler extends EngineWorkerHandler {
+export class ServiceWorkerMLCEngineHandler extends MLCEngineWorkerHandler {
   modelId?: string;
   chatOpts?: ChatOptions;
   appConfig?: AppConfig;
 
-  constructor(engine: EngineInterface, port: chrome.runtime.Port) {
+  constructor(engine: MLCEngineInterface, port: chrome.runtime.Port) {
     let portHandler = new PortPostMessageHandler(port);
     super(engine, portHandler);
 
@@ -124,31 +124,31 @@ export class ServiceWorkerEngineHandler extends EngineWorkerHandler {
 }
 
 /**
- * Create a ServiceWorkerEngine.
+ * Create a ServiceWorkerMLCEngine.
  *
  * @param modelId The model to load, needs to either be in `webllm.prebuiltAppConfig`, or in
  * `engineConfig.appConfig`.
- * @param engineConfig Optionally configures the engine, see `webllm.EngineConfig` for more.
+ * @param engineConfig Optionally configures the engine, see `webllm.MLCEngineConfig` for more.
  * @param keepAliveMs The interval to send keep alive messages to the service worker.
  * See [Service worker lifecycle](https://developer.chrome.com/docs/extensions/develop/concepts/service-workers/lifecycle#idle-shutdown)
  * The default is 10s.
- * @returns An initialized `WebLLM.ServiceWorkerEngine` with `modelId` loaded.
+ * @returns An initialized `WebLLM.ServiceWorkerMLCEngine` with `modelId` loaded.
  */
-export async function CreateServiceWorkerEngine(
+export async function CreateServiceWorkerMLCEngine(
   modelId: string,
-  engineConfig?: EngineConfig,
+  engineConfig?: MLCEngineConfig,
   keepAliveMs: number = 10000
-): Promise<ServiceWorkerEngine> {
-  const serviceWorkerEngine = new ServiceWorkerEngine(keepAliveMs);
-  serviceWorkerEngine.setInitProgressCallback(
+): Promise<ServiceWorkerMLCEngine> {
+  const serviceWorkerMLCEngine = new ServiceWorkerMLCEngine(keepAliveMs);
+  serviceWorkerMLCEngine.setInitProgressCallback(
     engineConfig?.initProgressCallback
   );
-  await serviceWorkerEngine.init(
+  await serviceWorkerMLCEngine.init(
     modelId,
     engineConfig?.chatOpts,
     engineConfig?.appConfig
   );
-  return serviceWorkerEngine;
+  return serviceWorkerMLCEngine;
 }
 
 class PortAdapter implements ChatWorker {
@@ -183,9 +183,9 @@ class PortAdapter implements ChatWorker {
 }
 
 /**
- * A client of Engine that exposes the same interface
+ * A client of MLCEngine that exposes the same interface
  */
-export class ServiceWorkerEngine extends WebWorkerEngine {
+export class ServiceWorkerMLCEngine extends WebWorkerMLCEngine {
   port: chrome.runtime.Port;
 
   constructor(keepAliveMs: number = 10000) {
diff --git a/src/index.ts b/src/index.ts
index c1951c47..0de3e8de 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -2,7 +2,7 @@ export {
   ModelRecord,
   AppConfig,
   ChatOptions,
-  EngineConfig,
+  MLCEngineConfig,
   GenerationConfig,
   prebuiltAppConfig,
   modelVersion,
@@ -13,13 +13,13 @@ export {
 export {
   InitProgressCallback,
   InitProgressReport,
-  EngineInterface,
+  MLCEngineInterface,
   LogitProcessor,
 } from "./types";
 
 export {
-  Engine,
-  CreateEngine,
+  MLCEngine,
+  CreateMLCEngine,
 } from "./engine";
 
 export {
@@ -27,9 +27,9 @@ export {
 } from "./cache_util";
 
 export {
-  EngineWorkerHandler,
-  WebWorkerEngine,
-  CreateWebWorkerEngine
+  MLCEngineWorkerHandler,
+  WebWorkerMLCEngine,
+  CreateWebWorkerMLCEngine
 } from "./web_worker";
 
 export {
@@ -39,15 +39,15 @@ export {
 } from "./message"
 
 export {
-  ServiceWorkerEngineHandler,
-  ServiceWorkerEngine,
-  CreateServiceWorkerEngine,
+  ServiceWorkerMLCEngineHandler,
+  ServiceWorkerMLCEngine,
+  CreateServiceWorkerMLCEngine,
 } from "./service_worker";
 
 export {
-  ServiceWorkerEngineHandler as ExtensionServiceWorkerEngineHandler,
-  ServiceWorkerEngine as ExtensionServiceWorkerEngine,
-  CreateServiceWorkerEngine as CreateExtensionServiceWorkerEngine,
+  ServiceWorkerMLCEngineHandler as ExtensionServiceWorkerMLCEngineHandler,
+  ServiceWorkerMLCEngine as ExtensionServiceWorkerMLCEngine,
+  CreateServiceWorkerMLCEngine as CreateExtensionServiceWorkerMLCEngine,
 } from './extension_service_worker'
 
 export * from './openai_api_protocols/index';
diff --git a/src/openai_api_protocols/apis.ts b/src/openai_api_protocols/apis.ts
index 679e25ba..d4064c25 100644
--- a/src/openai_api_protocols/apis.ts
+++ b/src/openai_api_protocols/apis.ts
@@ -1,11 +1,11 @@
-import { EngineInterface } from "../types";
+import { MLCEngineInterface } from "../types";
 import { Completions } from "./chat_completion";
 
 export class Chat {
-    private engine: EngineInterface;
+    private engine: MLCEngineInterface;
     completions: Completions;
 
-    constructor(engine: EngineInterface) {
+    constructor(engine: MLCEngineInterface) {
         this.engine = engine;
         this.completions = new Completions(this.engine);
     }
diff --git a/src/openai_api_protocols/chat_completion.ts b/src/openai_api_protocols/chat_completion.ts
index 957d48eb..6716436a 100644
--- a/src/openai_api_protocols/chat_completion.ts
+++ b/src/openai_api_protocols/chat_completion.ts
@@ -15,14 +15,14 @@
  * limitations under the License.
 */
 
-import { EngineInterface } from "../types";
+import { MLCEngineInterface } from "../types";
 
 /* eslint-disable @typescript-eslint/no-namespace */
 
 export class Completions {
-    private engine: EngineInterface;
+    private engine: MLCEngineInterface;
 
-    constructor(engine: EngineInterface) {
+    constructor(engine: MLCEngineInterface) {
         this.engine = engine;
     }
 
@@ -200,7 +200,7 @@ export interface ChatCompletionRequestBase {
     /**
      * Model to carry out this API.
      * 
-     * @note Not supported. Instead call `CreateEngine(model)` or `engine.reload(model)` instead.
+     * @note Not supported. Instead call `CreateMLCEngine(model)` or `engine.reload(model)` instead.
      */
     model?: string | null;
 }
diff --git a/src/service_worker.ts b/src/service_worker.ts
index c2310cd4..920a5aa1 100644
--- a/src/service_worker.ts
+++ b/src/service_worker.ts
@@ -1,8 +1,8 @@
 import * as tvmjs from "tvmjs";
-import { AppConfig, ChatOptions, EngineConfig, ModelRecord } from "./config";
+import { AppConfig, ChatOptions, MLCEngineConfig, ModelRecord } from "./config";
 import { ReloadParams, WorkerRequest, WorkerResponse } from "./message";
-import { EngineInterface, InitProgressReport } from "./types";
-import { EngineWorkerHandler, WebWorkerEngine, ChatWorker } from "./web_worker";
+import { MLCEngineInterface, InitProgressReport } from "./types";
+import { MLCEngineWorkerHandler, WebWorkerMLCEngine, ChatWorker } from "./web_worker";
 import { areAppConfigsEqual, areChatOptionsEqual } from "./utils";
 
 /* Service Worker Script */
@@ -14,18 +14,18 @@ type IServiceWorker = globalThis.ServiceWorker;
  *
  * @example
  *
- * const engine = new Engine();
+ * const engine = new MLCEngine();
  * let handler;
  * chrome.runtime.onConnect.addListener(function (port) {
  *   if (handler === undefined) {
- *     handler = new ServiceWorkerEngineHandler(engine, port);
+ *     handler = new ServiceWorkerMLCEngineHandler(engine, port);
  *   } else {
  *     handler.setPort(port);
  *   }
  *   port.onMessage.addListener(handler.onmessage.bind(handler));
  * });
  */
-export class ServiceWorkerEngineHandler extends EngineWorkerHandler {
+export class ServiceWorkerMLCEngineHandler extends MLCEngineWorkerHandler {
   modelId?: string;
   chatOpts?: ChatOptions;
   appConfig?: AppConfig;
@@ -36,10 +36,10 @@ export class ServiceWorkerEngineHandler extends EngineWorkerHandler {
   >();
   private initReuqestUuid?: string;
 
-  constructor(engine: EngineInterface) {
+  constructor(engine: MLCEngineInterface) {
     if (!self || !("addEventListener" in self)) {
       throw new Error(
-        "ServiceWorkerGlobalScope is not defined. ServiceWorkerEngineHandler must be created in service worker script."
+        "ServiceWorkerGlobalScope is not defined. ServiceWorkerMLCEngineHandler must be created in service worker script."
       );
     }
     const postMessageHandler = {
@@ -156,7 +156,7 @@ export class ServiceWorker implements ChatWorker {
     this.serviceWorker = serviceWorker;
   }
 
-  // ServiceWorkerEngine will later overwrite this
+  // ServiceWorkerMLCEngine will later overwrite this
   onmessage() {}
 
   postMessage(message: WorkerRequest) {
@@ -173,38 +173,38 @@ export class ServiceWorker implements ChatWorker {
 }
 
 /**
- * Create a ServiceWorkerEngine.
+ * Create a ServiceWorkerMLCEngine.
  *
  * @param modelId The model to load, needs to either be in `webllm.prebuiltAppConfig`, or in
  * `engineConfig.appConfig`.
- * @param engineConfig Optionally configures the engine, see `webllm.EngineConfig` for more.
- * @returns An initialized `WebLLM.ServiceWorkerEngine` with `modelId` loaded.
+ * @param engineConfig Optionally configures the engine, see `webllm.MLCEngineConfig` for more.
+ * @returns An initialized `WebLLM.ServiceWorkerMLCEngine` with `modelId` loaded.
  */
-export async function CreateServiceWorkerEngine(
+export async function CreateServiceWorkerMLCEngine(
   modelId: string,
-  engineConfig?: EngineConfig
-): Promise<ServiceWorkerEngine> {
+  engineConfig?: MLCEngineConfig
+): Promise<ServiceWorkerMLCEngine> {
   if (!("serviceWorker" in navigator)) {
     throw new Error("Service worker API is not available");
   }
   const registration = await (navigator.serviceWorker as ServiceWorkerContainer)
     .ready;
-  const serviceWorkerEngine = new ServiceWorkerEngine(registration.active!);
-  serviceWorkerEngine.setInitProgressCallback(
+  const serviceWorkerMLCEngine = new ServiceWorkerMLCEngine(registration.active!);
+  serviceWorkerMLCEngine.setInitProgressCallback(
     engineConfig?.initProgressCallback
   );
-  await serviceWorkerEngine.init(
+  await serviceWorkerMLCEngine.init(
     modelId,
     engineConfig?.chatOpts,
     engineConfig?.appConfig
   );
-  return serviceWorkerEngine;
+  return serviceWorkerMLCEngine;
 }
 
 /**
- * A client of Engine that exposes the same interface
+ * A client of MLCEngine that exposes the same interface
  */
-export class ServiceWorkerEngine extends WebWorkerEngine {
+export class ServiceWorkerMLCEngine extends WebWorkerMLCEngine {
   missedHeatbeat = 0;
 
   constructor(worker: IServiceWorker, keepAliveMs = 10000) {
@@ -227,7 +227,7 @@ export class ServiceWorkerEngine extends WebWorkerEngine {
         } catch (err: any) {
           // This is expected to throw if user has multiple windows open
           if (!err.message.startsWith("return from a unknown uuid")) {
-            console.error("CreateWebServiceWorkerEngine.onmessage", err);
+            console.error("CreateWebServiceWorkerMLCEngine.onmessage", err);
           }
         }
       }
diff --git a/src/types.ts b/src/types.ts
index dd0b78dc..d1127102 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -56,9 +56,9 @@ export interface LogitProcessor {
 
 
 /**
- * Common interface of Engine that UI can interact with
+ * Common interface of MLCEngine that UI can interact with
  */
-export interface EngineInterface {
+export interface MLCEngineInterface {
   /**
    * An object that exposes chat-related APIs.
    */
diff --git a/src/web_worker.ts b/src/web_worker.ts
index 91fc6afa..44c4ac43 100644
--- a/src/web_worker.ts
+++ b/src/web_worker.ts
@@ -1,11 +1,11 @@
 import {
   AppConfig,
   ChatOptions,
-  EngineConfig,
+  MLCEngineConfig,
   GenerationConfig,
 } from "./config";
 import {
-  EngineInterface,
+  MLCEngineInterface,
   GenerateProgressCallback,
   InitProgressCallback,
   InitProgressReport,
@@ -43,12 +43,12 @@ export interface PostMessageHandler {
  *
  * // setup a chat worker handler that routes
  * // requests to the chat
- * const engine = new Engine();
- * cont handler = new EngineWorkerHandler(engine);
+ * const engine = new MLCEngine();
+ * cont handler = new MLCEngineWorkerHandler(engine);
  * onmessage = handler.onmessage;
  */
-export class EngineWorkerHandler {
-  protected engine: EngineInterface;
+export class MLCEngineWorkerHandler {
+  protected engine: MLCEngineInterface;
   protected chatCompletionAsyncChunkGenerator?: AsyncGenerator<
     ChatCompletionChunk,
     void,
@@ -57,13 +57,13 @@ export class EngineWorkerHandler {
   protected postMessageHandler?: PostMessageHandler;
 
   /**
-   * @param engine A concrete implementation of EngineInterface
+   * @param engine A concrete implementation of MLCEngineInterface
    * @param postMessageHandler Optionally, a handler to communicate with the content script.
    *   This is only needed in ServiceWorker. In web worker, we can use `postMessage` from
    *   DOM API directly.
    */
   constructor(
-    engine: EngineInterface,
+    engine: MLCEngineInterface,
     postMessageHandler?: PostMessageHandler,
     initProgressCallback?: (report: InitProgressReport) => void
   ) {
@@ -298,44 +298,44 @@ export interface ChatWorker {
 }
 
 /**
- * Creates `WebWorkerEngine`, a client that holds the same interface as `Engine`.
+ * Creates `WebWorkerMLCEngine`, a client that holds the same interface as `MLCEngine`.
  *
- * Equivalent to `new webllm.WebWorkerEngine(worker).reload(...)`.
+ * Equivalent to `new webllm.WebWorkerMLCEngine(worker).reload(...)`.
  *
- * @param worker The worker that holds the actual Engine, intialized with `new Worker()`.
+ * @param worker The worker that holds the actual MLCEngine, intialized with `new Worker()`.
  * @param modelId The model to load, needs to either be in `webllm.prebuiltAppConfig`, or in
  * `engineConfig.appConfig`.
- * @param engineConfig Optionally configures the engine, see `webllm.EngineConfig` for more.
- * @returns An initialized `WebLLM.WebWorkerEngine` with `modelId` loaded.
+ * @param engineConfig Optionally configures the engine, see `webllm.MLCEngineConfig` for more.
+ * @returns An initialized `WebLLM.WebWorkerMLCEngine` with `modelId` loaded.
  *
- * @note engineConfig.logitProcessorRegistry is ignored for `CreateWebWorkEngine()`.
+ * @note engineConfig.logitProcessorRegistry is ignored for `CreateWebWorkMLCEngine()`.
  */
-export async function CreateWebWorkerEngine(
+export async function CreateWebWorkerMLCEngine(
   worker: any,
   modelId: string,
-  engineConfig?: EngineConfig
-): Promise<WebWorkerEngine> {
-  const webWorkerEngine = new WebWorkerEngine(worker);
-  webWorkerEngine.setInitProgressCallback(engineConfig?.initProgressCallback);
-  await webWorkerEngine.reload(
+  engineConfig?: MLCEngineConfig
+): Promise<WebWorkerMLCEngine> {
+  const webWorkerMLCEngine = new WebWorkerMLCEngine(worker);
+  webWorkerMLCEngine.setInitProgressCallback(engineConfig?.initProgressCallback);
+  await webWorkerMLCEngine.reload(
     modelId,
     engineConfig?.chatOpts,
     engineConfig?.appConfig
   );
-  return webWorkerEngine;
+  return webWorkerMLCEngine;
 }
 
 /**
- * A client of Engine that exposes the same interface
+ * A client of MLCEngine that exposes the same interface
  *
  * @example
  *
- * const chat = new webllm.WebWorkerEngine(new Worker(
+ * const chat = new webllm.WebWorkerMLCEngine(new Worker(
  *   new URL('./worker.ts', import.meta.url),
  *   {type: 'module'}
  * ));
  */
-export class WebWorkerEngine implements EngineInterface {
+export class WebWorkerMLCEngine implements MLCEngineInterface {
   public worker: ChatWorker;
   public chat: API.Chat;
 
diff --git a/tests/conv_template.test.ts b/tests/conv_template.test.ts
index 44957e64..ec16e8e8 100644
--- a/tests/conv_template.test.ts
+++ b/tests/conv_template.test.ts
@@ -1,6 +1,6 @@
 import { ChatConfig, ConvTemplateConfig, Role } from '../src/config'
 import { getConversation } from '../src/conversation'
-import { Engine } from '../src/engine'
+import { MLCEngine } from '../src/engine'
 import { ChatCompletionRequest } from "../src/openai_api_protocols/chat_completion"
 
 
diff --git a/tests/function_calling.test.ts b/tests/function_calling.test.ts
index 49cb9bde..6658b308 100644
--- a/tests/function_calling.test.ts
+++ b/tests/function_calling.test.ts
@@ -1,6 +1,6 @@
 import { Role } from '../src/config'
 import { getConversation } from '../src/conversation'
-import { Engine } from '../src/engine'
+import { MLCEngine } from '../src/engine'
 import { ChatCompletionRequest } from "../src/openai_api_protocols/chat_completion"
 
 
@@ -41,9 +41,9 @@ describe('Test conversation template', () => {
     })
 });
 
-describe('Test Engine', () => {
+describe('Test MLCEngine', () => {
     test('Test getFunctionCallUsage none', () => {
-        const engine = new Engine();
+        const engine = new MLCEngine();
 
         const request: ChatCompletionRequest = {
             model: "gorilla-openfunctions-v1-q4f16_1_MLC",
@@ -63,7 +63,7 @@ describe('Test Engine', () => {
     });
 
     test('Test getFunctionCallUsage auto', () => {
-        const engine = new Engine();
+        const engine = new MLCEngine();
 
         const request: ChatCompletionRequest = {
             model: "gorilla-openfunctions-v1-q4f16_1_MLC",
@@ -82,7 +82,7 @@ describe('Test Engine', () => {
     });
 
     test('Test getFunctionCallUsage function', () => {
-        const engine = new Engine();
+        const engine = new MLCEngine();
 
         const request: ChatCompletionRequest = {
             model: "gorilla-openfunctions-v1-q4f16_1_MLC",
diff --git a/tests/multi_round_chat.test.ts b/tests/multi_round_chat.test.ts
index bd884660..840e415c 100644
--- a/tests/multi_round_chat.test.ts
+++ b/tests/multi_round_chat.test.ts
@@ -6,7 +6,7 @@ import {
     ChatCompletionRequest,
     ChatCompletionUserMessageParam,
 } from "../src/openai_api_protocols/chat_completion";
-import { Engine } from '../src/engine';
+import { MLCEngine } from '../src/engine';
 import { Conversation, compareConversationObject } from '../src/conversation';
 import { ChatConfig, Role } from '../src/config';
 
@@ -51,7 +51,7 @@ describe('Test multi-round chatting', () => {
         // Setups
         const config_json = JSON.parse(configStr);
         const chatConfig = { ...config_json } as ChatConfig;
-        const engine = new Engine();
+        const engine = new MLCEngine();
 
         // Simulate request0
         const messages: ChatCompletionMessageParam[] = [
@@ -91,7 +91,7 @@ describe('Test multi-round chatting', () => {
         // Setups
         const config_json = JSON.parse(configStr);
         const chatConfig = { ...config_json } as ChatConfig;
-        const engine = new Engine();
+        const engine = new MLCEngine();
 
         // Simulate request0
         const messages: ChatCompletionMessageParam[] = [
@@ -136,7 +136,7 @@ describe('Test multi-round chatting', () => {
         // Setups
         const config_json = JSON.parse(configStr);
         const chatConfig = { ...config_json } as ChatConfig;
-        const engine = new Engine();
+        const engine = new MLCEngine();
 
         // Simulate request0
         const messages: ChatCompletionMessageParam[] = [
@@ -180,7 +180,7 @@ describe('Test multi-round chatting', () => {
         // Setups
         const config_json = JSON.parse(configStr);
         const chatConfig = { ...config_json } as ChatConfig;
-        const engine = new Engine();
+        const engine = new MLCEngine();
 
         // Simulate request0
         const messages: ChatCompletionMessageParam[] = [