A library for running OctoAI inferences. Please see the QuickStart Template Guides for more information.
OCTOAI_TOKEN
or pass it to the Client class on construction.The TypeScript SDK can be installed using NPM.
npm i @octoai/client
In order to access endpoints from OctoAI, an API token needs to be set, for more information on how to generate an access token see How to Create an OctoAI Access Token.
Wherever you set your environment variables for example in your .bashrc
or .env
file set OCTOAI_TOKEN
to the value you generated following the guide.
export OCTOAI_TOKEN=YOUR_TOKEN_HERE
Alternatively, on creation of the Client class, you can set your token variable. Please see the Client docs for more information.
If you need assistance with any specifics for using the OctoAI TypeScript SDK, please see the TypeScript SDK Reference.
The OctoAI TypeScript SDK is intended to help you use OctoAI templates, including our public QuickStart templates as well as templates you've cloned or built from scratch on your account. At its simplest, it allows you to run inferences against an endpoint by providing a dictionary with the necessary inputs.
import { Client } from "@octoai/client";
// Token can be provided to the client here. If one isn't set, you cannot use client.tune, client.asset, or client.chat.
const client = new Client(process.env.OCTOAI_TOKEN);
// The endpoint URL can be provided, as well as the inputs necessary to run an inference.
const result = await client.infer("yourEndpointUrl", { key: "value" });
// It also allows for inference streams for LLMs
const stream = await client.inferStream("yourEndpointUrl", { key: "value" });
// And for server-side asynchronous inferences
const future = await client.inferAsync("yourEndpointUrl", { key: "value" });
let ready = await client.isFutureReady(future);
while (!ready) {
ready = await client.isFutureReady(future);
await new Promise((resolve) => setTimeout(resolve, 1000));
}
const outputs = await client.getFutureResult(future);
// And includes healthChecks
if ((await client.healthCheck("healthCheckEndpointUrl")) === 200) {
// Run some inferences
}
// It can use the chat completions API to run text generating inferences with stream set to true or defauling to false.
const completion = await client.chat.completions.create({
messages: [
{
content:
"Below is an instruction that describes a task. Write a response that appropriately completes the request.",
role: "system",
},
{ content: "Write hello world in typescript", role: "user" },
],
model: "llama-2-13b-chat",
});
// It can be used to create and upload assets for ImageGen and other services.
import { LoraData } from "@octoai/client";
const loraData = {
asset_type: "lora",
data_type: "fp16",
engine: "image/stable-diffusion-v1-5",
file_format: "safetensors",
trigger_words: ["origami paper"],
} as LoraData;
const createdAsset = await client.asset.create({
file: "./test_assets/origami-paper.safetensors",
asset_type: "lora",
description: "origami paper lora",
data: loraData,
name: "origami-paper",
is_public: false,
});
// Such as uploading files for finetuning
const NAME = "test-sks3-poodle-sd15";
const assets = [];
for (let i = 0; i < 5; i++) {
const asset = await client.asset.create({
name: `${NAME}-image-${i}`,
file: `test_assets/mitchi${i}.jpg`, // Buffers and ArrayBuffers can also be used
data: { asset_type: "file", file_format: "jpg" },
asset_type: "file",
description: `${SEARCH_NAME}`,
});
assets.push(asset);
}
// Then finding a checkpoint to use for finetuning
const checkpoint = await client.asset
.list({
is_public: true,
owner: "octoai",
name: "default-sd15",
})
.then((r) => r.data[0]);
// And finally creating a finetuning job after verifying the assets are ready
const createTuneRequest = {
name: NAME,
description: "sks3 poodle",
details: {
base_checkpoint: checkpoint,
// This will set the captions to the trigger word, though you can also pass [{file_id: assets[0].id, caption: "your detailed caption with sks3 the trigger word in it here"}]
files: assets,
steps: 500,
tune_type: "lora_tune",
trigger_words: ["sks3"],
},
};
tune = await client.tune.create(createTuneRequest);
// And once the job is finished, using that tuned lora for an image generation request using infer.
Generated using TypeDoc