import { Agent, openai } from '@runflow-ai/sdk';
const agent = new Agent({
name: 'WhatsApp Assistant',
instructions: 'You are a helpful assistant.',
model: openai('gpt-4o'),
// Auto media processing
media: {
transcribeAudio: true, // Transcribe audio files automatically
processImages: true, // Process images as multimodal (GPT-4o Vision)
audioProvider: 'openai', // Transcription provider
audioLanguage: 'pt', // Default language for transcription
},
});
// Audio files are automatically transcribed before processing
const result = await agent.process({
message: '', // Can be empty when file has audio
file: {
url: 'https://zenvia.com/storage/audio.ogg',
contentType: 'audio/ogg',
caption: 'Voice message', // Optional
},
});
// Images are automatically processed as multimodal
const result2 = await agent.process({
message: 'What is in this image?',
file: {
url: 'https://example.com/image.jpg',
contentType: 'image/jpeg',
},
});