Skip to main content

Video

Analyze video content using multimodal models, enabling video understanding, scene analysis, and temporal reasoning.

Prerequisites

pip install apertis

Get your API Key from Apertis

Basic Video Analysis

from apertis import Apertis

def main():
client = Apertis()

response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe what happens in this video."},
{
"type": "video_url",
"video_url": {
"url": "https://example.com/video.mp4"
}
}
]
}
]
)

print(response.choices[0].message.content)

if __name__ == "__main__":
main()

Local Video (Base64)

import base64
from apertis import Apertis

def encode_video(video_path: str) -> str:
"""Encode video to base64."""
with open(video_path, "rb") as video_file:
return base64.standard_b64encode(video_file.read()).decode("utf-8")

def main():
client = Apertis()

video_path = "path/to/your/video.mp4"
base64_video = encode_video(video_path)

response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What is happening in this video?"},
{
"type": "video_url",
"video_url": {
"url": f"data:video/mp4;base64,{base64_video}"
}
}
]
}
]
)

print(response.choices[0].message.content)

if __name__ == "__main__":
main()

Video Question Answering

from apertis import Apertis

def main():
client = Apertis()

video_url = "https://example.com/cooking-video.mp4"

questions = [
"What dish is being prepared?",
"What ingredients are used?",
"How many steps are in this recipe?",
"What cooking techniques are demonstrated?"
]

for question in questions:
response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": question},
{"type": "video_url", "video_url": {"url": video_url}}
]
}
]
)
print(f"Q: {question}")
print(f"A: {response.choices[0].message.content}\n")

if __name__ == "__main__":
main()

Temporal Analysis

from apertis import Apertis

def main():
client = Apertis()

response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": """Analyze this video and provide:
1. A timeline of key events with approximate timestamps
2. Scene transitions
3. Any significant actions or changes"""
},
{
"type": "video_url",
"video_url": {"url": "https://example.com/event-video.mp4"}
}
]
}
]
)

print(response.choices[0].message.content)

if __name__ == "__main__":
main()

Video with Audio Analysis

from apertis import Apertis

def main():
client = Apertis()

response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": """Analyze both the visual and audio content of this video:
1. What is being shown visually?
2. What is being said or what sounds are present?
3. How do the audio and video relate to each other?"""
},
{
"type": "video_url",
"video_url": {"url": "https://example.com/presentation.mp4"}
}
]
}
]
)

print(response.choices[0].message.content)

if __name__ == "__main__":
main()

Video Comparison

from apertis import Apertis

def main():
client = Apertis()

response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Compare these two videos. What are the similarities and differences?"},
{"type": "video_url", "video_url": {"url": "https://example.com/video1.mp4"}},
{"type": "video_url", "video_url": {"url": "https://example.com/video2.mp4"}}
]
}
]
)

print(response.choices[0].message.content)

if __name__ == "__main__":
main()

Streaming Video Analysis

from apertis import Apertis

def main():
client = Apertis()

stream = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Provide a detailed scene-by-scene breakdown of this video."},
{"type": "video_url", "video_url": {"url": "https://example.com/movie-clip.mp4"}}
]
}
],
stream=True
)

for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)

print()

if __name__ == "__main__":
main()

Action Recognition

from apertis import Apertis

def main():
client = Apertis()

response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": """Identify all actions performed in this video.
For each action, provide:
- Action name
- Who/what is performing it
- Approximate duration or timestamp"""
},
{"type": "video_url", "video_url": {"url": "https://example.com/sports.mp4"}}
]
}
]
)

print(response.choices[0].message.content)

if __name__ == "__main__":
main()

Supported Models

Video analysis is available on:

ProviderModels
Googlegemini-3-pro-preview, gemini-2.5-flash, gemini-2.0-flash
OpenAIgpt-4.1 (limited video support)

View all models →

API Reference

Video URL Object

FieldTypeDescription
urlstrVideo URL or base64 data URI

Supported Formats

  • MP4
  • MOV
  • AVI
  • MKV
  • WebM

Limitations

  • Maximum video length varies by model
  • Large videos may be sampled or truncated
  • Processing time increases with video length
  • Some models may not support audio track analysis