Spaces:
Running
on
A10G
Running
on
A10G
[ | |
{ | |
"role": "user", | |
"content": "Give you some pictures e1.jpg, e2.png, e3.jpg, help me count the number of sheep?" | |
}, | |
{ | |
"role": "assistant", | |
"content": [ | |
{ | |
"task": "image-to-text", | |
"id": 0, | |
"dep": [-1], | |
"args": { | |
"image": "e1.jpg" | |
} | |
}, | |
{ | |
"task": "object-detection", | |
"id": 1, | |
"dep": [-1], | |
"args": { | |
"image": "e1.jpg" | |
} | |
}, | |
{ | |
"task": "visual-question-answering", | |
"id": 2, | |
"dep": [1], | |
"args": { | |
"image": "<GENERATED>-1", | |
"text": "How many sheep in the picture" | |
} | |
}, | |
{ | |
"task": "image-to-text", | |
"id": 3, | |
"dep": [-1], | |
"args": { | |
"image": "e2.png" | |
} | |
}, | |
{ | |
"task": "object-detection", | |
"id": 4, | |
"dep": [-1], | |
"args": { | |
"image": "e2.png" | |
} | |
}, | |
{ | |
"task": "visual-question-answering", | |
"id": 5, | |
"dep": [4], | |
"args": { | |
"image": "<GENERATED>-4", | |
"text": "How many sheep in the picture" | |
} | |
}, | |
{ | |
"task": "image-to-text", | |
"id": 6, | |
"dep": [-1], | |
"args": { | |
"image": "e3.jpg" | |
} | |
}, | |
{ | |
"task": "object-detection", | |
"id": 7, | |
"dep": [-1], | |
"args": { | |
"image": "e3.jpg" | |
} | |
}, | |
{ | |
"task": "visual-question-answering", | |
"id": 8, | |
"dep": [7], | |
"args": { | |
"image": "<GENERATED>-7", | |
"text": "How many sheep in the picture" | |
} | |
} | |
] | |
}, | |
{ | |
"role": "user", | |
"content": "Look at /e.jpg, can you tell me how many objects in the picture? Give me a picture and video similar to this one." | |
}, | |
{ | |
"role": "assistant", | |
"content": [ | |
{ | |
"task": "image-to-text", | |
"id": 0, | |
"dep": [-1], | |
"args": { | |
"image": "/e.jpg" | |
} | |
}, | |
{ | |
"task": "object-detection", | |
"id": 1, | |
"dep": [-1], | |
"args": { | |
"image": "/e.jpg" | |
} | |
}, | |
{ | |
"task": "visual-question-answering", | |
"id": 2, | |
"dep": [1], | |
"args": { | |
"image": "<GENERATED>-1", | |
"text": "how many objects in the picture?" | |
} | |
}, | |
{ | |
"task": "text-to-image", | |
"id": 3, | |
"dep": [0], | |
"args": { | |
"text": "<GENERATED-0>" | |
} | |
}, | |
{ | |
"task": "image-to-image", | |
"id": 4, | |
"dep": [-1], | |
"args": { | |
"image": "/e.jpg" | |
} | |
}, | |
{ | |
"task": "text-to-video", | |
"id": 5, | |
"dep": [0], | |
"args": { | |
"text": "<GENERATED-0>" | |
} | |
} | |
] | |
}, | |
{ | |
"role": "user", | |
"content": "given a document /images/e.jpeg, answer me what is the student amount? And describe the image with your voice" | |
}, | |
{ | |
"role": "assistant", | |
"content": [ | |
{ | |
"task": "document-question-answering", | |
"id": 0, | |
"dep": [-1], | |
"args": { | |
"image": "/images/e.jpeg", | |
"text": "what is the student amount?" | |
} | |
}, | |
{ | |
"task": "visual-question-answering", | |
"id": 1, | |
"dep": [-1], | |
"args": { | |
"image": "/images/e.jpeg", | |
"text": "what is the student amount?" | |
} | |
}, | |
{ | |
"task": "image-to-text", | |
"id": 2, | |
"dep": [-1], | |
"args": { | |
"image": "/images/e.jpeg" | |
} | |
}, | |
{ | |
"task": "text-to-speech", | |
"id": 3, | |
"dep": [2], | |
"args": { | |
"text": "<GENERATED>-2" | |
} | |
} | |
] | |
}, | |
{ | |
"role": "user", | |
"content": "Given an image /example.jpg, first generate a pose image, then based on the pose generate a new image where a girl is reading a book" | |
}, | |
{ | |
"role": "assistant", | |
"content": [ | |
{ | |
"task": "openpose-control", | |
"id": 0, | |
"dep": [-1], | |
"args": { | |
"image": "/example.jpg" | |
} | |
}, | |
{ | |
"task": "openpose-text-to-image", | |
"id": 1, | |
"dep": [0], | |
"args": { | |
"text": "a girl is reading a book", | |
"image": "<GENERATED>-0" | |
} | |
} | |
] | |
}, | |
{ | |
"role": "user", | |
"content": "please show me a video and an image of (based on the text) 'a boy is running' and dub it" | |
}, | |
{ | |
"role": "assistant", | |
"content": [ | |
{ | |
"task": "text-to-video", | |
"id": 0, | |
"dep": [-1], | |
"args": { | |
"text": "a boy is running" | |
} | |
}, | |
{ | |
"task": "text-to-speech", | |
"id": 1, | |
"dep": [-1], | |
"args": { | |
"text": "a boy is running" | |
} | |
}, | |
{ | |
"task": "text-to-image", | |
"id": 2, | |
"dep": [-1], | |
"args": { | |
"text": "a boy is running" | |
} | |
} | |
] | |
}, | |
{ | |
"role": "user", | |
"content": "please show me a joke and an image of cat" | |
}, | |
{ | |
"role": "assistant", | |
"content": [ | |
{ | |
"task": "conversational", | |
"id": 0, | |
"dep": [-1], | |
"args": { | |
"text": "please show me a joke of cat" | |
} | |
}, | |
{ | |
"task": "text-to-image", | |
"id": 1, | |
"dep": [-1], | |
"args": { | |
"text": "a photo of cat" | |
} | |
} | |
] | |
}, | |
{ | |
"role": "user", | |
"content": "give me a picture about a cut dog, then describe the image to me and tell a story about it" | |
}, | |
{ | |
"role": "assistant", | |
"content": [ | |
{ | |
"task": "text-to-image", | |
"id": 0, | |
"dep": [-1], | |
"args": { | |
"text": "a picture of a cute dog" | |
} | |
}, | |
{ | |
"task": "image-to-text", | |
"id": 1, | |
"dep": [0], | |
"args": { | |
"image": "<GENERATED>-0" | |
} | |
}, | |
{ | |
"task": "text-generation", | |
"id": 2, | |
"dep": [1], | |
"args": { | |
"text": "<GENERATED>-1" | |
} | |
}, | |
{ | |
"task": "text-to-speech", | |
"id": 3, | |
"dep": [2], | |
"args": { | |
"text": "<GENERATED>-2" | |
} | |
} | |
] | |
}, | |
{ | |
"role": "user", | |
"content": "give you a picture /example.jpg, what's in it and tell me a joke about it" | |
}, | |
{ | |
"role": "assistant", | |
"content": [ | |
{ | |
"task": "image-to-text", | |
"id": 0, | |
"dep": [-1], | |
"args": { | |
"image": "/example.jpg" | |
} | |
}, | |
{ | |
"task": "object-detection", | |
"id": 1, | |
"dep": [-1], | |
"args": { | |
"image": "/example.jpg" | |
} | |
}, | |
{ | |
"task": "conversational", | |
"id": 2, | |
"dep": [0], | |
"args": { | |
"text": "<GENERATED>-0" | |
} | |
}, | |
{ | |
"task": "text-to-speech", | |
"id": 3, | |
"dep": [2], | |
"args": { | |
"text": "<GENERATED>-1" | |
} | |
} | |
] | |
} | |
] |