HuggingGPT / demos /demo_parse_task.json
tricktreat's picture
update gpt4
c5ea4b9
[
{
"role": "user",
"content": "Give you some pictures e1.jpg, e2.png, e3.jpg, help me count the number of sheep?"
},
{
"role": "assistant",
"content": [
{
"task": "image-to-text",
"id": 0,
"dep": [-1],
"args": {
"image": "e1.jpg"
}
},
{
"task": "object-detection",
"id": 1,
"dep": [-1],
"args": {
"image": "e1.jpg"
}
},
{
"task": "visual-question-answering",
"id": 2,
"dep": [1],
"args": {
"image": "<GENERATED>-1",
"text": "How many sheep in the picture"
}
},
{
"task": "image-to-text",
"id": 3,
"dep": [-1],
"args": {
"image": "e2.png"
}
},
{
"task": "object-detection",
"id": 4,
"dep": [-1],
"args": {
"image": "e2.png"
}
},
{
"task": "visual-question-answering",
"id": 5,
"dep": [4],
"args": {
"image": "<GENERATED>-4",
"text": "How many sheep in the picture"
}
},
{
"task": "image-to-text",
"id": 6,
"dep": [-1],
"args": {
"image": "e3.jpg"
}
},
{
"task": "object-detection",
"id": 7,
"dep": [-1],
"args": {
"image": "e3.jpg"
}
},
{
"task": "visual-question-answering",
"id": 8,
"dep": [7],
"args": {
"image": "<GENERATED>-7",
"text": "How many sheep in the picture"
}
}
]
},
{
"role": "user",
"content": "Look at /e.jpg, can you tell me how many objects in the picture? Give me a picture and video similar to this one."
},
{
"role": "assistant",
"content": [
{
"task": "image-to-text",
"id": 0,
"dep": [-1],
"args": {
"image": "/e.jpg"
}
},
{
"task": "object-detection",
"id": 1,
"dep": [-1],
"args": {
"image": "/e.jpg"
}
},
{
"task": "visual-question-answering",
"id": 2,
"dep": [1],
"args": {
"image": "<GENERATED>-1",
"text": "how many objects in the picture?"
}
},
{
"task": "text-to-image",
"id": 3,
"dep": [0],
"args": {
"text": "<GENERATED-0>"
}
},
{
"task": "image-to-image",
"id": 4,
"dep": [-1],
"args": {
"image": "/e.jpg"
}
},
{
"task": "text-to-video",
"id": 5,
"dep": [0],
"args": {
"text": "<GENERATED-0>"
}
}
]
},
{
"role": "user",
"content": "given a document /images/e.jpeg, answer me what is the student amount? And describe the image with your voice"
},
{
"role": "assistant",
"content": [
{
"task": "document-question-answering",
"id": 0,
"dep": [-1],
"args": {
"image": "/images/e.jpeg",
"text": "what is the student amount?"
}
},
{
"task": "visual-question-answering",
"id": 1,
"dep": [-1],
"args": {
"image": "/images/e.jpeg",
"text": "what is the student amount?"
}
},
{
"task": "image-to-text",
"id": 2,
"dep": [-1],
"args": {
"image": "/images/e.jpeg"
}
},
{
"task": "text-to-speech",
"id": 3,
"dep": [2],
"args": {
"text": "<GENERATED>-2"
}
}
]
},
{
"role": "user",
"content": "Given an image /example.jpg, first generate a pose image, then based on the pose generate a new image where a girl is reading a book"
},
{
"role": "assistant",
"content": [
{
"task": "openpose-control",
"id": 0,
"dep": [-1],
"args": {
"image": "/example.jpg"
}
},
{
"task": "openpose-text-to-image",
"id": 1,
"dep": [0],
"args": {
"text": "a girl is reading a book",
"image": "<GENERATED>-0"
}
}
]
},
{
"role": "user",
"content": "please show me a video and an image of (based on the text) 'a boy is running' and dub it"
},
{
"role": "assistant",
"content": [
{
"task": "text-to-video",
"id": 0,
"dep": [-1],
"args": {
"text": "a boy is running"
}
},
{
"task": "text-to-speech",
"id": 1,
"dep": [-1],
"args": {
"text": "a boy is running"
}
},
{
"task": "text-to-image",
"id": 2,
"dep": [-1],
"args": {
"text": "a boy is running"
}
}
]
},
{
"role": "user",
"content": "please show me a joke and an image of cat"
},
{
"role": "assistant",
"content": [
{
"task": "conversational",
"id": 0,
"dep": [-1],
"args": {
"text": "please show me a joke of cat"
}
},
{
"task": "text-to-image",
"id": 1,
"dep": [-1],
"args": {
"text": "a photo of cat"
}
}
]
},
{
"role": "user",
"content": "give me a picture about a cut dog, then describe the image to me and tell a story about it"
},
{
"role": "assistant",
"content": [
{
"task": "text-to-image",
"id": 0,
"dep": [-1],
"args": {
"text": "a picture of a cute dog"
}
},
{
"task": "image-to-text",
"id": 1,
"dep": [0],
"args": {
"image": "<GENERATED>-0"
}
},
{
"task": "text-generation",
"id": 2,
"dep": [1],
"args": {
"text": "<GENERATED>-1"
}
},
{
"task": "text-to-speech",
"id": 3,
"dep": [2],
"args": {
"text": "<GENERATED>-2"
}
}
]
},
{
"role": "user",
"content": "give you a picture /example.jpg, what's in it and tell me a joke about it"
},
{
"role": "assistant",
"content": [
{
"task": "image-to-text",
"id": 0,
"dep": [-1],
"args": {
"image": "/example.jpg"
}
},
{
"task": "object-detection",
"id": 1,
"dep": [-1],
"args": {
"image": "/example.jpg"
}
},
{
"task": "conversational",
"id": 2,
"dep": [0],
"args": {
"text": "<GENERATED>-0"
}
},
{
"task": "text-to-speech",
"id": 3,
"dep": [2],
"args": {
"text": "<GENERATED>-1"
}
}
]
}
]