When submitting a video to be processed, Hive’s backend splits the video into frames, runs the model on each frame, then recombines the results into an output JSON.
The video output for a classifier will have an object with time and classes for each frame, and a video output for a detector will have an object with time and bounding_poly for each frame.
{
"output": [
{
"time": 0,
"classes": [
{
"class": "general_not_nsfw_not_suggestive",
"score": 0.7058501595243861
},
{
"class": "general_nsfw",
"score": 0.12669138033591393
},
{
"class": "general_suggestive",
"score": 0.16745846013969992
}
],
"media_link": "www.media_url.com/image1.jpg",
"subtask_no": 366
},
{
"time": 0.9676333333333333,
"classes": [
{
"class": "general_not_nsfw_not_suggestive",
"score": 0.9986390064645149
},
{
"class": "general_nsfw",
"score": 0.0008006852563126932
},
{
"class": "general_suggestive",
"score": 0.0005603082791726574
}
],
"media_link": "www.media_url.com/image2.jpg",
"subtask_no": 367
},
{
"time": 1.9686333333333335,
"classes": [
{
"class": "general_not_nsfw_not_suggestive",
"score": 0.9999421468597295
},
{
"class": "general_nsfw",
"score": 0.00003785532083901699
},
{
"class": "general_suggestive",
"score": 0.000019997819431634897
}
],
"subtask_no": 368
}
]
}