Comments (6)
I wrote a function for calculating the metrics of table 8, and it looks good.
def evaluation_metrics(data_path):
base = []
with open(data_path, "r") as fp:
lines = fp.readlines()
for line in lines:
base.append(json.loads(line))
correct = 0
incorrect = 0
comp_correct = 0
comp_incorrect = 0
pre_correct = 0
pre_incorrect = 0
ru_correct = 0
ru_incorrect = 0
for answers in tqdm(base):
gt = answers["gt"].lower()
type_ = answers["type"]
answer = answers["answer"].replace(" ", "").lower().replace(".", "")
if gt == answer:
correct = correct + 1
if type_ == "comp":
comp_correct = comp_correct + 1
if type_ == "presence":
pre_correct = pre_correct + 1
if type_ == "rural_urban":
ru_correct = ru_correct + 1
else:
incorrect = incorrect + 1
if type_ == "comp":
comp_incorrect = comp_incorrect + 1
if type_ == "presence":
pre_incorrect = pre_incorrect + 1
if type_ == "rural_urban":
ru_incorrect = ru_incorrect + 1
print("presence_correct:", pre_correct)
print("presence_incorrect:", pre_incorrect)
print("presence_Total:", pre_correct + pre_incorrect)
print("presence_Acc:", (pre_correct / (pre_correct + pre_incorrect)))
print("-" * 100)
print("comparison_correct:", comp_correct)
print("comparison_incorrect:", comp_incorrect)
print("comparison_Total:", comp_correct + comp_incorrect)
print("comparison_Acc:", (comp_correct / (comp_correct + comp_incorrect)))
print("-" * 100)
if ru_correct + ru_incorrect != 0:
print("rural_urban_correct:", ru_correct)
print("rural_urban_incorrect:", ru_incorrect)
print("rural_urban_Total:", ru_correct + ru_incorrect)
print("rural_urban_Acc:", (ru_correct / (ru_correct + ru_incorrect)))
print("-" * 100)
print("total_correct:", correct)
print("total_incorrect:", incorrect)
print("total_Total:", correct + incorrect)
print("total_Acc:", correct / (correct + incorrect))
I am also waiting for the metric calculation function of Table 7 and Table 9.
from geochat.
I wrote a function for calculating the metrics of table 8, and it looks good.
def evaluation_metrics(data_path): base = [] with open(data_path, "r") as fp: lines = fp.readlines() for line in lines: base.append(json.loads(line)) correct = 0 incorrect = 0 comp_correct = 0 comp_incorrect = 0 pre_correct = 0 pre_incorrect = 0 ru_correct = 0 ru_incorrect = 0 for answers in tqdm(base): gt = answers["gt"].lower() type_ = answers["type"] answer = answers["answer"].replace(" ", "").lower().replace(".", "") if gt == answer: correct = correct + 1 if type_ == "comp": comp_correct = comp_correct + 1 if type_ == "presence": pre_correct = pre_correct + 1 if type_ == "rural_urban": ru_correct = ru_correct + 1 else: incorrect = incorrect + 1 if type_ == "comp": comp_incorrect = comp_incorrect + 1 if type_ == "presence": pre_incorrect = pre_incorrect + 1 if type_ == "rural_urban": ru_incorrect = ru_incorrect + 1 print("presence_correct:", pre_correct) print("presence_incorrect:", pre_incorrect) print("presence_Total:", pre_correct + pre_incorrect) print("presence_Acc:", (pre_correct / (pre_correct + pre_incorrect))) print("-" * 100) print("comparison_correct:", comp_correct) print("comparison_incorrect:", comp_incorrect) print("comparison_Total:", comp_correct + comp_incorrect) print("comparison_Acc:", (comp_correct / (comp_correct + comp_incorrect))) print("-" * 100) if ru_correct + ru_incorrect != 0: print("rural_urban_correct:", ru_correct) print("rural_urban_incorrect:", ru_incorrect) print("rural_urban_Total:", ru_correct + ru_incorrect) print("rural_urban_Acc:", (ru_correct / (ru_correct + ru_incorrect))) print("-" * 100) print("total_correct:", correct) print("total_incorrect:", incorrect) print("total_Total:", correct + incorrect) print("total_Acc:", correct / (correct + incorrect))I am also waiting for the metric calculation function of Table 7 and Table 9.
I am currently facing this issue. Have you implemented the metric calculations in other tables?
from geochat.
Not yet
from geochat.
I wrote a script for visual grounding evaluation in table 7. I used a bounding box calculate package [BboxToolkit] I think it's correct but I can't get the same result in paper. I don't know what's wrong. The bbox_and_angle_to_polygon
function copy from geochat_demo.py
.
(https://github.com/jbwang1997/BboxToolkit/blob/master/USAGE.md).
def bbox_and_angle_to_polygon(x1, y1, x2, y2, a):
# 计算中心点坐标
x_ctr = (x1 + x2) / 2
y_ctr = (y1 + y2) / 2
# 计算宽度和高度
w = abs(x2 - x1)
h = abs(y2 - y1)
# 计算角度(弧度)
angle_rad = math.radians(a)
# 计算旋转后的四个角点坐标
cos_a = math.cos(angle_rad)
sin_a = math.sin(angle_rad)
x1_rot = cos_a * (-w / 2) - sin_a * (-h / 2) + x_ctr
y1_rot = sin_a * (-w / 2) + cos_a * (-h / 2) + y_ctr
x2_rot = cos_a * (w / 2) - sin_a * (-h / 2) + x_ctr
y2_rot = sin_a * (w / 2) + cos_a * (-h / 2) + y_ctr
x3_rot = cos_a * (w / 2) - sin_a * (h / 2) + x_ctr
y3_rot = sin_a * (w / 2) + cos_a * (h / 2) + y_ctr
x4_rot = cos_a * (-w / 2) - sin_a * (h / 2) + x_ctr
y4_rot = sin_a * (-w / 2) + cos_a * (h / 2) + y_ctr
# 返回多边形坐标
polygon_coords = np.array((x1_rot, y1_rot, x2_rot, y2_rot, x3_rot, y3_rot, x4_rot, y4_rot))
return polygon_coords
# read the answer file output by `GeoChat/geochat/eval/batch_geochat_referring.py`, and save as a list `geochat_predict`.
for i, predict in tqdm(enumerate(geochat_predict)):
answer = predict['answer']
answer = answer.replace("<unk>","").replace(" ","").strip()
images_dir = '../Dataset/GeoChat/referring_images'
image_path = os.path.join(images_dir, predict['image_id'] + '.png')
image = Image.open(image_path)
width, height = image.size
size_type = predict['type']
gt_bboxes = predict['ground_truth'] # list
predict_boxes = extract_bboxes(answer) # list
for i in range(len(gt_bboxes)):
# convert coordinates to float
poly = np.array(gt_bboxes[i]).astype(np.float32).reshape(-1) # [4,2]
gt_obb = bt.poly2obb(poly).reshape(1,5) # convert to [cx, cy, w, h, theta]
try:
pred_bbox = predict_boxes[i]
pred_bbox[0] = pred_bbox[0] / scale * width
pred_bbox[1] = pred_bbox[1] / scale * height
pred_bbox[2] = pred_bbox[2] / scale * width
pred_bbox[3] = pred_bbox[3] / scale * height
pred_poly = bbox_and_angle_to_polygon(*pred_bbox)
pred_obb = bt.poly2obb(pred_poly).reshape(1,5) # convert to [cx, cy, w, h, theta]
iou_score = bt.geometry.bbox_overlaps(pred_obb, gt_obb)[0][0] # calcualte obb Iou by BboxToolkit.
if iou_score >= 0.5:
correct += 1
except:
continue
dataset = 'GeoChat Bench referring'
print(f"Evaluating {dataset} ...")
print(f'Precision @ 0.5: {correct / total_cnt} \n')
Finally, I got a [email protected]=0.22744 as a result, my test data was come from GeoChat Bench referring.jsonl, with 7593 test samples. I was confused with the Iou result presented in the paper. I don't know how to get the same result.
from geochat.
extract_bboxes
I wrote a script for visual grounding evaluation in table 7. I used a bounding box calculate package [BboxToolkit] I think it's correct but I can't get the same result in paper. I don't know what's wrong. The
bbox_and_angle_to_polygon
function copy fromgeochat_demo.py
. (https://github.com/jbwang1997/BboxToolkit/blob/master/USAGE.md).def bbox_and_angle_to_polygon(x1, y1, x2, y2, a): # 计算中心点坐标 x_ctr = (x1 + x2) / 2 y_ctr = (y1 + y2) / 2 # 计算宽度和高度 w = abs(x2 - x1) h = abs(y2 - y1) # 计算角度(弧度) angle_rad = math.radians(a) # 计算旋转后的四个角点坐标 cos_a = math.cos(angle_rad) sin_a = math.sin(angle_rad) x1_rot = cos_a * (-w / 2) - sin_a * (-h / 2) + x_ctr y1_rot = sin_a * (-w / 2) + cos_a * (-h / 2) + y_ctr x2_rot = cos_a * (w / 2) - sin_a * (-h / 2) + x_ctr y2_rot = sin_a * (w / 2) + cos_a * (-h / 2) + y_ctr x3_rot = cos_a * (w / 2) - sin_a * (h / 2) + x_ctr y3_rot = sin_a * (w / 2) + cos_a * (h / 2) + y_ctr x4_rot = cos_a * (-w / 2) - sin_a * (h / 2) + x_ctr y4_rot = sin_a * (-w / 2) + cos_a * (h / 2) + y_ctr # 返回多边形坐标 polygon_coords = np.array((x1_rot, y1_rot, x2_rot, y2_rot, x3_rot, y3_rot, x4_rot, y4_rot)) return polygon_coords # read the answer file output by `GeoChat/geochat/eval/batch_geochat_referring.py`, and save as a list `geochat_predict`. for i, predict in tqdm(enumerate(geochat_predict)): answer = predict['answer'] answer = answer.replace("<unk>","").replace(" ","").strip() images_dir = '../Dataset/GeoChat/referring_images' image_path = os.path.join(images_dir, predict['image_id'] + '.png') image = Image.open(image_path) width, height = image.size size_type = predict['type'] gt_bboxes = predict['ground_truth'] # list predict_boxes = extract_bboxes(answer) # list for i in range(len(gt_bboxes)): # convert coordinates to float poly = np.array(gt_bboxes[i]).astype(np.float32).reshape(-1) # [4,2] gt_obb = bt.poly2obb(poly).reshape(1,5) # convert to [cx, cy, w, h, theta] try: pred_bbox = predict_boxes[i] pred_bbox[0] = pred_bbox[0] / scale * width pred_bbox[1] = pred_bbox[1] / scale * height pred_bbox[2] = pred_bbox[2] / scale * width pred_bbox[3] = pred_bbox[3] / scale * height pred_poly = bbox_and_angle_to_polygon(*pred_bbox) pred_obb = bt.poly2obb(pred_poly).reshape(1,5) # convert to [cx, cy, w, h, theta] iou_score = bt.geometry.bbox_overlaps(pred_obb, gt_obb)[0][0] # calcualte obb Iou by BboxToolkit. if iou_score >= 0.5: correct += 1 except: continue dataset = 'GeoChat Bench referring' print(f"Evaluating {dataset} ...") print(f'Precision @ 0.5: {correct / total_cnt} \n')Finally, I got a [email protected]=0.22744 as a result, my test data was come from GeoChat Bench referring.jsonl, with 7593 test samples. I was confused with the Iou result presented in the paper. I don't know how to get the same result.
How to implement the function of extract_bboxes?
thank you.
from geochat.
extract_bboxes
I wrote a script for visual grounding evaluation in table 7. I used a bounding box calculate package [BboxToolkit] I think it's correct but I can't get the same result in paper. I don't know what's wrong. The
bbox_and_angle_to_polygon
function copy fromgeochat_demo.py
. (https://github.com/jbwang1997/BboxToolkit/blob/master/USAGE.md).def bbox_and_angle_to_polygon(x1, y1, x2, y2, a): # 计算中心点坐标 x_ctr = (x1 + x2) / 2 y_ctr = (y1 + y2) / 2 # 计算宽度和高度 w = abs(x2 - x1) h = abs(y2 - y1) # 计算角度(弧度) angle_rad = math.radians(a) # 计算旋转后的四个角点坐标 cos_a = math.cos(angle_rad) sin_a = math.sin(angle_rad) x1_rot = cos_a * (-w / 2) - sin_a * (-h / 2) + x_ctr y1_rot = sin_a * (-w / 2) + cos_a * (-h / 2) + y_ctr x2_rot = cos_a * (w / 2) - sin_a * (-h / 2) + x_ctr y2_rot = sin_a * (w / 2) + cos_a * (-h / 2) + y_ctr x3_rot = cos_a * (w / 2) - sin_a * (h / 2) + x_ctr y3_rot = sin_a * (w / 2) + cos_a * (h / 2) + y_ctr x4_rot = cos_a * (-w / 2) - sin_a * (h / 2) + x_ctr y4_rot = sin_a * (-w / 2) + cos_a * (h / 2) + y_ctr # 返回多边形坐标 polygon_coords = np.array((x1_rot, y1_rot, x2_rot, y2_rot, x3_rot, y3_rot, x4_rot, y4_rot)) return polygon_coords # read the answer file output by `GeoChat/geochat/eval/batch_geochat_referring.py`, and save as a list `geochat_predict`. for i, predict in tqdm(enumerate(geochat_predict)): answer = predict['answer'] answer = answer.replace("<unk>","").replace(" ","").strip() images_dir = '../Dataset/GeoChat/referring_images' image_path = os.path.join(images_dir, predict['image_id'] + '.png') image = Image.open(image_path) width, height = image.size size_type = predict['type'] gt_bboxes = predict['ground_truth'] # list predict_boxes = extract_bboxes(answer) # list for i in range(len(gt_bboxes)): # convert coordinates to float poly = np.array(gt_bboxes[i]).astype(np.float32).reshape(-1) # [4,2] gt_obb = bt.poly2obb(poly).reshape(1,5) # convert to [cx, cy, w, h, theta] try: pred_bbox = predict_boxes[i] pred_bbox[0] = pred_bbox[0] / scale * width pred_bbox[1] = pred_bbox[1] / scale * height pred_bbox[2] = pred_bbox[2] / scale * width pred_bbox[3] = pred_bbox[3] / scale * height pred_poly = bbox_and_angle_to_polygon(*pred_bbox) pred_obb = bt.poly2obb(pred_poly).reshape(1,5) # convert to [cx, cy, w, h, theta] iou_score = bt.geometry.bbox_overlaps(pred_obb, gt_obb)[0][0] # calcualte obb Iou by BboxToolkit. if iou_score >= 0.5: correct += 1 except: continue dataset = 'GeoChat Bench referring' print(f"Evaluating {dataset} ...") print(f'Precision @ 0.5: {correct / total_cnt} \n')Finally, I got a [email protected]=0.22744 as a result, my test data was come from GeoChat Bench referring.jsonl, with 7593 test samples. I was confused with the Iou result presented in the paper. I don't know how to get the same result.
How to implement the function of extract_bboxes? thank you.
I implement the extract_bboxes
function as follows:
import re
def extract_bboxes(output):
"""
Extract bounding box coordinates from the given string using regular expressions.
:param output: String containing bounding box coordinates in the format {<bx_left><by_top><bx_right><by_bottom>|θ}
:return: List of bounding boxes, each in the format [bx_left, by_top, bx_right, by_bottom, θ]
"""
# 修改正则表达式,确保最后一个数字和管道符号能够正确匹配
pattern = r'{<(\d+)><(\d+)><(\d+)><(\d+)>|<(\d+)>}'
matches = re.findall(pattern, output)
bboxes = []
for match in matches:
# 将所有匹配的坐标转换为浮点数,并添加到 bboxes 列表中
bbox = [int(coord) for coord in match] # 用int而不是float, 坐标是整数
bboxes.append(bbox)
return bboxes
from geochat.
Related Issues (20)
- get_chunk method in batch_geochat_scene.py seems to be undefined HOT 1
- Minimum memory for the training process
- how to run the lora finetuned model? HOT 5
- metrics about region captioning HOT 1
- training data corrupted HOT 1
- is training necessary ?
- Model for visual grounding
- Calculation of metrics
- Evaluation results about Grounding
- The results of MiniGPT in the paper HOT 2
- when training had an error!
- License for Commercial use
- merge lora
- how to finetune on my custom dataset
- training data corrupt
- Using transformers to use geochat directly
- The error encountered when using ZeRO-2 for training.
- Could you describe the procedure of reproduce the GeoChat?
- Multi images HOT 1
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from geochat.