@@ -74,7 +74,8 @@ def load_tasks(args):
"hellaswag_10_shot",
"truthfulqa_mc2",
"winogrande_5_shot",
- "gsm8k"
+ "gsm8k",
+ "mmlu",
]
return args.tasks.split(",") if args.tasks else []
@@ -0,0 +1,9 @@
+include: {$EVAL_PATH}/lm_eval/tasks/mmlu/default/_mmlu.yaml
+task:
+ - mmlu_stem
+ - mmlu_other
+ - mmlu_social_sciences
+ - mmlu_humanities
+num_fewshot: 5
+metric_list:
+ - metric: acc