use FastChat
click to view the code
# env mt_bench
git clone https://github.com/lm-sys/FastChat.git
cd FastChat
pip install -e ".[model_worker,llm_judge]"
python gen_judgment.py --model-list gpt-3.5-turbo gpt-4 --parallel 2
python show_result.py --model-list gpt-3.5-turbo gpt-4