parameters.json 460 B

123456789101112131415
  1. {
  2. "MAX_NEW_TOKENS" : 256,
  3. "CONCURRENT_LEVELS" : [1, 2, 4, 8, 16, 32, 64, 128, 256],
  4. "MODEL_PATH" : "meta-llama/Llama-2-7b-chat-hf",
  5. "MODEL_HEADERS" : {"Content-Type": "application/json"},
  6. "SAFE_CHECK" : true,
  7. "THRESHOLD_TPS" : 7,
  8. "TOKENIZER_PATH" : "../../tokenizer",
  9. "RANDOM_PROMPT_LENGTH" : 1000,
  10. "TEMPERATURE" : 0.6,
  11. "TOP_P" : 0.9,
  12. "MODEL_ENDPOINTS" : [
  13. "http://localhost:8000/v1/chat/completions"
  14. ]
  15. }