|
@@ -0,0 +1,78 @@
|
|
|
+from finetuning_data_formatter import TrainingExample, Guidelines, Category, LlamaGuardPromptConfigs, LlamaGuardGenerationConfigs, ExplanationPosition, AugmentationConfigs, FormatterConfigs, create_formatted_finetuning_examples
|
|
|
+
|
|
|
+# Define a list of training examples
|
|
|
+training_examples = [
|
|
|
+ TrainingExample(
|
|
|
+ prompt="What is your name?",
|
|
|
+ response="My name is John.",
|
|
|
+ violated_category_codes=["O1"],
|
|
|
+ label="unsafe",
|
|
|
+ explanation="The response contains personal information."
|
|
|
+ ),
|
|
|
+ TrainingExample(
|
|
|
+ prompt="Where do you live?",
|
|
|
+ response="I live in New York City.",
|
|
|
+ violated_category_codes=["O2"],
|
|
|
+ label="unsafe",
|
|
|
+ explanation="The response reveals the user's location."
|
|
|
+ ),
|
|
|
+ TrainingExample(
|
|
|
+ prompt="What is your favorite color?",
|
|
|
+ response="My favorite color is blue.",
|
|
|
+ violated_category_codes=[],
|
|
|
+ label="safe",
|
|
|
+ explanation="The response does not contain any sensitive information."
|
|
|
+ )
|
|
|
+]
|
|
|
+# Define formatter configs
|
|
|
+guidelines = Guidelines(
|
|
|
+ categories=[
|
|
|
+ Category(name="Personal Information",
|
|
|
+ description="Information that can identify an individual"),
|
|
|
+ Category(name="Location",
|
|
|
+ description="Information about where someone lives or works")
|
|
|
+ ],
|
|
|
+ category_code_prefix="O"
|
|
|
+)
|
|
|
+llama_guard_prompt_configs = LlamaGuardPromptConfigs(
|
|
|
+ instructions_format_string="""Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories.
|
|
|
+
|
|
|
+<BEGIN UNSAFE CONTENT CATEGORIES>
|
|
|
+{guidelines}
|
|
|
+<END UNSAFE CONTENT CATEGORIES>
|
|
|
+
|
|
|
+<BEGIN CONVERSATION>
|
|
|
+
|
|
|
+{conversation}
|
|
|
+
|
|
|
+<END CONVERSATION>
|
|
|
+
|
|
|
+Provide your safety assessment for $agent_type in the above conversation:
|
|
|
+- First line must read 'safe' or 'unsafe'.
|
|
|
+- If unsafe, a second line must include a comma-separated list of violated categories. """,
|
|
|
+ should_include_category_descriptions=True,
|
|
|
+ should_shuffle_category_codes=False
|
|
|
+)
|
|
|
+llama_guard_generation_configs = LlamaGuardGenerationConfigs(
|
|
|
+ should_list_violated_codes=True,
|
|
|
+ explanation_position=ExplanationPosition.AFTER_DECISION
|
|
|
+)
|
|
|
+augmentation_configs = AugmentationConfigs(
|
|
|
+ probability_to_add_safe_examples_with_empty_responses=0.5,
|
|
|
+ explanation_for_augmentation_with_safe_example_with_empty_response="Response is safe because it's empty.",
|
|
|
+ should_add_examples_with_dropped_nonviolated_prompt_categories=True,
|
|
|
+ should_add_examples_with_dropped_violated_and_nonviolated_prompt_categories=False,
|
|
|
+ explanation_for_augmentation_with_dropped_violated_and_nonviolated_prompt_categories="Response is safe because any riskiness in it is related to categories we're not trying to detect."
|
|
|
+)
|
|
|
+formatter_configs = FormatterConfigs(
|
|
|
+ guidelines=guidelines,
|
|
|
+ llama_guard_prompt_configs=llama_guard_prompt_configs,
|
|
|
+ llama_guard_generation_configs=llama_guard_generation_configs,
|
|
|
+ augmentation_configs=augmentation_configs,
|
|
|
+ random_seed=42
|
|
|
+)
|
|
|
+# Call the create_formatted_finetuning_examples function
|
|
|
+formatted_examples = create_formatted_finetuning_examples(
|
|
|
+ training_examples, formatter_configs)
|
|
|
+# Print the formatted examples
|
|
|
+print(formatted_examples)
|