Janus JSON Files#
Janus JSON files are both created at the output of many of the janus
CLI commands as well as able to be used as input for the same commands.
Explanation#
The Janus JSON file is a standardized input/output format for the janus
CLI tool. It contains the following fields:
{ "input": "string",
"metadata": {
"cost": float,
"processing_time": float,
"num_requests": int,
"input_tokens": int,
"output_tokens": int,
"converter_name": string
},
"outputs": [
"string"
]
}
Keys#
input
: The input to the command. This is usually the source code or whatever the command is processing.metadata
: A dictionary containing metadata about the command. This includes:cost
: The cost of the command in USD.processing_time
: The time it took to process the command in seconds.num_requests
: The number of requests made to the LLM. If there are multiple requests, for a single pipeline stage, then this likely means there there was a failure in the pipeline and the stage was retried.input_tokens
: The number of input tokens used by the LLM.output_tokens
: The number of output tokens generated by the LLM.converter_name
: The name of the converter used to generate the output.
outputs
: A list of strings containing the output of the command. This can take different forms based on the command. For example, for thejanus translate
command, this will be the translated code.
Example#
An example Janus JSON file is shown below. The command used to create this JSON file is here:
janus translate -i janus/cli -s python -o test-out -t javascript -L my-gpt -S ast-flex
{
"input": "from pathlib import Path\nfrom typing import Optional\n\nimport click\nimport typer\nfrom typing_extensions import Annotated\n\nfrom janus.cli.constants import REFINERS\nfrom janus.language.naive.registry import CUSTOM_SPLITTERS\nfrom janus.utils.enums import LANGUAGES\nfrom janus.utils.logger import create_logger\n\nlog = create_logger(__name__)\n\n\ndef translate(\n input_dir: Annotated[\n Path,\n typer.Option(\n \"--input\",\n \"-i\",\n help=\"The directory containing the source code to be translated. \"\n \"The files should all be in one flat directory.\",\n ),\n ],\n source_lang: Annotated[\n str,\n typer.Option(\n \"--source-language\",\n \"-s\",\n help=\"The language of the source code.\",\n click_type=click.Choice(sorted(LANGUAGES)),\n ),\n ],\n output_dir: Annotated[\n Path,\n typer.Option(\n \"--output\", \"-o\", help=\"The directory to store the translated code in.\"\n ),\n ],\n target_lang: Annotated[\n str,\n typer.Option(\n \"--target-language\",\n \"-t\",\n help=\"The desired output language to translate the source code to. The \"\n \"format can follow a 'language-version' syntax. Use 'text' to get plaintext\"\n \"results as returned by the LLM. Examples: `python-3.10`, `mumps`, `java-10`,\"\n \"text.\",\n ),\n ],\n llm_name: Annotated[\n str,\n typer.Option(\n \"--llm\",\n \"-L\",\n help=\"The custom name of the model set with 'janus llm add'.\",\n ),\n ],\n failure_dir: Annotated[\n Optional[Path],\n typer.Option(\n \"--failure-directory\",\n \"-f\",\n help=\"The directory to store failure files during translation\",\n ),\n ] = None,\n max_prompts: Annotated[\n int,\n typer.Option(\n \"--max-prompts\",\n \"-m\",\n help=\"The maximum number of times to prompt a model on one functional block \"\n \"before exiting the application. This is to prevent wasting too much money.\",\n ),\n ] = 10,\n overwrite: Annotated[\n bool,\n typer.Option(\n \"--overwrite/--preserve\",\n help=\"Whether to overwrite existing files in the output directory\",\n ),\n ] = False,\n skip_context: Annotated[\n bool,\n typer.Option(\n \"--skip-context\",\n help=\"Prompts will include any context information associated with source\"\n \" code blocks, unless this option is specified\",\n ),\n ] = False,\n temp: Annotated[\n float,\n typer.Option(\"--temperature\", \"-T\", help=\"Sampling temperature.\", min=0, max=2),\n ] = 0.7,\n prompt_template: Annotated[\n str,\n typer.Option(\n \"--prompt-template\",\n \"-p\",\n help=\"Name of the Janus prompt template directory or \"\n \"path to a directory containing those template files.\",\n ),\n ] = \"simple\",\n collection: Annotated[\n str,\n typer.Option(\n \"--collection\",\n \"-c\",\n help=\"If set, will put the translated result into a Chroma DB \"\n \"collection with the name provided.\",\n ),\n ] = None,\n splitter_type: Annotated[\n str,\n typer.Option(\n \"-S\",\n \"--splitter\",\n help=\"Name of custom splitter to use\",\n click_type=click.Choice(list(CUSTOM_SPLITTERS.keys())),\n ),\n ] = \"file\",\n refiner_types: Annotated[\n list[str],\n typer.Option(\n \"-r\",\n \"--refiner\",\n help=\"\\\n refinement chain\",\n click_type=click.Choice(list(REFINERS.keys())),\n ),\n ] = [\"JanusRefiner\"],\n retriever_type: Annotated[\n str,\n typer.Option(\n \"-R\",\n \"--retriever\",\n help=\"Name of custom retriever to use\",\n click_type=click.Choice([\"active_usings\", \"language_docs\"]),\n ),\n ] = None,\n max_tokens: Annotated[\n int,\n typer.Option(\n \"--max-tokens\",\n \"-M\",\n help=\"The maximum number of tokens the model will take in. \"\n \"If unspecificed, model's default max will be used.\",\n ),\n ] = None,\n use_janus_inputs: Annotated[\n bool,\n typer.Option(\n \"-j\",\n \"--use-janus-inputs\",\n help=\"Prsent if translator should use janus files as inputs\",\n ),\n ] = False,\n):\n from janus.cli.constants import db_loc, get_collections_config\n from janus.converter.translate import Translator\n\n refiner_types = [REFINERS[r] for r in refiner_types]\n try:\n target_language, target_version = target_lang.split(\"-\")\n except ValueError:\n target_language = target_lang\n target_version = None\n # make sure not overwriting input\n if source_lang.lower() == target_language.lower() and input_dir == output_dir:\n log.error(\"Output files would overwrite input! Aborting...\")\n raise ValueError\n\n model_arguments = dict(temperature=temp)\n collections_config = get_collections_config()\n translator = Translator(\n model=llm_name,\n model_arguments=model_arguments,\n source_language=source_lang,\n target_language=target_language,\n target_version=target_version,\n max_prompts=max_prompts,\n max_tokens=max_tokens,\n prompt_templates=prompt_template,\n db_path=db_loc,\n db_config=collections_config,\n splitter_type=splitter_type,\n refiner_types=refiner_types,\n retriever_type=retriever_type,\n use_janus_inputs=use_janus_inputs,\n )\n translator.translate(input_dir, output_dir, failure_dir, overwrite, collection)",
"metadata": {
"cost": 0.009809249999999999,
"processing_time": 5.246312856674194,
"num_requests": 1,
"input_tokens": 1431,
"output_tokens": 534,
"converter_name": "Translator"
},
"outputs": [
"// Import necessary modules\nimport { Path } from 'path';\nimport click from 'click';\nimport typer from 'typer';\nimport { REFINERS } from 'janus/cli/constants';\nimport { CUSTOM_SPLITTERS } from 'janus/language/naive/registry';\nimport { LANGUAGES } from 'janus/utils/enums';\nimport { createLogger } from 'janus/utils/logger';\n\n// Create a logger\nconst log = createLogger(__filename);\n\n// Define the translate function\nfunction translate({\n inputDir,\n sourceLang,\n outputDir,\n targetLang,\n llmName,\n failureDir = null,\n maxPrompts = 10,\n overwrite = false,\n skipContext = false,\n temp = 0.7,\n promptTemplate = 'simple',\n collection = null,\n splitterType = 'file',\n refinerTypes = ['JanusRefiner'],\n retrieverType = null,\n maxTokens = null,\n useJanusInputs = false,\n}) {\n const { dbLoc, getCollectionsConfig } = require('janus/cli/constants');\n const { Translator } = require('janus/converter/translate');\n\n // Configure refiner types\n refinerTypes = refinerTypes.map(r => REFINERS[r]);\n\n // Extract target language and version\n let targetLanguage, targetVersion;\n try {\n [targetLanguage, targetVersion] = targetLang.split('-');\n } catch (error) {\n targetLanguage = targetLang;\n targetVersion = null;\n }\n\n // Ensure not overwriting input\n if (sourceLang.toLowerCase() === targetLanguage.toLowerCase() && inputDir === outputDir) {\n log.error(\"Output files would overwrite input! Aborting...\");\n throw new Error(\"Output files would overwrite input! Aborting...\");\n }\n\n // Model arguments\n const modelArguments = { temperature: temp };\n const collectionsConfig = getCollectionsConfig();\n\n // Create a translator instance\n const translator = new Translator({\n model: llmName,\n modelArguments,\n sourceLanguage: sourceLang,\n targetLanguage: targetLanguage,\n targetVersion: targetVersion,\n maxPrompts,\n maxTokens,\n promptTemplates: promptTemplate,\n dbPath: dbLoc,\n dbConfig: collectionsConfig,\n splitterType,\n refinerTypes,\n retrieverType,\n useJanusInputs,\n });\n\n // Perform translation\n translator.translate(inputDir, outputDir, failureDir, overwrite, collection);\n}\n"
]
}
To use this same file as an input for a janus translate
command that translates the code from JavaScript back to Python, you would run the following command:
janus translate -i test-out/ -s javascript -o test-out-2 -t python -L my-gpt -S ast-flex -j
The key difference here is the -j
flag, which tells Janus to use the Janus JSON files as input instead of the source code files.
The resultant JSON file is shown below:
{
"input": "// Import necessary modules\nimport { Path } from 'path';\nimport click from 'click';\nimport typer from 'typer';\nimport { REFINERS } from 'janus/cli/constants';\nimport { CUSTOM_SPLITTERS } from 'janus/language/naive/registry';\nimport { LANGUAGES } from 'janus/utils/enums';\nimport { createLogger } from 'janus/utils/logger';\n\n// Create a logger\nconst log = createLogger(__filename);\n\n// Define the translate function\nfunction translate({\n inputDir,\n sourceLang,\n outputDir,\n targetLang,\n llmName,\n failureDir = null,\n maxPrompts = 10,\n overwrite = false,\n skipContext = false,\n temp = 0.7,\n promptTemplate = 'simple',\n collection = null,\n splitterType = 'file',\n refinerTypes = ['JanusRefiner'],\n retrieverType = null,\n maxTokens = null,\n useJanusInputs = false,\n}) {\n const { dbLoc, getCollectionsConfig } = require('janus/cli/constants');\n const { Translator } = require('janus/converter/translate');\n\n // Configure refiner types\n refinerTypes = refinerTypes.map(r => REFINERS[r]);\n\n // Extract target language and version\n let targetLanguage, targetVersion;\n try {\n [targetLanguage, targetVersion] = targetLang.split('-');\n } catch (error) {\n targetLanguage = targetLang;\n targetVersion = null;\n }\n\n // Ensure not overwriting input\n if (sourceLang.toLowerCase() === targetLanguage.toLowerCase() && inputDir === outputDir) {\n log.error(\"Output files would overwrite input! Aborting...\");\n throw new Error(\"Output files would overwrite input! Aborting...\");\n }\n\n // Model arguments\n const modelArguments = { temperature: temp };\n const collectionsConfig = getCollectionsConfig();\n\n // Create a translator instance\n const translator = new Translator({\n model: llmName,\n modelArguments,\n sourceLanguage: sourceLang,\n targetLanguage: targetLanguage,\n targetVersion: targetVersion,\n maxPrompts,\n maxTokens,\n promptTemplates: promptTemplate,\n dbPath: dbLoc,\n dbConfig: collectionsConfig,\n splitterType,\n refinerTypes,\n retrieverType,\n useJanusInputs,\n });\n\n // Perform translation\n translator.translate(inputDir, outputDir, failureDir, overwrite, collection);\n}",
"metadata": {
"cost": 0.0072435,
"processing_time": 5.323791742324829,
"num_requests": 1,
"input_tokens": 710,
"output_tokens": 481,
"converter_name": "Translator"
},
"outputs": [
"# Import necessary modules\nfrom pathlib import Path\nimport click\nimport typer\nfrom janus.cli.constants import REFINERS\nfrom janus.language.naive.registry import CUSTOM_SPLITTERS\nfrom janus.utils.enums import LANGUAGES\nfrom janus.utils.logger import createLogger\n\n# Create a logger\nlog = createLogger(__file__)\n\n# Define the translate function\ndef translate(\n input_dir,\n source_lang,\n output_dir,\n target_lang,\n llm_name,\n failure_dir=None,\n max_prompts=10,\n overwrite=False,\n skip_context=False,\n temp=0.7,\n prompt_template='simple',\n collection=None,\n splitter_type='file',\n refiner_types=['JanusRefiner'],\n retriever_type=None,\n max_tokens=None,\n use_janus_inputs=False,\n):\n from janus.cli.constants import dbLoc, getCollectionsConfig\n from janus.converter.translate import Translator\n\n # Configure refiner types\n refiner_types = [REFINERS[r] for r in refiner_types]\n\n # Extract target language and version\n try:\n target_language, target_version = target_lang.split('-')\n except ValueError:\n target_language = target_lang\n target_version = None\n\n # Ensure not overwriting input\n if source_lang.lower() == target_language.lower() and input_dir == output_dir:\n log.error(\"Output files would overwrite input! Aborting...\")\n raise Exception(\"Output files would overwrite input! Aborting...\")\n\n # Model arguments\n model_arguments = {'temperature': temp}\n collections_config = getCollectionsConfig()\n\n # Create a translator instance\n translator = Translator(\n model=llm_name,\n modelArguments=model_arguments,\n sourceLanguage=source_lang,\n targetLanguage=target_language,\n targetVersion=target_version,\n maxPrompts=max_prompts,\n maxTokens=max_tokens,\n promptTemplates=prompt_template,\n dbPath=dbLoc,\n dbConfig=collections_config,\n splitterType=splitter_type,\n refinerTypes=refiner_types,\n retrieverType=retriever_type,\n useJanusInputs=use_janus_inputs,\n )\n\n # Perform translation\n translator.translate(input_dir, output_dir, failure_dir, overwrite, collection)\n"
]
}