Skip to content
This repository was archived by the owner on May 1, 2025. It is now read-only.
This repository was archived by the owner on May 1, 2025. It is now read-only.

ValueError: Config name is missing. #32

@Paul-B98

Description

@Paul-B98

I tried to run the demo example for fine tuning the CodeT5+ Model in the README but set the CodeXGlue dataset from text-to-code to code-to-text. It would be helpful to have the option to set this var.

def load_codexglue_code_to_text_dataset(self):
dataset = self.dataset_config["codexglue_code_to_text"]
dataset = load_dataset(dataset)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[3], line 11
      7 model_class = load_model_pipeline(model_name="codet5", task="pretrained",
      8             model_type="plus-220M", is_eval=True)
     10 dataset = CodeXGLUEDataset(tokenizer=model_class.get_tokenizer())
---> 11 train, test, validation = dataset.load(subset="code-to-text")
     13 train_dataset= CustomDataset(train[0], train[1])
     14 test_dataset= CustomDataset(test[0], test[1])

File [~/projects/edu/master/CodeTF/codetf/data_utility/codexglue_dataset.py:19](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/projects/edu/master/CodeTF/codetf/data_utility/codexglue_dataset.py:19), in CodeXGLUEDataset.load(self, subset)
     17 def load(self, subset):
     18     if subset in self.load_funcs:
---> 19         return self.load_funcs[subset]()
     20     else:
     21         raise ValueError(f'Invalid subset {subset}. Available subsets are: {list(self.load_funcs.keys())}')

File [~/projects/edu/master/CodeTF/codetf/data_utility/codexglue_dataset.py:43](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/projects/edu/master/CodeTF/codetf/data_utility/codexglue_dataset.py:43), in CodeXGLUEDataset.load_codexglue_code_to_text_dataset(self)
     41 def load_codexglue_code_to_text_dataset(self):
     42     dataset = self.dataset_config["codexglue_code_to_text"]
---> 43     dataset = load_dataset(dataset)
     45     train = dataset["train"]
     46     train_code_tensors, _ = self.process_data(train["code"])

File [~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/load.py:1773](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/load.py:1773), in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)
   1768 verification_mode = VerificationMode(
   1769     (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
   1770 )
   1772 # Create a dataset builder
-> 1773 builder_instance = load_dataset_builder(
   1774     path=path,
   1775     name=name,
   1776     data_dir=data_dir,
   1777     data_files=data_files,
   1778     cache_dir=cache_dir,
   1779     features=features,
   1780     download_config=download_config,
   1781     download_mode=download_mode,
   1782     revision=revision,
   1783     use_auth_token=use_auth_token,
   1784     storage_options=storage_options,
   1785     **config_kwargs,
   1786 )
   1788 # Return iterable dataset in case of streaming
   1789 if streaming:

File [~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/load.py:1528](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/load.py:1528), in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, use_auth_token, storage_options, **config_kwargs)
   1525     raise ValueError(error_msg)
   1527 # Instantiate the dataset builder
-> 1528 builder_instance: DatasetBuilder = builder_cls(
   1529     cache_dir=cache_dir,
   1530     config_name=config_name,
   1531     data_dir=data_dir,
   1532     data_files=data_files,
   1533     hash=hash,
   1534     features=features,
   1535     use_auth_token=use_auth_token,
   1536     storage_options=storage_options,
   1537     **builder_kwargs,
   1538     **config_kwargs,
   1539 )
   1541 return builder_instance

File [~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/builder.py:340](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/builder.py:340), in DatasetBuilder.__init__(self, cache_dir, config_name, hash, base_path, info, features, use_auth_token, repo_id, data_files, data_dir, storage_options, writer_batch_size, name, **config_kwargs)
    338 if data_dir is not None:
    339     config_kwargs["data_dir"] = data_dir
--> 340 self.config, self.config_id = self._create_builder_config(
    341     config_name=config_name,
    342     custom_features=features,
    343     **config_kwargs,
    344 )
    346 # prepare info: DatasetInfo are a standardized dataclass across all datasets
    347 # Prefill datasetinfo
    348 if info is None:

File [~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/builder.py:469](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/builder.py:469), in DatasetBuilder._create_builder_config(self, config_name, custom_features, **config_kwargs)
    467 if len(self.BUILDER_CONFIGS) > 1:
    468     example_of_usage = f"load_dataset('{self.name}', '{self.BUILDER_CONFIGS[0].name}')"
--> 469     raise ValueError(
    470         "Config name is missing."
    471         f"\nPlease pick one among the available configs: {list(self.builder_configs.keys())}"
    472         + f"\nExample of usage:\n\t`{example_of_usage}`"
    473     )
    474 builder_config = self.BUILDER_CONFIGS[0]
    475 logger.info(f"No config specified, defaulting to the single config: {self.name}/{builder_config.name}")

ValueError: Config name is missing.
Please pick one among the available configs: ['go', 'java', 'javascript', 'php', 'python', 'ruby']
Example of usage:
	`load_dataset('code_x_glue_ct_code_to_text', 'go')`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions