diff --git a/tensorflow_datasets/rl_unplugged/rlu_control_suite/rlu_control_suite.py b/tensorflow_datasets/rl_unplugged/rlu_control_suite/rlu_control_suite.py index c4a6dbeb12d..d65101058df 100644 --- a/tensorflow_datasets/rl_unplugged/rlu_control_suite/rlu_control_suite.py +++ b/tensorflow_datasets/rl_unplugged/rlu_control_suite/rlu_control_suite.py @@ -184,10 +184,10 @@ def get_features_dict(self): 'steps': tfds.features.Dataset({ 'observation': { k: rlu_common.float_tensor_feature(v) - for k, v in self.builder_config.observation_size.items() + for k, v in self.builder_config.observation_size.items() # pyrefly: ignore[missing-attribute] }, 'action': tfds.features.Tensor( - shape=(self.builder_config.action_size,), dtype=np.float32 + shape=(self.builder_config.action_size,), dtype=np.float32 # pyrefly: ignore[missing-attribute] ), 'reward': np.float32, 'is_terminal': np.bool_, @@ -206,7 +206,7 @@ def get_citation(self): return _CITATION def get_file_prefix(self): - task = self.builder_config.name + task = self.builder_config.name # pyrefly: ignore[missing-attribute] return f'{self._INPUT_FILE_PREFIX}/{task}/train' def num_shards(self): @@ -215,11 +215,11 @@ def num_shards(self): def _get_example_specs(self): obs_features = { f'observation/{k}': _sequence_feature(v) - for k, v in self.builder_config.observation_size.items() + for k, v in self.builder_config.observation_size.items() # pyrefly: ignore[missing-attribute] } return { **obs_features, - 'action': _sequence_feature(self.builder_config.action_size), + 'action': _sequence_feature(self.builder_config.action_size), # pyrefly: ignore[missing-attribute] 'discount': _sequence_feature(), 'reward': _sequence_feature(), 'step_type': _sequence_feature(), diff --git a/tensorflow_datasets/rl_unplugged/rlu_locomotion/rlu_locomotion.py b/tensorflow_datasets/rl_unplugged/rlu_locomotion/rlu_locomotion.py index 6a2277672cb..f0bc548952d 100644 --- a/tensorflow_datasets/rl_unplugged/rlu_locomotion/rlu_locomotion.py +++ b/tensorflow_datasets/rl_unplugged/rlu_locomotion/rlu_locomotion.py @@ -137,7 +137,7 @@ class RluLocomotion(rlu_common.RLUBuilder): _INPUT_FILE_PREFIX = 'gs://rl_unplugged/dm_locomotion_episodes/' def get_features_dict(self): - if 'humanoid' in self.builder_config.name: + if 'humanoid' in self.builder_config.name: # pyrefly: ignore[missing-attribute] walker_features = { 'joints_vel': rlu_common.float_tensor_feature(56), 'sensors_velocimeter': rlu_common.float_tensor_feature(3), @@ -171,7 +171,7 @@ def get_features_dict(self): action_features = tfds.features.Tensor(shape=(38,), dtype=np.float32) return tfds.features.FeaturesDict({ - 'steps': tfds.features.Dataset({ + 'steps': tfds.features.Dataset({ # pyrefly: ignore[bad-argument-type] 'observation': { 'walker': walker_features, }, @@ -193,7 +193,7 @@ def get_citation(self): return _CITATION def get_file_prefix(self): - task = self.builder_config.name + task = self.builder_config.name # pyrefly: ignore[missing-attribute] return f'{self._INPUT_FILE_PREFIX}/{task}/train' def num_shards(self): @@ -203,7 +203,7 @@ def tf_example_to_step_ds( self, tf_example: tf.train.Example ) -> Dict[str, Any]: """Create an episode from a TF example.""" - feature_description = _feature_description(self.builder_config.name) + feature_description = _feature_description(self.builder_config.name) # pyrefly: ignore[missing-attribute] data = tf.io.parse_single_example(tf_example, feature_description) episode_length = tf.size(data['discount']) diff --git a/tensorflow_datasets/rl_unplugged/rlu_rwrl/rlu_rwrl.py b/tensorflow_datasets/rl_unplugged/rlu_rwrl/rlu_rwrl.py index cb42e6aafb2..e0ffd87ecad 100644 --- a/tensorflow_datasets/rl_unplugged/rlu_rwrl/rlu_rwrl.py +++ b/tensorflow_datasets/rl_unplugged/rlu_rwrl/rlu_rwrl.py @@ -187,10 +187,10 @@ def tf_example_to_feature_description( 'tf_example_to_feature_description() only works under eager mode.' ) example = example.numpy() # pytype: disable=attribute-error - example = tf.train.Example.FromString(example) + example = tf.train.Example.FromString(example) # pyrefly: ignore[bad-argument-type, bad-assignment] ret = {} - for k, v in example.features.feature.items(): + for k, v in example.features.feature.items(): # pyrefly: ignore[missing-attribute] l = len(v.float_list.value) if l % num_timesteps: raise ValueError( @@ -336,10 +336,10 @@ def get_citation(self): return _CITATION def get_file_prefix(self): - domain = self.builder_config.domain - task = self.builder_config.task - combined_challenge = self.builder_config.combined_challenge - dataset_size = self.builder_config.dataset_size + domain = self.builder_config.domain # pyrefly: ignore[missing-attribute] + task = self.builder_config.task # pyrefly: ignore[missing-attribute] + combined_challenge = self.builder_config.combined_challenge # pyrefly: ignore[missing-attribute] + dataset_size = self.builder_config.dataset_size # pyrefly: ignore[missing-attribute] return ( f'{self._INPUT_FILE_PREFIX}/' f'combined_challenge_{str(combined_challenge).lower()}/' @@ -351,9 +351,9 @@ def num_shards(self): return self._SHARDS # For testing. # type: ignore except AttributeError: pass - domain = self.builder_config.domain - combined_challenge = self.builder_config.combined_challenge - dataset_size = self.builder_config.dataset_size + domain = self.builder_config.domain # pyrefly: ignore[missing-attribute] + combined_challenge = self.builder_config.combined_challenge # pyrefly: ignore[missing-attribute] + dataset_size = self.builder_config.dataset_size # pyrefly: ignore[missing-attribute] return SHARDS_MAPPING[(combined_challenge, domain, dataset_size)] def tf_example_to_step_ds( # pytype: disable=signature-mismatch # overriding-parameter-count-checks diff --git a/tensorflow_datasets/rlds/datasets/locomotion/locomotion.py b/tensorflow_datasets/rlds/datasets/locomotion/locomotion.py index 2c3ecd0a89c..ace700b4cf0 100644 --- a/tensorflow_datasets/rlds/datasets/locomotion/locomotion.py +++ b/tensorflow_datasets/rlds/datasets/locomotion/locomotion.py @@ -62,8 +62,8 @@ class Locomotion(tfds.core.GeneratorBasedBuilder): name='ant_sac_1M_single_policy_stochastic', observation_info=tfds.features.Tensor(shape=(111,), dtype=np.float32), action_info=tfds.features.Tensor(shape=(8,), dtype=np.float32), - reward_info=np.float32, - discount_info=np.float32, + reward_info=np.float32, # pyrefly: ignore[bad-argument-type] + discount_info=np.float32, # pyrefly: ignore[bad-argument-type] citation=_CITATION, homepage=_HOMEPAGE, overall_description=_DESCRIPTION, @@ -76,8 +76,8 @@ class Locomotion(tfds.core.GeneratorBasedBuilder): name='hopper_sac_1M_single_policy_stochastic', observation_info=tfds.features.Tensor(shape=(11,), dtype=np.float32), action_info=tfds.features.Tensor(shape=(3,), dtype=np.float32), - reward_info=np.float32, - discount_info=np.float32, + reward_info=np.float32, # pyrefly: ignore[bad-argument-type] + discount_info=np.float32, # pyrefly: ignore[bad-argument-type] citation=_CITATION, homepage=_HOMEPAGE, overall_description=_DESCRIPTION, @@ -91,8 +91,8 @@ class Locomotion(tfds.core.GeneratorBasedBuilder): name='halfcheetah_sac_1M_single_policy_stochastic', observation_info=tfds.features.Tensor(shape=(17,), dtype=np.float32), action_info=tfds.features.Tensor(shape=(6,), dtype=np.float32), - reward_info=np.float32, - discount_info=np.float32, + reward_info=np.float32, # pyrefly: ignore[bad-argument-type] + discount_info=np.float32, # pyrefly: ignore[bad-argument-type] citation=_CITATION, homepage=_HOMEPAGE, overall_description=_DESCRIPTION, @@ -106,8 +106,8 @@ class Locomotion(tfds.core.GeneratorBasedBuilder): name='walker2d_sac_1M_single_policy_stochastic', observation_info=tfds.features.Tensor(shape=(17,), dtype=np.float32), action_info=tfds.features.Tensor(shape=(6,), dtype=np.float32), - reward_info=np.float32, - discount_info=np.float32, + reward_info=np.float32, # pyrefly: ignore[bad-argument-type] + discount_info=np.float32, # pyrefly: ignore[bad-argument-type] citation=_CITATION, homepage=_HOMEPAGE, overall_description=_DESCRIPTION, @@ -121,8 +121,8 @@ class Locomotion(tfds.core.GeneratorBasedBuilder): name='humanoid_sac_15M_single_policy_stochastic', observation_info=tfds.features.Tensor(shape=(376,), dtype=np.float32), action_info=tfds.features.Tensor(shape=(17,), dtype=np.float32), - reward_info=np.float32, - discount_info=np.float32, + reward_info=np.float32, # pyrefly: ignore[bad-argument-type] + discount_info=np.float32, # pyrefly: ignore[bad-argument-type] citation=_CITATION, homepage=_HOMEPAGE, overall_description=_DESCRIPTION, @@ -137,13 +137,13 @@ class Locomotion(tfds.core.GeneratorBasedBuilder): def _info(self) -> tfds.core.DatasetInfo: """Returns the dataset metadata.""" - return rlds_base.build_info(self.builder_config, self) + return rlds_base.build_info(self.builder_config, self) # pyrefly: ignore[bad-argument-type] def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract( { - 'file_path': self._DATA_PATHS[self.builder_config.name], + 'file_path': self._DATA_PATHS[self.builder_config.name], # pyrefly: ignore[missing-attribute] } ) return { diff --git a/tensorflow_datasets/rlds/datasets/robosuite_panda_pick_place_can/robosuite_panda_pick_place_can.py b/tensorflow_datasets/rlds/datasets/robosuite_panda_pick_place_can/robosuite_panda_pick_place_can.py index 0225aaef31f..180ed45f138 100644 --- a/tensorflow_datasets/rlds/datasets/robosuite_panda_pick_place_can/robosuite_panda_pick_place_can.py +++ b/tensorflow_datasets/rlds/datasets/robosuite_panda_pick_place_can/robosuite_panda_pick_place_can.py @@ -270,13 +270,13 @@ class RobosuitePandaPickPlaceCan(tfds.core.GeneratorBasedBuilder): def _info(self) -> tfds.core.DatasetInfo: """Returns the dataset metadata.""" - return rlds_base.build_info(self.builder_config, self) + return rlds_base.build_info(self.builder_config, self) # pyrefly: ignore[bad-argument-type] def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract( { - 'file_path': self._DATA_PATHS[self.builder_config.name], + 'file_path': self._DATA_PATHS[self.builder_config.name], # pyrefly: ignore[missing-attribute] } ) return { diff --git a/tensorflow_datasets/rlds/rlds_base.py b/tensorflow_datasets/rlds/rlds_base.py index d77ac4e7c9b..ba47875d163 100644 --- a/tensorflow_datasets/rlds/rlds_base.py +++ b/tensorflow_datasets/rlds/rlds_base.py @@ -82,7 +82,7 @@ def build_info( **step_metadata, } if ds_config.observation_info: - step_info['observation'] = ds_config.observation_info + step_info['observation'] = ds_config.observation_info # pyrefly: ignore[unsupported-operation] if ds_config.action_info: step_info['action'] = ds_config.action_info if ds_config.reward_info: @@ -95,13 +95,13 @@ def build_info( builder=builder, description=ds_config.overall_description, features=tfds.features.FeaturesDict({ - 'steps': tfds.features.Dataset(step_info), + 'steps': tfds.features.Dataset(step_info), # pyrefly: ignore[bad-argument-type] **episode_metadata, }), - supervised_keys=ds_config.supervised_keys, + supervised_keys=ds_config.supervised_keys, # pyrefly: ignore[bad-argument-type] homepage=ds_config.homepage, citation=ds_config.citation, - metadata=ds_metadata, + metadata=ds_metadata, # pyrefly: ignore[bad-argument-type] ) diff --git a/tensorflow_datasets/robomimic/dataset_utils.py b/tensorflow_datasets/robomimic/dataset_utils.py index 50a30db765f..e4086d3a0fb 100644 --- a/tensorflow_datasets/robomimic/dataset_utils.py +++ b/tensorflow_datasets/robomimic/dataset_utils.py @@ -110,7 +110,7 @@ def make_builder_configs(dataset: DataSource): """Creates the PH build configs.""" configs = [] for task, details in TASKS.items(): - if dataset in details['datasets']: + if dataset in details['datasets']: # pyrefly: ignore[not-iterable] for observation_type in [ObservationType.IMAGE, ObservationType.LOW_DIM]: # pytype: disable=wrong-keyword-args configs.append( @@ -216,7 +216,7 @@ class RobomimicBuilder(tfds.core.GeneratorBasedBuilder, skip_registration=True): """DatasetBuilder for robomimic datasets.""" VERSION: tfds.core.Version - RELEASE_NOTES: Dict[str, str] + RELEASE_NOTES: Dict[str, str] # pyrefly: ignore[bad-override] BUILDER_CONFIGS: List[tfds.core.BuilderConfig] DATASET_NAME: str DATASET_FILE_EXTENSION: str = '' @@ -231,13 +231,13 @@ def _info(self) -> tfds.core.DatasetInfo: ) def _get_features(self) -> tfds.features.FeaturesDict: - obs_dim = TASKS[self.builder_config.task]['object'] - states_dim = TASKS[self.builder_config.task]['states'] - action_size = TASKS[self.builder_config.task]['action_size'] + obs_dim = TASKS[self.builder_config.task]['object'] # pyrefly: ignore[missing-attribute] + states_dim = TASKS[self.builder_config.task]['states'] # pyrefly: ignore[missing-attribute] + action_size = TASKS[self.builder_config.task]['action_size'] # pyrefly: ignore[missing-attribute] observation = { 'object': tensor_feature( - obs_dim, + obs_dim, # pyrefly: ignore[bad-argument-type] ), 'robot0_eef_pos': tensor_feature(3, doc='End-effector position'), 'robot0_eef_quat': tensor_feature(4, doc='End-effector orientation'), @@ -254,7 +254,7 @@ def _get_features(self) -> tfds.features.FeaturesDict: 'robot0_joint_pos_sin': tensor_feature(7), 'robot0_joint_vel': tensor_feature(7, doc='7DOF joint velocities'), } - if self.builder_config.task == Task.TRANSPORT: + if self.builder_config.task == Task.TRANSPORT: # pyrefly: ignore[missing-attribute] observation['robot1_eef_pos'] = tensor_feature( 3, doc='End-effector position' ) @@ -282,18 +282,18 @@ def _get_features(self) -> tfds.features.FeaturesDict: 7, doc='7DOF joint velocities' ) - if self.builder_config.filename == ObservationType.IMAGE: + if self.builder_config.filename == ObservationType.IMAGE: # pyrefly: ignore[missing-attribute] if self.builder_config.task == Task.TOOL_HANG: - observation['robot0_eye_in_hand_image'] = image_feature(240) - observation['sideview_image'] = image_feature(240) + observation['robot0_eye_in_hand_image'] = image_feature(240) # pyrefly: ignore[bad-assignment] + observation['sideview_image'] = image_feature(240) # pyrefly: ignore[bad-assignment] elif self.builder_config.task == Task.TRANSPORT: - observation['robot0_eye_in_hand_image'] = image_feature(84) - observation['robot1_eye_in_hand_image'] = image_feature(84) - observation['shouldercamera0_image'] = image_feature(84) - observation['shouldercamera1_image'] = image_feature(84) + observation['robot0_eye_in_hand_image'] = image_feature(84) # pyrefly: ignore[bad-assignment] + observation['robot1_eye_in_hand_image'] = image_feature(84) # pyrefly: ignore[bad-assignment] + observation['shouldercamera0_image'] = image_feature(84) # pyrefly: ignore[bad-assignment] + observation['shouldercamera1_image'] = image_feature(84) # pyrefly: ignore[bad-assignment] else: - observation['agentview_image'] = image_feature(84) - observation['robot0_eye_in_hand_image'] = image_feature(84) + observation['agentview_image'] = image_feature(84) # pyrefly: ignore[bad-assignment] + observation['robot0_eye_in_hand_image'] = image_feature(84) # pyrefly: ignore[bad-assignment] # metadata depends on the quality type metadata = self._get_metadata() @@ -301,15 +301,15 @@ def _get_features(self) -> tfds.features.FeaturesDict: features = tfds.features.FeaturesDict({ 'horizon': np.int32, 'episode_id': np.str_, - 'steps': tfds.features.Dataset({ - 'action': tensor_feature(action_size), + 'steps': tfds.features.Dataset({ # pyrefly: ignore[bad-argument-type] + 'action': tensor_feature(action_size), # pyrefly: ignore[bad-argument-type] 'observation': observation, 'reward': np.float64, 'is_first': np.bool_, 'is_last': np.bool_, 'is_terminal': np.bool_, 'discount': np.int32, - 'states': tensor_feature(states_dim), + 'states': tensor_feature(states_dim), # pyrefly: ignore[bad-argument-type] }), **metadata, }) @@ -325,6 +325,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): # in the sparse rewards, for consistency with the other datasets. ext = self.DATASET_FILE_EXTENSION filepath = ( + # pyrefly: ignore[missing-attribute] 'http://downloads.cs.stanford.edu/downloads/rt_benchmark/' f'{self.builder_config.task}/{self.builder_config.dataset}/' f'{self.builder_config.filename}{ext}.hdf5' @@ -351,7 +352,7 @@ def _generate_examples(self, path): for key in data: yield key, { 'steps': build_episode(data[key]), - 'horizon': self.builder_config.horizon, + 'horizon': self.builder_config.horizon, # pyrefly: ignore[missing-attribute] 'episode_id': key, **episode_metadata(mask, key), } @@ -359,6 +360,6 @@ def _generate_examples(self, path): for key in data: yield key, { 'steps': build_episode(data[key]), - 'horizon': self.builder_config.horizon, + 'horizon': self.builder_config.horizon, # pyrefly: ignore[missing-attribute] 'episode_id': key, } diff --git a/tensorflow_datasets/robotics/mt_opt/mt_opt.py b/tensorflow_datasets/robotics/mt_opt/mt_opt.py index 5ef7c7950af..809c99f7ddd 100644 --- a/tensorflow_datasets/robotics/mt_opt/mt_opt.py +++ b/tensorflow_datasets/robotics/mt_opt/mt_opt.py @@ -158,7 +158,7 @@ def _info(self) -> tfds.core.DatasetInfo: return tfds.core.DatasetInfo( builder=self, description=_DESCRIPTION, - features=_name_to_features(self.builder_config.name), + features=_name_to_features(self.builder_config.name), # pyrefly: ignore[missing-attribute] supervised_keys=None, homepage='https://karolhausman.github.io/mt-opt/', citation=_CITATION, @@ -166,7 +166,7 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" - ds_name = self.builder_config.name + ds_name = self.builder_config.name # pyrefly: ignore[missing-attribute] splits = {} for split, shards in _NAME_TO_SPLITS[ds_name].items(): paths = { @@ -184,7 +184,7 @@ def _generate_examples_one_file( # Dataset of tf.Examples containing full episodes. example_ds = tf.data.TFRecordDataset(filenames=str(path)) - example_features = _name_to_features_encode(self.builder_config.name) + example_features = _name_to_features_encode(self.builder_config.name) # pyrefly: ignore[missing-attribute] example_specs = example_features.get_serialized_info() parser = tfds.core.example_parser.ExampleParser(example_specs) @@ -192,7 +192,7 @@ def _generate_examples_one_file( decoded_examples = parsed_examples.map(example_features.decode_example) for index, example in enumerate(tfds.as_numpy(decoded_examples)): - if self.builder_config.name == 'rlds': + if self.builder_config.name == 'rlds': # pyrefly: ignore[missing-attribute] id_key = 'episode_id' else: id_key = 'task_code' diff --git a/tensorflow_datasets/scripts/replace_fake_images.py b/tensorflow_datasets/scripts/replace_fake_images.py index fd87e5024c6..89062db4b4e 100644 --- a/tensorflow_datasets/scripts/replace_fake_images.py +++ b/tensorflow_datasets/scripts/replace_fake_images.py @@ -124,13 +124,13 @@ def rewrite_tar(root_dir, tar_filepath): extension = '' # Extraction of .tar file - with tarfile.open(tar_filepath, 'r' + extension) as tar: + with tarfile.open(tar_filepath, 'r' + extension) as tar: # pyrefly: ignore[no-matching-overload] tar.extractall(path=temp_dir) rewrite_dir(temp_dir) # Recursively compress the archive content # Convert back into tar file - with tarfile.open(tar_filepath, 'w' + extension) as tar: + with tarfile.open(tar_filepath, 'w' + extension) as tar: # pyrefly: ignore[no-matching-overload] tar.add(temp_dir, arcname='', recursive=True) diff --git a/tensorflow_datasets/scripts/tools/compute_split_info.py b/tensorflow_datasets/scripts/tools/compute_split_info.py index 6d5b49dfbaf..7bce2337ddf 100644 --- a/tensorflow_datasets/scripts/tools/compute_split_info.py +++ b/tensorflow_datasets/scripts/tools/compute_split_info.py @@ -75,7 +75,7 @@ def main(args: argparse.Namespace) -> None: data_dir=args.data_dir, filetype_suffix=args.filetype or None, dataset_name=args.dataset_name or None, - template=args.filename_template or None, + template=args.filename_template or None, # pyrefly: ignore[bad-argument-type] ) else: filename_template = None diff --git a/tensorflow_datasets/structured/cherry_blossoms/cherry_blossoms.py b/tensorflow_datasets/structured/cherry_blossoms/cherry_blossoms.py index 9c904882672..80f634cf60f 100644 --- a/tensorflow_datasets/structured/cherry_blossoms/cherry_blossoms.py +++ b/tensorflow_datasets/structured/cherry_blossoms/cherry_blossoms.py @@ -86,7 +86,7 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract(URL) - return {tfds.Split.TRAIN: self._generate_examples(path)} + return {tfds.Split.TRAIN: self._generate_examples(path)} # pyrefly: ignore[missing-attribute] def _generate_examples(self, path): """Yields examples.""" diff --git a/tensorflow_datasets/structured/covid19/covid19.py b/tensorflow_datasets/structured/covid19/covid19.py index 962c8046199..2150de33f5a 100644 --- a/tensorflow_datasets/structured/covid19/covid19.py +++ b/tensorflow_datasets/structured/covid19/covid19.py @@ -802,7 +802,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): archive_path = dl_manager.download( 'https://storage.googleapis.com/covid19-open-data/v3/aggregated.csv.gz?generation=1620814656792419' ) - return {tfds.Split.TRAIN: self._generate_examples(dl_manager, archive_path)} + return {tfds.Split.TRAIN: self._generate_examples(dl_manager, archive_path)} # pyrefly: ignore[missing-attribute] def _generate_examples( self, dl_manager: tfds.download.DownloadManager, archive_path diff --git a/tensorflow_datasets/structured/dart/dart.py b/tensorflow_datasets/structured/dart/dart.py index 54ab9956f07..658c8caae68 100644 --- a/tensorflow_datasets/structured/dart/dart.py +++ b/tensorflow_datasets/structured/dart/dart.py @@ -86,15 +86,15 @@ def _split_generators(self, dl_manager): dl_manager.download_and_extract(_URL), 'dart-master', 'data', 'v1.1.1' ) return { - tfds.Split.TRAIN: self._generate_examples( + tfds.Split.TRAIN: self._generate_examples( # pyrefly: ignore[missing-attribute] json_file=os.path.join( extracted_path, 'dart-v1.1.1-full-train.json' ) ), - tfds.Split.VALIDATION: self._generate_examples( + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] json_file=os.path.join(extracted_path, 'dart-v1.1.1-full-dev.json') ), - tfds.Split.TEST: self._generate_examples( + tfds.Split.TEST: self._generate_examples( # pyrefly: ignore[missing-attribute] json_file=os.path.join(extracted_path, 'dart-v1.1.1-full-test.json') ), } diff --git a/tensorflow_datasets/structured/diamonds/diamonds.py b/tensorflow_datasets/structured/diamonds/diamonds.py index 3aa43cc2daf..f84405ee0c9 100644 --- a/tensorflow_datasets/structured/diamonds/diamonds.py +++ b/tensorflow_datasets/structured/diamonds/diamonds.py @@ -100,7 +100,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" data = dl_manager.download({'data': _URL}) # There is no predefined train/val/test split for this dataset. - return {tfds.Split.TRAIN: self._generate_examples(file_path=data['data'])} + return {tfds.Split.TRAIN: self._generate_examples(file_path=data['data'])} # pyrefly: ignore[missing-attribute] def _generate_examples(self, file_path): """Yields examples.""" diff --git a/tensorflow_datasets/structured/howell/howell.py b/tensorflow_datasets/structured/howell/howell.py index 3b67673fb62..09a63c1541c 100644 --- a/tensorflow_datasets/structured/howell/howell.py +++ b/tensorflow_datasets/structured/howell/howell.py @@ -85,7 +85,7 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" file_path = dl_manager.download_and_extract(URL) - return {tfds.Split.TRAIN: self._generate_examples(file_path)} + return {tfds.Split.TRAIN: self._generate_examples(file_path)} # pyrefly: ignore[missing-attribute] def _generate_examples(self, file_path): """Yields examples.""" diff --git a/tensorflow_datasets/structured/web_graph/web_graph.py b/tensorflow_datasets/structured/web_graph/web_graph.py index 1e632b19511..bfdfb7853cd 100644 --- a/tensorflow_datasets/structured/web_graph/web_graph.py +++ b/tensorflow_datasets/structured/web_graph/web_graph.py @@ -82,16 +82,16 @@ def _get_int_feature(example: tf.train.Example, feature_name: str) -> list[int]: - return example.features.feature[feature_name].int64_list.value + return example.features.feature[feature_name].int64_list.value # pyrefly: ignore[bad-return] def _process_example(example: bytes, is_test=False): """Process a single example.""" - example = tf.train.Example.FromString(example) - row_tag = _get_int_feature(example, 'row_tag')[0] - col_tag = np.array(_get_int_feature(example, 'col_tag'), dtype=np.int64) + example = tf.train.Example.FromString(example) # pyrefly: ignore[bad-assignment] + row_tag = _get_int_feature(example, 'row_tag')[0] # pyrefly: ignore[bad-argument-type] + col_tag = np.array(_get_int_feature(example, 'col_tag'), dtype=np.int64) # pyrefly: ignore[bad-argument-type] gt_tag = np.array( - _get_int_feature(example, 'gt_tag') if is_test else [], dtype=np.int64 + _get_int_feature(example, 'gt_tag') if is_test else [], dtype=np.int64 # pyrefly: ignore[bad-argument-type] ) return_dict = {'row_tag': row_tag, 'col_tag': col_tag, 'gt_tag': gt_tag} return row_tag, return_dict @@ -212,9 +212,9 @@ def _split_generators( del dl_manager subfolder = os.path.join( - self.WEB_GRAPH_HOMEPAGE, self.builder_config.subfolder + self.WEB_GRAPH_HOMEPAGE, self.builder_config.subfolder # pyrefly: ignore[missing-attribute] ) - shards = self.SHARDS or self.builder_config.num_shards + shards = self.SHARDS or self.builder_config.num_shards # pyrefly: ignore[missing-attribute] train_files = [ filename(subfolder, f'train.tfr-{i:05}-of-{int(shards):05}') diff --git a/tensorflow_datasets/structured/web_nlg/web_nlg.py b/tensorflow_datasets/structured/web_nlg/web_nlg.py index 9513a2bdbfe..2e569336078 100644 --- a/tensorflow_datasets/structured/web_nlg/web_nlg.py +++ b/tensorflow_datasets/structured/web_nlg/web_nlg.py @@ -106,7 +106,7 @@ def get_files_in_dir(directory): ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'list_files': get_files_in_dir( os.path.join(extracted_path, 'train') @@ -115,7 +115,7 @@ def get_files_in_dir(directory): }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'list_files': get_files_in_dir( os.path.join(extracted_path, 'dev') diff --git a/tensorflow_datasets/structured/wiki_table_text/wiki_table_text.py b/tensorflow_datasets/structured/wiki_table_text/wiki_table_text.py index b8303af1c55..c1e67b70822 100644 --- a/tensorflow_datasets/structured/wiki_table_text/wiki_table_text.py +++ b/tensorflow_datasets/structured/wiki_table_text/wiki_table_text.py @@ -79,11 +79,11 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): {'train_path': _TRAIN_URL, 'dev_path': _DEV_URL, 'test_path': _TEST_URL} ) return { - tfds.Split.TRAIN: self._generate_examples(extracted_path['train_path']), - tfds.Split.VALIDATION: self._generate_examples( + tfds.Split.TRAIN: self._generate_examples(extracted_path['train_path']), # pyrefly: ignore[missing-attribute] + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] extracted_path['dev_path'] ), - tfds.Split.TEST: self._generate_examples(extracted_path['test_path']), + tfds.Split.TEST: self._generate_examples(extracted_path['test_path']), # pyrefly: ignore[missing-attribute] } def _generate_examples(self, path): diff --git a/tensorflow_datasets/structured/wine_quality/wine_quality.py b/tensorflow_datasets/structured/wine_quality/wine_quality.py index ed6f855865c..3caedc9d37d 100644 --- a/tensorflow_datasets/structured/wine_quality/wine_quality.py +++ b/tensorflow_datasets/structured/wine_quality/wine_quality.py @@ -119,7 +119,7 @@ def _info(self): return tfds.core.DatasetInfo( builder=self, description=_DESCRIPTION, - features=tfds.features.FeaturesDict({ + features=tfds.features.FeaturesDict({ # pyrefly: ignore[bad-argument-type] "quality": np.int32, "features": features_dict, }), @@ -130,12 +130,12 @@ def _info(self): def _split_generators(self, dl_manager): """Returns SplitGenerators.""" - file_path = dl_manager.download({"train": self.builder_config.dl_url}) + file_path = dl_manager.download({"train": self.builder_config.dl_url}) # pyrefly: ignore[missing-attribute] # There is no predefined train/val/test split for this dataset. return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={"file_path": file_path["train"]}, ), ] diff --git a/tensorflow_datasets/summarization/gov_report/gov_report.py b/tensorflow_datasets/summarization/gov_report/gov_report.py index 3c2d81b79a2..dfeee9d33d4 100644 --- a/tensorflow_datasets/summarization/gov_report/gov_report.py +++ b/tensorflow_datasets/summarization/gov_report/gov_report.py @@ -132,9 +132,9 @@ def _info(self) -> tfds.core.DatasetInfo: builder=self, description=_DESCRIPTION, features=tfds.features.FeaturesDict( - {k: tfds.features.Text() for k in self.builder_config.all_keys} + {k: tfds.features.Text() for k in self.builder_config.all_keys} # pyrefly: ignore[missing-attribute] ), - supervised_keys=self.builder_config.supervised_keys, + supervised_keys=self.builder_config.supervised_keys, # pyrefly: ignore[missing-attribute] homepage=_HOMEPAGE, citation=_CITATION, ) @@ -143,19 +143,19 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract(_URL) split_map = { - tfds.Split.TRAIN: "train", - tfds.Split.VALIDATION: "valid", - tfds.Split.TEST: "test", + tfds.Split.TRAIN: "train", # pyrefly: ignore[missing-attribute] + tfds.Split.VALIDATION: "valid", # pyrefly: ignore[missing-attribute] + tfds.Split.TEST: "test", # pyrefly: ignore[missing-attribute] } path = os.path.join(path, "gov-report") return {k: self._generate_examples(path, v) for k, v in split_map.items()} def _generate_examples(self, path: str, split: str): """Yields examples.""" - subset = self.builder_config.subset - style = self.builder_config.style - separator = self.builder_config.separator - report_key, summary_key = self.builder_config.supervised_keys + subset = self.builder_config.subset # pyrefly: ignore[missing-attribute] + style = self.builder_config.style # pyrefly: ignore[missing-attribute] + separator = self.builder_config.separator # pyrefly: ignore[missing-attribute] + report_key, summary_key = self.builder_config.supervised_keys # pyrefly: ignore[missing-attribute] split_filename = os.path.join(path, "split_ids", f"{subset}_{split}.ids") with epath.Path(split_filename).open() as f: for line in f: @@ -191,7 +191,7 @@ def _generate_examples(self, path: str, split: str): results.update( { k: separator.join(d[k]) if isinstance(d[k], list) else d[k] - for k in self.builder_config.other_keys + for k in self.builder_config.other_keys # pyrefly: ignore[missing-attribute] } ) yield d[_ID_KEY], results diff --git a/tensorflow_datasets/testing/dummy_config_based_datasets/dummy_ds_1/dummy_ds_1_dataset_builder.py b/tensorflow_datasets/testing/dummy_config_based_datasets/dummy_ds_1/dummy_ds_1_dataset_builder.py index 469a864afb5..0c7a4b39bfb 100644 --- a/tensorflow_datasets/testing/dummy_config_based_datasets/dummy_ds_1/dummy_ds_1_dataset_builder.py +++ b/tensorflow_datasets/testing/dummy_config_based_datasets/dummy_ds_1/dummy_ds_1_dataset_builder.py @@ -34,7 +34,7 @@ def _info(self): def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] ), ] diff --git a/tensorflow_datasets/testing/dummy_config_based_datasets/dummy_ds_2/dummy_builder.py b/tensorflow_datasets/testing/dummy_config_based_datasets/dummy_ds_2/dummy_builder.py index 1d3c667fb8e..53565af7588 100644 --- a/tensorflow_datasets/testing/dummy_config_based_datasets/dummy_ds_2/dummy_builder.py +++ b/tensorflow_datasets/testing/dummy_config_based_datasets/dummy_ds_2/dummy_builder.py @@ -39,7 +39,7 @@ def _info(self): def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] ), ] diff --git a/tensorflow_datasets/testing/dummy_dataset/dummy_dataset.py b/tensorflow_datasets/testing/dummy_dataset/dummy_dataset.py index 11c55bf4cee..26df1f50b45 100644 --- a/tensorflow_datasets/testing/dummy_dataset/dummy_dataset.py +++ b/tensorflow_datasets/testing/dummy_dataset/dummy_dataset.py @@ -40,7 +40,7 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={'path': os.path.join(path, 'train.txt')}, ), ] diff --git a/tensorflow_datasets/text/c4_wsrs/c4_wsrs.py b/tensorflow_datasets/text/c4_wsrs/c4_wsrs.py index c022e749e80..409e3f1f9de 100644 --- a/tensorflow_datasets/text/c4_wsrs/c4_wsrs.py +++ b/tensorflow_datasets/text/c4_wsrs/c4_wsrs.py @@ -166,25 +166,25 @@ def _process_example(element: tuple[str, WSRSFeatures]): | 'ExtractSnippets' >> beam.FlatMap( c4_wsrs_utils.extract_snippets, - self.builder_config.max_sentences_per_snippet, + self.builder_config.max_sentences_per_snippet, # pyrefly: ignore[missing-attribute] abbreviations_by_expansion, expansion_re, - self.builder_config.max_snippet_char_len, - self.builder_config.alpha_keep_no_rs, - self.builder_config.alpha_keep_rs, + self.builder_config.max_snippet_char_len, # pyrefly: ignore[missing-attribute] + self.builder_config.alpha_keep_no_rs, # pyrefly: ignore[missing-attribute] + self.builder_config.alpha_keep_rs, # pyrefly: ignore[missing-attribute] ) | 'ReshuffleSnippets1' >> beam.Reshuffle() | 'ReverseSubstitution' >> beam.FlatMap( c4_wsrs_utils.reverse_substitution, - self.builder_config.subsitution_rate, - self.builder_config.min_snippet_token_len, + self.builder_config.subsitution_rate, # pyrefly: ignore[missing-attribute] + self.builder_config.min_snippet_token_len, # pyrefly: ignore[missing-attribute] ) | 'GroupByRarestSubstitution' >> beam.GroupByKey() | 'SampleSnippetsByRarestSubstitution' >> beam.FlatMap( c4_wsrs_utils.sample_snippets_by_substitution, - self.builder_config.num_snippets_per_substitution, + self.builder_config.num_snippets_per_substitution, # pyrefly: ignore[missing-attribute] ) | 'ReshuffleSnippets2' >> beam.Reshuffle() | 'ProcessExamples' >> beam.Map(_process_example) diff --git a/tensorflow_datasets/text/conll2002/conll2002.py b/tensorflow_datasets/text/conll2002/conll2002.py index 798a3d4579c..6701f09a3e2 100644 --- a/tensorflow_datasets/text/conll2002/conll2002.py +++ b/tensorflow_datasets/text/conll2002/conll2002.py @@ -172,7 +172,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): dl_paths = dl_manager.download(urls_to_download) return { - "train": self._generate_examples(dl_paths["train"]), - "dev": self._generate_examples(dl_paths["dev"]), - "test": self._generate_examples(dl_paths["test"]), + "train": self._generate_examples(dl_paths["train"]), # pyrefly: ignore[bad-index] + "dev": self._generate_examples(dl_paths["dev"]), # pyrefly: ignore[bad-index] + "test": self._generate_examples(dl_paths["test"]), # pyrefly: ignore[bad-index] } diff --git a/tensorflow_datasets/text/docnli/docnli.py b/tensorflow_datasets/text/docnli/docnli.py index 733ccb06965..e90de470713 100644 --- a/tensorflow_datasets/text/docnli/docnli.py +++ b/tensorflow_datasets/text/docnli/docnli.py @@ -89,13 +89,13 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): # Returns the Dict[split names, Iterator[Key, Example]] return { - tfds.Split.TRAIN: self._generate_examples( + tfds.Split.TRAIN: self._generate_examples( # pyrefly: ignore[missing-attribute] path=data_dir / _EXTRACT_PATH_TOKEN / 'train.json' ), - tfds.Split.VALIDATION: self._generate_examples( + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] path=data_dir / _EXTRACT_PATH_TOKEN / 'dev.json' ), - tfds.Split.TEST: self._generate_examples( + tfds.Split.TEST: self._generate_examples( # pyrefly: ignore[missing-attribute] path=data_dir / _EXTRACT_PATH_TOKEN / 'test.json' ), } diff --git a/tensorflow_datasets/text/gem/gem.py b/tensorflow_datasets/text/gem/gem.py index 35f4a7c9738..2a5098873ae 100644 --- a/tensorflow_datasets/text/gem/gem.py +++ b/tensorflow_datasets/text/gem/gem.py @@ -758,15 +758,15 @@ def _info(self) -> tfds.core.DatasetInfo: return tfds.core.DatasetInfo( builder=self, description=_DESCRIPTION, - features=tfds.features.FeaturesDict(self.builder_config.features), + features=tfds.features.FeaturesDict(self.builder_config.features), # pyrefly: ignore[missing-attribute] homepage=_URL, - citation=self.builder_config.citation + "\n" + _CITATION, + citation=self.builder_config.citation + "\n" + _CITATION, # pyrefly: ignore[missing-attribute] ) def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" - files = dl_manager.download_and_extract(self.builder_config.data_urls) - if self.builder_config.name == "common_gen": + files = dl_manager.download_and_extract(self.builder_config.data_urls) # pyrefly: ignore[missing-attribute] + if self.builder_config.name == "common_gen": # pyrefly: ignore[missing-attribute] challenge_sets = [ ("challenge_train_sample", "train_common_gen_RandomSample500.json"), ( @@ -786,7 +786,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): name=challenge_split, gen_kwargs={ "filepath": os.path.join( - files["challenge_set"], + files["challenge_set"], # pyrefly: ignore[bad-index] self.builder_config.name, filename, ), @@ -797,28 +797,28 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": os.path.join( - files["data"], "commongen.train.jsonl" + files["data"], "commongen.train.jsonl" # pyrefly: ignore[bad-index] ), "set_name": "train", }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": os.path.join( - files["data"], "commongen.dev.jsonl" + files["data"], "commongen.dev.jsonl" # pyrefly: ignore[bad-index] ), "set_name": "validation", }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": os.path.join( - files["data"], "commongen.test_noref.jsonl" + files["data"], "commongen.test_noref.jsonl" # pyrefly: ignore[bad-index] ), "set_name": "test", }, @@ -847,7 +847,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): name=challenge_split, gen_kwargs={ "filepath": os.path.join( - files["challenge_set"], + files["challenge_set"], # pyrefly: ignore[bad-index] self.builder_config.name, filename, ), @@ -858,23 +858,23 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["train"], + "filepath": files["train"], # pyrefly: ignore[bad-index] "set_name": "train", }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["validation"], + "filepath": files["validation"], # pyrefly: ignore[bad-index] "set_name": "validation", }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["test"], + "filepath": files["test"], # pyrefly: ignore[bad-index] "set_name": "test", }, ), @@ -882,23 +882,23 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): elif self.builder_config.name == "dart": return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["train"], + "filepath": files["train"], # pyrefly: ignore[bad-index] "set_name": "train", }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["validation"], + "filepath": files["validation"], # pyrefly: ignore[bad-index] "set_name": "validation", }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["test"], + "filepath": files["test"], # pyrefly: ignore[bad-index] "set_name": "test", }, ), @@ -922,7 +922,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): name=challenge_split, gen_kwargs={ "filepath": os.path.join( - files["challenge_set"], + files["challenge_set"], # pyrefly: ignore[bad-index] self.builder_config.name, filename, ), @@ -932,30 +932,30 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["train"], + "filepath": files["train"], # pyrefly: ignore[bad-index] "set_name": "train", }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["validation"], + "filepath": files["validation"], # pyrefly: ignore[bad-index] "set_name": "validation", }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["test"], + "filepath": files["test"], # pyrefly: ignore[bad-index] "set_name": "test", }, ), ] + challenge_splits - elif self.builder_config.name.startswith("mlsum"): + elif self.builder_config.name.startswith("mlsum"): # pyrefly: ignore[missing-attribute] # Can be either _de or _es. - lang = self.builder_config.name.split("_")[1] + lang = self.builder_config.name.split("_")[1] # pyrefly: ignore[missing-attribute] challenge_sets = [ ( "challenge_train_sample", @@ -973,8 +973,8 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): tfds.core.SplitGenerator( name=challenge_split, gen_kwargs={ - "filepath": os.path.join( - files["challenge_set"], + "filepath": os.path.join( # pyrefly: ignore[no-matching-overload] + files["challenge_set"], # pyrefly: ignore[bad-index] self.builder_config.name, filename, ), @@ -984,34 +984,34 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": os.path.join( - files["train"], lang + "_train.jsonl" + files["train"], lang + "_train.jsonl" # pyrefly: ignore[bad-index] ), "set_name": "train", "lang": lang, - "filepaths": files["bad_ids"], + "filepaths": files["bad_ids"], # pyrefly: ignore[bad-index] }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": os.path.join( - files["validation"], lang + "_val.jsonl" + files["validation"], lang + "_val.jsonl" # pyrefly: ignore[bad-index] ), "set_name": "validation", "lang": lang, - "filepaths": files["bad_ids"], + "filepaths": files["bad_ids"], # pyrefly: ignore[bad-index] }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": os.path.join(files["test"], lang + "_test.jsonl"), + "filepath": os.path.join(files["test"], lang + "_test.jsonl"), # pyrefly: ignore[bad-index] "set_name": "test", "lang": lang, - "filepaths": files["bad_ids"], + "filepaths": files["bad_ids"], # pyrefly: ignore[bad-index] }, ), ] + challenge_splits @@ -1053,7 +1053,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): name=challenge_split, gen_kwargs={ "filepath": os.path.join( - files["challenge_set"], + files["challenge_set"], # pyrefly: ignore[bad-index] self.builder_config.name, filename, ), @@ -1064,14 +1064,14 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): generators = [] for tfds_spl, spl in zip( - [tfds.Split.TRAIN, tfds.Split.VALIDATION, tfds.Split.TEST], + [tfds.Split.TRAIN, tfds.Split.VALIDATION, tfds.Split.TEST], # pyrefly: ignore[missing-attribute] ["train", "validation", "test"], ): generators.append( tfds.core.SplitGenerator( name=tfds_spl, gen_kwargs={ - "filepath": os.path.join(files["data"], "gem_sgd.json"), + "filepath": os.path.join(files["data"], "gem_sgd.json"), # pyrefly: ignore[bad-index] "set_name": spl, }, ) @@ -1096,7 +1096,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): name=challenge_split, gen_kwargs={ "filepath": os.path.join( - files["challenge_set"], + files["challenge_set"], # pyrefly: ignore[bad-index] self.builder_config.name, filename, ), @@ -1107,37 +1107,37 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": os.path.join( - files["data"], "totto_data/totto_train_data.jsonl" + files["data"], "totto_data/totto_train_data.jsonl" # pyrefly: ignore[bad-index] ), "set_name": "train", }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": os.path.join( - files["data"], "totto_data/totto_dev_data.jsonl" + files["data"], "totto_data/totto_dev_data.jsonl" # pyrefly: ignore[bad-index] ), "set_name": "validation", }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": os.path.join( - files["data"], + files["data"], # pyrefly: ignore[bad-index] "totto_data/unlabeled_totto_test_data.jsonl", ), "set_name": "test", }, ), ] + challenge_splits - elif self.builder_config.name.startswith("web_nlg"): + elif self.builder_config.name.startswith("web_nlg"): # pyrefly: ignore[missing-attribute] # Can be either _en or _ru. - ln = self.builder_config.name.split("_")[2] + ln = self.builder_config.name.split("_")[2] # pyrefly: ignore[missing-attribute] challenge_sets = [ ( "challenge_train_sample", @@ -1163,8 +1163,8 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): tfds.core.SplitGenerator( name=challenge_split, gen_kwargs={ - "filepath": os.path.join( - files["challenge_set"], + "filepath": os.path.join( # pyrefly: ignore[no-matching-overload] + files["challenge_set"], # pyrefly: ignore[bad-index] self.builder_config.name, filename, ), @@ -1174,19 +1174,19 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, - gen_kwargs={"filepath": files["train"], "set_name": "train"}, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] + gen_kwargs={"filepath": files["train"], "set_name": "train"}, # pyrefly: ignore[bad-index] ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["validation"], + "filepath": files["validation"], # pyrefly: ignore[bad-index] "set_name": "validation", }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, - gen_kwargs={"filepath": files["test"], "set_name": "test"}, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] + gen_kwargs={"filepath": files["test"], "set_name": "test"}, # pyrefly: ignore[bad-index] ), ] + challenge_splits elif self.builder_config.name == "wiki_auto_asset_turk": @@ -1239,7 +1239,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): name=challenge_split, gen_kwargs={ "filepath": os.path.join( - files["challenge_set"], + files["challenge_set"], # pyrefly: ignore[bad-index] self.builder_config.name, filename, ), @@ -1249,16 +1249,16 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["train"], + "filepath": files["train"], # pyrefly: ignore[bad-index] "set_name": "train", }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["validation"], + "filepath": files["validation"], # pyrefly: ignore[bad-index] "set_name": "validation", }, ), @@ -1268,25 +1268,25 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): "filepath": "", "set_name": "test_asset", "filepaths": [ - files["test_asset_" + str(i)] for i in range(10) + files["test_asset_" + str(i)] for i in range(10) # pyrefly: ignore[bad-index] ], }, ), tfds.core.SplitGenerator( name="test_turk", gen_kwargs={ - "filepath": files["test_turk"], + "filepath": files["test_turk"], # pyrefly: ignore[bad-index] "set_name": "test_turk", }, ), ] + challenge_splits - elif self.builder_config.name.startswith("wiki_lingua"): - lang_name = self.builder_config.name.split("_")[-2] - lang = self.builder_config.name.split("_")[-1] - base_dir = os.path.join(files["data"], lang_name) + elif self.builder_config.name.startswith("wiki_lingua"): # pyrefly: ignore[missing-attribute] + lang_name = self.builder_config.name.split("_")[-2] # pyrefly: ignore[missing-attribute] + lang = self.builder_config.name.split("_")[-1] # pyrefly: ignore[missing-attribute] + base_dir = os.path.join(files["data"], lang_name) # pyrefly: ignore[bad-index] return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": base_dir, "set_name": "train", @@ -1294,7 +1294,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": base_dir, "set_name": "val", @@ -1302,7 +1302,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "filepath": base_dir, "set_name": "test", @@ -1339,7 +1339,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): name=challenge_split, gen_kwargs={ "filepath": os.path.join( - files["challenge_set"], + files["challenge_set"], # pyrefly: ignore[bad-index] self.builder_config.name, filename, ), @@ -1349,34 +1349,34 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["splits"], + "filepath": files["splits"], # pyrefly: ignore[bad-index] "set_name": "train", - "filepaths": os.path.join(files["data"], "bbc-summary-data"), + "filepaths": os.path.join(files["data"], "bbc-summary-data"), # pyrefly: ignore[bad-index] }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["splits"], + "filepath": files["splits"], # pyrefly: ignore[bad-index] "set_name": "validation", - "filepaths": os.path.join(files["data"], "bbc-summary-data"), + "filepaths": os.path.join(files["data"], "bbc-summary-data"), # pyrefly: ignore[bad-index] }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "filepath": files["splits"], + "filepath": files["splits"], # pyrefly: ignore[bad-index] "set_name": "test", - "filepaths": os.path.join(files["data"], "bbc-summary-data"), + "filepaths": os.path.join(files["data"], "bbc-summary-data"), # pyrefly: ignore[bad-index] }, ), ] + challenge_splits def _generate_examples(self, filepath, set_name, filepaths=None, lang=None): """Yields examples.""" - if self.builder_config.name == "common_gen": + if self.builder_config.name == "common_gen": # pyrefly: ignore[missing-attribute] with epath.Path(filepath).open() as f: if set_name.startswith("challenge"): exples = json.load(f) @@ -1521,7 +1521,7 @@ def _generate_examples(self, filepath, set_name, filepaths=None, lang=None): "target": example["ref"], "references": [] if set_name == "train" else [example["ref"]], } - elif self.builder_config.name.startswith("mlsum"): + elif self.builder_config.name.startswith("mlsum"): # pyrefly: ignore[missing-attribute] if set_name in ["train", "validation", "test", "challenge_test_covid"]: if set_name == "challenge_test_covid": bad_ids = {} @@ -1668,7 +1668,7 @@ def _generate_examples(self, filepath, set_name, filepaths=None, lang=None): else: response["target"] = "" yield id_, response - elif self.builder_config.name.startswith("web_nlg"): + elif self.builder_config.name.startswith("web_nlg"): # pyrefly: ignore[missing-attribute] if "challenge" in set_name: exples = json.load(tf.io.gfile.GFile(filepath)) if isinstance(exples, dict): @@ -1744,7 +1744,7 @@ def _generate_examples(self, filepath, set_name, filepaths=None, lang=None): del example[k] yield id_, example elif set_name == "test_asset": - files = [tf.io.gfile.GFile(f_name) for f_name in filepaths] + files = [tf.io.gfile.GFile(f_name) for f_name in filepaths] # pyrefly: ignore[not-iterable] for id_, lines in enumerate(zip(*files)): yield id_, { "gem_id": f"{self.builder_config.name}-{set_name}-{id_}", @@ -1765,7 +1765,7 @@ def _generate_examples(self, filepath, set_name, filepaths=None, lang=None): if k in exple: del exple[k] yield id_, exple - elif self.builder_config.name.startswith("wiki_lingua"): + elif self.builder_config.name.startswith("wiki_lingua"): # pyrefly: ignore[missing-attribute] with tf.io.gfile.GFile( os.path.join(filepath, f"{set_name}.src.{lang}") ) as f_in_ln: @@ -1833,7 +1833,7 @@ def _generate_examples(self, filepath, set_name, filepaths=None, lang=None): with epath.Path(filepath).open() as f: split_ids = json.load(f) for id_, i in enumerate(split_ids[set_name]): - with tf.io.gfile.GFile(os.path.join(filepaths, i + ".summary")) as f: + with tf.io.gfile.GFile(os.path.join(filepaths, i + ".summary")) as f: # pyrefly: ignore[no-matching-overload] text = "".join( [ line diff --git a/tensorflow_datasets/text/gsm8k/gsm8k.py b/tensorflow_datasets/text/gsm8k/gsm8k.py index fcdb253e860..60fd24b0c98 100644 --- a/tensorflow_datasets/text/gsm8k/gsm8k.py +++ b/tensorflow_datasets/text/gsm8k/gsm8k.py @@ -68,7 +68,7 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" extracted = dl_manager.download_and_extract(_URLS) - return {k: self._generate_examples(v) for k, v in extracted.items()} + return {k: self._generate_examples(v) for k, v in extracted.items()} # pyrefly: ignore[missing-attribute] def _generate_examples(self, path: epath.PathLike): """Yields examples.""" diff --git a/tensorflow_datasets/text/mrqa/mrqa.py b/tensorflow_datasets/text/mrqa/mrqa.py index 9052f5aff57..624dd275b4c 100644 --- a/tensorflow_datasets/text/mrqa/mrqa.py +++ b/tensorflow_datasets/text/mrqa/mrqa.py @@ -594,40 +594,40 @@ def _info(self) -> tfds.core.DatasetInfo: return tfds.core.DatasetInfo( builder=self, description=_DESCRIPTION, - features=tfds.features.FeaturesDict(self.builder_config.features), + features=tfds.features.FeaturesDict(self.builder_config.features), # pyrefly: ignore[missing-attribute] supervised_keys=None, homepage=_HOMEPAGE, - citation=self.builder_config.citation + '\n' + _CITATION, + citation=self.builder_config.citation + '\n' + _CITATION, # pyrefly: ignore[missing-attribute] ) def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" - data_dir = dl_manager.download_and_extract(self.builder_config.data_urls) + data_dir = dl_manager.download_and_extract(self.builder_config.data_urls) # pyrefly: ignore[missing-attribute] split_generators = {} - if 'train' in self.builder_config.data_urls: + if 'train' in self.builder_config.data_urls: # pyrefly: ignore[missing-attribute] split_generators.update( { - tfds.Split.TRAIN: self._generate_examples( - path=data_dir['train'], split='train' + tfds.Split.TRAIN: self._generate_examples( # pyrefly: ignore[missing-attribute] + path=data_dir['train'], split='train' # pyrefly: ignore[bad-index] ), } ) - if 'validation' in self.builder_config.data_urls: + if 'validation' in self.builder_config.data_urls: # pyrefly: ignore[not-iterable] split_generators.update( { - tfds.Split.VALIDATION: self._generate_examples( - path=data_dir['validation'], split='validation' + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] + path=data_dir['validation'], split='validation' # pyrefly: ignore[bad-index] ), } ) - if 'test' in self.builder_config.data_urls: + if 'test' in self.builder_config.data_urls: # pyrefly: ignore[not-iterable] split_generators.update( { - tfds.Split.TEST: self._generate_examples( - path=data_dir['test'], split='test' + tfds.Split.TEST: self._generate_examples( # pyrefly: ignore[missing-attribute] + path=data_dir['test'], split='test' # pyrefly: ignore[bad-index] ), } ) diff --git a/tensorflow_datasets/text/qrecc/qrecc.py b/tensorflow_datasets/text/qrecc/qrecc.py index f06b8516f28..ba0d901f3ae 100644 --- a/tensorflow_datasets/text/qrecc/qrecc.py +++ b/tensorflow_datasets/text/qrecc/qrecc.py @@ -81,10 +81,10 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): # Returns the Dict[split names, Iterator[Key, Example]] return { - tfds.Split.TRAIN: self._generate_examples( + tfds.Split.TRAIN: self._generate_examples( # pyrefly: ignore[missing-attribute] path=data_dir / "qrecc_train.json" ), - tfds.Split.TEST: self._generate_examples( + tfds.Split.TEST: self._generate_examples( # pyrefly: ignore[missing-attribute] path=data_dir / "qrecc_test.json" ), } diff --git a/tensorflow_datasets/text/scrolls/scrolls.py b/tensorflow_datasets/text/scrolls/scrolls.py index 69fcc48ebe2..b24e7cb1825 100644 --- a/tensorflow_datasets/text/scrolls/scrolls.py +++ b/tensorflow_datasets/text/scrolls/scrolls.py @@ -229,19 +229,19 @@ class Scrolls(tfds.core.GeneratorBasedBuilder): def _info(self): return tfds.core.DatasetInfo( builder=self, - description=_SCROLLS_DESCRIPTION + self.builder_config.description, + description=_SCROLLS_DESCRIPTION + self.builder_config.description, # pyrefly: ignore[missing-attribute] features=tfds.features.FeaturesDict( {feature: tfds.features.Text() for feature in _FEATURES} ), supervised_keys=(_INPUT_KEY, _OUTPUT_KEY), - homepage=self.builder_config.url, - citation=self.builder_config.citation + "\n" + _SCROLLS_CITATION, + homepage=self.builder_config.url, # pyrefly: ignore[missing-attribute] + citation=self.builder_config.citation + "\n" + _SCROLLS_CITATION, # pyrefly: ignore[missing-attribute] ) def _split_generators(self, dl_manager): - dl_dir = dl_manager.download_and_extract(self.builder_config.data_url) + dl_dir = dl_manager.download_and_extract(self.builder_config.data_url) # pyrefly: ignore[missing-attribute] task_name = task_name = _get_task_name_from_data_url( - self.builder_config.data_url + self.builder_config.data_url # pyrefly: ignore[missing-attribute] ) return { diff --git a/tensorflow_datasets/text/squad_question_generation/squad_question_generation.py b/tensorflow_datasets/text/squad_question_generation/squad_question_generation.py index bff4bf77264..51c603af46e 100644 --- a/tensorflow_datasets/text/squad_question_generation/squad_question_generation.py +++ b/tensorflow_datasets/text/squad_question_generation/squad_question_generation.py @@ -187,21 +187,21 @@ def _info(self): return tfds.core.DatasetInfo( builder=self, description=_DESCRIPTION, - features=tfds.features.FeaturesDict(self.builder_config.features), + features=tfds.features.FeaturesDict(self.builder_config.features), # pyrefly: ignore[missing-attribute] homepage=_HOMEPAGE_URL_DU_ET_AL + "\n" + _CITATION_ZHOU_ET_AL, - citation=self.builder_config.citation + "\n" + _CITATION_SQUAD, + citation=self.builder_config.citation + "\n" + _CITATION_SQUAD, # pyrefly: ignore[missing-attribute] supervised_keys=(_CONTEXT_PASSAGE, _QUESTION), ) def _split_generators(self, dl_manager): """Returns SplitGenerators.""" - dl_paths = dl_manager.download_and_extract(self.builder_config.data_urls) + dl_paths = dl_manager.download_and_extract(self.builder_config.data_urls) # pyrefly: ignore[missing-attribute] - if self.builder_config.name == "split_du": + if self.builder_config.name == "split_du": # pyrefly: ignore[missing-attribute] return { - tfds.Split.TRAIN: self._generate_examples(dl_paths["train"]), - tfds.Split.VALIDATION: self._generate_examples(dl_paths["dev"]), - tfds.Split.TEST: self._generate_examples(dl_paths["test"]), + tfds.Split.TRAIN: self._generate_examples(dl_paths["train"]), # pyrefly: ignore[missing-attribute] + tfds.Split.VALIDATION: self._generate_examples(dl_paths["dev"]), # pyrefly: ignore[missing-attribute] + tfds.Split.TEST: self._generate_examples(dl_paths["test"]), # pyrefly: ignore[missing-attribute] } elif self.builder_config.name == "split_zhou": mapping_dir = os.path.join(dl_paths["mapping"], "qas_id_in_squad") @@ -209,17 +209,17 @@ def _split_generators(self, dl_manager): dl_paths["redistribute"], "redistribute/raw" ) return { - tfds.Split.TRAIN: self._generate_examples( + tfds.Split.TRAIN: self._generate_examples( # pyrefly: ignore[missing-attribute] dl_paths["train"], os.path.join(mapping_dir, "train.txt.id"), os.path.join(redistribute_raw_dir, "train.txt"), ), - tfds.Split.VALIDATION: self._generate_examples( + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] dl_paths["dev"], os.path.join(mapping_dir, "dev.txt.shuffle.dev.id"), os.path.join(redistribute_raw_dir, "dev.txt.shuffle.dev"), ), - tfds.Split.TEST: self._generate_examples( + tfds.Split.TEST: self._generate_examples( # pyrefly: ignore[missing-attribute] dl_paths["dev"], os.path.join(mapping_dir, "dev.txt.shuffle.test.id"), os.path.join(redistribute_raw_dir, "dev.txt.shuffle.test"), @@ -244,7 +244,7 @@ def _generate_examples( Yields: key and example dict. """ - if self.builder_config.name == "split_du": + if self.builder_config.name == "split_du": # pyrefly: ignore[missing-attribute] # The file format slightly differs from the original SQuAD JSON format. with epath.Path(squad_data_path).open() as f: squad_data = json.load(f) diff --git a/tensorflow_datasets/text/unifiedqa/unifiedqa.py b/tensorflow_datasets/text/unifiedqa/unifiedqa.py index 32ff29fff14..b4e02035bbe 100644 --- a/tensorflow_datasets/text/unifiedqa/unifiedqa.py +++ b/tensorflow_datasets/text/unifiedqa/unifiedqa.py @@ -1958,37 +1958,37 @@ def _info(self) -> tfds.core.DatasetInfo: return tfds.core.DatasetInfo( builder=self, description=_DESCRIPTION, - features=tfds.features.FeaturesDict(self.builder_config.features), + features=tfds.features.FeaturesDict(self.builder_config.features), # pyrefly: ignore[missing-attribute] supervised_keys=None, homepage=_HOMEPAGE, - citation=self.builder_config.citation + '\n' + _CITATION, + citation=self.builder_config.citation + '\n' + _CITATION, # pyrefly: ignore[missing-attribute] ) def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" - data_dir = dl_manager.download_and_extract(self.builder_config.data_urls) + data_dir = dl_manager.download_and_extract(self.builder_config.data_urls) # pyrefly: ignore[missing-attribute] split_generators = {} - if 'train' in self.builder_config.data_urls: + if 'train' in self.builder_config.data_urls: # pyrefly: ignore[missing-attribute] split_generators.update( { - tfds.Split.TRAIN: self._generate_examples(path=data_dir['train']), + tfds.Split.TRAIN: self._generate_examples(path=data_dir['train']), # pyrefly: ignore[bad-index, missing-attribute] } ) - if 'validation' in self.builder_config.data_urls: + if 'validation' in self.builder_config.data_urls: # pyrefly: ignore[not-iterable] split_generators.update( { - tfds.Split.VALIDATION: self._generate_examples( - path=data_dir['validation'] + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] + path=data_dir['validation'] # pyrefly: ignore[bad-index] ), } ) - if 'test' in self.builder_config.data_urls: + if 'test' in self.builder_config.data_urls: # pyrefly: ignore[not-iterable] split_generators.update( { - tfds.Split.TEST: self._generate_examples(path=data_dir['test']), + tfds.Split.TEST: self._generate_examples(path=data_dir['test']), # pyrefly: ignore[bad-index, missing-attribute] } ) return split_generators @@ -1998,7 +1998,7 @@ def _generate_examples(self, path): with epath.Path(path).open() as f: data = csv.reader(f, delimiter='\t') # Skip the header row - if self.builder_config.header: + if self.builder_config.header: # pyrefly: ignore[missing-attribute] next(data) for id_, row in enumerate(data): yield id_, {'input': row[0].strip(), 'output': row[1].strip()} diff --git a/tensorflow_datasets/text/wiki_dialog/wiki_dialog.py b/tensorflow_datasets/text/wiki_dialog/wiki_dialog.py index ea09a8c5c23..785835b405b 100644 --- a/tensorflow_datasets/text/wiki_dialog/wiki_dialog.py +++ b/tensorflow_datasets/text/wiki_dialog/wiki_dialog.py @@ -118,7 +118,7 @@ def _generate_examples(self, filepaths: Sequence[str]): def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" - base_download_url = self.builder_config.base_download_url + base_download_url = self.builder_config.base_download_url # pyrefly: ignore[missing-attribute] download_urls = { 'train': [ os.path.join( @@ -133,7 +133,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): filepaths = dl_manager.download(download_urls) splits = { - 'train': self._generate_examples(filepaths['train']), - 'validation': self._generate_examples(filepaths['validation']), + 'train': self._generate_examples(filepaths['train']), # pyrefly: ignore[bad-index] + 'validation': self._generate_examples(filepaths['validation']), # pyrefly: ignore[bad-index] } return splits diff --git a/tensorflow_datasets/text/wikiann/wikiann.py b/tensorflow_datasets/text/wikiann/wikiann.py index 87071498c32..f9cb7d02f8f 100644 --- a/tensorflow_datasets/text/wikiann/wikiann.py +++ b/tensorflow_datasets/text/wikiann/wikiann.py @@ -350,20 +350,20 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract(URL) subpath = dl_manager.extract( - os.path.join(path, self.builder_config.language + ".tar.gz") + os.path.join(path, self.builder_config.language + ".tar.gz") # pyrefly: ignore[missing-attribute] ) return [ tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={"filepath": os.path.join(subpath, "dev")}, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={"filepath": os.path.join(subpath, "test")}, ), tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={"filepath": os.path.join(subpath, "train")}, ), ] diff --git a/tensorflow_datasets/text/wsc273/wsc273.py b/tensorflow_datasets/text/wsc273/wsc273.py index 1e7e27edc95..2af29e723f5 100644 --- a/tensorflow_datasets/text/wsc273/wsc273.py +++ b/tensorflow_datasets/text/wsc273/wsc273.py @@ -75,7 +75,7 @@ def _info(self): def _split_generators(self, dl_manager): """Returns SplitGenerators.""" file_path = dl_manager.download(_DOWNLOAD_URL) - return {tfds.Split.TEST: self._generate_examples(file_path)} + return {tfds.Split.TEST: self._generate_examples(file_path)} # pyrefly: ignore[missing-attribute] def _generate_examples(self, file_path): """Yields Examples. diff --git a/tensorflow_datasets/text/xtreme_pawsx/xtreme_pawsx.py b/tensorflow_datasets/text/xtreme_pawsx/xtreme_pawsx.py index 3582e0c49b7..67d7c6db74a 100644 --- a/tensorflow_datasets/text/xtreme_pawsx/xtreme_pawsx.py +++ b/tensorflow_datasets/text/xtreme_pawsx/xtreme_pawsx.py @@ -105,7 +105,7 @@ def _info(self): def _split_generators(self, dl_manager): """Returns SplitGenerators.""" dl_path = dl_manager.download_and_extract( - _XTREME_TRANSLATIONS_FORMAT.format(self.builder_config.language) + _XTREME_TRANSLATIONS_FORMAT.format(self.builder_config.language) # pyrefly: ignore[missing-attribute] ) return {"train": self._generate_examples(dl_path)} diff --git a/tensorflow_datasets/text_simplification/wiki_auto/wiki_auto.py b/tensorflow_datasets/text_simplification/wiki_auto/wiki_auto.py index 359cb9204bf..40249695006 100644 --- a/tensorflow_datasets/text_simplification/wiki_auto/wiki_auto.py +++ b/tensorflow_datasets/text_simplification/wiki_auto/wiki_auto.py @@ -120,7 +120,7 @@ class WikiAuto(tfds.core.GeneratorBasedBuilder): def _info(self) -> tfds.core.DatasetInfo: """Returns the dataset metadata.""" - if self.builder_config.name == 'manual': + if self.builder_config.name == 'manual': # pyrefly: ignore[missing-attribute] features = tfds.features.FeaturesDict({ 'alignment_label': tfds.features.ClassLabel( names=['notAligned', 'aligned', 'partialAligned'] @@ -182,9 +182,9 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager): """Returns SplitGenerators.""" - urls_to_download = _URLs[self.builder_config.name] + urls_to_download = _URLs[self.builder_config.name] # pyrefly: ignore[missing-attribute] data_dir = dl_manager.download_and_extract(urls_to_download) - if self.builder_config.name in ['manual', 'auto']: + if self.builder_config.name in ['manual', 'auto']: # pyrefly: ignore[missing-attribute] return { spl: self._generate_examples(filepaths=data_dir, split=spl) for spl in data_dir @@ -195,7 +195,7 @@ def _split_generators(self, dl_manager): def _generate_examples(self, filepaths, split): """Yields examples.""" - if self.builder_config.name == 'manual': + if self.builder_config.name == 'manual': # pyrefly: ignore[missing-attribute] keys = [ 'alignment_label', 'simple_sentence_id', diff --git a/tensorflow_datasets/translate/mtnt/mtnt.py b/tensorflow_datasets/translate/mtnt/mtnt.py index b026d7f9100..45dab7bc069 100644 --- a/tensorflow_datasets/translate/mtnt/mtnt.py +++ b/tensorflow_datasets/translate/mtnt/mtnt.py @@ -80,7 +80,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract(URL_1_1) - pair = f'{self.builder_config.src_lang}-{self.builder_config.dst_lang}' + pair = f'{self.builder_config.src_lang}-{self.builder_config.dst_lang}' # pyrefly: ignore[missing-attribute] return { 'train': self._generate_examples(path / f'MTNT/train/train.{pair}.tsv'), 'test': self._generate_examples(path / f'MTNT/test/test.{pair}.tsv'), diff --git a/tensorflow_datasets/video/davis/davis.py b/tensorflow_datasets/video/davis/davis.py index 5e850ef728c..7f0f0473fac 100644 --- a/tensorflow_datasets/video/davis/davis.py +++ b/tensorflow_datasets/video/davis/davis.py @@ -126,7 +126,7 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" - if self.builder_config.full_resolution: + if self.builder_config.full_resolution: # pyrefly: ignore[missing-attribute] trainval_data = dl_manager.download_and_extract( _URL + 'DAVIS-2017-trainval-Full-Resolution.zip' ) @@ -139,8 +139,8 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): val_files = trainval_data / 'DAVIS/ImageSets/2017/val.txt' return { - tfds.Split.TRAIN: self._generate_examples(train_files), - tfds.Split.VALIDATION: self._generate_examples(val_files), + tfds.Split.TRAIN: self._generate_examples(train_files), # pyrefly: ignore[missing-attribute] + tfds.Split.VALIDATION: self._generate_examples(val_files), # pyrefly: ignore[missing-attribute] } def _generate_examples(self, path): @@ -149,7 +149,7 @@ def _generate_examples(self, path): videos_to_include = path.read_text().splitlines() root_path = path.parent.parent.parent # Move up three directories. resolution = ( - 'Full-Resolution' if self.builder_config.full_resolution else '480p' + 'Full-Resolution' if self.builder_config.full_resolution else '480p' # pyrefly: ignore[missing-attribute] ) for video in videos_to_include: images_path = root_path / 'JPEGImages' / resolution / video diff --git a/tensorflow_datasets/video/tao/tao.py b/tensorflow_datasets/video/tao/tao.py index 1f5f80d9764..a56e5aeb235 100644 --- a/tensorflow_datasets/video/tao/tao.py +++ b/tensorflow_datasets/video/tao/tao.py @@ -116,7 +116,7 @@ def _maybe_prepare_manual_data( f'{file} inside {os.fspath(dl_manager.manual_dir)}. ' 'There should only be one file matching this pattern.' ) - return dl_manager.extract(files) + return dl_manager.extract(files) # pyrefly: ignore[bad-return] def _get_category_id_map(annotations_root) -> dict[str, int]: @@ -291,8 +291,8 @@ def _info(self) -> tfds.core.DatasetInfo: names_file = tfds.core.tfds_path('video/tao/labels.txt') video_shape = ( None, - self.builder_config.height, - self.builder_config.width, + self.builder_config.height, # pyrefly: ignore[missing-attribute] + self.builder_config.width, # pyrefly: ignore[missing-attribute] 3, ) all_features = { @@ -325,7 +325,7 @@ def _info(self) -> tfds.core.DatasetInfo: return tfds.core.DatasetInfo( builder=self, description=_DESCRIPTION, - features=tfds.features.FeaturesDict(all_features), + features=tfds.features.FeaturesDict(all_features), # pyrefly: ignore[bad-argument-type] supervised_keys=None, homepage='https://taodataset.org/', citation=_CITATION, @@ -347,7 +347,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): id_map = _get_category_id_map(data['annotations'] / 'annotations-1.2') return { - tfds.Split.TRAIN: self._generate_examples( + tfds.Split.TRAIN: self._generate_examples( # pyrefly: ignore[missing-attribute] data_path=data['train'], manual_path=manual_train, annotations_path=data['annotations'] @@ -355,7 +355,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): / 'train.json', id_map=id_map, ), - tfds.Split.VALIDATION: self._generate_examples( + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] data_path=data['val'], manual_path=manual_val, annotations_path=data['annotations'] @@ -363,7 +363,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): / 'validation.json', id_map=id_map, ), - tfds.Split.TEST: self._generate_examples( + tfds.Split.TEST: self._generate_examples( # pyrefly: ignore[missing-attribute] data_path=data['test'], manual_path=manual_test, annotations_path=data['annotations'] @@ -375,7 +375,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): def _maybe_resize_video(self, frames_list): """Resizes the video depending on the build_config.""" - if self.builder_config.height is None: + if self.builder_config.height is None: # pyrefly: ignore[missing-attribute] return frames_list # Don't waste compute loading and resizing. resized_images = [] cv2 = tfds.core.lazy_imports.cv2 @@ -384,7 +384,7 @@ def _maybe_resize_video(self, frames_list): image = tfds.core.lazy_imports.PIL_Image.open(f).convert('RGB') image = np.asarray(image) image = cv2.resize( - image, (self.builder_config.width, self.builder_config.height) + image, (self.builder_config.width, self.builder_config.height) # pyrefly: ignore[missing-attribute] ) resized_images.append(image) return resized_images @@ -395,8 +395,8 @@ def _create_metadata( """Creates the metadata object for each video data example.""" metadata = {} metadata['num_frames'] = num_frames - metadata['height'] = self.builder_config.height or video_ann['height'] - metadata['width'] = self.builder_config.width or video_ann['width'] + metadata['height'] = self.builder_config.height or video_ann['height'] # pyrefly: ignore[missing-attribute] + metadata['width'] = self.builder_config.width or video_ann['width'] # pyrefly: ignore[missing-attribute] metadata['neg_category_ids'] = video_ann['neg_category_ids'] metadata['not_exhaustive_category_ids'] = video_ann[ 'not_exhaustive_category_ids' @@ -452,7 +452,7 @@ def _process_example(video_id_and_path): if is_manual and manual_path is None: continue path = ( - (manual_path if is_manual else data_path) + (manual_path if is_manual else data_path) # pyrefly: ignore[unsupported-operation] / 'frames' / vids[video_id]['name'] ) diff --git a/tensorflow_datasets/video/youtube_vis/youtube_vis.py b/tensorflow_datasets/video/youtube_vis/youtube_vis.py index d4696388776..9d7a1082e32 100644 --- a/tensorflow_datasets/video/youtube_vis/youtube_vis.py +++ b/tensorflow_datasets/video/youtube_vis/youtube_vis.py @@ -95,7 +95,7 @@ def _decode_segmentation( (desired_width, desired_height), interpolation=cv2.INTER_NEAREST, ) - segmentation = np.expand_dims(segmentation, axis=-1) + segmentation = np.expand_dims(segmentation, axis=-1) # pyrefly: ignore[bad-assignment] assert len(segmentation.shape) == 3 return segmentation @@ -426,11 +426,11 @@ def _info(self) -> tfds.core.DatasetInfo: names_file = tfds.core.tfds_path('video/youtube_vis/labels.txt') video_shape = ( None, - self.builder_config.height, - self.builder_config.width, + self.builder_config.height, # pyrefly: ignore[missing-attribute] + self.builder_config.width, # pyrefly: ignore[missing-attribute] 3, ) - seg_shape = (None, self.builder_config.height, self.builder_config.width, 1) + seg_shape = (None, self.builder_config.height, self.builder_config.width, 1) # pyrefly: ignore[missing-attribute] all_features = { 'video': tfds.features.Video(video_shape), # pytype: disable=wrong-arg-types # gen-stub-imports 'metadata': { @@ -453,7 +453,7 @@ def _info(self) -> tfds.core.DatasetInfo: return tfds.core.DatasetInfo( builder=self, description=_DESCRIPTION, - features=tfds.features.FeaturesDict(all_features), + features=tfds.features.FeaturesDict(all_features), # pyrefly: ignore[bad-argument-type] supervised_keys=None, homepage='https://youtube-vos.org/dataset/vis/', citation=_CITATION, @@ -466,13 +466,13 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): 'train_all_frames': dl_manager.manual_dir / 'train_all_frames.zip', 'train_annotations': dl_manager.manual_dir / 'train.json', } - if self.builder_config.split_train_data_range is not None: + if self.builder_config.split_train_data_range is not None: # pyrefly: ignore[missing-attribute] # Create a custom training split by subsampling the training data. train_data_range = self.builder_config.split_train_data_range else: # Use the provided training split. train_data_range = None - if self.builder_config.split_val_data_range is not None: + if self.builder_config.split_val_data_range is not None: # pyrefly: ignore[missing-attribute] # Create a custom validation split by subsampling the training data. val_data_range = self.builder_config.split_val_data_range manually_downloaded_files['valid_all_frames'] = manually_downloaded_files[ @@ -490,7 +490,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): dl_manager.manual_dir / 'valid.json' ) - if self.builder_config.split_test_data_range is not None: + if self.builder_config.split_test_data_range is not None: # pyrefly: ignore[missing-attribute] # Create a custom test split by subsampling the training data. test_data_range = self.builder_config.split_test_data_range manually_downloaded_files['test_all_frames'] = manually_downloaded_files[ @@ -513,32 +513,32 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): test_dir = 'train_all_frames' if test_data_range else 'test_all_frames' return { - tfds.Split.TRAIN: self._generate_examples( - annotations=extracted_files['train_annotations'], - all_frames=extracted_files['train_all_frames'] + tfds.Split.TRAIN: self._generate_examples( # pyrefly: ignore[missing-attribute] + annotations=extracted_files['train_annotations'], # pyrefly: ignore[bad-index] + all_frames=extracted_files['train_all_frames'] # pyrefly: ignore[bad-index] / 'train_all_frames' / 'JPEGImages', - video_range_to_use=train_data_range, + video_range_to_use=train_data_range, # pyrefly: ignore[bad-argument-type] ), - tfds.Split.VALIDATION: self._generate_examples( - annotations=extracted_files['valid_annotations'], - all_frames=extracted_files['valid_all_frames'] + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] + annotations=extracted_files['valid_annotations'], # pyrefly: ignore[bad-index] + all_frames=extracted_files['valid_all_frames'] # pyrefly: ignore[bad-index] / val_dir / 'JPEGImages', - video_range_to_use=val_data_range, + video_range_to_use=val_data_range, # pyrefly: ignore[bad-argument-type] ), - tfds.Split.TEST: self._generate_examples( - annotations=extracted_files['test_annotations'], - all_frames=extracted_files['test_all_frames'] + tfds.Split.TEST: self._generate_examples( # pyrefly: ignore[missing-attribute] + annotations=extracted_files['test_annotations'], # pyrefly: ignore[bad-index] + all_frames=extracted_files['test_all_frames'] # pyrefly: ignore[bad-index] / test_dir / 'JPEGImages', - video_range_to_use=test_data_range, + video_range_to_use=test_data_range, # pyrefly: ignore[bad-argument-type] ), } def _maybe_resize_video(self, frames_list): """Resizes the video depending on the build_config.""" - if self.builder_config.height is None: + if self.builder_config.height is None: # pyrefly: ignore[missing-attribute] return frames_list # Don't waste compute loading and resizing. resized_images = [] cv2 = tfds.core.lazy_imports.cv2 @@ -547,7 +547,7 @@ def _maybe_resize_video(self, frames_list): image = tfds.core.lazy_imports.PIL_Image.open(f).convert('RGB') image = np.asarray(image) image = cv2.resize( - image, (self.builder_config.width, self.builder_config.height) + image, (self.builder_config.width, self.builder_config.height) # pyrefly: ignore[missing-attribute] ) resized_images.append(image) return resized_images diff --git a/tensorflow_datasets/vision_language/gref/gref.py b/tensorflow_datasets/vision_language/gref/gref.py index 9750042c798..2c1280c2097 100644 --- a/tensorflow_datasets/vision_language/gref/gref.py +++ b/tensorflow_datasets/vision_language/gref/gref.py @@ -156,11 +156,11 @@ def _split_generators(self, dl_manager): ) # Specify the splits return { - tfds.Split.TRAIN: self._generate_examples( + tfds.Split.TRAIN: self._generate_examples( # pyrefly: ignore[missing-attribute] coco_image_dir, gref_aligned_json_train, ), - tfds.Split.VALIDATION: self._generate_examples( + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] coco_image_dir, gref_aligned_json_val, ), diff --git a/tensorflow_datasets/vision_language/grounded_scan/grounded_scan.py b/tensorflow_datasets/vision_language/grounded_scan/grounded_scan.py index 23cc90b277b..68c8e40f19e 100644 --- a/tensorflow_datasets/vision_language/grounded_scan/grounded_scan.py +++ b/tensorflow_datasets/vision_language/grounded_scan/grounded_scan.py @@ -240,12 +240,12 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" - path = dl_manager.download_and_extract(self.builder_config.data_path) + path = dl_manager.download_and_extract(self.builder_config.data_path) # pyrefly: ignore[missing-attribute] return { split_name: self._generate_examples( - path / self.builder_config.name, split_name=split_name + path / self.builder_config.name, split_name=split_name # pyrefly: ignore[missing-attribute] ) - for split_name in self.builder_config.splits_names + for split_name in self.builder_config.splits_names # pyrefly: ignore[missing-attribute] } def _generate_examples(self, path, split_name): diff --git a/tensorflow_datasets/vision_language/laion400m/laion400m.py b/tensorflow_datasets/vision_language/laion400m/laion400m.py index b1676350399..28d1cb6d27f 100644 --- a/tensorflow_datasets/vision_language/laion400m/laion400m.py +++ b/tensorflow_datasets/vision_language/laion400m/laion400m.py @@ -179,12 +179,12 @@ def _info(self) -> tfds.core.DatasetInfo: ), } - if self.builder_config.name == LAION400M_IMAGES_CONFIG.name: - features.update({ + if self.builder_config.name == LAION400M_IMAGES_CONFIG.name: # pyrefly: ignore[missing-attribute] + features.update({ # pyrefly: ignore[no-matching-overload] 'image': tfds.features.Image(doc='image'), }) else: - features.update({ + features.update({ # pyrefly: ignore[no-matching-overload] 'image_embedding': tfds.features.Tensor( shape=_CLIP_EMBEDDING_SHAPE, dtype=tf.float16, @@ -210,7 +210,7 @@ def _download_data( self, dl_manager: tfds.download.DownloadManager ) -> Dict[str, epath.Path]: """Downloads data.""" - if self.builder_config.name == LAION400M_IMAGES_CONFIG.name: + if self.builder_config.name == LAION400M_IMAGES_CONFIG.name: # pyrefly: ignore[missing-attribute] if not dl_manager.manual_dir.exists(): raise AssertionError( 'LAION-400M requires manual download of the images. Please download' @@ -220,7 +220,7 @@ def _download_data( return {} else: file_name_to_url = {} - for shard_idx in range(self.builder_config.num_shards): + for shard_idx in range(self.builder_config.num_shards): # pyrefly: ignore[missing-attribute] img_emb_file_name, text_emb_file_name, metadata_file_name = ( _get_embeddings_file_names(shard_idx) ) @@ -236,7 +236,7 @@ def _download_data( file_name_to_dl_path = dl_manager.download(file_name_to_url) - return file_name_to_dl_path + return file_name_to_dl_path # pyrefly: ignore[bad-return] def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" @@ -255,7 +255,7 @@ def _generate_examples( return ( 'Generate shard indices' - >> beam.Create(list(range(self.builder_config.num_shards))) + >> beam.Create(list(range(self.builder_config.num_shards))) # pyrefly: ignore[missing-attribute] | 'Generate examples from a single shard' >> beam.FlatMap( functools.partial( @@ -279,7 +279,7 @@ def _generate_examples_one_shard( """Yields examples from a single shard.""" pd = tfds.core.lazy_imports.pandas - if self.builder_config.name == LAION400M_IMAGES_CONFIG.name: + if self.builder_config.name == LAION400M_IMAGES_CONFIG.name: # pyrefly: ignore[missing-attribute] img_archive_path = dl_manager.manual_dir / f'{shard_idx:05d}.tar' metadata_path = dl_manager.manual_dir / f'{shard_idx:05d}.parquet'