diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 592f482141a45..cdaf16cb972c2 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -408,7 +408,7 @@ def append_library_dir(library_dir): if not library_dir.startswith("-L"): raise ValueError( "pkg-config --libs-only-L returned unexpected " - "value {!r}".format(library_dir)) + f"value {library_dir!r}") append_library_dir(library_dir[2:]) if _sys.platform == 'win32': diff --git a/python/pyarrow/_acero.pyx b/python/pyarrow/_acero.pyx index d49945ed70009..d0c63ab530b49 100644 --- a/python/pyarrow/_acero.pyx +++ b/python/pyarrow/_acero.pyx @@ -553,7 +553,7 @@ cdef class Declaration(_Weakrefable): return frombytes(GetResultValue(DeclarationToString(self.decl))) def __repr__(self): - return "\n{0}".format(str(self)) + return f"\n{str(self)}" def to_table(self, bint use_threads=True): """ diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index d23286dcdd02e..de2a06d8dc3d6 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -63,14 +63,10 @@ def _pas(): def _forbid_instantiation(klass, subclasses_instead=True): - msg = '{} is an abstract class thus cannot be initialized.'.format( - klass.__name__ - ) + msg = f'{klass.__name__} is an abstract class thus cannot be initialized.' if subclasses_instead: subclasses = [cls.__name__ for cls in klass.__subclasses__] - msg += ' Use one of the subclasses instead: {}'.format( - ', '.join(subclasses) - ) + msg += f' Use one of the subclasses instead: {", ".join(subclasses)}' raise TypeError(msg) @@ -201,8 +197,7 @@ cdef class Kernel(_Weakrefable): """ def __init__(self): - raise TypeError("Do not call {}'s constructor directly" - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly") cdef class ScalarKernel(Kernel): @@ -212,8 +207,7 @@ cdef class ScalarKernel(Kernel): self.kernel = kernel def __repr__(self): - return ("ScalarKernel<{}>" - .format(frombytes(self.kernel.signature.get().ToString()))) + return f"ScalarKernel<{frombytes(self.kernel.signature.get().ToString())}>" cdef class VectorKernel(Kernel): @@ -223,8 +217,7 @@ cdef class VectorKernel(Kernel): self.kernel = kernel def __repr__(self): - return ("VectorKernel<{}>" - .format(frombytes(self.kernel.signature.get().ToString()))) + return f"VectorKernel<{frombytes(self.kernel.signature.get().ToString())}>" cdef class ScalarAggregateKernel(Kernel): @@ -234,8 +227,7 @@ cdef class ScalarAggregateKernel(Kernel): self.kernel = kernel def __repr__(self): - return ("ScalarAggregateKernel<{}>" - .format(frombytes(self.kernel.signature.get().ToString()))) + return f"ScalarAggregateKernel<{frombytes(self.kernel.signature.get().ToString())}>" cdef class HashAggregateKernel(Kernel): @@ -245,8 +237,7 @@ cdef class HashAggregateKernel(Kernel): self.kernel = kernel def __repr__(self): - return ("HashAggregateKernel<{}>" - .format(frombytes(self.kernel.signature.get().ToString()))) + return f"HashAggregateKernel<{frombytes(self.kernel.signature.get().ToString())}>" FunctionDoc = namedtuple( @@ -298,17 +289,14 @@ cdef class Function(_Weakrefable): } def __init__(self): - raise TypeError("Do not call {}'s constructor directly" - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly") cdef void init(self, const shared_ptr[CFunction]& sp_func) except *: self.sp_func = sp_func self.base_func = sp_func.get() def __repr__(self): - return ("arrow.compute.Function" - .format(self.name, self.kind, self.arity, self.num_kernels)) + return f"arrow.compute.Function" def __reduce__(self): # Reduction uses the global registry @@ -2462,9 +2450,7 @@ cdef class Expression(_Weakrefable): return frombytes(self.expr.ToString()) def __repr__(self): - return "".format( - self.__class__.__name__, str(self) - ) + return f"" @staticmethod def from_substrait(object message not None): @@ -2768,8 +2754,7 @@ cdef class UdfContext: """ def __init__(self): - raise TypeError("Do not call {}'s constructor directly" - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly") cdef void init(self, const CUdfContext &c_context): self.c_context = c_context diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx index e53c6d1847566..62cb75fa6eab6 100644 --- a/python/pyarrow/_csv.pyx +++ b/python/pyarrow/_csv.pyx @@ -1150,9 +1150,8 @@ cdef class CSVStreamingReader(RecordBatchReader): Schema schema def __init__(self): - raise TypeError("Do not call {}'s constructor directly, " - "use pyarrow.csv.open_csv() instead." - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, " + "use pyarrow.csv.open_csv() instead.") # Note about cancellation: we cannot create a SignalStopHandler # by default here, as several CSVStreamingReader instances may be diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx index 5aed9f8a28518..340f7e35c73f8 100644 --- a/python/pyarrow/_cuda.pyx +++ b/python/pyarrow/_cuda.pyx @@ -841,8 +841,7 @@ cdef class BufferWriter(NativeFile): offset = offset + position else: with gil: - raise ValueError("Invalid value of whence: {0}" - .format(whence)) + raise ValueError(f"Invalid value of whence: {whence}") check_status(self.writer.Seek(offset)) return self.tell() diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index ca66c2a7a0ae5..553321498e87d 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -1127,8 +1127,8 @@ cdef class FileSystemDataset(Dataset): root_partition = _true elif not isinstance(root_partition, Expression): raise TypeError( - "Argument 'root_partition' has incorrect type (expected " - "Expression, got {0})".format(type(root_partition)) + f"Argument 'root_partition' has incorrect type (expected " + f"Expression, got {type(root_partition)})" ) for fragment in fragments: @@ -1220,8 +1220,8 @@ cdef class FileSystemDataset(Dataset): ]: if not isinstance(arg, class_): raise TypeError( - "Argument '{0}' has incorrect type (expected {1}, " - "got {2})".format(name, class_.__name__, type(arg)) + f"Argument '{name}' has incorrect type (expected {class_.__name__}, " + f"got {type(arg)})" ) partitions = partitions or [_true] * len(paths) @@ -1988,9 +1988,7 @@ cdef class FileFragment(Fragment): ) if partition: partition = f" partition=[{partition}]" - return "".format( - self.__class__.__name__, typ, self.path, partition - ) + return f"" def __reduce__(self): buffer = self.buffer @@ -3384,8 +3382,8 @@ cdef class FileSystemDatasetFactory(DatasetFactory): c_options ) else: - raise TypeError('Must pass either paths or a FileSelector, but ' - 'passed {}'.format(type(paths_or_selector))) + raise TypeError(f'Must pass either paths or a FileSelector, but ' + f'passed {type(paths_or_selector)}') self.init(GetResultValue(result)) @@ -3527,8 +3525,8 @@ cdef void _populate_builder(const shared_ptr[CScannerBuilder]& ptr, for expr in columns.values(): if not isinstance(expr, Expression): raise TypeError( - "Expected an Expression for a 'column' dictionary " - "value, got {} instead".format(type(expr)) + f"Expected an Expression for a 'column' dictionary " + f"value, got {type(expr)} instead" ) c_exprs.push_back(( expr).unwrap()) @@ -3539,8 +3537,8 @@ cdef void _populate_builder(const shared_ptr[CScannerBuilder]& ptr, check_status(builder.ProjectColumns([tobytes(c) for c in columns])) else: raise ValueError( - "Expected a list or a dict for 'columns', " - "got {} instead.".format(type(columns)) + f"Expected a list or a dict for 'columns', " + f"got {type(columns)} instead." ) check_status(builder.BatchSize(batch_size)) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 863c928591937..9bd53a889aff3 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -336,7 +336,7 @@ class RowGroupInfo: } def __repr__(self): - return "RowGroupInfo({})".format(self.id) + return f"RowGroupInfo({self.id})" def __eq__(self, other): if isinstance(other, int): @@ -671,9 +671,7 @@ cdef class ParquetFileWriteOptions(FileWriteOptions): self._set_arrow_properties() def __repr__(self): - return "".format( - " ".join([f"{key}={value}" for key, value in self._properties.items()]) - ) + return f"" cdef set _PARQUET_READ_OPTIONS = { diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx index ed7740440db56..3f9c6c0819c24 100644 --- a/python/pyarrow/_flight.pyx +++ b/python/pyarrow/_flight.pyx @@ -88,8 +88,7 @@ _FLIGHT_SERVER_ERROR_REGEX = re.compile( def _munge_grpc_python_error(message): m = _FLIGHT_SERVER_ERROR_REGEX.match(message) if m: - return ('Flight RPC failed with Python exception \"{}: {}\"' - .format(m.group(2), m.group(1))) + return f'Flight RPC failed with Python exception "{m.group(2)}: {m.group(1)}"' else: return message @@ -131,8 +130,7 @@ cdef class FlightCallOptions(_Weakrefable): self.options.write_options = c_write_options.c_options if read_options is not None: if not isinstance(read_options, IpcReadOptions): - raise TypeError("expected IpcReadOptions, got {}" - .format(type(read_options))) + raise TypeError(f"expected IpcReadOptions, got {type(read_options)}") self.options.read_options = read_options.c_options if headers is not None: self.options.headers = headers @@ -143,8 +141,7 @@ cdef class FlightCallOptions(_Weakrefable): return &DEFAULT_CALL_OPTIONS elif isinstance(obj, FlightCallOptions): return &(( obj).options) - raise TypeError("Expected a FlightCallOptions object, not " - "'{}'".format(type(obj))) + raise TypeError(f"Expected a FlightCallOptions object, not '{type(obj)}'") _CertKeyPair = collections.namedtuple('_CertKeyPair', ['cert', 'key']) @@ -183,7 +180,7 @@ cdef class FlightError(Exception): self.extra_info = tobytes(extra_info) cdef CStatus to_status(self): - message = tobytes("Flight error: {}".format(str(self))) + message = tobytes(f"Flight error: {str(self)}") return CStatus_UnknownError(message) @@ -281,8 +278,7 @@ cdef class Action(_Weakrefable): @staticmethod cdef CAction unwrap(action) except *: if not isinstance(action, Action): - raise TypeError("Must provide Action, not '{}'".format( - type(action))) + raise TypeError(f"Must provide Action, not '{type(action)}'") return ( action).action def serialize(self): @@ -494,10 +490,9 @@ cdef class FlightDescriptor(_Weakrefable): CFlightDescriptor descriptor def __init__(self): - raise TypeError("Do not call {}'s constructor directly, use " + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, use " "`pyarrow.flight.FlightDescriptor.for_{path,command}` " - "function instead." - .format(self.__class__.__name__)) + "function instead.") @staticmethod def for_path(*path): @@ -553,8 +548,8 @@ cdef class FlightDescriptor(_Weakrefable): @staticmethod cdef CFlightDescriptor unwrap(descriptor) except *: if not isinstance(descriptor, FlightDescriptor): - raise TypeError("Must provide a FlightDescriptor, not '{}'".format( - type(descriptor))) + raise TypeError( + f"Must provide a FlightDescriptor, not '{type(descriptor)}'") return ( descriptor).descriptor def serialize(self): @@ -694,8 +689,7 @@ cdef class Location(_Weakrefable): CLocation.Parse(tobytes(location)).Value(&c_location)) return c_location elif not isinstance(location, Location): - raise TypeError("Must provide a Location, not '{}'".format( - type(location))) + raise TypeError(f"Must provide a Location, not '{type(location)}'") return ( location).location @@ -733,8 +727,8 @@ cdef class FlightEndpoint(_Weakrefable): elif isinstance(ticket, (str, bytes)): self.endpoint.ticket.ticket = tobytes(ticket) else: - raise TypeError("Argument ticket must be a Ticket instance, string or bytes, " - "not '{}'".format(type(ticket))) + raise TypeError(f"Argument ticket must be a Ticket instance, string or bytes, " + f"not '{type(ticket)}'") for location in locations: if isinstance(location, Location): @@ -744,8 +738,8 @@ cdef class FlightEndpoint(_Weakrefable): check_flight_status( CLocation.Parse(tobytes(location)).Value(&c_location)) else: - raise TypeError("Argument locations must contain Location instances, strings or bytes, " - "not '{}'".format(type(location))) + raise TypeError(f"Argument locations must contain Location instances, strings or bytes, " + f"not '{type(location)}'") self.endpoint.locations.push_back(c_location) if expiration_time is not None: @@ -753,12 +747,12 @@ cdef class FlightEndpoint(_Weakrefable): self.endpoint.expiration_time = TimePoint_from_ns( expiration_time.cast(timestamp("ns")).value) else: - raise TypeError("Argument expiration_time must be a TimestampScalar, " - "not '{}'".format(type(expiration_time))) + raise TypeError(f"Argument expiration_time must be a TimestampScalar, " + f"not '{type(expiration_time)}'") if not isinstance(app_metadata, (str, bytes)): - raise TypeError("Argument app_metadata must be a string or bytes, " - "not '{}'".format(type(app_metadata))) + raise TypeError(f"Argument app_metadata must be a string or bytes, " + f"not '{type(app_metadata)}'") self.endpoint.app_metadata = tobytes(app_metadata) @property @@ -920,8 +914,8 @@ cdef class FlightInfo(_Weakrefable): if isinstance(endpoint, FlightEndpoint): c_endpoints.push_back(( endpoint).endpoint) else: - raise TypeError('Endpoint {} is not instance of' - ' FlightEndpoint'.format(endpoint)) + raise TypeError( + f'Endpoint {endpoint} is not instance of FlightEndpoint') check_flight_status(CreateFlightInfo(c_schema, descriptor.descriptor, @@ -1049,8 +1043,7 @@ cdef class FlightStreamChunk(_Weakrefable): return iter((self.data, self.app_metadata)) def __repr__(self): - return "".format( - self.chunk.data != NULL, self.chunk.app_metadata != NULL) + return f"" cdef class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin): @@ -1528,8 +1521,7 @@ cdef class FlightClient(_Weakrefable): if not isinstance(auth_handler, ClientAuthHandler): raise TypeError( - "FlightClient.authenticate takes a ClientAuthHandler, " - "not '{}'".format(type(auth_handler))) + f"FlightClient.authenticate takes a ClientAuthHandler, not '{type(auth_handler)}'") handler.reset(( auth_handler).to_handler()) with nogil: check_flight_status( @@ -1831,8 +1823,8 @@ cdef class RecordBatchStream(FlightDataStream): """ if (not isinstance(data_source, RecordBatchReader) and not isinstance(data_source, lib.Table)): - raise TypeError("Expected RecordBatchReader or Table, " - "but got: {}".format(type(data_source))) + raise TypeError(f"Expected RecordBatchReader or Table, " + f"but got: {type(data_source)}") self.data_source = data_source self.write_options = _get_options(options).c_options @@ -1845,8 +1837,8 @@ cdef class RecordBatchStream(FlightDataStream): table = ( self.data_source).table reader.reset(new TableBatchReader(deref(table))) else: - raise RuntimeError("Can't construct RecordBatchStream " - "from type {}".format(type(self.data_source))) + raise RuntimeError( + f"Can't construct RecordBatchStream from type {type(self.data_source)}") return new CRecordBatchStream(reader, self.write_options) @@ -2125,11 +2117,10 @@ cdef CStatus _data_stream_next(void* self, CFlightPayload* payload) except *: ( result).to_stream()) substream_schema = pyarrow_wrap_schema(data_stream.get().schema()) if substream_schema != stream_schema: - raise ValueError("Got a FlightDataStream whose schema " - "does not match the declared schema of this " - "GeneratorStream. " - "Got: {}\nExpected: {}".format( - substream_schema, stream_schema)) + raise ValueError(f"Got a FlightDataStream whose schema " + f"does not match the declared schema of this " + f"GeneratorStream. " + f"Got: {substream_schema}\nExpected: {stream_schema}") stream.current_stream.reset( new CPyFlightDataStream(result, move(data_stream))) # Loop around and try again @@ -2137,11 +2128,10 @@ cdef CStatus _data_stream_next(void* self, CFlightPayload* payload) except *: elif isinstance(result, RecordBatch): batch = result if batch.schema != stream_schema: - raise ValueError("Got a RecordBatch whose schema does not " - "match the declared schema of this " - "GeneratorStream. " - "Got: {}\nExpected: {}".format(batch.schema, - stream_schema)) + raise ValueError(f"Got a RecordBatch whose schema does not " + f"match the declared schema of this " + f"GeneratorStream. " + f"Got: {batch.schema}\nExpected: {stream_schema}") check_flight_status(GetRecordBatchPayload( deref(batch.batch), stream.c_options, @@ -2150,10 +2140,10 @@ cdef CStatus _data_stream_next(void* self, CFlightPayload* payload) except *: payload.app_metadata = pyarrow_unwrap_buffer( as_buffer(metadata)) else: - raise TypeError("GeneratorStream must be initialized with " - "an iterator of FlightDataStream, Table, " - "RecordBatch, or RecordBatchStreamReader objects, " - "not {}.".format(type(result))) + raise TypeError(f"GeneratorStream must be initialized with " + f"an iterator of FlightDataStream, Table, " + f"RecordBatch, or RecordBatchStreamReader objects, " + f"not {type(result)}.") # Don't loop around return CStatus_OK() # Ran out of attempts (the RPC handler kept yielding empty tables/readers) @@ -2174,9 +2164,8 @@ cdef CStatus _list_flights(void* self, const CServerCallContext& context, c_criteria.expression) for info in result: if not isinstance(info, FlightInfo): - raise TypeError("FlightServerBase.list_flights must return " - "FlightInfo instances, but got {}".format( - type(info))) + raise TypeError(f"FlightServerBase.list_flights must return " + f"FlightInfo instances, but got {type(info)}") flights.push_back(deref(( info).info.get())) listing.reset(new CSimpleFlightListing(flights)) except FlightError as flight_error: @@ -2199,9 +2188,8 @@ cdef CStatus _get_flight_info(void* self, const CServerCallContext& context, except FlightError as flight_error: return ( flight_error).to_status() if not isinstance(result, FlightInfo): - raise TypeError("FlightServerBase.get_flight_info must return " - "a FlightInfo instance, but got {}".format( - type(result))) + raise TypeError(f"FlightServerBase.get_flight_info must return " + f"a FlightInfo instance, but got {type(result)}") info.reset(new CFlightInfo(deref(( result).info.get()))) return CStatus_OK() @@ -2216,9 +2204,8 @@ cdef CStatus _get_schema(void* self, const CServerCallContext& context, result = ( self).get_schema(ServerCallContext.wrap(context), py_descriptor) if not isinstance(result, SchemaResult): - raise TypeError("FlightServerBase.get_schema_info must return " - "a SchemaResult instance, but got {}".format( - type(result))) + raise TypeError(f"FlightServerBase.get_schema_info must return " + f"a SchemaResult instance, but got {type(result)}") info.reset(new CSchemaResult(deref(( result).result.get()))) return CStatus_OK() @@ -2943,8 +2930,8 @@ cdef class FlightServerBase(_Weakrefable): if auth_handler: if not isinstance(auth_handler, ServerAuthHandler): - raise TypeError("auth_handler must be a ServerAuthHandler, " - "not a '{}'".format(type(auth_handler))) + raise TypeError(f"auth_handler must be a ServerAuthHandler, " + f"not a '{type(auth_handler)}'") c_options.get().auth_handler.reset( ( auth_handler).to_handler()) diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx index 2b1f0e7eb3189..660d4e56a6864 100644 --- a/python/pyarrow/_fs.pyx +++ b/python/pyarrow/_fs.pyx @@ -180,8 +180,7 @@ cdef class FileInfo(_Weakrefable): @staticmethod cdef CFileInfo unwrap_safe(obj): if not isinstance(obj, FileInfo): - raise TypeError("Expected FileInfo instance, got {0}" - .format(type(obj))) + raise TypeError(f"Expected FileInfo instance, got {type(obj)}") return ( obj).unwrap() def __repr__(self): @@ -406,8 +405,7 @@ cdef class FileSelector(_Weakrefable): self.selector.recursive = recursive def __repr__(self): - return ("".format(self)) + return f"" cdef class FileSystem(_Weakrefable): @@ -449,7 +447,7 @@ cdef class FileSystem(_Weakrefable): -------- Create a new FileSystem subclass from a URI: - >>> uri = 'file:///{}/pyarrow-fs-example.dat'.format(local_path) + >>> uri = f'file:///{local_path}/pyarrow-fs-example.dat' >>> local_new, path_new = fs.FileSystem.from_uri(uri) >>> local_new >> local - >>> local.get_file_info("/{}/pyarrow-fs-example.dat".format(local_path)) + >>> local.get_file_info(f"/{local_path}/pyarrow-fs-example.dat") """ cdef: @@ -1192,8 +1190,7 @@ cdef class SubTreeFileSystem(FileSystem): self.subtreefs = wrapped.get() def __repr__(self): - return ("SubTreeFileSystem(base_path={}, base_fs={}" - .format(self.base_path, self.base_fs)) + return f"SubTreeFileSystem(base_path={self.base_path}, base_fs={self.base_fs})" def __reduce__(self): return SubTreeFileSystem, ( @@ -1260,8 +1257,8 @@ cdef class PyFileSystem(FileSystem): shared_ptr[CPyFileSystem] wrapped if not isinstance(handler, FileSystemHandler): - raise TypeError("Expected a FileSystemHandler instance, got {0}" - .format(type(handler))) + raise TypeError( + f"Expected a FileSystemHandler instance, got {type(handler)}") vtable.get_type_name = _cb_get_type_name vtable.equals = _cb_equals diff --git a/python/pyarrow/_hdfs.pyx b/python/pyarrow/_hdfs.pyx index 0a1a5483bda32..8a9fddee3dd5b 100644 --- a/python/pyarrow/_hdfs.pyx +++ b/python/pyarrow/_hdfs.pyx @@ -74,7 +74,7 @@ cdef class HadoopFileSystem(FileSystem): if not host.startswith(('hdfs://', 'viewfs://')) and host != "default": # TODO(kszucs): do more sanitization - host = 'hdfs://{}'.format(host) + host = f'hdfs://{host}' options.ConfigureEndPoint(tobytes(host), int(port)) options.ConfigureReplication(replication) diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx index c023baeec1c82..07e615dd5e482 100644 --- a/python/pyarrow/_json.pyx +++ b/python/pyarrow/_json.pyx @@ -216,8 +216,8 @@ cdef class ParseOptions(_Weakrefable): v = CUnexpectedFieldBehavior_InferType else: raise ValueError( - "Unexpected value `{}` for `unexpected_field_behavior`, pass " - "either `ignore`, `error` or `infer`.".format(value) + f"Unexpected value `{value}` for `unexpected_field_behavior`, pass " + f"either `ignore`, `error` or `infer`." ) self.options.unexpected_field_behavior = v diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 2fb1e41641f8e..6d5e113236c44 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -227,7 +227,7 @@ cdef class ParquetLogicalType(_Weakrefable): self.type = type def __repr__(self): - return "{}\n {}".format(object.__repr__(self), str(self)) + return f"{object.__repr__(self)}\n {str(self)}" def __str__(self): return frombytes(self.type.get().ToString(), safe=True) @@ -632,16 +632,15 @@ cdef class SortingColumn: elif descending == "ascending": descending = False else: - raise ValueError("Invalid sort key direction: {0}" - .format(descending)) + raise ValueError(f"Invalid sort key direction: {descending}") else: - raise ValueError("Invalid sort key: {0}".format(sort_key)) + raise ValueError(f"Invalid sort key: {sort_key}") try: column_index = col_map[name] except KeyError: - raise ValueError("Sort key name '{0}' not found in schema:\n{1}" - .format(name, schema)) + raise ValueError( + f"Sort key name '{name}' not found in schema:\n{schema}") sorting_columns.append( cls(column_index, descending=descending, nulls_first=nulls_first) @@ -741,7 +740,7 @@ cdef class RowGroupMetaData(_Weakrefable): def __cinit__(self, FileMetaData parent, int index): if index < 0 or index >= parent.num_row_groups: - raise IndexError('{0} out of bounds'.format(index)) + raise IndexError(f'{index} out of bounds') self.up_metadata = parent._metadata.RowGroup(index) self.metadata = self.up_metadata.get() self.parent = parent @@ -786,7 +785,7 @@ cdef class RowGroupMetaData(_Weakrefable): Metadata for column within this chunk. """ if i < 0 or i >= self.num_columns: - raise IndexError('{0} out of bounds'.format(i)) + raise IndexError(f'{i} out of bounds') chunk = ColumnChunkMetaData() chunk.init(self, i) return chunk @@ -987,8 +986,7 @@ cdef class FileMetaData(_Weakrefable): elif version == ParquetVersion_V2_6: return '2.6' else: - warnings.warn('Unrecognized file version, assuming 2.6: {}' - .format(version)) + warnings.warn(f'Unrecognized file version, assuming 2.6: {version}') return '2.6' @property @@ -1095,9 +1093,7 @@ cdef class ParquetSchema(_Weakrefable): self.schema = container._metadata.schema() def __repr__(self): - return "{0}\n{1}".format( - object.__repr__(self), - frombytes(self.schema.ToString(), safe=True)) + return f"{object.__repr__(self)}\n{frombytes(self.schema.ToString(), safe=True)}" def __reduce__(self): return ParquetSchema, (self.parent,) @@ -1169,7 +1165,7 @@ cdef class ParquetSchema(_Weakrefable): column_schema : ColumnSchema """ if i < 0 or i >= len(self): - raise IndexError('{0} out of bounds'.format(i)) + raise IndexError(f'{i} out of bounds') return ColumnSchema(self, i) @@ -1214,11 +1210,9 @@ cdef class ColumnSchema(_Weakrefable): physical_type = self.physical_type converted_type = self.converted_type if converted_type == 'DECIMAL': - converted_type = 'DECIMAL({0}, {1})'.format(self.precision, - self.scale) + converted_type = f'DECIMAL({self.precision}, {self.scale})' elif physical_type == 'FIXED_LEN_BYTE_ARRAY': - converted_type = ('FIXED_LEN_BYTE_ARRAY(length={0})' - .format(self.length)) + converted_type = f'FIXED_LEN_BYTE_ARRAY(length={self.length})' return """ name: {0} @@ -1866,8 +1860,8 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( elif data_page_version == "2.0": props.data_page_version(ParquetDataPageVersion_V2) else: - raise ValueError("Unsupported Parquet data page version: {0}" - .format(data_page_version)) + raise ValueError( + f"Unsupported Parquet data page version: {data_page_version}") # version @@ -1885,8 +1879,7 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( elif version == "2.6": props.version(ParquetVersion_V2_6) else: - raise ValueError("Unsupported Parquet format version: {0}" - .format(version)) + raise ValueError(f"Unsupported Parquet format version: {version}") # compression @@ -2061,8 +2054,7 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties( elif coerce_timestamps == 'us': arrow_props.coerce_timestamps(TimeUnit_MICRO) elif coerce_timestamps is not None: - raise ValueError('Invalid value for coerce_timestamps: {0}' - .format(coerce_timestamps)) + raise ValueError(f'Invalid value for coerce_timestamps: {coerce_timestamps}') # allow_truncated_timestamps @@ -2084,8 +2076,7 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties( warnings.warn("V1 parquet writer engine is a no-op. Use V2.") arrow_props.set_engine_version(ArrowWriterEngineVersion.V1) elif writer_engine_version != "V2": - raise ValueError("Unsupported Writer Engine Version: {0}" - .format(writer_engine_version)) + raise ValueError(f"Unsupported Writer Engine Version: {writer_engine_version}") arrow_properties = arrow_props.build() diff --git a/python/pyarrow/_parquet_encryption.pyx b/python/pyarrow/_parquet_encryption.pyx index 81bd421dcbce3..8e79a56cb460d 100644 --- a/python/pyarrow/_parquet_encryption.pyx +++ b/python/pyarrow/_parquet_encryption.pyx @@ -44,7 +44,7 @@ cdef cipher_to_name(ParquetCipher cipher): elif ParquetCipher_AES_GCM_CTR_V1 == cipher: return 'AES_GCM_CTR_V1' else: - raise ValueError('Invalid cipher value: {0}'.format(cipher)) + raise ValueError(f'Invalid cipher value: {cipher}') cdef class EncryptionConfiguration(_Weakrefable): """Configuration of the encryption, such as which columns to encrypt""" @@ -100,7 +100,7 @@ cdef class EncryptionConfiguration(_Weakrefable): # to the string defined by the spec # 'key1: col1 , col2; key2: col3 , col4' column_keys = "; ".join( - ["{}: {}".format(k, ", ".join(v)) for k, v in value.items()]) + [f"{k}: {', '.join(v)}" for k, v in value.items()]) self.configuration.get().column_keys = tobytes(column_keys) @property @@ -340,8 +340,7 @@ cdef void _cb_create_kms_client( result = handler(connection_config) if not isinstance(result, KmsClient): raise TypeError( - "callable must return KmsClient instances, but got {}".format( - type(result))) + f"callable must return KmsClient instances, but got {type(result)}") out[0] = ( result).unwrap() diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py index 86bf7cbf4d29d..f539f559eb2ab 100644 --- a/python/pyarrow/acero.py +++ b/python/pyarrow/acero.py @@ -307,8 +307,8 @@ def _perform_join_asof(left_operand, left_on, left_by, columns_collisions = set(left_operand.schema.names) & set(right_columns) if columns_collisions: raise ValueError( - "Columns {} present in both tables. AsofJoin does not support " - "column collisions.".format(columns_collisions), + f"Columns {columns_collisions} present in both tables. " + f"AsofJoin does not support column collisions." ) # Add the join node to the execplan diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index b738dc04b0c81..0cb151d4b168c 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -338,12 +338,11 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None, # the first array(..) call) if value_type is not None: warnings.warn( - "The dtype of the 'categories' of the passed " - "categorical values ({0}) does not match the " - "specified type ({1}). For now ignoring the specified " - "type, but in the future this mismatch will raise a " - "TypeError".format( - values.categories.dtype, value_type), + f"The dtype of the 'categories' of the passed " + f"categorical values ({values.categories.dtype}) does not match the " + f"specified type ({value_type}). For now ignoring the specified " + f"type, but in the future this mismatch will raise a " + f"TypeError", FutureWarning, stacklevel=2) dictionary = array( values.categories.values, memory_pool=memory_pool) @@ -1027,9 +1026,9 @@ cdef class Array(_PandasConvertible): """ def __init__(self): - raise TypeError("Do not call {}'s constructor directly, use one of " - "the `pyarrow.Array.from_*` functions instead." - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor " + "directly, use one of the `pyarrow.Array.from_*` " + "functions instead.") cdef void init(self, const shared_ptr[CArray]& sp_array) except *: self.sp_array = sp_array @@ -1270,19 +1269,19 @@ cdef class Array(_PandasConvertible): children = children or [] if type.num_fields != len(children): - raise ValueError("Type's expected number of children " - "({0}) did not match the passed number " - "({1}).".format(type.num_fields, len(children))) + raise ValueError(f"Type's expected number of children " + f"({type.num_fields}) did not match the passed number " + f"({len(children)})") if type.has_variadic_buffers: if type.num_buffers > len(buffers): - raise ValueError("Type's expected number of buffers is at least " - "{0}, but the passed number is " - "{1}.".format(type.num_buffers, len(buffers))) + raise ValueError(f"Type's expected number of buffers is at least " + f"{type.num_buffers}, but the passed number is " + f"{len(buffers)}.") elif type.num_buffers != len(buffers): - raise ValueError("Type's expected number of buffers " - "({0}) did not match the passed number " - "({1}).".format(type.num_buffers, len(buffers))) + raise ValueError(f"Type's expected number of buffers " + f"({type.num_buffers}) did not match the passed number " + f"({len(buffers)}).") for buf in buffers: # None will produce a null buffer pointer @@ -1354,7 +1353,7 @@ cdef class Array(_PandasConvertible): def __repr__(self): type_format = object.__repr__(self) - return '{0}\n{1}'.format(type_format, str(self)) + return f'{type_format}\n{str(self)}' def to_string(self, *, int indent=2, int top_level_indent=0, int window=10, int container_window=2, c_bool skip_new_lines=False): @@ -3712,7 +3711,7 @@ cdef class UnionArray(Array): result = ( self.ap).field(pos) if result != NULL: return pyarrow_wrap_array(result) - raise KeyError("UnionArray does not have child {}".format(pos)) + raise KeyError(f"UnionArray does not have child {pos}") @property def type_codes(self): @@ -4380,9 +4379,9 @@ cdef class RunEndEncodedArray(Array): children = children or [] if type.num_fields != len(children): - raise ValueError("RunEndEncodedType's expected number of children " - "({0}) did not match the passed number " - "({1}).".format(type.num_fields, len(children))) + raise ValueError(f"RunEndEncodedType's expected number of children " + f"({type.num_fields}) did not match the passed number " + f"({len(children)})") # buffers are validated as if we needed to pass them to C++, but # _make_from_arrays will take care of filling in the expected @@ -4393,14 +4392,14 @@ cdef class RunEndEncodedArray(Array): raise ValueError("RunEndEncodedType expects None as validity " "bitmap, buffers[0] is not None") if type.num_buffers != len(buffers): - raise ValueError("RunEndEncodedType's expected number of buffers " - "({0}) did not match the passed number " - "({1}).".format(type.num_buffers, len(buffers))) + raise ValueError(f"RunEndEncodedType's expected number of buffers " + f"({type.num_buffers}) did not match the passed number " + f"({len(buffers)}).") # null_count is also validated as if we needed it if null_count != -1 and null_count != 0: - raise ValueError("RunEndEncodedType's expected null_count (0) " - "did not match passed number ({0})".format(null_count)) + raise ValueError(f"RunEndEncodedType's expected null_count (0) " + f"did not match passed number ({null_count})") return RunEndEncodedArray._from_arrays(type, False, length, children[0], children[1], offset) @@ -4483,8 +4482,8 @@ cdef class ExtensionArray(Array): shared_ptr[CExtensionArray] ext_array if storage.type != typ.storage_type: - raise TypeError("Incompatible storage type {0} " - "for extension type {1}".format(storage.type, typ)) + raise TypeError(f"Incompatible storage type {storage.type} " + f"for extension type {typ}") ext_array = make_shared[CExtensionArray](typ.sp_type, storage.sp_array) cdef Array result = pyarrow_wrap_array( ext_array) @@ -4946,8 +4945,8 @@ def concat_arrays(arrays, MemoryPool memory_pool=None): for array in arrays: if not isinstance(array, Array): - raise TypeError("Iterable should contain Array objects, " - "got {0} instead".format(type(array))) + raise TypeError(f"Iterable should contain Array objects, " + f"got {type(array)} instead") c_arrays.push_back(pyarrow_unwrap_array(array)) with nogil: diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 5348336235118..86ca0ed9d7389 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -138,8 +138,7 @@ def _decorate_compute_function(wrapper, exposed_name, func, options_class): summary = cpp_doc.summary if not summary: arg_str = "arguments" if func.arity > 1 else "argument" - summary = ("Call compute function {!r} with the given {}" - .format(func.name, arg_str)) + summary = f"Call compute function {func.name!r} with the given {arg_str}" doc_pieces.append(f"{summary}.\n\n") @@ -179,11 +178,11 @@ def _decorate_compute_function(wrapper, exposed_name, func, options_class): f"does not have a docstring", RuntimeWarning) options_sig = inspect.signature(options_class) for p in options_sig.parameters.values(): - doc_pieces.append(dedent("""\ - {0} : optional - Parameter for {1} constructor. Either `options` - or `{0}` can be passed, but not both at the same time. - """.format(p.name, options_class.__name__))) + doc_pieces.append(dedent(f"""\ + {p.name} : optional + Parameter for {options_class.__name__} constructor. Either `options` + or `{p.name}` can be passed, but not both at the same time. + """)) doc_pieces.append(dedent(f"""\ options : pyarrow.compute.{options_class.__name__}, optional Alternative way of passing options. @@ -209,8 +208,8 @@ def _get_options_class(func): try: return globals()[class_name] except KeyError: - warnings.warn("Python binding for {} not exposed" - .format(class_name), RuntimeWarning) + warnings.warn(f"Python binding for {class_name} not exposed", + RuntimeWarning) return None @@ -218,9 +217,8 @@ def _handle_options(name, options_class, options, args, kwargs): if args or kwargs: if options is not None: raise TypeError( - "Function {!r} called with both an 'options' argument " - "and additional arguments" - .format(name)) + f"Function {name!r} called with both an 'options' argument " + f"and additional arguments") return options_class(*args, **kwargs) if options is not None: @@ -229,8 +227,8 @@ def _handle_options(name, options_class, options, args, kwargs): elif isinstance(options, options_class): return options raise TypeError( - "Function {!r} expected a {} parameter, got {}" - .format(name, options_class, type(options))) + f"Function {name!r} expected a {options_class} parameter, " + f"got {type(options)}") return None diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py index c037126945071..41beaa1404195 100644 --- a/python/pyarrow/conftest.py +++ b/python/pyarrow/conftest.py @@ -233,7 +233,7 @@ def pytest_ignore_collect(collection_path, config): # handle cuda, flight, etc for group in doctest_groups: - if 'pyarrow/{}'.format(group) in str(collection_path): + if f'pyarrow/{group}' in str(collection_path): if not defaults[group]: return True diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py index c61e13ee75801..03459a6c491db 100644 --- a/python/pyarrow/dataset.py +++ b/python/pyarrow/dataset.py @@ -114,7 +114,7 @@ def __getattr__(name): raise ImportError(_parquet_msg) raise AttributeError( - "module 'pyarrow.dataset' has no attribute '{0}'".format(name) + f"module 'pyarrow.dataset' has no attribute '{name}'" ) @@ -234,8 +234,7 @@ def partitioning(schema=None, field_names=None, flavor=None, return DirectoryPartitioning.discover(field_names) else: raise ValueError( - "Expected list of field names, got {}".format( - type(field_names))) + f"Expected list of field names, got {type(field_names)}") else: raise ValueError( "For the default directory flavor, need to specify " @@ -253,8 +252,7 @@ def partitioning(schema=None, field_names=None, flavor=None, return FilenamePartitioning.discover(field_names) else: raise ValueError( - "Expected list of field names, got {}".format( - type(field_names))) + f"Expected list of field names, got {type(field_names)}") else: raise ValueError( "For the filename flavor, need to specify " @@ -269,8 +267,7 @@ def partitioning(schema=None, field_names=None, flavor=None, return HivePartitioning(schema, dictionaries) else: raise ValueError( - "Expected Schema for 'schema', got {}".format( - type(schema))) + f"Expected Schema for 'schema', got {type(schema)}") else: return HivePartitioning.discover() else: @@ -292,8 +289,8 @@ def _ensure_partitioning(scheme): elif isinstance(scheme, (Partitioning, PartitioningFactory)): pass else: - raise ValueError("Expected Partitioning or PartitioningFactory, got {}" - .format(type(scheme))) + raise ValueError( + f"Expected Partitioning or PartitioningFactory, got {type(scheme)}") return scheme @@ -317,7 +314,7 @@ def _ensure_format(obj): elif obj == "json": return JsonFileFormat() else: - raise ValueError("format '{}' is not supported".format(obj)) + raise ValueError(f"format '{obj}' is not supported") def _ensure_multiple_sources(paths, filesystem=None): @@ -382,16 +379,15 @@ def _ensure_multiple_sources(paths, filesystem=None): raise FileNotFoundError(info.path) elif file_type == FileType.Directory: raise IsADirectoryError( - 'Path {} points to a directory, but only file paths are ' + f'Path {info.path} points to a directory, but only file paths are ' 'supported. To construct a nested or union dataset pass ' - 'a list of dataset objects instead.'.format(info.path) + 'a list of dataset objects instead.' ) else: raise IOError( - 'Path {} exists but its type is unknown (could be a ' + f'Path {info.path} exists but its type is unknown (could be a ' 'special file such as a Unix socket or character device, ' - 'or Windows NUL / CON / ...)'.format(info.path) - ) + 'or Windows NUL / CON / ...)') return filesystem, paths @@ -802,18 +798,18 @@ def dataset(source, schema=None, format=None, filesystem=None, return _in_memory_dataset(source, **kwargs) else: unique_types = set(type(elem).__name__ for elem in source) - type_names = ', '.join('{}'.format(t) for t in unique_types) + type_names = ', '.join(f'{t}' for t in unique_types) raise TypeError( 'Expected a list of path-like or dataset objects, or a list ' 'of batches or tables. The given list contains the following ' - 'types: {}'.format(type_names) + f'types: {type_names}' ) elif isinstance(source, (pa.RecordBatch, pa.Table)): return _in_memory_dataset(source, **kwargs) else: raise TypeError( 'Expected a path-like, list of path-likes or a list of Datasets ' - 'instead of the given type: {}'.format(type(source).__name__) + f'instead of the given type: {type(source).__name__}' ) @@ -987,9 +983,8 @@ def file_visitor(written_file): file_options = format.make_write_options() if format != file_options.format: - raise TypeError("Supplied FileWriteOptions have format {}, " - "which doesn't match supplied FileFormat {}".format( - format, file_options)) + raise TypeError(f"Supplied FileWriteOptions have format {format}, " + f"which doesn't match supplied FileFormat {file_options}") if basename_template is None: basename_template = "part-{i}." + format.default_extname diff --git a/python/pyarrow/device.pxi b/python/pyarrow/device.pxi index 26256de62093e..ec7bef68d9a36 100644 --- a/python/pyarrow/device.pxi +++ b/python/pyarrow/device.pxi @@ -73,7 +73,7 @@ cdef class Device(_Weakrefable): return self.device.get().Equals(deref((other).device.get())) def __repr__(self): - return "".format(frombytes(self.device.get().ToString())) + return f"" @property def type_name(self): @@ -137,9 +137,7 @@ cdef class MemoryManager(_Weakrefable): return self.memory_manager def __repr__(self): - return "".format( - frombytes(self.memory_manager.get().device().get().ToString()) - ) + return f"" @property def device(self): diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py index fbd0602597006..16a52bed8879f 100644 --- a/python/pyarrow/feather.py +++ b/python/pyarrow/feather.py @@ -69,10 +69,8 @@ def read_table(self, columns=None): def validate_schemas(self, piece, table): if not self.schema.equals(table.schema): - raise ValueError('Schema in {!s} was different. \n' - '{!s}\n\nvs\n\n{!s}' - .format(piece, self.schema, - table.schema)) + raise ValueError(f'Schema in {piece!s} was different. \n' + f'{self.schema!s}\n\nvs\n\n{table.schema!s}') def read_pandas(self, columns=None, use_threads=True): """ @@ -99,14 +97,15 @@ def check_chunked_overflow(name, col): return if col.type in (ext.binary(), ext.string()): - raise ValueError("Column '{}' exceeds 2GB maximum capacity of " + raise ValueError(f"Column '{name}' exceeds 2GB maximum capacity of " "a Feather binary column. This restriction may be " - "lifted in the future".format(name)) + "lifted in the future") else: # TODO(wesm): Not sure when else this might be reached - raise ValueError("Column '{}' of type {} was chunked on conversion " - "to Arrow and cannot be currently written to " - "Feather format".format(name, str(col.type))) + raise ValueError( + f"Column '{name}' of type {col.type} was chunked on conversion to Arrow " + f"and cannot be currently written to Feather format" + ) _FEATHER_SUPPORTED_CODECS = {'lz4', 'zstd', 'uncompressed'} @@ -179,9 +178,8 @@ def write_feather(df, dest, compression=None, compression_level=None, compression = 'lz4' elif (compression is not None and compression not in _FEATHER_SUPPORTED_CODECS): - raise ValueError('compression="{}" not supported, must be ' - 'one of {}'.format(compression, - _FEATHER_SUPPORTED_CODECS)) + raise ValueError(f'compression="{compression}" not supported, must be ' + f'one of {_FEATHER_SUPPORTED_CODECS}') try: _feather.write_feather(table, dest, compression=compression, @@ -262,9 +260,8 @@ def read_table(source, columns=None, memory_map=False, use_threads=True): table = reader.read_names(columns) else: column_type_names = [t.__name__ for t in column_types] - raise TypeError("Columns must be indices or names. " - "Got columns {} of types {}" - .format(columns, column_type_names)) + raise TypeError(f"Columns must be indices or names. " + f"Got columns {columns} of types {column_type_names}") # Feather v1 already respects the column selection if reader.version < 3: diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index abdd1a995751a..fc2556ae27553 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -73,12 +73,12 @@ def __getattr__(name): if name in _not_imported: raise ImportError( - "The pyarrow installation is not built with support for " - "'{0}'".format(name) + f"The pyarrow installation is not built with support for " + f"'{name}'" ) raise AttributeError( - "module 'pyarrow.fs' has no attribute '{0}'".format(name) + f"module 'pyarrow.fs' has no attribute '{name}'" ) @@ -92,11 +92,10 @@ def _filesystem_from_str(uri): prefix_info = filesystem.get_file_info([prefix])[0] if prefix_info.type != FileType.Directory: raise ValueError( - "The path component of the filesystem URI must point to a " - "directory but it has a type: `{}`. The path component " - "is `{}` and the given filesystem URI is `{}`".format( - prefix_info.type.name, prefix_info.path, uri - ) + f"The path component of the filesystem URI must point to a " + f"directory but it has a type: `{prefix_info.type.name}`. The path " + f"component is `{prefix_info.path}` and the given filesystem URI is " + f"`{uri}`" ) filesystem = SubTreeFileSystem(prefix, filesystem) return filesystem @@ -126,9 +125,8 @@ def _ensure_filesystem(filesystem, *, use_mmap=False): return PyFileSystem(FSSpecHandler(filesystem)) raise TypeError( - "Unrecognized filesystem: {}. `filesystem` argument must be a " - "FileSystem instance or a valid file system URI'".format( - type(filesystem)) + f"Unrecognized filesystem: {type(filesystem)}. `filesystem` argument must be a " + "FileSystem instance or a valid file system URI" ) @@ -234,7 +232,7 @@ def copy_files(source, destination, Copy one file from S3 bucket to a local directory: >>> fs.copy_files("s3://registry.opendata.aws/roda/ndjson/index.ndjson", - ... "file:///{}/index_copy.ndjson".format(local_path)) + ... f"file:///{local_path}/index_copy.ndjson") >>> fs.LocalFileSystem().get_file_info(str(local_path)+ ... '/index_copy.ndjson') @@ -243,7 +241,7 @@ def copy_files(source, destination, Copy file using a FileSystem object: >>> fs.copy_files("registry.opendata.aws/roda/ndjson/index.ndjson", - ... "file:///{}/index_copy.ndjson".format(local_path), + ... f"file:///{local_path}/index_copy.ndjson", ... source_filesystem=fs.S3FileSystem()) """ source_fs, source_path = _resolve_filesystem_and_path( @@ -297,7 +295,7 @@ def get_type_name(self): protocol = self.fs.protocol if isinstance(protocol, list): protocol = protocol[0] - return "fsspec+{0}".format(protocol) + return f"fsspec+{protocol}" def normalize_path(self, path): return path diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index 2202ec64f2962..896d33faed121 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -80,9 +80,8 @@ cdef class Node(_Weakrefable): shared_ptr[CNode] node def __init__(self): - raise TypeError("Do not call {}'s constructor directly, use the " - "TreeExprBuilder API directly" - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, use the " + f"TreeExprBuilder API directly") @staticmethod cdef create(shared_ptr[CNode] node): @@ -95,7 +94,7 @@ cdef class Node(_Weakrefable): def __repr__(self): type_format = object.__repr__(self) - return '{0}\n{1}'.format(type_format, str(self)) + return f"{type_format}\n{str(self)}" def return_type(self): return pyarrow_wrap_data_type(self.node.get().return_type()) @@ -113,7 +112,7 @@ cdef class Expression(_Weakrefable): def __repr__(self): type_format = object.__repr__(self) - return '{0}\n{1}'.format(type_format, str(self)) + return f"{type_format}\n{str(self)}" def root(self): return Node.create(self.expression.get().root()) @@ -127,9 +126,8 @@ cdef class Condition(_Weakrefable): shared_ptr[CCondition] condition def __init__(self): - raise TypeError("Do not call {}'s constructor directly, use the " - "TreeExprBuilder API instead" - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, use the " + f"TreeExprBuilder API instead") @staticmethod cdef create(shared_ptr[CCondition] condition): @@ -142,7 +140,7 @@ cdef class Condition(_Weakrefable): def __repr__(self): type_format = object.__repr__(self) - return '{0}\n{1}'.format(type_format, str(self)) + return f"{type_format}\n{str(self)}" def root(self): return Node.create(self.condition.get().root()) @@ -156,8 +154,8 @@ cdef class SelectionVector(_Weakrefable): shared_ptr[CSelectionVector] selection_vector def __init__(self): - raise TypeError("Do not call {}'s constructor directly." - .format(self.__class__.__name__)) + raise TypeError( + f"Do not call {self.__class__.__name__}'s constructor directly.") @staticmethod cdef create(shared_ptr[CSelectionVector] selection_vector): @@ -176,9 +174,8 @@ cdef class Projector(_Weakrefable): MemoryPool pool def __init__(self): - raise TypeError("Do not call {}'s constructor directly, use " - "make_projector instead" - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, use " + "make_projector instead") @staticmethod cdef create(shared_ptr[CProjector] projector, MemoryPool pool): @@ -226,9 +223,8 @@ cdef class Filter(_Weakrefable): shared_ptr[CFilter] filter def __init__(self): - raise TypeError("Do not call {}'s constructor directly, use " - "make_filter instead" - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, use " + "make_filter instead") @staticmethod cdef create(shared_ptr[CFilter] filter): @@ -712,8 +708,8 @@ cdef class FunctionSignature(_Weakrefable): shared_ptr[CFunctionSignature] signature def __init__(self): - raise TypeError("Do not call {}'s constructor directly." - .format(self.__class__.__name__)) + raise TypeError( + f"Do not call {self.__class__.__name__}'s constructor directly.") @staticmethod cdef create(shared_ptr[CFunctionSignature] signature): diff --git a/python/pyarrow/includes/libgandiva.pxd b/python/pyarrow/includes/libgandiva.pxd index 7d76576bef2b9..782d83423e827 100644 --- a/python/pyarrow/includes/libgandiva.pxd +++ b/python/pyarrow/includes/libgandiva.pxd @@ -81,7 +81,7 @@ cdef inline CSelectionVector_Mode _ensure_selection_mode(str name) except *: elif uppercase == 'UINT64': return CSelectionVector_Mode_UINT64 else: - raise ValueError('Invalid value for Selection Mode: {!r}'.format(name)) + raise ValueError(f'Invalid value for Selection Mode: {name!r}') cdef inline str _selection_mode_name(CSelectionVector_Mode ctype): if ctype == CSelectionVector_Mode_NONE: diff --git a/python/pyarrow/interchange/column.py b/python/pyarrow/interchange/column.py index e609e469b0ffa..ddbceabcb00ad 100644 --- a/python/pyarrow/interchange/column.py +++ b/python/pyarrow/interchange/column.py @@ -308,7 +308,7 @@ def _dtype_from_arrowdtype( kind = DtypeKind.DATETIME ts = dtype.unit[0] tz = dtype.tz if dtype.tz else "" - f_string = "ts{ts}:{tz}".format(ts=ts, tz=tz) + f_string = f"ts{ts}:{tz}" return kind, bit_width, f_string, Endianness.NATIVE elif pa.types.is_dictionary(dtype): kind = DtypeKind.CATEGORICAL diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 3ab2e49a15753..fd2d4df42ccde 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -335,8 +335,7 @@ cdef class NativeFile(_Weakrefable): offset = offset + position else: with gil: - raise ValueError("Invalid value of whence: {0}" - .format(whence)) + raise ValueError(f"Invalid value of whence: {whence}") check_status(handle.get().Seek(offset)) return self.tell() @@ -702,8 +701,7 @@ cdef class NativeFile(_Weakrefable): # the passed buffer, so it's hard for us to avoid doubling the memory buf = malloc(buffer_size) if buf == NULL: - raise MemoryError("Failed to allocate {0} bytes" - .format(buffer_size)) + raise MemoryError(f"Failed to allocate {buffer_size} bytes") writer_thread.start() @@ -770,8 +768,7 @@ cdef class NativeFile(_Weakrefable): # the passed buffer, so it's hard for us to avoid doubling the memory buf = malloc(buffer_size) if buf == NULL: - raise MemoryError("Failed to allocate {0} bytes" - .format(buffer_size)) + raise MemoryError(f"Failed to allocate {buffer_size} bytes") cdef int64_t total_bytes = 0 cdef int32_t c_buffer_size = buffer_size @@ -951,7 +948,7 @@ cdef class PythonFile(NativeFile): elif inferred_mode.startswith('r'): kind = 'r' else: - raise ValueError('Invalid file mode: {0}'.format(mode)) + raise ValueError(f'Invalid file mode: {mode}') # If mode was given, check it matches the given file if mode is not None: @@ -1087,7 +1084,7 @@ cdef class MemoryMappedFile(NativeFile): self.is_readable = True self.is_writable = True else: - raise ValueError('Invalid file mode: {0}'.format(mode)) + raise ValueError(f'Invalid file mode: {mode}') with nogil: handle = GetResultValue(CMemoryMappedFile.Open(c_path, c_mode)) @@ -1149,8 +1146,7 @@ def memory_map(path, mode='r'): cdef _check_is_file(path): if os.path.isdir(path): - raise IOError("Expected file path, but {0} is a directory" - .format(path)) + raise IOError(f"Expected file path, but {path} is a directory") def create_memory_map(path, size): @@ -1247,7 +1243,7 @@ cdef class OSFile(NativeFile): elif mode in ('a', 'ab'): self._open_writable(c_path, append=True) else: - raise ValueError('Invalid file mode: {0}'.format(mode)) + raise ValueError(f'Invalid file mode: {mode}') cdef _open_readable(self, c_string path, CMemoryPool* pool): cdef shared_ptr[ReadableFile] handle @@ -2212,8 +2208,7 @@ cdef get_writer(object source, shared_ptr[COutputStream]* writer): nf = source writer[0] = nf.get_output_stream() else: - raise TypeError('Unable to write to object of type: {0}' - .format(type(source))) + raise TypeError(f'Unable to write to object of type: {type(source)}') # --------------------------------------------------------------------- @@ -2248,7 +2243,7 @@ cdef CCompressionType _ensure_compression(str name) except *: elif uppercase == 'ZSTD': return CCompressionType_ZSTD else: - raise ValueError('Invalid value for compression: {!r}'.format(name)) + raise ValueError(f'Invalid value for compression: {name!r}') cdef class CacheOptions(_Weakrefable): @@ -2810,8 +2805,8 @@ def input_stream(source, compression='detect', buffer_size=None): hasattr(source, 'closed')): stream = PythonFile(source, 'r') else: - raise TypeError("pa.input_stream() called with instance of '{}'" - .format(source.__class__)) + raise TypeError( + f"pa.input_stream() called with instance of '{source.__class__}'") if compression == 'detect': # detect for OSFile too @@ -2902,8 +2897,8 @@ def output_stream(source, compression='detect', buffer_size=None): hasattr(source, 'closed')): stream = PythonFile(source, 'w') else: - raise TypeError("pa.output_stream() called with instance of '{}'" - .format(source.__class__)) + raise TypeError( + f"pa.output_stream() called with instance of '{source.__class__}'") if compression == 'detect': compression = _detect_compression(source_path) diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi index e15b0ea40ed2e..a69fdc4223a45 100644 --- a/python/pyarrow/ipc.pxi +++ b/python/pyarrow/ipc.pxi @@ -301,9 +301,8 @@ cdef class Message(_Weakrefable): pass def __init__(self): - raise TypeError("Do not call {}'s constructor directly, use " - "`pyarrow.ipc.read_message` function instead." - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, use " + "`pyarrow.ipc.read_message` function instead.") @property def type(self): @@ -393,9 +392,9 @@ cdef class Message(_Weakrefable): body_len = 0 if body is None else body.size return """pyarrow.Message -type: {0} -metadata length: {1} -body length: {2}""".format(self.type, metadata_len, body_len) +type: {self.type} +metadata length: {metadata_len} +body length: {body_len}""" cdef class MessageReader(_Weakrefable): @@ -410,9 +409,9 @@ cdef class MessageReader(_Weakrefable): pass def __init__(self): - raise TypeError("Do not call {}'s constructor directly, use " + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, use " "`pyarrow.ipc.MessageReader.open_stream` function " - "instead.".format(self.__class__.__name__)) + "instead.") @staticmethod def open_stream(source): @@ -660,9 +659,8 @@ cdef class RecordBatchReader(_Weakrefable): # cdef block is in lib.pxd def __init__(self): - raise TypeError("Do not call {}'s constructor directly, " - "use one of the RecordBatchReader.from_* functions instead." - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, " + "use one of the RecordBatchReader.from_* functions instead.") def __iter__(self): return self @@ -1116,7 +1114,7 @@ cdef class _RecordBatchFileReader(_Weakrefable): cdef shared_ptr[CRecordBatch] batch if i < 0 or i >= self.num_record_batches: - raise ValueError('Batch number {0} out of range'.format(i)) + raise ValueError(f'Batch number {i} out of range') with nogil: batch = GetResultValue(self.reader.get().ReadRecordBatch(i)) @@ -1146,7 +1144,7 @@ cdef class _RecordBatchFileReader(_Weakrefable): CRecordBatchWithMetadata batch_with_metadata if i < 0 or i >= self.num_record_batches: - raise ValueError('Batch number {0} out of range'.format(i)) + raise ValueError(f'Batch number {i} out of range') with nogil: batch_with_metadata = GetResultValue( @@ -1259,8 +1257,8 @@ cdef NativeFile as_native_file(source): source = BufferReader(source) if not isinstance(source, NativeFile): - raise ValueError('Unable to read message from object with type: {0}' - .format(type(source))) + raise ValueError( + f'Unable to read message from object with type: {type(source)}') return source diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py index 523196e1e3389..aaf031e2b9cd9 100644 --- a/python/pyarrow/ipc.py +++ b/python/pyarrow/ipc.py @@ -76,9 +76,9 @@ def __init__(self, source, *, options=None, memory_pool=None): class RecordBatchStreamWriter(lib._RecordBatchStreamWriter): - __doc__ = """Writer for the Arrow streaming binary format + __doc__ = f"""Writer for the Arrow streaming binary format -{}""".format(_ipc_writer_class_doc) +{_ipc_writer_class_doc}""" def __init__(self, sink, schema, *, use_legacy_format=None, options=None): options = _get_legacy_format_default(use_legacy_format, options) @@ -113,9 +113,9 @@ def __init__(self, source, footer_offset=None, *, options=None, class RecordBatchFileWriter(lib._RecordBatchFileWriter): - __doc__ = """Writer to create the Arrow binary file format + __doc__ = f"""Writer to create the Arrow binary file format -{}""".format(_ipc_writer_class_doc) +{_ipc_writer_class_doc}""" def __init__(self, sink, schema, *, use_legacy_format=None, options=None): options = _get_legacy_format_default(use_legacy_format, options) @@ -128,8 +128,7 @@ def _get_legacy_format_default(use_legacy_format, options): "Can provide at most one of options and use_legacy_format") elif options: if not isinstance(options, IpcWriteOptions): - raise TypeError("expected IpcWriteOptions, got {}" - .format(type(options))) + raise TypeError(f"expected IpcWriteOptions, got {type(options)}") return options metadata_version = MetadataVersion.V5 @@ -144,9 +143,7 @@ def _get_legacy_format_default(use_legacy_format, options): def _ensure_default_ipc_read_options(options): if options and not isinstance(options, IpcReadOptions): - raise TypeError( - "expected IpcReadOptions, got {}".format(type(options)) - ) + raise TypeError(f"expected IpcReadOptions, got {type(options)}") return options or IpcReadOptions() @@ -156,16 +153,16 @@ def new_stream(sink, schema, *, use_legacy_format=None, options=None): options=options) -new_stream.__doc__ = """\ +new_stream.__doc__ = f"""\ Create an Arrow columnar IPC stream writer instance -{} +{_ipc_writer_class_doc} Returns ------- writer : RecordBatchStreamWriter A writer for the given sink -""".format(_ipc_writer_class_doc) +""" def open_stream(source, *, options=None, memory_pool=None): @@ -197,16 +194,16 @@ def new_file(sink, schema, *, use_legacy_format=None, options=None): options=options) -new_file.__doc__ = """\ +new_file.__doc__ = f"""\ Create an Arrow columnar IPC file writer instance -{} +{_ipc_writer_class_doc} Returns ------- writer : RecordBatchFileWriter A writer for the given sink -""".format(_ipc_writer_class_doc) +""" def open_file(source, footer_offset=None, *, options=None, memory_pool=None): diff --git a/python/pyarrow/jvm.py b/python/pyarrow/jvm.py index 161c5ff4d6d74..fcac7c78ba168 100644 --- a/python/pyarrow/jvm.py +++ b/python/pyarrow/jvm.py @@ -239,7 +239,7 @@ def field(jvm_field): typ = pa.decimal128(jvm_type.getPrecision(), jvm_type.getScale()) else: raise NotImplementedError( - "Unsupported JVM type: {}".format(type_str)) + f"Unsupported JVM type: {type_str}") else: # TODO: The following JVM types are not implemented: # Struct, List, FixedSizeList, Union, Dictionary @@ -295,8 +295,8 @@ def array(jvm_array): if jvm_array.getField().getType().isComplex(): minor_type_str = jvm_array.getMinorType().toString() raise NotImplementedError( - "Cannot convert JVM Arrow array of type {}," - " complex types not yet implemented.".format(minor_type_str)) + f"Cannot convert JVM Arrow array of type {minor_type_str}, " + "complex types not yet implemented.") dtype = field(jvm_array.getField()).type buffers = [jvm_buffer(buf) for buf in list(jvm_array.getBuffers(False))] diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi index fdd5b991a8859..63d6e04e3eb3a 100644 --- a/python/pyarrow/memory.pxi +++ b/python/pyarrow/memory.pxi @@ -29,9 +29,8 @@ cdef class MemoryPool(_Weakrefable): """ def __init__(self): - raise TypeError("Do not call {}'s constructor directly, " - "use pyarrow.*_memory_pool instead." - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, " + f"use pyarrow.*_memory_pool instead.") cdef void init(self, CMemoryPool* pool): self.pool = pool @@ -126,9 +125,8 @@ cdef class LoggingMemoryPool(MemoryPool): unique_ptr[CLoggingMemoryPool] logging_pool def __init__(self): - raise TypeError("Do not call {}'s constructor directly, " - "use pyarrow.logging_memory_pool instead." - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, " + f"use pyarrow.logging_memory_pool instead.") cdef class ProxyMemoryPool(MemoryPool): @@ -141,9 +139,8 @@ cdef class ProxyMemoryPool(MemoryPool): unique_ptr[CProxyMemoryPool] proxy_pool def __init__(self): - raise TypeError("Do not call {}'s constructor directly, " - "use pyarrow.proxy_memory_pool instead." - .format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, " + f"use pyarrow.proxy_memory_pool instead.") def default_memory_pool(): diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py index 6f5feafb3363e..4e0d66ec66599 100644 --- a/python/pyarrow/orc.py +++ b/python/pyarrow/orc.py @@ -222,7 +222,7 @@ def read(self, columns=None): class ORCWriter: - __doc__ = """ + __doc__ = f""" Writer interface for a single ORC file Parameters @@ -231,8 +231,8 @@ class ORCWriter: Writable target. For passing Python file objects or byte buffers, see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream or pyarrow.io.FixedSizeBufferWriter. -{} -""".format(_orc_writer_args_docs) +{_orc_writer_args_docs} +""" is_open = False @@ -369,7 +369,7 @@ def write_table(table, where, *, writer.write(table) -write_table.__doc__ = """ +write_table.__doc__ = f""" Write a table into an ORC file. Parameters @@ -380,5 +380,5 @@ def write_table(table, where, *, Writable target. For passing Python file objects or byte buffers, see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream or pyarrow.io.FixedSizeBufferWriter. -{} -""".format(_orc_writer_args_docs) +{_orc_writer_args_docs} +""" diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi index 18de584bff835..efbbac6da6e6e 100644 --- a/python/pyarrow/pandas-shim.pxi +++ b/python/pyarrow/pandas-shim.pxi @@ -67,14 +67,16 @@ cdef class _PandasAPIShim(object): self._have_pandas = False if raise_: raise ImportError( - "pyarrow requires pandas 1.0.0 or above, pandas {} is " - "installed".format(self._version) + f"pyarrow requires pandas 1.0.0 or above, pandas {self._version} is " + f"installed" ) else: warnings.warn( - "pyarrow requires pandas 1.0.0 or above, pandas {} is " + f"pyarrow requires pandas 1.0.0 or above, pandas {self._version} is " "installed. Therefore, pandas-specific integration is not " - "used.".format(self._version), stacklevel=2) + "used.", + stacklevel=2 + ) return self._is_v1 = self._loose_version < Version('2.0.0') diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 4164ad2106668..a9f096bef5150 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -81,7 +81,7 @@ def get_logical_type(arrow_type): if isinstance(arrow_type, pa.lib.DictionaryType): return 'categorical' elif isinstance(arrow_type, pa.lib.ListType): - return 'list[{}]'.format(get_logical_type(arrow_type.value_type)) + return f'list[{get_logical_type(arrow_type.value_type)}]' elif isinstance(arrow_type, pa.lib.TimestampType): return 'datetimetz' if arrow_type.tz is not None else 'datetime' elif pa.types.is_decimal(arrow_type): @@ -180,9 +180,8 @@ def get_column_metadata(column, name, arrow_type, field_name): and not isinstance(name, str) ): raise TypeError( - 'Column name must be a string. Got column {} of type {}'.format( - name, type(name).__name__ - ) + f"Column name must be a string. Got column {name} of type " + f"{type(name).__name__}" ) assert isinstance(field_name, str), str(type(field_name)) @@ -365,7 +364,7 @@ def _index_level_name(index, i, column_names): if index.name is not None and index.name not in column_names: return _column_name_to_strings(index.name) else: - return '__index_level_{:d}__'.format(i) + return f'__index_level_{i:d}__' def _get_columns_to_convert(df, schema, preserve_index, columns): @@ -373,7 +372,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns): if not df.columns.is_unique: raise ValueError( - 'Duplicate column names found: {}'.format(list(df.columns)) + f'Duplicate column names found: {list(df.columns)}' ) if schema is not None: @@ -396,7 +395,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns): if _pandas_api.is_sparse(col): raise TypeError( - "Sparse pandas data (column {}) not supported.".format(name)) + f"Sparse pandas data (column {name}) not supported.") columns_to_convert.append(col) convert_fields.append(None) @@ -458,27 +457,27 @@ def _get_columns_to_convert_given_schema(df, schema, preserve_index): except (KeyError, IndexError): # name not found as index level raise KeyError( - "name '{}' present in the specified schema is not found " - "in the columns or index".format(name)) + f"name '{name}' present in the specified schema is not found " + "in the columns or index") if preserve_index is False: raise ValueError( - "name '{}' present in the specified schema corresponds " + f"name '{name}' present in the specified schema corresponds " "to the index, but 'preserve_index=False' was " - "specified".format(name)) + "specified") elif (preserve_index is None and isinstance(col, _pandas_api.pd.RangeIndex)): raise ValueError( - "name '{}' is present in the schema, but it is a " + f"name '{name}' is present in the schema, but it is a " "RangeIndex which will not be converted as a column " "in the Table, but saved as metadata-only not in " "columns. Specify 'preserve_index=True' to force it " "being added as a column, or remove it from the " - "specified schema".format(name)) + "specified schema") is_index = True if _pandas_api.is_sparse(col): raise TypeError( - "Sparse pandas data (column {}) not supported.".format(name)) + f"Sparse pandas data (column {name}) not supported.") field = schema.field(name) columns_to_convert.append(col) @@ -621,13 +620,12 @@ def convert_column(col, field): except (pa.ArrowInvalid, pa.ArrowNotImplementedError, pa.ArrowTypeError) as e: - e.args += ("Conversion failed for column {!s} with type {!s}" - .format(col.name, col.dtype),) + e.args += ( + f"Conversion failed for column {col.name!s} with type {col.dtype!s}",) raise e if not field_nullable and result.null_count > 0: - raise ValueError("Field {} was non-nullable but pandas column " - "had {} null values".format(str(field), - result.null_count)) + raise ValueError(f"Field {field} was non-nullable but pandas column " + f"had {result.null_count} null values") return result def _can_definitely_zero_copy(arr): @@ -1004,8 +1002,7 @@ def _reconstruct_index(table, index_descriptors, all_columns, types_mapper=None) # Possibly the result of munged metadata continue else: - raise ValueError("Unrecognized index kind: {}" - .format(descr['kind'])) + raise ValueError(f"Unrecognized index kind: {descr['kind']}") index_arrays.append(index_level) index_names.append(index_name) diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index 6ca6f7089e75c..541715b6edcf3 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -182,9 +182,7 @@ def convert_single_predicate(col, op, val): elif op == 'not in': return ~field.isin(val) else: - raise ValueError( - '"{0}" is not a valid operator in predicates.'.format( - (col, op, val))) + raise ValueError(f'"{col}" is not a valid operator in predicates.') disjunction_members = [] @@ -945,14 +943,14 @@ def _sanitize_table(table, new_schema, flavor): class ParquetWriter: - __doc__ = """ + __doc__ = f""" Class for incrementally building a Parquet file for Arrow tables. Parameters ---------- where : path or file-like object schema : pyarrow.Schema -{} +{_parquet_writer_arg_docs} writer_engine_version : unused **options : dict If options contains a key `metadata_collector` then the @@ -962,8 +960,8 @@ class ParquetWriter: Examples -------- -{} -""".format(_parquet_writer_arg_docs, _parquet_writer_example_doc) +{_parquet_writer_example_doc} +""" def __init__(self, where, schema, filesystem=None, flavor=None, @@ -1108,9 +1106,10 @@ def write_table(self, table, row_group_size=None): assert self.is_open if not table.schema.equals(self.schema, check_metadata=False): - msg = ('Table schema does not match schema used to create file: ' - '\ntable:\n{!s} vs. \nfile:\n{!s}' - .format(table.schema, self.schema)) + msg = ( + f"Table schema does not match schema used to create file: \n" + f"table:\n{table.schema!s} vs. \nfile:\n{self.schema!s}" + ) raise ValueError(msg) self.writer.write_table(table, row_group_size=row_group_size) @@ -1211,7 +1210,7 @@ def _get_pandas_index_columns(keyvalues): class ParquetDataset: - __doc__ = """ + __doc__ = f""" Encapsulates details of reading a complete Parquet dataset possibly consisting of multiple files and partitions in subdirectories. @@ -1232,8 +1231,8 @@ class ParquetDataset: exploited to avoid loading files at all if they contain no matching rows. Within-file level filtering and different partitioning schemes are supported. - {1} -{0} + {_DNF_filter_doc} +{_read_docstring_common} ignore_prefixes : list, optional Files matching any of these prefixes will be ignored by the discovery process. @@ -1270,8 +1269,8 @@ class ParquetDataset: Examples -------- -{2} -""".format(_read_docstring_common, _DNF_filter_doc, _parquet_dataset_example) +{_parquet_dataset_example} +""" def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None, read_dictionary=None, memory_map=False, buffer_size=None, @@ -1969,7 +1968,7 @@ def write_table(table, where, row_group_size=None, version='2.6', ... use_dictionary=False) """ -write_table.__doc__ = """ +write_table.__doc__ = f""" Write a Table to Parquet format. Parameters @@ -1980,14 +1979,14 @@ def write_table(table, where, row_group_size=None, version='2.6', Maximum number of rows in each written row group. If None, the row group size will be the minimum of the Table size and 1024 * 1024. -{} +{_parquet_writer_arg_docs} **kwargs : optional Additional options for ParquetWriter Examples -------- -{} -""".format(_parquet_writer_arg_docs, _write_table_example) +{_write_table_example} +""" def write_to_dataset(table, root_path, partition_cols=None, diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index 04442c1f5d2b9..4f13df2efdbfe 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -26,8 +26,8 @@ cdef class Scalar(_Weakrefable): """ def __init__(self): - raise TypeError("Do not call {}'s constructor directly, use " - "pa.scalar() instead.".format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, " + "use pa.scalar() instead.") cdef void init(self, const shared_ptr[CScalar]& wrapped): self.wrapped = wrapped @@ -117,9 +117,7 @@ cdef class Scalar(_Weakrefable): check_status(self.wrapped.get().Validate()) def __repr__(self): - return ''.format( - self.__class__.__name__, self.as_py() - ) + return f'' def __str__(self): return str(self.as_py()) @@ -611,10 +609,10 @@ def _datetime_from_int(int64_t value, TimeUnit unit, tzinfo=None): # otherwise safely truncate to microsecond resolution datetime if value % 1000 != 0: raise ValueError( - "Nanosecond resolution temporal type {} is not safely " + f"Nanosecond resolution temporal type {value} is not safely " "convertible to microseconds to convert to datetime.datetime. " "Install pandas to return as Timestamp with nanosecond " - "support or access the .value attribute.".format(value) + "support or access the .value attribute." ) delta = datetime.timedelta(microseconds=value // 1000) @@ -735,9 +733,7 @@ cdef class TimestampScalar(Scalar): type_format = str(_pc().strftime(self, format="%Y-%m-%dT%H:%M:%S%z")) else: type_format = str(_pc().strftime(self)) - return ''.format( - self.__class__.__name__, type_format - ) + return f'' cdef class DurationScalar(Scalar): @@ -783,10 +779,10 @@ cdef class DurationScalar(Scalar): # otherwise safely truncate to microsecond resolution timedelta if sp.value % 1000 != 0: raise ValueError( - "Nanosecond duration {} is not safely convertible to " + f"Nanosecond duration {sp.value} is not safely convertible to " "microseconds to convert to datetime.timedelta. Install " "pandas to return as Timedelta with nanosecond support or " - "access the .value attribute.".format(sp.value) + "access the .value attribute." ) return datetime.timedelta(microseconds=sp.value // 1000) @@ -1049,9 +1045,7 @@ cdef class StructScalar(Scalar, collections.abc.Mapping): return None def __repr__(self): - return ''.format( - self.__class__.__name__, self._as_py_tuple() - ) + return f'' def __str__(self): return str(self._as_py_tuple()) @@ -1356,9 +1350,8 @@ cdef class ExtensionScalar(Scalar): storage = None elif isinstance(value, Scalar): if value.type != typ.storage_type: - raise TypeError("Incompatible storage type {0} " - "for extension type {1}" - .format(value.type, typ)) + raise TypeError(f"Incompatible storage type {value.type} " + f"for extension type {typ}") storage = value else: storage = scalar(value, typ.storage_type) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 5a6cd390489bf..71f1e037e2db1 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -113,7 +113,7 @@ cdef class ChunkedArray(_PandasConvertible): def __repr__(self): type_format = object.__repr__(self) - return '{0}\n{1}'.format(type_format, str(self)) + return f"{type_format}\n{str(self)}" def to_string(self, *, int indent=0, int window=5, int container_window=2, c_bool skip_new_lines=False): @@ -1568,8 +1568,8 @@ cdef _schema_from_arrays(arrays, names, metadata, shared_ptr[CSchema]* schema): schema.reset(new CSchema(c_fields, c_meta)) return arrays else: - raise ValueError('Length of names ({}) does not match ' - 'length of arrays ({})'.format(len(names), K)) + raise ValueError(f'Length of names ({len(names)}) does not match ' + f'length of arrays ({K})') c_fields.resize(K) @@ -1578,8 +1578,8 @@ cdef _schema_from_arrays(arrays, names, metadata, shared_ptr[CSchema]* schema): 'Table or RecordBatch.') if len(names) != K: - raise ValueError('Length of names ({}) does not match ' - 'length of arrays ({})'.format(len(names), K)) + raise ValueError(f'Length of names ({len(names)}) does not match ' + f'length of arrays ({K})') converted_arrays = [] for i in range(K): @@ -1726,11 +1726,10 @@ cdef class _Tabular(_PandasConvertible): field_indices = self.schema.get_all_field_indices(i) if len(field_indices) == 0: - raise KeyError("Field \"{}\" does not exist in schema" - .format(i)) + raise KeyError(f'Field "{i}" does not exist in schema') elif len(field_indices) > 1: - raise KeyError("Field \"{}\" exists {} times in schema" - .format(i, len(field_indices))) + raise KeyError( + f'Field "{i}" exists {len(field_indices)} times in schema') else: return field_indices[0] elif isinstance(i, int): @@ -2368,15 +2367,13 @@ cdef class _Tabular(_PandasConvertible): show_field_metadata=show_metadata, show_schema_metadata=show_metadata ) - title = 'pyarrow.{}\n{}'.format(type(self).__name__, schema_as_string) + title = f'pyarrow.{type(self).__name__}\n{schema_as_string}' pieces = [title] if preview_cols: pieces.append('----') for i in range(min(self.num_columns, preview_cols)): - pieces.append('{}: {}'.format( - self.field(i).name, - self.column(i).to_string(indent=0, skip_new_lines=True) - )) + pieces.append( + f'{self.field(i).name}: {self.column(i).to_string(indent=0, skip_new_lines=True)}') if preview_cols < self.num_columns: pieces.append('...') return '\n'.join(pieces) @@ -2436,7 +2433,7 @@ cdef class _Tabular(_PandasConvertible): for col in columns: idx = self.schema.get_field_index(col) if idx == -1: - raise KeyError("Column {!r} not found".format(col)) + raise KeyError(f"Column {col!r} not found") indices.append(idx) indices.sort() @@ -3072,7 +3069,7 @@ cdef class RecordBatch(_Tabular): indices = self.schema.get_all_field_indices(name) if not indices: - raise KeyError("Column {!r} not found".format(name)) + raise KeyError(f"Column {name!r} not found") for index in indices: idx_to_new_name[index] = new_name @@ -3347,14 +3344,13 @@ cdef class RecordBatch(_Tabular): list newcols = [] if self.schema.names != target_schema.names: - raise ValueError("Target schema's field names are not matching " - "the record batch's field names: {!r}, {!r}" - .format(self.schema.names, target_schema.names)) + raise ValueError(f"Target schema's field names are not matching " + f"the record batch's field names: {self.schema.names!r}, {target_schema.names!r}") for column, field in zip(self.itercolumns(), target_schema): if not field.nullable and column.null_count > 0: - raise ValueError("Casting field {!r} with null values to non-nullable" - .format(field.name)) + raise ValueError( + f"Casting field {field.name!r} with null values to non-nullable") casted = column.cast(field.type, safe=safe, options=options) newcols.append(casted) @@ -3549,8 +3545,8 @@ cdef class RecordBatch(_Tabular): c_arrays.reserve(len(arrays)) for arr in converted_arrays: if len(arr) != num_rows: - raise ValueError('Arrays were not all the same length: ' - '{0} vs {1}'.format(len(arr), num_rows)) + raise ValueError(f'Arrays were not all the same length: ' + f'{len(arr)} vs {num_rows}') c_arrays.push_back(arr.sp_array) result = pyarrow_wrap_batch(CRecordBatch.Make(c_schema, num_rows, @@ -4714,14 +4710,13 @@ cdef class Table(_Tabular): list newcols = [] if self.schema.names != target_schema.names: - raise ValueError("Target schema's field names are not matching " - "the table's field names: {!r}, {!r}" - .format(self.schema.names, target_schema.names)) + raise ValueError(f"Target schema's field names are not matching " + f"the table's field names: {self.schema.names!r}, {target_schema.names!r}") for column, field in zip(self.itercolumns(), target_schema): if not field.nullable and column.null_count > 0: - raise ValueError("Casting field {!r} with null values to non-nullable" - .format(field.name)) + raise ValueError( + f"Casting field {field.name!r} with null values to non-nullable") casted = column.cast(field.type, safe=safe, options=options) newcols.append(casted) @@ -5557,7 +5552,7 @@ cdef class Table(_Tabular): indices = self.schema.get_all_field_indices(name) if not indices: - raise KeyError("Column {!r} not found".format(name)) + raise KeyError(f"Column {name!r} not found") for index in indices: idx_to_new_name[index] = new_name diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi index 3e0c63c18fc98..0e1454f230420 100644 --- a/python/pyarrow/tensor.pxi +++ b/python/pyarrow/tensor.pxi @@ -56,7 +56,7 @@ cdef class Tensor(_Weakrefable): return """ type: {0.type} shape: {0.shape} -strides: {0.strides}""".format(self) +strides: {self.strides}""" @staticmethod def from_numpy(obj, dim_names=None): @@ -321,8 +321,8 @@ cdef class SparseCOOTensor(_Weakrefable): def __repr__(self): return """ -type: {0.type} -shape: {0.shape}""".format(self) +type: {self.type} +shape: {self.shape}""" @classmethod def from_dense_numpy(cls, obj, dim_names=None): @@ -394,7 +394,7 @@ shape: {0.shape}""".format(self) import scipy.sparse if not isinstance(obj, scipy.sparse.coo_matrix): raise TypeError( - "Expected scipy.sparse.coo_matrix, got {}".format(type(obj))) + f"Expected scipy.sparse.coo_matrix, got {type(obj)}") cdef shared_ptr[CSparseCOOTensor] csparse_tensor cdef vector[int64_t] c_shape @@ -440,7 +440,7 @@ shape: {0.shape}""".format(self) import sparse if not isinstance(obj, sparse.COO): raise TypeError( - "Expected sparse.COO, got {}".format(type(obj))) + f"Expected sparse.COO, got {type(obj)}") cdef shared_ptr[CSparseCOOTensor] csparse_tensor cdef vector[int64_t] c_shape @@ -624,9 +624,9 @@ cdef class SparseCSRMatrix(_Weakrefable): self.type = pyarrow_wrap_data_type(self.stp.type()) def __repr__(self): - return """ -type: {0.type} -shape: {0.shape}""".format(self) + return f""" +type: {self.type} +shape: {self.shape}""" @classmethod def from_dense_numpy(cls, obj, dim_names=None): @@ -705,7 +705,7 @@ shape: {0.shape}""".format(self) import scipy.sparse if not isinstance(obj, scipy.sparse.csr_matrix): raise TypeError( - "Expected scipy.sparse.csr_matrix, got {}".format(type(obj))) + f"Expected scipy.sparse.csr_matrix, got {type(obj)}") cdef shared_ptr[CSparseCSRMatrix] csparse_tensor cdef vector[int64_t] c_shape @@ -865,9 +865,9 @@ cdef class SparseCSCMatrix(_Weakrefable): self.type = pyarrow_wrap_data_type(self.stp.type()) def __repr__(self): - return """ -type: {0.type} -shape: {0.shape}""".format(self) + return f""" +type: {self.type} +shape: {self.shape}""" @classmethod def from_dense_numpy(cls, obj, dim_names=None): @@ -946,7 +946,7 @@ shape: {0.shape}""".format(self) import scipy.sparse if not isinstance(obj, scipy.sparse.csc_matrix): raise TypeError( - "Expected scipy.sparse.csc_matrix, got {}".format(type(obj))) + f"Expected scipy.sparse.csc_matrix, got {type(obj)}") cdef shared_ptr[CSparseCSCMatrix] csparse_tensor cdef vector[int64_t] c_shape @@ -1115,9 +1115,9 @@ cdef class SparseCSFTensor(_Weakrefable): self.type = pyarrow_wrap_data_type(self.stp.type()) def __repr__(self): - return """ -type: {0.type} -shape: {0.shape}""".format(self) + return f""" +type: {self.type} +shape: {self.shape}""" @classmethod def from_dense_numpy(cls, obj, dim_names=None): @@ -1183,14 +1183,14 @@ shape: {0.shape}""".format(self) # Enforce preconditions for SparseCSFTensor indices if not (isinstance(indptr, (list, tuple)) and isinstance(indices, (list, tuple))): - raise TypeError("Expected list or tuple, got {}, {}" - .format(type(indptr), type(indices))) + raise TypeError( + f"Expected list or tuple, got {type(indptr)}, {type(indices)}") if len(indptr) != len(shape) - 1: - raise ValueError("Expected list of {ndim} np.arrays for " - "SparseCSFTensor.indptr".format(ndim=len(shape))) + raise ValueError(f"Expected list of {len(shape)} np.arrays for " + "SparseCSFTensor.indptr") if len(indices) != len(shape): - raise ValueError("Expected list of {ndim} np.arrays for " - "SparseCSFTensor.indices".format(ndim=len(shape))) + raise ValueError(f"Expected list of {len(shape)} np.arrays for " + "SparseCSFTensor.indices") if any([x.ndim != 1 for x in indptr]): raise ValueError("Expected a list of 1-dimensional arrays for " "SparseCSFTensor.indptr") diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index 53b54bb494da6..575444c1cfc25 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -83,17 +83,16 @@ def bool_env(name, default=None): elif value in {'0', 'false', 'off', 'no', 'n'}: return False else: - raise ValueError('{}={} is not parsable as boolean' - .format(name.upper(), value)) + raise ValueError(f'{name.upper()}={value} is not parsable as boolean') for group in groups: - default = bool_env('PYARROW_TEST_{}'.format(group), defaults[group]) - parser.addoption('--enable-{}'.format(group), + default = bool_env(f'PYARROW_TEST_{group}', defaults[group]) + parser.addoption(f'--enable-{group}', action='store_true', default=default, - help=('Enable the {} test group'.format(group))) - parser.addoption('--disable-{}'.format(group), + help=(f'Enable the {group} test group')) + parser.addoption(f'--disable-{group}', action='store_true', default=False, - help=('Disable the {} test group'.format(group))) + help=(f'Disable the {group} test group')) class PyArrowConfig: @@ -107,7 +106,7 @@ def apply_mark(self, mark): def requires(self, group): if not self.is_enabled[group]: - pytest.skip('{} NOT enabled'.format(group)) + pytest.skip(f'{group} NOT enabled') def pytest_configure(config): @@ -119,8 +118,8 @@ def pytest_configure(config): "markers", mark, ) - enable_flag = '--enable-{}'.format(mark) - disable_flag = '--disable-{}'.format(mark) + enable_flag = f'--enable-{mark}' + disable_flag = f'--disable-{mark}' is_enabled = (config.getoption(enable_flag) and not config.getoption(disable_flag)) @@ -217,7 +216,7 @@ def minio_server_health_check(address): tmpdir = tmpdir_factory.getbasetemp() host, port, access_key, secret_key = s3_connection - address = '{}:{}'.format(host, port) + address = f'{host}:{port}' env = os.environ.copy() env.update({ 'MINIO_ACCESS_KEY': access_key, diff --git a/python/pyarrow/tests/pandas_examples.py b/python/pyarrow/tests/pandas_examples.py index 466c14eeb6f5f..048840fa32b4b 100644 --- a/python/pyarrow/tests/pandas_examples.py +++ b/python/pyarrow/tests/pandas_examples.py @@ -157,7 +157,7 @@ def dataframe_with_lists(include_index=False, parquet_compatible=False): ] for value_type, data in temporal_pairs: - field_name = '{}_list'.format(value_type) + field_name = f'{value_type}_list' field_type = pa.list_(value_type) field = pa.field(field_name, field_type) fields.append(field) diff --git a/python/pyarrow/tests/parquet/conftest.py b/python/pyarrow/tests/parquet/conftest.py index 94b3058fa02c8..b5d2216d70ecb 100644 --- a/python/pyarrow/tests/parquet/conftest.py +++ b/python/pyarrow/tests/parquet/conftest.py @@ -49,7 +49,7 @@ def s3_bucket(s3_server): host, port, access_key, secret_key = s3_server['connection'] s3_client = boto3.client( 's3', - endpoint_url='http://{}:{}'.format(host, port), + endpoint_url=f'http://{host}:{port}', aws_access_key_id=access_key, aws_secret_access_key=secret_key, config=botocore.client.Config(signature_version='s3v4'), @@ -75,11 +75,11 @@ def s3_example_s3fs(s3_server, s3_bucket): key=access_key, secret=secret_key, client_kwargs={ - 'endpoint_url': 'http://{}:{}'.format(host, port) + 'endpoint_url': f'http://{host}:{port}' } ) - test_path = '{}/{}'.format(s3_bucket, guid()) + test_path = f'{s3_bucket}/{guid()}' fs.mkdir(test_path) yield fs, test_path @@ -95,9 +95,8 @@ def s3_example_fs(s3_server): host, port, access_key, secret_key = s3_server['connection'] uri = ( - "s3://{}:{}@mybucket/data.parquet?scheme=http&endpoint_override={}:{}" - "&allow_bucket_creation=True" - .format(access_key, secret_key, host, port) + f"s3://{access_key}:{secret_key}@mybucket/data.parquet?scheme=http" + f"&endpoint_override={host}:{port}&allow_bucket_creation=True" ) fs, path = FileSystem.from_uri(uri) diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py index 1428f80239771..e8e3da1d84177 100644 --- a/python/pyarrow/tests/parquet/test_data_types.py +++ b/python/pyarrow/tests/parquet/test_data_types.py @@ -183,7 +183,7 @@ def test_dictionary_array_automatically_read(): # Make a large dictionary, a little over 4MB of data dict_length = 4000 - dict_values = pa.array([('x' * 1000 + '_{}'.format(i)) + dict_values = pa.array([('x' * 1000 + f'_{i}') for i in range(dict_length)]) num_chunks = 10 @@ -220,8 +220,7 @@ def test_decimal_roundtrip(tempdir): util.randdecimal(precision, scale) for _ in range(num_values) ] - column_name = ('dec_precision_{:d}_scale_{:d}' - .format(precision, scale)) + column_name = f'dec_precision_{precision}_scale_{scale}' columns[column_name] = random_decimal_values expected = pd.DataFrame(columns) @@ -255,7 +254,7 @@ def test_decimal_roundtrip_negative_scale(tempdir): @pytest.mark.parametrize('dtype', [int, float]) def test_single_pylist_column_roundtrip(tempdir, dtype,): - filename = tempdir / 'single_{}_column.parquet'.format(dtype.__name__) + filename = tempdir / f'single_{dtype.__name__}_column.parquet' data = [pa.array(list(map(dtype, range(5))))] table = pa.Table.from_arrays(data, names=['a']) _write_table(table, filename) diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index 5cab902fda601..d909df7339c18 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -566,7 +566,7 @@ def _visit_level(base_dir, level, part_keys): level_dir = pathsep.join([ str(base_dir), - '{}={}'.format(name, value) + f'{name}={value}' ]) fs.create_dir(level_dir) @@ -648,7 +648,7 @@ def test_read_multiple_files(tempdir): # Hack so that we don't have a dtype cast in v1 files df['uint32'] = df['uint32'].astype(np.int64) - path = dirpath / '{}.parquet'.format(i) + path = dirpath / f'{i}.parquet' table = pa.Table.from_pandas(df) _write_table(table, path) @@ -683,7 +683,7 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs): # Test failure modes with non-uniform metadata bad_apple = _test_dataframe(size, seed=i).iloc[:, :4] - bad_apple_path = tempdir / '{}.parquet'.format(guid()) + bad_apple_path = tempdir / f'{guid()}.parquet' t = pa.Table.from_pandas(bad_apple) _write_table(t, bad_apple_path) @@ -720,7 +720,7 @@ def test_dataset_read_pandas(tempdir): df.index = np.arange(i * size, (i + 1) * size) df.index.name = 'index' - path = dirpath / '{}.parquet'.format(i) + path = dirpath / f'{i}.parquet' table = pa.Table.from_pandas(df) _write_table(table, path) @@ -749,7 +749,7 @@ def test_dataset_memory_map(tempdir): dirpath.mkdir() df = _test_dataframe(10, seed=0) - path = dirpath / '{}.parquet'.format(0) + path = dirpath / f'{0}.parquet' table = pa.Table.from_pandas(df) _write_table(table, path, version='2.6') @@ -764,7 +764,7 @@ def test_dataset_enable_buffered_stream(tempdir): dirpath.mkdir() df = _test_dataframe(10, seed=0) - path = dirpath / '{}.parquet'.format(0) + path = dirpath / f'{0}.parquet' table = pa.Table.from_pandas(df) _write_table(table, path, version='2.6') @@ -784,7 +784,7 @@ def test_dataset_enable_pre_buffer(tempdir): dirpath.mkdir() df = _test_dataframe(10, seed=0) - path = dirpath / '{}.parquet'.format(0) + path = dirpath / f'{0}.parquet' table = pa.Table.from_pandas(df) _write_table(table, path, version='2.6') @@ -801,7 +801,7 @@ def _make_example_multifile_dataset(base_path, nfiles=10, file_nrows=5): paths = [] for i in range(nfiles): df = _test_dataframe(file_nrows, seed=i) - path = base_path / '{}.parquet'.format(i) + path = base_path / f'{i}.parquet' test_data.append(_write_table(df, path)) paths.append(path) @@ -823,7 +823,7 @@ def test_ignore_private_directories(tempdir, dir_prefix): file_nrows=5) # private directory - (dirpath / '{}staging'.format(dir_prefix)).mkdir() + (dirpath / f'{dir_prefix}staging').mkdir() dataset = pq.ParquetDataset(dirpath) @@ -873,7 +873,7 @@ def test_ignore_hidden_files_underscore(tempdir): def test_ignore_no_private_directories_in_base_path(tempdir, dir_prefix): # ARROW-8427 - don't ignore explicitly listed files if parent directory # is a private directory - dirpath = tempdir / "{0}data".format(dir_prefix) / guid() + dirpath = tempdir / f'{dir_prefix}data' / guid() dirpath.mkdir(parents=True) paths = _make_example_multifile_dataset(dirpath, nfiles=10, diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py index 14ce9bbfcdd58..3f83a0868292b 100644 --- a/python/pyarrow/tests/parquet/test_metadata.py +++ b/python/pyarrow/tests/parquet/test_metadata.py @@ -267,7 +267,7 @@ def test_statistics_convert_logical_types(tempdir): for i, (min_val, max_val, typ) in enumerate(cases): t = pa.Table.from_arrays([pa.array([min_val, max_val], type=typ)], ['col']) - path = str(tempdir / ('example{}.parquet'.format(i))) + path = str(tempdir / f'example{i}.parquet') pq.write_table(t, path, version='2.6') pf = pq.ParquetFile(path) stats = pf.metadata.row_group(0).column(0).statistics diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py index 2ea2f46873aef..1fe323da2a1cc 100644 --- a/python/pyarrow/tests/parquet/test_pandas.py +++ b/python/pyarrow/tests/parquet/test_pandas.py @@ -590,7 +590,7 @@ def test_dataset_read_pandas_common_metadata( np.arange(i * size, (i + 1) * size, dtype="int64"), name='index' ) - path = dirpath / '{}.parquet'.format(i) + path = dirpath / f'{i}.parquet' table = pa.Table.from_pandas(df, preserve_index=preserve_index) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 885442b079c5b..8c119855fc3b8 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -246,7 +246,7 @@ def test_to_numpy_writable(): @pytest.mark.parametrize('tz', [None, "UTC"]) def test_to_numpy_datetime64(unit, tz): arr = pa.array([1, 2, 3], pa.timestamp(unit, tz=tz)) - expected = np.array([1, 2, 3], dtype="datetime64[{}]".format(unit)) + expected = np.array([1, 2, 3], dtype=f"datetime64[{unit}]") np_arr = arr.to_numpy() np.testing.assert_array_equal(np_arr, expected) @@ -255,7 +255,7 @@ def test_to_numpy_datetime64(unit, tz): @pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns']) def test_to_numpy_timedelta64(unit): arr = pa.array([1, 2, 3], pa.duration(unit)) - expected = np.array([1, 2, 3], dtype="timedelta64[{}]".format(unit)) + expected = np.array([1, 2, 3], dtype=f"timedelta64[{unit}]") np_arr = arr.to_numpy() np.testing.assert_array_equal(np_arr, expected) diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index 239ae55f2f760..5bcd188d458b8 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -1423,7 +1423,7 @@ def test_stress_convert_options_blowup(self): except AttributeError: clock = time.time num_columns = 10000 - col_names = ["K{}".format(i) for i in range(num_columns)] + col_names = [f"K{i}" for i in range(num_columns)] csv = make_empty_csv(col_names) t1 = clock() convert_options = ConvertOptions( diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py index a71fa036503d7..e06f479987cb7 100644 --- a/python/pyarrow/tests/test_cuda.py +++ b/python/pyarrow/tests/test_cuda.py @@ -747,8 +747,10 @@ def make_table(): dictionary=a2) arrays = [a0, a1, a2, a3, a4, a5] - schema = pa.schema([('f{}'.format(i), arr.type) - for i, arr in enumerate(arrays)]) + schema = pa.schema([ + (f'f{i}', arr.type) + for i, arr in enumerate(arrays) + ]) batch = pa.record_batch(arrays, schema=schema) table = pa.Table.from_batches([batch]) return table diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py index 937d927f831b0..e0116a4bb7626 100644 --- a/python/pyarrow/tests/test_cython.py +++ b/python/pyarrow/tests/test_cython.py @@ -123,7 +123,7 @@ def test_cython_api(tmpdir): # Check the extension module is loadable from a subprocess without # pyarrow imported first. - code = """if 1: + code = f"""if 1: import sys import os @@ -131,17 +131,16 @@ def test_cython_api(tmpdir): # Add dll directory was added on python 3.8 # and is required in order to find extra DLLs # only for win32 - for dir in {library_dirs}: + for dir in {pa.get_library_dirs()}: os.add_dll_directory(dir) except AttributeError: pass - mod = __import__({mod_name!r}) + mod = __import__('pyarrow_cython_example') arr = mod.make_null_array(5) assert mod.get_array_length(arr) == 5 assert arr.null_count == 5 - """.format(mod_name='pyarrow_cython_example', - library_dirs=pa.get_library_dirs()) + """ path_var = None if sys.platform == 'win32': diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index b6aaa2840d83c..e9fc0fc2387c8 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -122,7 +122,7 @@ def mockfs(): ] for i, directory in enumerate(directories): - path = '{}/file{}.parquet'.format(directory, i) + path = f'{directory}/file{i}.parquet' mockfs.create_dir(directory) with mockfs.open_output_stream(path) as out: data = [ @@ -198,15 +198,15 @@ def multisourcefs(request): mockfs.create_dir('plain') n = len(df_a) for i, chunk in enumerate([df_a.iloc[i:i+n//10] for i in range(0, n, n//10)]): - path = 'plain/chunk-{}.parquet'.format(i) + path = f'plain/chunk-{i}.parquet' with mockfs.open_output_stream(path) as out: pq.write_table(_table_from_pandas(chunk), out) # create one with schema partitioning by weekday and color mockfs.create_dir('schema') for part, chunk in df_b.groupby([df_b.date.dt.dayofweek, df_b.color]): - folder = 'schema/{}/{}'.format(*part) - path = '{}/chunk.parquet'.format(folder) + folder = f'schema/{part[0]}/{part[1]}' + path = f'{folder}/chunk.parquet' mockfs.create_dir(folder) with mockfs.open_output_stream(path) as out: pq.write_table(_table_from_pandas(chunk), out) @@ -214,8 +214,8 @@ def multisourcefs(request): # create one with hive partitioning by year and month mockfs.create_dir('hive') for part, chunk in df_c.groupby([df_c.date.dt.year, df_c.date.dt.month]): - folder = 'hive/year={}/month={}'.format(*part) - path = '{}/chunk.parquet'.format(folder) + folder = f'hive/year={part[0]}/month={part[1]}' + path = f'{folder}/chunk.parquet' mockfs.create_dir(folder) with mockfs.open_output_stream(path) as out: pq.write_table(_table_from_pandas(chunk), out) @@ -223,8 +223,8 @@ def multisourcefs(request): # create one with hive partitioning by color mockfs.create_dir('hive_color') for part, chunk in df_d.groupby("color"): - folder = 'hive_color/color={}'.format(part) - path = '{}/chunk.parquet'.format(folder) + folder = f'hive_color/color={part}' + path = f'{folder}/chunk.parquet' mockfs.create_dir(folder) with mockfs.open_output_stream(path) as out: pq.write_table(_table_from_pandas(chunk), out) @@ -1801,8 +1801,8 @@ def test_fragments_repr(tempdir, dataset): fragment = list(dataset.get_fragments())[0] assert ( repr(fragment) == - "".format( - dataset.filesystem.normalize_path(str(path))) + f"" ) # non-parquet format @@ -1812,8 +1812,8 @@ def test_fragments_repr(tempdir, dataset): fragment = list(dataset.get_fragments())[0] assert ( repr(fragment) == - "".format( - dataset.filesystem.normalize_path(str(path))) + f"" ) @@ -2569,7 +2569,7 @@ def _create_partitioned_dataset(basedir): path.mkdir() for i in range(3): - part = path / "part={}".format(i) + part = path / f"part={i}" part.mkdir() pq.write_table(table.slice(3*i, 3), part / "test.parquet") @@ -2781,9 +2781,8 @@ def s3_example_simple(s3_server): host, port, access_key, secret_key = s3_server['connection'] uri = ( - "s3://{}:{}@mybucket/data.parquet?scheme=http&endpoint_override={}:{}" - "&allow_bucket_creation=True" - .format(access_key, secret_key, host, port) + f"s3://{access_key}:{secret_key}@mybucket/data.parquet?scheme=http" + f"&endpoint_override={host}:{port}&allow_bucket_creation=True" ) fs, path = FileSystem.from_uri(uri) @@ -2833,7 +2832,7 @@ def test_open_dataset_from_uri_s3_fsspec(s3_example_simple): key=access_key, secret=secret_key, client_kwargs={ - 'endpoint_url': 'http://{}:{}'.format(host, port) + 'endpoint_url': f'http://{host}:{port}' } ) @@ -2855,10 +2854,10 @@ def test_open_dataset_from_s3_with_filesystem_uri(s3_server): host, port, access_key, secret_key = s3_server['connection'] bucket = 'theirbucket' path = 'nested/folder/data.parquet' - uri = "s3://{}:{}@{}/{}?scheme=http&endpoint_override={}:{}"\ - "&allow_bucket_creation=true".format( - access_key, secret_key, bucket, path, host, port - ) + uri = ( + f"s3://{access_key}:{secret_key}@{bucket}/{path}?scheme=http" + f"&endpoint_override={host}:{port}&allow_bucket_creation=true" + ) fs, path = FileSystem.from_uri(uri) assert path == 'theirbucket/nested/folder/data.parquet' @@ -4778,7 +4777,7 @@ def test_write_dataset_parquet(tempdir): format = ds.ParquetFileFormat() opts = format.make_write_options(version=version) assert " not selecting the nested file_c @@ -1494,7 +1494,7 @@ def test_hdfs_options(hdfs_connection, pickle_module): host, port, 'me', replication + 1, buffer_size, default_block_size )) hdfs5 = HadoopFileSystem(host, port) - hdfs6 = HadoopFileSystem.from_uri('hdfs://{}:{}'.format(host, port)) + hdfs6 = HadoopFileSystem.from_uri(f'hdfs://{host}:{port}') hdfs7 = HadoopFileSystem(host, port, user='localuser') hdfs8 = HadoopFileSystem(host, port, user='localuser', kerb_ticket="cache_path") diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index e2df1b1c46835..a6d3546e57c68 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -822,7 +822,7 @@ def test_cache_options_pickling(pickle_module): ]) def test_compress_decompress(compression): if not Codec.is_available(compression): - pytest.skip("{} support is not built".format(compression)) + pytest.skip(f"{compression} support is not built") INPUT_SIZE = 10000 test_data = (np.random.randint(0, 255, size=INPUT_SIZE) @@ -863,7 +863,7 @@ def test_compress_decompress(compression): ]) def test_compression_level(compression): if not Codec.is_available(compression): - pytest.skip("{} support is not built".format(compression)) + pytest.skip(f"{compression} support is not built") codec = Codec(compression) if codec.name == "snappy": @@ -1755,7 +1755,7 @@ def test_unknown_compression_raises(): ]) def test_compressed_roundtrip(compression): if not Codec.is_available(compression): - pytest.skip("{} support is not built".format(compression)) + pytest.skip(f"{compression} support is not built") data = b"some test data\n" * 10 + b"eof\n" raw = pa.BufferOutputStream() @@ -1776,7 +1776,7 @@ def test_compressed_roundtrip(compression): ) def test_compressed_recordbatch_stream(compression): if not Codec.is_available(compression): - pytest.skip("{} support is not built".format(compression)) + pytest.skip(f"{compression} support is not built") # ARROW-4836: roundtrip a RecordBatch through a compressed stream table = pa.Table.from_arrays([pa.array([1, 2, 3, 4, 5])], ['a']) diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py index e1bd0d82d9f16..d2ba780efc7fb 100644 --- a/python/pyarrow/tests/test_jvm.py +++ b/python/pyarrow/tests/test_jvm.py @@ -45,7 +45,7 @@ def root_allocator(): }).text jar_path = os.path.join( arrow_dir, 'java', 'tools', 'target', - 'arrow-tools-{}-jar-with-dependencies.jar'.format(version)) + f'arrow-tools-{version}-jar-with-dependencies.jar') jar_path = os.getenv("ARROW_TOOLS_JAR", jar_path) kwargs = {} # This will be the default behaviour in jpype 0.8+ @@ -224,7 +224,7 @@ def test_jvm_types(root_allocator, pa_type, jvm_spec, nullable): ]) def test_jvm_array(root_allocator, pa_type, py_data, jvm_type): # Create vector - cls = "org.apache.arrow.vector.{}".format(jvm_type) + cls = f"org.apache.arrow.vector.{jvm_type}" jvm_vector = jpype.JClass(cls)("vector", root_allocator) jvm_vector.allocateNew(len(py_data)) for i, val in enumerate(py_data): @@ -241,7 +241,7 @@ def test_jvm_array(root_allocator, pa_type, py_data, jvm_type): def test_jvm_array_empty(root_allocator): - cls = "org.apache.arrow.vector.{}".format('IntVector') + cls = f"org.apache.arrow.vector.{'IntVector'}" jvm_vector = jpype.JClass(cls)("vector", root_allocator) jvm_vector.allocateNew() jvm_array = pa_jvm.array(jvm_vector) @@ -362,7 +362,7 @@ def test_jvm_array_empty(root_allocator): def test_jvm_record_batch(root_allocator, pa_type, py_data, jvm_type, jvm_spec): # Create vector - cls = "org.apache.arrow.vector.{}".format(jvm_type) + cls = f"org.apache.arrow.vector.{jvm_type}" jvm_vector = jpype.JClass(cls)("vector", root_allocator) jvm_vector.allocateNew(len(py_data)) for i, val in enumerate(py_data): diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index dbba7852190f4..15e423948638b 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -261,6 +261,6 @@ def test_set_timezone_db_path_non_windows(): ]) def test_extension_type_constructor_errors(klass): # ARROW-2638: prevent calling extension class constructors directly - msg = "Do not call {cls}'s constructor directly, use .* instead." - with pytest.raises(TypeError, match=msg.format(cls=klass.__name__)): + msg = f"Do not call {klass.__name__}'s constructor directly, use .* instead." + with pytest.raises(TypeError, match=msg): klass() diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 4ad04c9ad1ecb..80eb3173f74d7 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -3876,7 +3876,7 @@ def test_to_pandas_split_blocks(): pa.array([1, 2, 3, 4, 5]*100, type='f8'), pa.array([1, 2, 3, 4, 5]*100, type='f8'), pa.array([1, 2, 3, 4, 5]*100, type='f8'), - ], ['f{}'.format(i) for i in range(8)]) + ], [f'f{i}' for i in range(8)]) _check_blocks_created(t, 8) _check_to_pandas_memory_unchanged(t, split_blocks=True) @@ -3902,7 +3902,7 @@ def _make_table(): # Slice to force a copy pa.array(np.random.randn(10000)[::2]) for i in range(K) - ], ['f{}'.format(i) for i in range(K)]) + ], [f'f{i}' for i in range(K)]) t = _make_table() _check_to_pandas_memory_unchanged(t, split_blocks=True, self_destruct=True) diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py index 29db36eddc715..6ef25b82f8e33 100644 --- a/python/pyarrow/tests/test_scalars.py +++ b/python/pyarrow/tests/test_scalars.py @@ -404,7 +404,7 @@ def test_timestamp(): units = ['ns', 'us', 'ms', 's'] for i, unit in enumerate(units): - dtype = 'datetime64[{}]'.format(unit) + dtype = f'datetime64[{unit}]' arrow_arr = pa.Array.from_pandas(arr.astype(dtype)) expected = pd.Timestamp('2000-01-01 12:34:56') @@ -414,7 +414,7 @@ def test_timestamp(): tz = 'America/New_York' arrow_type = pa.timestamp(unit, tz=tz) - dtype = 'datetime64[{}]'.format(unit) + dtype = f'datetime64[{unit}]' arrow_arr = pa.Array.from_pandas(arr.astype(dtype), type=arrow_type) expected = (pd.Timestamp('2000-01-01 12:34:56') .tz_localize('utc') @@ -478,7 +478,7 @@ def test_duration(): units = ['us', 'ms', 's'] for i, unit in enumerate(units): - dtype = 'timedelta64[{}]'.format(unit) + dtype = f'timedelta64[{unit}]' arrow_arr = pa.array(arr.astype(dtype)) expected = datetime.timedelta(seconds=60*60) assert isinstance(arrow_arr[1].as_py(), datetime.timedelta) diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py index b6d36787fbd37..d3f3a94ecfc13 100644 --- a/python/pyarrow/tests/test_schema.py +++ b/python/pyarrow/tests/test_schema.py @@ -290,7 +290,7 @@ def test_schema_to_string_with_metadata(): metadata={"key3": "value3"})], metadata={"lorem": lorem}) - assert my_schema.to_string() == """\ + assert my_schema.to_string() == f"""\ foo: int32 not null -- field metadata -- key1: 'value1' @@ -298,7 +298,7 @@ def test_schema_to_string_with_metadata(): -- field metadata -- key3: 'value3' -- schema metadata -- -lorem: '""" + lorem[:65] + "' + " + str(len(lorem) - 65) +lorem: '{lorem[:65]}' + {len(lorem) - 65}""" # Metadata that exactly fits result = pa.schema([('f0', 'int32')], @@ -309,7 +309,7 @@ def test_schema_to_string_with_metadata(): key: 'valuexxxxxxxxxxxxxxxxxxxxxxxxxxxxx\ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'""" - assert my_schema.to_string(truncate_metadata=False) == """\ + assert my_schema.to_string(truncate_metadata=False) == f"""\ foo: int32 not null -- field metadata -- key1: 'value1' @@ -317,14 +317,14 @@ def test_schema_to_string_with_metadata(): -- field metadata -- key3: 'value3' -- schema metadata -- -lorem: '{}'""".format(lorem) +lorem: '{lorem}'""" assert my_schema.to_string(truncate_metadata=False, - show_field_metadata=False) == """\ + show_field_metadata=False) == f"""\ foo: int32 not null bar: string -- schema metadata -- -lorem: '{}'""".format(lorem) +lorem: '{lorem}'""" assert my_schema.to_string(truncate_metadata=False, show_schema_metadata=False) == """\ @@ -635,7 +635,7 @@ def test_type_schema_pickling(pickle_module): if isinstance(f, pa.Field): fields.append(f) else: - fields.append(pa.field('_f{}'.format(i), f)) + fields.append(pa.field(f'_f{i}', f)) schema = pa.schema(fields, metadata={b'foo': b'bar'}) roundtripped = pickle_module.loads(pickle_module.dumps(schema)) diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 926de46318036..60a713eb717aa 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -558,7 +558,7 @@ def test_time32_units(): assert ty.unit == valid_unit for invalid_unit in ('m', 'us', 'ns'): - error_msg = 'Invalid time unit for time32: {!r}'.format(invalid_unit) + error_msg = f'Invalid time unit for time32: {invalid_unit!r}' with pytest.raises(ValueError, match=error_msg): pa.time32(invalid_unit) @@ -569,7 +569,7 @@ def test_time64_units(): assert ty.unit == valid_unit for invalid_unit in ('m', 's', 'ms'): - error_msg = 'Invalid time unit for time64: {!r}'.format(invalid_unit) + error_msg = f'Invalid time unit for time64: {invalid_unit!r}' with pytest.raises(ValueError, match=error_msg): pa.time64(invalid_unit) diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py index 84215d30ef064..0f8de4e0fb072 100644 --- a/python/pyarrow/tests/util.py +++ b/python/pyarrow/tests/util.py @@ -104,7 +104,7 @@ def randdecimal(precision, scale): fractional = random.randint(0, max_fractional_value) return decimal.Decimal( - '{}.{}'.format(whole, str(fractional).rjust(scale, '0')) + f'{whole}.{str(fractional).rjust(scale, "0")}' ) @@ -153,9 +153,10 @@ def _get_use(): def _leak_check(): current_use = _get_use() if current_use - baseline_use > threshold: - raise Exception("Memory leak detected. " - "Departure from baseline {} after {} iterations" - .format(current_use - baseline_use, i)) + raise Exception( + f"Memory leak detected. Departure from baseline " + f"{current_use - baseline_use} after {i} iterations" + ) for i in range(iterations): f() @@ -229,9 +230,9 @@ def disabled_gc(): def _filesystem_uri(path): # URIs on Windows must follow 'file:///C:...' or 'file:/C:...' patterns. if os.name == 'nt': - uri = 'file:///{}'.format(path) + uri = f'file:///C:{path}' else: - uri = 'file://{}'.format(path) + uri = f'file://{path}' return uri @@ -395,7 +396,7 @@ def _configure_s3_limited_user(s3_server, policy, username, password): tempdir = s3_server['tempdir'] host, port, access_key, secret_key = s3_server['connection'] - address = '{}:{}'.format(host, port) + address = f'{host}:{port}' mcdir = os.path.join(tempdir, 'mc') if os.path.exists(mcdir): diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 3caf068a4c9b1..89950e3b9b782 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -221,9 +221,9 @@ cdef class DataType(_Weakrefable): pass def __init__(self): - raise TypeError("Do not call {}'s constructor directly, use public " - "functions like pyarrow.int64, pyarrow.list_, etc. " - "instead.".format(self.__class__.__name__)) + raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, use public " + f"functions like pyarrow.int64, pyarrow.list_, etc. " + f"instead.") cdef void init(self, const shared_ptr[CDataType]& type) except *: assert type != nullptr @@ -355,7 +355,7 @@ cdef class DataType(_Weakrefable): return type_for_alias, (str(self),) def __repr__(self): - return '{0.__class__.__name__}({0})'.format(self) + return f'{self.__class__.__name__}({self})' def __eq__(self, other): try: @@ -1878,8 +1878,7 @@ cdef class ExtensionType(BaseExtensionType): return NotImplemented def __repr__(self): - fmt = '{0.__class__.__name__}({1})' - return fmt.format(self, repr(self.storage_type)) + return f'{self.__class__.__name__}({repr(self.storage_type)})' def __arrow_ext_serialize__(self): """ @@ -2165,8 +2164,7 @@ cdef class PyExtensionType(ExtensionType): ExtensionType.__init__(self, storage_type, "arrow.py_extension_type") def __reduce__(self): - raise NotImplementedError("Please implement {0}.__reduce__" - .format(type(self).__name__)) + raise NotImplementedError(f"Please implement {type(self).__name__}.__reduce__") def __arrow_ext_serialize__(self): return pickle.dumps(self) @@ -2195,8 +2193,8 @@ cdef class PyExtensionType(ExtensionType): return UnknownExtensionType(storage_type, serialized) if ty.storage_type != storage_type: - raise TypeError("Expected storage type {0} but got {1}" - .format(ty.storage_type, storage_type)) + raise TypeError( + f"Expected storage type {ty.storage_type} but got {storage_type}") return ty # XXX Cython marks extension types as immutable, so cannot expose this @@ -2607,8 +2605,7 @@ cdef class Field(_Weakrefable): return field, (self.name, self.type, self.nullable, self.metadata) def __str__(self): - return 'pyarrow.Field<{0}>'.format( - frombytes(self.field.ToString(), safe=True)) + return f'pyarrow.Field<{frombytes(self.field.ToString(), safe=True)}>' def __repr__(self): return self.__str__() @@ -3263,7 +3260,7 @@ cdef class Schema(_Weakrefable): if isinstance(i, (bytes, str)): field_index = self.get_field_index(i) if field_index < 0: - raise KeyError("Column {} does not exist in schema".format(i)) + raise KeyError(f"Column {i} does not exist in schema") else: return self._field(field_index) elif isinstance(i, int): @@ -3792,7 +3789,7 @@ def unify_schemas(schemas, *, promote_options="default"): vector[shared_ptr[CSchema]] c_schemas for schema in schemas: if not isinstance(schema, Schema): - raise TypeError("Expected Schema, got {}".format(type(schema))) + raise TypeError(f"Expected Schema, got {type(schema)}") c_schemas.push_back(pyarrow_unwrap_schema(schema)) if promote_options == "default": @@ -5528,14 +5525,14 @@ def union(child_fields, mode, type_codes=None): """ if isinstance(mode, int): if mode not in (_UnionMode_SPARSE, _UnionMode_DENSE): - raise ValueError("Invalid union mode {0!r}".format(mode)) + raise ValueError(f"Invalid union mode {mode!r}") else: if mode == 'sparse': mode = _UnionMode_SPARSE elif mode == 'dense': mode = _UnionMode_DENSE else: - raise ValueError("Invalid union mode {0!r}".format(mode)) + raise ValueError(f"Invalid union mode {mode!r}") if mode == _UnionMode_SPARSE: return sparse_union(child_fields, type_codes) @@ -5900,7 +5897,7 @@ def type_for_alias(name): try: alias = _type_aliases[name] except KeyError: - raise ValueError('No type alias for {0}'.format(name)) + raise ValueError(f'No type alias for {name}') if isinstance(alias, DataType): return alias @@ -5915,7 +5912,7 @@ cpdef DataType ensure_type(object ty, bint allow_none=False): elif isinstance(ty, str): return type_for_alias(ty) else: - raise TypeError('DataType expected, got {!r}'.format(type(ty))) + raise TypeError(f'DataType expected, got {type(ty)!r}') def schema(fields, metadata=None):