diff --git a/wild_lib/src/elf_writer.rs b/wild_lib/src/elf_writer.rs index 0dff06b7..ab831ffd 100644 --- a/wild_lib/src/elf_writer.rs +++ b/wild_lib/src/elf_writer.rs @@ -679,9 +679,7 @@ impl<'data> ObjectLayout<'data> { for rel in &self.plt_relocations { plt_got_writer.write_ifunc_relocation(rel, &mut relocation_writer)?; } - for (symbol_id, resolution) in - layout.resolutions_in_range(self.start_symbol_id, self.num_symbols) - { + for (symbol_id, resolution) in layout.resolutions_in_range(self.symbol_id_range) { if let Some(res) = resolution { plt_got_writer .process_resolution(res, &mut relocation_writer) @@ -750,8 +748,10 @@ impl<'data> ObjectLayout<'data> { ) -> Result { let mut symbol_writer = SymbolTableWriter::new(start_str_offset, &mut buffers, &self.mem_sizes, sections); - for (sym, sym_state) in self.object.symbols().zip(&self.symbol_states) { - let symbol_id = self.start_symbol_id.add_usize(sym.index().0); + for (local_index, (sym, sym_state)) in + self.object.symbols().zip(&self.symbol_states).enumerate() + { + let symbol_id = self.symbol_id_range.offset_to_id(local_index); if let Some(info) = SymbolCopyInfo::new( &sym, symbol_id, @@ -764,7 +764,7 @@ impl<'data> ObjectLayout<'data> { match &self.sections[section_index.0] { SectionSlot::Loaded(section) => section.output_section_id.unwrap(), SectionSlot::MergeStrings(_) => { - let merged_string_res = &self.merged_string_resolutions[sym.index().0].context( + let merged_string_res = &self.merged_string_resolutions[local_index].context( "Tried to write symbol for merged string without a resolution", )?; merged_string_res.output_section_id @@ -802,10 +802,7 @@ impl<'data> ObjectLayout<'data> { symbol_writer .copy_symbol(&sym, info.name, output_section_id, symbol_value) .with_context(|| { - format!( - "Failed to copy {}", - layout.symbol_debug(self.start_symbol_id.add_usize(sym.index().0)) - ) + format!("Failed to copy {}", layout.symbol_debug(symbol_id)) })?; } } @@ -1027,7 +1024,7 @@ impl<'data> ObjectLayout<'data> { let mut new_resolution = None; match rel.target() { object::RelocationTarget::Symbol(symbol_index) => { - let local_symbol_id = self.start_symbol_id.add_usize(symbol_index.0); + let local_symbol_id = self.symbol_id_range.input_to_id(symbol_index); let symbol_id = layout.symbol_db.definition(local_symbol_id); let file_id = layout.symbol_db.file_id_for_symbol(symbol_id); if symbol_id == SymbolId::undefined() || !layout.is_file_loaded(file_id) { @@ -1079,7 +1076,7 @@ impl<'a> Display for DisplayRelocation<'a> { write!(f, " to ")?; match self.rel.target() { object::RelocationTarget::Symbol(local_symbol_index) => { - let symbol_id = self.object.start_symbol_id.add_usize(local_symbol_index.0); + let symbol_id = self.object.symbol_id_range.input_to_id(local_symbol_index); write!(f, " {}", self.symbol_db.symbol_debug(symbol_id))?; } object::RelocationTarget::Section(section_index) => write!( @@ -1516,9 +1513,9 @@ fn write_dynamic_symbol_definitions( let FileLayout::Object(object) = file_layout else { bail!("Internal error: only objects should define dynamic symbols"); }; - let sym = object.object.symbol_by_index(object::SymbolIndex( - sym_def.symbol_id.offset_from(object.start_symbol_id), - ))?; + let sym = object + .object + .symbol_by_index(sym_def.symbol_id.to_input(object.symbol_id_range))?; let section_index = sym .section_index() .context("Internal error: Symbols should only be defined if they have a section")?; @@ -1915,7 +1912,7 @@ impl<'data> DynamicLayout<'data> { let mut dynsym: &mut [SymtabEntry] = slice_from_all_bytes_mut(buffers.dynsym); for ((symbol_id, resolution), symbol) in layout - .resolutions_in_range(self.start_symbol_id, self.num_symbols) + .resolutions_in_range(self.symbol_id_range) .zip(self.object.dynamic_symbols()) { if let Some(res) = resolution { diff --git a/wild_lib/src/layout.rs b/wild_lib/src/layout.rs index dcb503ea..1c1a0bc5 100644 --- a/wild_lib/src/layout.rs +++ b/wild_lib/src/layout.rs @@ -41,6 +41,7 @@ use crate::symbol::SymbolName; use crate::symbol_db::SymbolDb; use crate::symbol_db::SymbolDebug; use crate::symbol_db::SymbolId; +use crate::symbol_db::SymbolIdRange; use ahash::AHashMap; use anyhow::anyhow; use anyhow::bail; @@ -292,8 +293,7 @@ pub(crate) struct ObjectLayout<'data> { pub(crate) plt_relocations: Vec, /// The memory address of the start of this object's allocation within .eh_frame. pub(crate) eh_frame_start_address: u64, - pub(crate) start_symbol_id: SymbolId, - pub(crate) num_symbols: usize, + pub(crate) symbol_id_range: SymbolIdRange, pub(crate) symbol_states: Vec, pub(crate) merged_string_resolutions: Vec>, } @@ -336,8 +336,7 @@ pub(crate) struct DynamicLayout<'data> { /// The offset in .dynstr at which we'll start writing. pub(crate) dynstr_start_offset: u64, - pub(crate) start_symbol_id: SymbolId, - pub(crate) num_symbols: usize, + pub(crate) symbol_id_range: SymbolIdRange, pub(crate) object: &'data crate::elf::File<'data>, } @@ -367,7 +366,7 @@ trait SymbolRequestHandler<'data>: std::fmt::Display { queue: &mut LocalWorkQueue, ) -> Result { let symbol_id = symbol_request.symbol_id; - let local_index = symbol_request.symbol_id.offset_from(self.start_symbol_id()); + let local_index = symbol_request.symbol_id.to_offset(self.symbol_id_range()); let mut common = self.common_mut(); if local_index >= common.symbol_states.len() { bail!( @@ -456,7 +455,7 @@ trait SymbolRequestHandler<'data>: std::fmt::Display { Ok(()) } - fn start_symbol_id(&self) -> SymbolId; + fn symbol_id_range(&self) -> SymbolIdRange; fn common_mut(&mut self) -> &mut CommonLayoutState; @@ -491,7 +490,11 @@ impl<'data> SymbolRequestHandler<'data> for ObjectLayoutState<'data> { "Tried to load symbol in a file that doesn't hold the definition: {}", resources.symbol_db.symbol_debug(symbol_id) ); - let object_symbol_index = object::SymbolIndex(local_index); + let object_symbol_index = self + .state + .common + .symbol_id_range + .offset_to_input(local_index); let local_symbol = self.object.symbol_by_index(object_symbol_index)?; let symbol_kind = match local_symbol.flags() { object::SymbolFlags::Elf { st_info, .. } => { @@ -524,7 +527,11 @@ impl<'data> SymbolRequestHandler<'data> for ObjectLayoutState<'data> { } fn is_weak(&self, local_index: usize) -> bool { - let object_symbol_index = object::SymbolIndex(local_index); + let object_symbol_index = self + .state + .common + .symbol_id_range + .offset_to_input(local_index); self.object .symbol_by_index(object_symbol_index) .map(|local_symbol| local_symbol.is_weak()) @@ -535,14 +542,14 @@ impl<'data> SymbolRequestHandler<'data> for ObjectLayoutState<'data> { self.state.common.file_id } - fn start_symbol_id(&self) -> SymbolId { - self.state.common.start_symbol_id + fn symbol_id_range(&self) -> SymbolIdRange { + self.state.common.symbol_id_range } } impl<'data> SymbolRequestHandler<'data> for DynamicLayoutState<'data> { - fn start_symbol_id(&self) -> SymbolId { - self.common.start_symbol_id + fn symbol_id_range(&self) -> SymbolIdRange { + self.common.symbol_id_range } fn common_mut(&mut self) -> &mut CommonLayoutState { @@ -563,11 +570,12 @@ impl<'data> SymbolRequestHandler<'data> for DynamicLayoutState<'data> { // TODO: Reading symbol names involves finding the null terminator, which is slightly // expensive. We do it up to three times. Once when we build the symbol DB, now, then when // we write out the dynamic symbol table. Look into just storing the names the first time. + let object_symbol_index = self.common.symbol_id_range.offset_to_input(local_index); let symbol = self .object .dynamic_symbol_table() .context("Missing dynamic symbol table")? - .symbol_by_index(object::SymbolIndex(local_index))?; + .symbol_by_index(object_symbol_index)?; let name = symbol.name_bytes()?; self.common.mem_sizes.dynstr += name.len() as u64 + 1; self.common.mem_sizes.dynsym += crate::elf::SYMTAB_ENTRY_SIZE; @@ -575,8 +583,9 @@ impl<'data> SymbolRequestHandler<'data> for DynamicLayoutState<'data> { } fn is_weak(&self, local_index: usize) -> bool { + let object_symbol_index = self.common.symbol_id_range.offset_to_input(local_index); self.object - .symbol_by_index(object::SymbolIndex(local_index)) + .symbol_by_index(object_symbol_index) .map(|sym| sym.is_local()) .unwrap_or(false) } @@ -606,9 +615,8 @@ impl<'data> SymbolRequestHandler<'data> for InternalLayoutState<'data> { false } - fn start_symbol_id(&self) -> SymbolId { - // Internal-layout always starts from the undefined symbol. - SymbolId::undefined() + fn symbol_id_range(&self) -> SymbolIdRange { + self.common.symbol_id_range } } @@ -636,8 +644,8 @@ impl<'data> SymbolRequestHandler<'data> for EpilogueLayoutState<'data> { false } - fn start_symbol_id(&self) -> SymbolId { - self.common.start_symbol_id + fn symbol_id_range(&self) -> SymbolIdRange { + self.common.symbol_id_range } } @@ -656,22 +664,21 @@ struct CommonLayoutState { /// tracks whether we've asked the other file to allocate a GOT entry. symbol_states: Vec, - start_symbol_id: SymbolId, + symbol_id_range: SymbolIdRange, } impl CommonLayoutState { fn new( file_id: FileId, - num_symbols: usize, output_sections: &OutputSections, - start_symbol_id: SymbolId, + symbol_id_range: SymbolIdRange, ) -> Self { Self { file_id, mem_sizes: OutputSectionPartMap::with_size(output_sections.len()), sections_with_content: OutputSectionMap::with_size(output_sections.len()), - symbol_states: vec![TargetResolutionKind::None; num_symbols], - start_symbol_id, + symbol_states: vec![TargetResolutionKind::None; symbol_id_range.len()], + symbol_id_range, } } @@ -709,7 +716,7 @@ impl CommonLayoutState { symbol_states: &self.symbol_states, symbol_db, plt_relocations: Default::default(), - start_symbol_id: self.start_symbol_id, + symbol_id_range: self.symbol_id_range, } } } @@ -898,13 +905,12 @@ impl<'data> Layout<'data> { pub(crate) fn resolutions_in_range( &self, - start: SymbolId, - num_symbols: usize, + range: SymbolIdRange, ) -> impl Iterator)> { - self.symbol_resolutions.resolutions[start.as_usize()..start.as_usize() + num_symbols] + self.symbol_resolutions.resolutions[range.as_usize()] .iter() .enumerate() - .map(move |(i, res)| (start.add_usize(i), res.as_ref())) + .map(move |(i, res)| (range.offset_to_id(i), res.as_ref())) } pub(crate) fn entry_symbol_address(&self) -> Result { @@ -1780,7 +1786,7 @@ impl RelocationLayoutAction { let args = symbol_db.args; match rel.target() { object::RelocationTarget::Symbol(local_sym_index) => { - let symbol_id = state.common.start_symbol_id.add_usize(local_sym_index.0); + let symbol_id = state.common.symbol_id_range.input_to_id(local_sym_index); return Ok(Some(Self::for_symbol( rel, rel_offset, section, symbol_db, symbol_id, args, )?)); @@ -1880,7 +1886,7 @@ impl RelocationLayoutAction { ) { match self.kind { RelocationLayoutActionKind::LoadSymbol(symbol_id, resolution_kind) => { - let local_sym_index = symbol_id.offset_from(state.common.start_symbol_id); + let local_sym_index = symbol_id.to_offset(state.common.symbol_id_range); if state.common.symbol_states[local_sym_index] < resolution_kind { let destination = queue.send_symbol_request(symbol_id, resolution_kind, resources); @@ -1938,9 +1944,8 @@ impl<'data> InternalLayoutState<'data> { let mut layout = Self { common: CommonLayoutState::new( INTERNAL_FILE_ID, - input_state.symbol_definitions.len(), output_sections, - SymbolId::undefined(), + SymbolIdRange::internal(input_state.symbol_definitions.len()), ), internal_symbols: InternalSymbols { symbol_definitions: input_state.symbol_definitions.to_owned(), @@ -2279,9 +2284,11 @@ impl<'data> EpilogueLayoutState<'data> { EpilogueLayoutState { common: CommonLayoutState::new( input_state.file_id, - input_state.symbol_definitions.len(), output_sections, - input_state.start_symbol_id, + SymbolIdRange::epilogue( + input_state.start_symbol_id, + input_state.symbol_definitions.len(), + ), ), internal_symbols: InternalSymbols { symbol_definitions: input_state.symbol_definitions, @@ -2398,9 +2405,8 @@ fn new_object_layout_state<'data>( ) -> FileLayoutState<'data> { let common = CommonLayoutState::new( input_state.file_id, - input_state.num_symbols, output_sections, - input_state.start_symbol_id, + input_state.symbol_id_range, ); if let Some(non_dynamic) = input_state.non_dynamic { FileLayoutState::Object(Box::new(ObjectLayoutState { @@ -2452,7 +2458,7 @@ impl<'data> ObjectLayoutState<'data> { if let Some(eh_frame_section) = eh_frame_section { process_eh_frame_data( self.object, - self.start_symbol_id(), + self.symbol_id_range(), &mut self.section_frame_data, &mut self.state, eh_frame_section, @@ -2555,7 +2561,7 @@ impl<'data> ObjectLayoutState<'data> { .symbols() .zip(&mut self.state.common.symbol_states) { - let symbol_id = self.state.common.start_symbol_id.add_usize(sym.index().0); + let symbol_id = self.state.common.symbol_id_range.input_to_id(sym.index()); if let Some(info) = SymbolCopyInfo::new(&sym, symbol_id, symbol_db, *sym_state, &self.state.sections) { @@ -2588,7 +2594,7 @@ impl<'data> ObjectLayoutState<'data> { output_sections: &OutputSections, merged_string_start_addresses: &MergedStringStartAddresses, ) -> Result> { - let start_symbol_id = self.start_symbol_id(); + let symbol_id_range = self.symbol_id_range(); let mut sections = self.state.sections; let mut emitter = self @@ -2639,7 +2645,7 @@ impl<'data> ObjectLayoutState<'data> { if *symbol_state == TargetResolutionKind::None { continue; } - let symbol_id = start_symbol_id.add_usize(local_symbol.index().0); + let symbol_id = symbol_id_range.input_to_id(local_symbol.index()); if !symbol_db.is_definition(symbol_id) { continue; } @@ -2655,7 +2661,7 @@ impl<'data> ObjectLayoutState<'data> { ResolutionValue::Address(merged_string_start_addresses .try_resolve_local( &self.state.merged_string_resolutions, - local_symbol.index(), + symbol_id_range.input_to_offset(local_symbol.index()), ) .ok_or_else(|| { anyhow!( @@ -2699,8 +2705,7 @@ impl<'data> ObjectLayoutState<'data> { strtab_offset_start, plt_relocations, eh_frame_start_address: memory_offsets.eh_frame, - start_symbol_id, - num_symbols: self.state.common.symbol_states.len(), + symbol_id_range, symbol_states: self.state.common.symbol_states, merged_string_resolutions: self.state.merged_string_resolutions, }) @@ -2720,7 +2725,7 @@ impl<'data> ObjectLayoutState<'data> { { continue; } - let symbol_id = self.start_symbol_id().add_usize(sym.index().0); + let symbol_id = self.symbol_id_range().input_to_id(sym.index()); self.handle_symbol_request( SymbolRequest { symbol_id, @@ -2811,9 +2816,9 @@ impl MergedStringStartAddresses { fn try_resolve_local( &self, merged_string_resolutions: &[Option], - local_symbol_index: object::SymbolIndex, + local_symbol_index: usize, ) -> Option { - merged_string_resolutions[local_symbol_index.0].map(|res| self.resolve(res)) + merged_string_resolutions[local_symbol_index].map(|res| self.resolve(res)) } pub(crate) fn resolve(&self, res: resolution::MergedStringResolution) -> u64 { @@ -2829,7 +2834,7 @@ fn should_copy_symbol_named(name: &[u8]) -> bool { fn process_eh_frame_data<'data>( object: &crate::elf::File<'data>, - file_start_symbol: SymbolId, + file_symbol_id_range: SymbolIdRange, section_frame_data: &mut Vec, state: &mut ObjectLayoutMutableState<'data>, eh_frame_section: elf::Section<'data, '_>, @@ -2877,7 +2882,7 @@ fn process_eh_frame_data<'data>( action.apply(resources, state, queue); } if let object::RelocationTarget::Symbol(local_sym_index) = rel.target() { - let local_symbol_id = file_start_symbol.add_usize(local_sym_index.0); + let local_symbol_id = file_symbol_id_range.input_to_id(local_sym_index); let definition = resources.symbol_db.definition(local_symbol_id); referenced_symbols.push(definition); } else { @@ -2983,7 +2988,7 @@ struct GlobalAddressEmitter<'state> { symbol_states: &'state [TargetResolutionKind], symbol_db: &'state SymbolDb<'state>, plt_relocations: Vec, - start_symbol_id: SymbolId, + symbol_id_range: SymbolIdRange, } impl<'state> GlobalAddressEmitter<'state> { @@ -2994,14 +2999,16 @@ impl<'state> GlobalAddressEmitter<'state> { resolutions_out: &mut [Option], ) -> Result { debug_assert_bail!( - symbol_id >= self.start_symbol_id - && symbol_id.offset_from(self.start_symbol_id) < resolutions_out.len(), + symbol_id >= self.symbol_id_range.start() + && symbol_id.to_offset(self.symbol_id_range) < resolutions_out.len(), "Tried to emit resolution for {} which is outside {}..{}", self.symbol_db.symbol_debug(symbol_id), - self.start_symbol_id, - self.start_symbol_id.add_usize(resolutions_out.len()) + self.symbol_id_range.start(), + self.symbol_id_range + .start() + .add_usize(resolutions_out.len()) ); - let local_symbol_index = symbol_id.offset_from(self.start_symbol_id); + let local_symbol_index = symbol_id.to_offset(self.symbol_id_range); let resolution = self.create_resolution(self.symbol_states[local_symbol_index], value)?; resolutions_out[local_symbol_index] = Some(resolution); Ok(()) @@ -3196,8 +3203,7 @@ impl<'data> DynamicLayoutState<'data> { lib_name: self.lib_name, dynstr_start_offset, object: self.object, - start_symbol_id: self.common.start_symbol_id, - num_symbols: self.common.symbol_states.len(), + symbol_id_range: self.common.symbol_id_range, }) } } @@ -3243,7 +3249,7 @@ fn print_symbol_info(symbol_db: &SymbolDb, name: &str) { match &symbol_db.inputs[file_id.as_usize()] { crate::parsing::InputObject::Internal(_) => println!(" "), crate::parsing::InputObject::Object(o) => { - let local_index = symbol_id.offset_from(o.start_symbol_id); + let local_index = symbol_id.to_offset(o.symbol_id_range); match o.object.symbol_by_index(object::SymbolIndex(local_index)) { Ok(sym) => { println!( diff --git a/wild_lib/src/parsing.rs b/wild_lib/src/parsing.rs index c05a44f7..d686baa6 100644 --- a/wild_lib/src/parsing.rs +++ b/wild_lib/src/parsing.rs @@ -12,6 +12,7 @@ use crate::output_section_id::OutputSectionId; use crate::sharding::ShardKey; use crate::symbol::SymbolName; use crate::symbol_db::SymbolId; +use crate::symbol_db::SymbolIdRange; use anyhow::Context; use object::Object as _; use object::ObjectSymbol; @@ -44,7 +45,7 @@ pub(crate) fn parse_input_files<'data>( assert_eq!(next_symbol_id, SymbolId::undefined()); } InputObject::Object(o) => { - o.start_symbol_id = next_symbol_id; + o.symbol_id_range.set_start(next_symbol_id); } InputObject::Epilogue(o) => { o.start_symbol_id = next_symbol_id; @@ -68,8 +69,7 @@ pub(crate) struct InternalInputObject { pub(crate) struct RegularInputObject<'data> { pub(crate) input: InputRef<'data>, pub(crate) object: Box>, - pub(crate) num_symbols: usize, - pub(crate) start_symbol_id: SymbolId, + pub(crate) symbol_id_range: SymbolIdRange, pub(crate) file_id: FileId, pub(crate) is_dynamic: bool, modifiers: Modifiers, @@ -108,12 +108,29 @@ impl<'data> RegularInputObject<'data> { } else { object.symbols().count() }; + // object.symbols() may not return the null symbol. + let start_symbol_index = if is_dynamic { + object + .dynamic_symbols() + .next() + .map(|s| s.index()) + .unwrap_or(object::SymbolIndex(0)) + } else { + object + .symbols() + .next() + .map(|s| s.index()) + .unwrap_or(object::SymbolIndex(0)) + }; Ok(Self { input: input.input, object, - num_symbols, - // Filled in once we've parsed all objects. - start_symbol_id: SymbolId::undefined(), + symbol_id_range: SymbolIdRange::input( + // Filled in once we've parsed all objects. + SymbolId::undefined(), + start_symbol_index, + num_symbols, + ), file_id, is_dynamic, modifiers: input.modifiers, @@ -135,17 +152,14 @@ impl<'data> RegularInputObject<'data> { &self, symbol_id: crate::symbol_db::SymbolId, ) -> Result> { + let index = symbol_id.to_input(self.symbol_id_range); let symbol = if self.is_dynamic { self.object .dynamic_symbol_table() .context("Missing dynamic symbol table")? - .symbol_by_index(object::SymbolIndex( - symbol_id.offset_from(self.start_symbol_id), - ))? + .symbol_by_index(index)? } else { - self.object.symbol_by_index(object::SymbolIndex( - symbol_id.offset_from(self.start_symbol_id), - ))? + self.object.symbol_by_index(index)? }; Ok(SymbolName::new(symbol.name_bytes()?)) } @@ -166,7 +180,7 @@ impl<'data> InputObject<'data> { pub(crate) fn num_symbols(&self) -> usize { match self { InputObject::Internal(o) => o.symbol_definitions.len(), - InputObject::Object(o) => o.num_symbols, + InputObject::Object(o) => o.symbol_id_range.len(), InputObject::Epilogue(_) => { // Initially, we report 0 symbols because we don't know what symbols we'll define // until after archives have been processed. We're the last input file, so we can @@ -184,11 +198,11 @@ impl<'data> InputObject<'data> { } } - pub(crate) fn start_symbol_id(&self) -> SymbolId { + pub(crate) fn symbol_id_range(&self) -> SymbolIdRange { match self { - InputObject::Internal(_) => SymbolId::undefined(), - InputObject::Object(o) => o.start_symbol_id, - InputObject::Epilogue(o) => o.start_symbol_id, + InputObject::Internal(o) => SymbolIdRange::internal(o.symbol_definitions.len()), + InputObject::Object(o) => o.symbol_id_range, + InputObject::Epilogue(o) => SymbolIdRange::epilogue(o.start_symbol_id, 0), } } } @@ -216,7 +230,7 @@ impl InternalInputObject { } pub(crate) fn symbol_name(&self, symbol_id: SymbolId) -> SymbolName<'static> { - let def = &self.symbol_definitions[symbol_id.offset_from(SymbolId::undefined())]; + let def = &self.symbol_definitions[symbol_id.as_usize()]; let name = match def { InternalSymDefInfo::Undefined => Some(""), InternalSymDefInfo::SectionStart(section_id) => { diff --git a/wild_lib/src/resolution.rs b/wild_lib/src/resolution.rs index 48a4d5ea..1dd071b4 100644 --- a/wild_lib/src/resolution.rs +++ b/wild_lib/src/resolution.rs @@ -24,10 +24,10 @@ use crate::parsing::InternalInputObject; use crate::parsing::InternalSymDefInfo; use crate::parsing::RegularInputObject; use crate::sharding::split_slice; -use crate::sharding::ShardKey; use crate::symbol::SymbolName; use crate::symbol_db::SymbolDb; use crate::symbol_db::SymbolId; +use crate::symbol_db::SymbolIdRange; use ahash::AHashMap; use anyhow::bail; use anyhow::Context; @@ -276,8 +276,7 @@ pub(crate) struct ResolvedObject<'data> { pub(crate) input: InputRef<'data>, pub(crate) object: &'data File<'data>, pub(crate) file_id: FileId, - pub(crate) num_symbols: usize, - pub(crate) start_symbol_id: SymbolId, + pub(crate) symbol_id_range: SymbolIdRange, pub(crate) non_dynamic: Option>, } @@ -382,7 +381,8 @@ fn merge_strings<'data>( // This reference belongs to a subsequent string. break; } - non_dynamic.merged_string_resolutions[merge_ref.symbol_index.0] = + let local_index = obj.symbol_id_range.input_to_offset(merge_ref.symbol_index); + non_dynamic.merged_string_resolutions[local_index] = Some(MergedStringResolution { output_section_id, offset: output_offset + offset_into_string, @@ -528,7 +528,7 @@ fn allocate_start_stop_symbol_ids<'data>( epilogue.symbol_definitions.push(def_info); for (file_id, sym_index) in refs { if let ResolvedFile::Object(obj) = &mut objects[file_id.as_usize()] { - let local_symbol_id = obj.start_symbol_id.add_usize(sym_index.0); + let local_symbol_id = obj.symbol_id_range.input_to_id(sym_index); symbol_db.replace_definition(local_symbol_id, symbol_id); } } @@ -595,7 +595,7 @@ impl<'data> ResolvedObject<'data> { } } non_dynamic = Some(NonDynamicResolved { - merged_string_resolutions: vec![None; obj.num_symbols], + merged_string_resolutions: vec![None; obj.symbol_id_range.len()], sections, merge_strings_sections, custom_sections, @@ -606,8 +606,7 @@ impl<'data> ResolvedObject<'data> { input: obj.input, object: &obj.object, file_id: obj.file_id, - num_symbols: obj.num_symbols, - start_symbol_id: obj.start_symbol_id, + symbol_id_range: obj.symbol_id_range, non_dynamic, }) } @@ -618,10 +617,18 @@ fn resolve_sections<'data>( custom_sections: &mut Vec<(object::SectionIndex, SectionDetails<'data>)>, args: &Args, ) -> Result>> { - let sections = obj - .object - .sections() - .map(|input_section| { + // object.sections() may not return the null section, but we require + // a slot for it so that we can use ELF section indexes to access slots. + let null = if obj.object.sections().next().map(|section| section.index()) + != Some(object::SectionIndex(0)) + { + Some(Ok(SectionSlot::Discard)) + } else { + None + }; + let sections = null + .into_iter() + .chain(obj.object.sections().map(|input_section| { if let Some(unloaded) = UnloadedSection::from_section(&input_section, args)? { if unloaded.is_string_merge { if let TemporaryOutputSectionId::Custom(_custom_section_id) = @@ -648,7 +655,7 @@ fn resolve_sections<'data>( } else { Ok(SectionSlot::Discard) } - }) + })) .collect::>>()?; Ok(sections) } @@ -850,7 +857,7 @@ impl<'data> SymbolDb<'data> { fn symbol_strength(&self, symbol_id: SymbolId, resolved: &[ResolvedFile]) -> SymbolStrength { let file_id = self.file_id_for_symbol(symbol_id); if let ResolvedFile::Object(obj) = &resolved[file_id.as_usize()] { - let local_index = object::SymbolIndex(symbol_id.offset_from(obj.start_symbol_id)); + let local_index = symbol_id.to_input(obj.symbol_id_range); let Ok(obj_symbol) = obj.object.symbol_by_index(local_index) else { // Errors from this function should have been reported elsewhere. return SymbolStrength::Undefined; diff --git a/wild_lib/src/symbol.rs b/wild_lib/src/symbol.rs index 3e93bb2d..24fd2ea0 100644 --- a/wild_lib/src/symbol.rs +++ b/wild_lib/src/symbol.rs @@ -45,7 +45,6 @@ impl<'data, 'file> Display for SymDebug<'data, 'file> { }; let kind = if sym.is_definition() { match sym.kind() { - object::SymbolKind::Null => "Null", object::SymbolKind::Text => "Text", object::SymbolKind::Data => "Data", object::SymbolKind::Section => "Section", diff --git a/wild_lib/src/symbol_db.rs b/wild_lib/src/symbol_db.rs index 13d03c52..bf5e8c84 100644 --- a/wild_lib/src/symbol_db.rs +++ b/wild_lib/src/symbol_db.rs @@ -69,6 +69,95 @@ pub(crate) struct PendingSymbol<'data> { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) struct SymbolId(u32); +/// A range of symbol IDs that are defined by the same input file. +/// +/// This exists to translate between 3 different ways of identifying a symbol: +/// - A `SymbolId` is a globally unique identifier for a symbol. +/// - An `object::SymbolIndex` is an index into the ELF symbol table of the input file. +/// - A `usize` offset is an index into our own data structures for the file. +#[derive(Clone, Copy, Debug)] +pub(crate) struct SymbolIdRange { + start_symbol_id: SymbolId, + start_symbol_index: object::SymbolIndex, + num_symbols: usize, +} + +impl SymbolIdRange { + pub(crate) fn internal(num_symbols: usize) -> SymbolIdRange { + SymbolIdRange { + start_symbol_id: SymbolId::undefined(), + start_symbol_index: object::SymbolIndex(0), + num_symbols, + } + } + + pub(crate) fn epilogue(start_symbol_id: SymbolId, num_symbols: usize) -> SymbolIdRange { + SymbolIdRange { + start_symbol_id, + start_symbol_index: object::SymbolIndex(0), + num_symbols, + } + } + + pub(crate) fn input( + start_symbol_id: SymbolId, + start_symbol_index: object::SymbolIndex, + num_symbols: usize, + ) -> SymbolIdRange { + SymbolIdRange { + start_symbol_id, + start_symbol_index, + num_symbols, + } + } + + pub(crate) fn len(&self) -> usize { + self.num_symbols + } + + pub(crate) fn start(&self) -> SymbolId { + self.start_symbol_id + } + + pub(crate) fn set_start(&mut self, start: SymbolId) { + self.start_symbol_id = start; + } + + pub(crate) fn as_usize(&self) -> std::ops::Range { + self.start_symbol_id.as_usize()..self.start_symbol_id.as_usize() + self.num_symbols + } + + pub(crate) fn offset_to_id(&self, offset: usize) -> SymbolId { + debug_assert!(offset < self.num_symbols); + self.start_symbol_id.add_usize(offset) + } + + pub(crate) fn id_to_offset(&self, symbol_id: SymbolId) -> usize { + let offset = (symbol_id.0 - self.start_symbol_id.0) as usize; + debug_assert!(offset < self.num_symbols); + offset + } + + pub(crate) fn offset_to_input(&self, offset: usize) -> object::SymbolIndex { + debug_assert!(offset < self.num_symbols); + object::SymbolIndex(self.start_symbol_index.0 + offset) + } + + pub(crate) fn input_to_offset(&self, symbol_index: object::SymbolIndex) -> usize { + let offset = symbol_index.0 - self.start_symbol_index.0; + debug_assert!(offset < self.num_symbols); + offset + } + + pub(crate) fn input_to_id(&self, symbol_index: object::SymbolIndex) -> SymbolId { + self.offset_to_id(self.input_to_offset(symbol_index)) + } + + pub(crate) fn id_to_input(&self, symbol_id: SymbolId) -> object::SymbolIndex { + self.offset_to_input(self.id_to_offset(symbol_id)) + } +} + #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub(crate) struct ObjectSymDefInfo { /// The index of the symbol within the symbol table of the object that defined it. @@ -346,7 +435,7 @@ impl<'db, 'data> std::fmt::Display for SymbolDebug<'db, 'data> { let definition = self.db.definition(symbol_id); let file_id = self.db.file_id_for_symbol(symbol_id); let file = &self.db.inputs[file_id.as_usize()]; - let local_index = symbol_id.offset_from(file.start_symbol_id()); + let local_index = symbol_id.to_offset(file.symbol_id_range()); if definition.is_undefined() { write!(f, "undefined ")?; } @@ -356,7 +445,7 @@ impl<'db, 'data> std::fmt::Display for SymbolDebug<'db, 'data> { InputObject::Object(o) => { if let Some(section_name) = o .object - .symbol_by_index(object::SymbolIndex(local_index)) + .symbol_by_index(symbol_id.to_input(file.symbol_id_range())) .ok() .and_then(|symbol| symbol.section_index()) .and_then(|section_index| o.object.section_by_index(section_index).ok()) @@ -404,10 +493,18 @@ impl SymbolId { SymbolId(value) } - pub(crate) fn offset_from(&self, base: SymbolId) -> usize { + pub(crate) fn offset_from(self, base: SymbolId) -> usize { (self.0 - base.0) as usize } + pub(crate) fn to_offset(self, range: SymbolIdRange) -> usize { + range.id_to_offset(self) + } + + pub(crate) fn to_input(self, range: SymbolIdRange) -> object::SymbolIndex { + range.id_to_input(self) + } + pub(crate) fn is_undefined(&self) -> bool { self.0 == 0 }