Skip to content

Commit

Permalink
Merge pull request #53 from psobot/psobot/13.1
Browse files Browse the repository at this point in the history
Add Keynote 13.1 support.
  • Loading branch information
psobot authored Jun 21, 2023
2 parents 7114e3b + d46e710 commit 4b45abe
Show file tree
Hide file tree
Showing 37 changed files with 701 additions and 1,035 deletions.
4 changes: 2 additions & 2 deletions dumper/Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

.PHONY=clean all

LLDB_PYTHON_PATH := ${shell lldb --python-path}
LLDB_PYTHON := ${shell lldb --python-path | cut -f 8 -d /}
LLDB_PYTHON_PATH := /opt/homebrew/opt/llvm//libexec/python3.11/site-packages/
LLDB_PYTHON := python3.11
IDENTITY := $(shell security find-identity -v -p codesigning | head -n 1 | python -c 'import sys; print(sys.stdin.read().split("\"")[1])')

all: mapping.py proto
Expand Down
7 changes: 5 additions & 2 deletions dumper/extract_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
# let's break in the CloudKit code and early exit the function before it can raise an exception:
target.BreakpointCreateByName("[CKContainer containerWithIdentifier:]")
# In later Keynote versions, 'containerWithIdentifier' isn't called directly, but we can break on similar methods:
# Note: this __lldb_unnamed_symbol index was determined by painstaking experimentation. It will break again for sure.
target.BreakpointCreateByName("___lldb_unnamed_symbol2482", "CloudKit")
# Note: this __lldb_unnamed_symbol hack was determined by painstaking experimentation. It will break again for sure.
target.BreakpointCreateByRegex("___lldb_unnamed_symbol[0-9]+", "CloudKit")

process = target.LaunchSimple(None, None, os.getcwd())

Expand All @@ -40,6 +40,9 @@
process.Continue()
else:
break
elif thread.GetStopReason() == lldb.eStopReasonException:
sys.stderr.write(repr(thread) + "\n")
raise NotImplementedError(f"LLDB caught exception, {__file__} needs to be updated to handle.")
if process.GetState() == lldb.eStateStopped:
if thread:
frame = thread.GetFrameAtIndex(0)
Expand Down
167 changes: 153 additions & 14 deletions dumper/protodump.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,150 @@
Inspired by Sean Patrick O'Brien (@obriensp)'s 2013 "proto-dump": https://github.com/obriensp/proto-dump
"""

import sys
from pathlib import Path
from tqdm import tqdm
from typing import List
from collections import defaultdict

from google.protobuf.internal.decoder import _DecodeVarint, SkipField
from google.protobuf import descriptor_pb2
from google.protobuf.descriptor_pool import DescriptorPool
from google.protobuf.message import DecodeError
from google.protobuf.internal import api_implementation


PROTO_TYPES = {
1: 'double',
2: 'float',
3: 'int64',
4: 'uint64',
5: 'int32',
6: 'fixed64',
7: 'fixed32',
8: 'bool',
9: 'string',
12: 'bytes',
13: 'uint32',
15: 'sfixed32',
16: 'sfixed64',
17: 'sint32',
18: 'sint64',
}

def to_proto_file(fds: descriptor_pb2.FileDescriptorSet) -> str:
if len(fds.file) != 1:
raise NotImplementedError("Only one file per fds.")
f = fds.file[0]
lines = [
"syntax = \"proto2\";",
""
]

for dependency in f.dependency:
lines.append(f'import "{dependency}";')

lines.append(f'package {f.package};')
lines.append("")

def generate_enum_lines(f, lines: List[str], indent: int = 0):
prefix = " " * indent
for enum in f.enum_type:
lines.append(prefix + f"enum {enum.name} " + '{')
for value in enum.value:
lines.append(prefix + f" {value.name} = {value.number};")
lines.append(prefix + '}')


def generate_field_line(field, in_oneof: bool = False) -> str:
line = []
if field.label == 1:
if not in_oneof:
line.append("optional")
elif field.label == 2:
line.append("required")
elif field.label == 3:
line.append("repeated")
else:
raise NotImplementedError("Unknown field label type!")

if field.type in PROTO_TYPES:
line.append(PROTO_TYPES[field.type])
elif field.type == 11 or field.type == 14: # MESSAGE
line.append(field.type_name)
else:
raise NotImplementedError(f"Unknown field type {field.type}!")

line.append(field.name)
line.append("=")
line.append(str(field.number));
options = []
if field.default_value:
options.append(f"default = {field.default_value}")
if field.options.deprecated:
options.append("deprecated = true")
if field.options.packed:
options.append("packed = true")
# TODO: Protobuf supports other options in square brackets!
# Add support for them here to make this feature-complete.
if options:
line.append(f"[{', '.join(options)}]")
return f" {' '.join(line)};"

def generate_extension_lines(message, lines: List[str], indent: int = 0):
prefix = " " * indent
extensions_grouped_by_extendee = defaultdict(list)
for extension in message.extension:
extensions_grouped_by_extendee[extension.extendee].append(extension)
for extendee, extensions in extensions_grouped_by_extendee.items():
lines.append(prefix + f"extend {extendee} {{")
for extension in extensions:
lines.append(prefix + generate_field_line(extension))
lines.append(prefix + "}")

def generate_message_lines(f, lines: List[str], indent: int = 0):
prefix = " " * indent

submessages = f.message_type if hasattr(f, 'message_type') else f.nested_type

for message in submessages:
# if message.name == "ContainedObjectsCommandArchive":
# breakpoint()
lines.append(prefix + f"message {message.name} " + '{')

generate_enum_lines(message, lines, indent + 1)
generate_message_lines(message, lines, indent + 1)

for field in message.field:
if not field.HasField("oneof_index"):
lines.append(prefix + generate_field_line(field))

# ...then the oneofs:
next_prefix = " " * (indent + 1)
for oneof_index, oneof in enumerate(message.oneof_decl):
lines.append(next_prefix + f"oneof {oneof.name} {{")
for field in message.field:
if field.HasField("oneof_index") and field.oneof_index == oneof_index:
lines.append(next_prefix + generate_field_line(field, in_oneof=True))
lines.append(next_prefix + "}")

if len(message.extension_range):
if len(message.extension_range) > 1:
raise NotImplementedError("Not sure how to handle multiple extension ranges!")
start, end = (
message.extension_range[0].start,
min(message.extension_range[0].end, 536870911)
)
lines.append(next_prefix + f"extensions {start} to {end};")

generate_extension_lines(message, lines, indent + 1)
lines.append(prefix + '}')
lines.append('')

generate_enum_lines(f, lines)
generate_message_lines(f, lines)
generate_extension_lines(f, lines)

return "\n".join(lines)


class ProtoFile(object):
Expand All @@ -39,19 +175,24 @@ def attempt_to_load(self):
try:
return self.pool.Add(self.file_descriptor_proto)
except Exception as e:
if "duplicate file name" in str(e):
return self.pool.FindFileByName(e.args[0].split("duplicate file name")[1].strip())
return None

@property
def descriptor(self):
return self.attempt_to_load()

def __repr__(self):
return "<%s: path=\"%s\">" % (self.__class__.__name__, self.path)
return '<%s: path="%s">' % (self.__class__.__name__, self.path)

@property
def source(self):
if self.descriptor:
return self.descriptor.GetDebugString()
fds = descriptor_pb2.FileDescriptorSet()
fds.file.append(descriptor_pb2.FileDescriptorProto())
fds.file[0].ParseFromString(self.descriptor.serialized_pb)
return to_proto_file(fds)
return None


Expand All @@ -77,19 +218,19 @@ def read_until_null_tag(data):


def extract_proto_from_file(filename, descriptor_pool):
with open(filename, 'rb') as f:
with open(filename, "rb") as f:
data = f.read()
offset = 0

PROTO_MARKER = b'.proto'
PROTO_MARKER = b".proto"

while True:
# Look for ".proto"
suffix_position = data.find(PROTO_MARKER, offset)
if suffix_position == -1:
break

marker_start = data.rfind(b'\x0A', offset, suffix_position)
marker_start = data.rfind(b"\x0A", offset, suffix_position)
if marker_start == -1:
# Doesn't look like a proto descriptor
offset = suffix_position + len(PROTO_MARKER)
Expand Down Expand Up @@ -163,16 +304,9 @@ def main():
parser.add_argument("output_path", help="Output directory to dump .protoc files to.")

args = parser.parse_args()

if api_implementation.Type() != "cpp":
raise NotImplementedError(
"This script requires the Protobuf installation to use the C++ implementation. Please"
" reinstall Protobuf with C++ support."
)

GLOBAL_DESCRIPTOR_POOL = DescriptorPool()

all_filenames = [str(path) for path in Path(args.input_path).rglob('*') if not path.is_dir()]
all_filenames = [str(path) for path in Path(args.input_path).rglob("*") if not path.is_dir()]

print(
f"Scanning {len(all_filenames):,} files under {args.input_path} for protobuf definitions..."
Expand All @@ -190,11 +324,16 @@ def main():
if not found.attempt_to_load():
missing_deps.update(find_missing_dependencies(proto_files_found, found.path))

for found in proto_files_found:
if not found.attempt_to_load():
missing_deps.add(found)

if missing_deps:
print(
f"Unable to print out all Protobuf definitions; {len(missing_deps):,} proto files could"
f" not be found:\n{missing_deps}"
)
sys.exit(1)
else:
for proto_file in tqdm(proto_files_found):
Path(args.output_path).mkdir(parents=True, exist_ok=True)
Expand Down
2 changes: 1 addition & 1 deletion keynote_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
__major_version__ = 1
__patch_version__ = 0
__supported_keynote_version__ = keynote_parser.macos_app_version.MacOSAppVersion(
"12.2.1", "7035.0.161", "1A165"
"13.1", "7037.0.101", "1A98"
)
__version_tuple__ = (
__major_version__,
Expand Down
Loading

0 comments on commit 4b45abe

Please sign in to comment.