Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing of OPAMFETCH (support quotes / proper POSIX shell syntax) #5492

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions master_changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ users)
## Clean

## Env
* Fix parsing of `OPAMFETCH` (support quotes / proper POSIX shell syntax) [#5492 @kit-ty-kate - fix #5490]

## Opamfile

Expand Down Expand Up @@ -203,7 +204,9 @@ users)
* `OpamFile.Repos_config.t`: change the type to not allow repositories without an URL [#6249 @kit-ty-kate]

## opam-core
* `OpamCmd`: Create the module and add `of_string` [#5492 @kit-ty-kate]
* `OpamStd.List.split`: Improve performance [#6210 @kit-ty-kate]
* `OpamStd.Char`: Create the module and export `is_whitespace` [#5492 @kit-ty-kate]
* `OpamStd.Sys.{get_terminal_columns,uname,getconf,guess_shell_compat}`: Harden the process calls to account for failures [#6230 @kit-ty-kate - fix #6215]
* `OpamStd.Sys.getconf`: was removed, replaced by `get_long_bit` [#6217 @kit-ty-kate]
* `OpamStd.Sys.get_long_bit`: was added, which returns the output of the `getconf LONG_BIT` command [#6217 @kit-ty-kate]
Expand Down
2 changes: 1 addition & 1 deletion src/client/opamArg.ml
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ let environment_variables =
"CURL", cli_original, (fun v -> CURL (env_string v)),
"can be used to select a given 'curl' program. See $(i,OPAMFETCH) for \
more options.";
"FETCH", cli_original, (fun v -> FETCH (env_string v)),
"FETCH", cli_original, (fun v -> FETCH (Option.map OpamCmd.of_string (env_string v))),
"specifies how to download files: either `wget', `curl' or a custom \
command where variables $(b,%{url}%), $(b,%{out}%), $(b,%{retry}%), \
$(b,%{compress}%) and $(b,%{checksum}%) will be replaced. Overrides the \
Expand Down
7 changes: 3 additions & 4 deletions src/client/opamInitDefaults.ml
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,7 @@ let req_dl_tools () =
let open OpamStd.Option.Op in
let cmd =
(OpamRepositoryConfig.E.fetch_t ()
>>= fun s ->
match OpamStd.String.split s ' ' with
>>= function
| c::_ -> Some c
| _ -> None)
>>+ fun () -> OpamRepositoryConfig.E.curl_t ()
Expand All @@ -122,8 +121,8 @@ let req_dl_tools () =
let dl_tool () =
let open OpamStd.Option.Op in
(OpamRepositoryConfig.E.fetch_t ()
>>+ fun () -> OpamRepositoryConfig.E.curl_t ())
>>| fun cmd -> [(CString cmd), None]
>>+ fun () -> Option.map (fun x -> [x]) (OpamRepositoryConfig.E.curl_t ()))
>>| fun cmd -> List.map (fun x -> (CString x, None)) cmd

let recommended_tools () =
let make = OpamStateConfig.(Lazy.force !r.makecmd) in
Expand Down
187 changes: 187 additions & 0 deletions src/core/opamCmd.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
(**************************************************************************)
(* *)
(* Copyright 2025 Kate Deplaix *)
(* Copyright 2015 The bos programmers *)
(* *)
(* All rights reserved. This file is distributed under the terms of the *)
(* GNU Lesser General Public License version 2.1, with the special *)
(* exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)

(* NOTE: Inspired from @dbuenzli's astring library *)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it worth adding an url ?

module String = struct
module Sub = struct
type t = {
str : string;
start_pos : int;
end_pos : int;
}

let start_pos {start_pos; _} = start_pos

let is_empty {start_pos; end_pos; _} = end_pos - start_pos = 0

let tail ({start_pos; end_pos; _} as sub) =
if (start_pos : int) = (end_pos : int) then
sub
else
{sub with start_pos = start_pos + 1}

let head {str; start_pos; end_pos} =
if (start_pos : int) = (end_pos : int) then
None
else
Some str.[start_pos]

let span ~sat {str; start_pos; end_pos} =
let rec loop i str start_pos end_pos =
if i < end_pos && sat str.[i] then
loop (i + 1) str start_pos end_pos
else
({str; start_pos; end_pos = i},
{str; start_pos = i; end_pos})
in
loop start_pos str start_pos end_pos

let concat l =
let to_string {str; start_pos; end_pos} =
String.sub str start_pos (end_pos - start_pos)
in
List.fold_left (fun acc x -> acc ^ to_string x) "" l

let extend ~max {str; start_pos; end_pos} =
let rec loop i str start_pos str_len =
if i < str_len then
loop (i + 1) str start_pos str_len
else
{str; start_pos; end_pos = i}
in
loop start_pos str start_pos
(Int.min (end_pos + max) (String.length str))
end

let sub str = {Sub.str; start_pos = 0; end_pos = String.length str}

let of_char c = String.make 1 c

let dump fmt s =
let escape_digit = function
| 0 -> '0'
| 1 -> '1'
| 2 -> '2'
| 3 -> '3'
| 4 -> '4'
| 5 -> '5'
| 6 -> '6'
| 7 -> '7'
| 8 -> '8'
| 9 -> '9'
| 10 -> 'A'
| 11 -> 'B'
| 12 -> 'C'
| 13 -> 'D'
| 14 -> 'E'
| 15 -> 'F'
| _ -> assert false
in
let dump_escaped_str fmt s i len =
if i < len then begin
match s.[i] with
| '\b' -> Format.pp_print_string fmt "\\b"
| '\t' -> Format.pp_print_string fmt "\\t"
| '\n' -> Format.pp_print_string fmt "\\n"
| '\r' -> Format.pp_print_string fmt "\\r"
| '\"' -> Format.pp_print_string fmt "\\\""
| '\\' -> Format.pp_print_string fmt "\\\\"
| ' '..'~' as c -> Format.pp_print_char fmt c
| c ->
let code = Char.code c in
Format.fprintf fmt "\\x%c%c"
(escape_digit (code / 16)) (escape_digit (code mod 16))
end
in
Format.pp_print_char fmt '"';
dump_escaped_str fmt s 0 (String.length s);
Format.pp_print_char fmt '"';
end

(* NOTE: Modified version from @dbuenzli's bos library (module Bos_cmd) *)
let parse_cmdline s =
try
let err_unclosed kind s =
failwith
(Format.sprintf "%d: unclosed %s quote delimited string"
(String.Sub.start_pos s) kind)
in
let parse_squoted s =
let sat = function '\'' -> false | _ -> true in
let tok, rem = String.Sub.span ~sat (String.Sub.tail s) in
if not (String.Sub.is_empty rem) then
(tok, String.Sub.tail rem)
else
err_unclosed "single" s
in
let parse_dquoted acc s =
let is_data = function '\\' | '"' -> false | _ -> true in
let rec loop acc s =
let data, rem = String.Sub.span ~sat:is_data s in
match String.Sub.head rem with
| Some '"' -> (data :: acc, String.Sub.tail rem)
| Some '\\' ->
let rem = String.Sub.tail rem in
begin match String.Sub.head rem with
| Some ('"' | '\\' | '$' | '`' as c) ->
let acc = String.sub (String.of_char c) :: data :: acc in
loop acc (String.Sub.tail rem)
| Some '\n' -> loop (data :: acc) (String.Sub.tail rem)
| Some _c ->
let acc = String.Sub.extend ~max:2 data :: acc in
loop acc (String.Sub.tail rem)
| None ->
err_unclosed "double" s
end
| None -> err_unclosed "double" s
| Some _ -> assert false
in
loop acc (String.Sub.tail s)
in
let parse_token s =
let rec loop acc s =
match String.Sub.head s with
| None -> (acc, s)
| Some c when OpamStd.Char.is_whitespace c -> (acc, s)
| Some '\'' ->
let tok, rem = parse_squoted s in
loop (tok :: acc) rem
| Some '\"' ->
let acc, rem = parse_dquoted acc s in
loop acc rem
| Some _c ->
let sat = function
| '\'' | '\"' -> false
| c -> not (OpamStd.Char.is_whitespace c)
in
let tok, rem = String.Sub.span ~sat s in
loop (tok :: acc) rem
in
loop [] s
in
let rec loop acc s =
match String.Sub.head s with
| None when acc = [] -> failwith "empty command"
| None -> acc
| Some c when OpamStd.Char.is_whitespace c ->
loop acc (String.Sub.tail s)
| Some _ ->
let token, s = parse_token s in
loop (String.Sub.concat (List.rev token) :: acc) s
in
Ok (loop [] (String.sub s))
with Failure err ->
Error (Format.asprintf "command line %a:%s" String.dump s err)

let of_string s =
match parse_cmdline s with
| Ok x -> List.rev x
| Error msg -> OpamConsole.error_and_exit `Bad_arguments "%s" msg
13 changes: 13 additions & 0 deletions src/core/opamCmd.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
(**************************************************************************)
(* *)
(* Copyright 2025 Kate Deplaix *)
(* *)
(* All rights reserved. This file is distributed under the terms of the *)
(* GNU Lesser General Public License version 2.1, with the special *)
(* exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)

val of_string : string -> string list
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new module should be in its own commit.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to add a new module instead of adding it in OpamStd?

(** [of_string s] parses [s] and returns the list of [command :: arguments]
according to the POSIX shell syntax. *)
19 changes: 12 additions & 7 deletions src/core/opamStd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,15 @@ module Option = struct
end
end

module OpamChar = struct

(* TODO: Replace by Stdlib.Char.Ascii.is_space
(to be introduced in OCaml 5.4) *)
let is_whitespace = function
| ' ' | '\t' | '\r' | '\n' -> true
| _ -> false

end

module OpamString = struct

Expand Down Expand Up @@ -630,26 +638,22 @@ module OpamString = struct
for i = 0 to len - 1 do Bytes.set b i (f s.[i]) done;
Bytes.to_string b

let is_whitespace = function
| ' ' | '\t' | '\r' | '\n' -> true
| _ -> false

let strip str =
let p = ref 0 in
let l = String.length str in
while !p < l && is_whitespace (String.unsafe_get str !p) do
while !p < l && OpamChar.is_whitespace (String.unsafe_get str !p) do
incr p;
done;
let p = !p in
let l = ref (l - 1) in
while !l >= p && is_whitespace (String.unsafe_get str !l) do
while !l >= p && OpamChar.is_whitespace (String.unsafe_get str !l) do
decr l;
done;
String.sub str p (!l - p + 1)

let strip_right str =
let rec aux i =
if i < 0 || not (is_whitespace str.[i]) then i else aux (i-1)
if i < 0 || not (OpamChar.is_whitespace str.[i]) then i else aux (i-1)
in
let l = String.length str in
let i = aux (l-1) in
Expand Down Expand Up @@ -1898,6 +1902,7 @@ module Config = struct
end

module List = OpamList
module Char = OpamChar
module String = OpamString
module Sys = OpamSys
module Format = OpamFormat
Expand Down
6 changes: 6 additions & 0 deletions src/core/opamStd.mli
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,12 @@ module List : sig
val fold_left_map: ('s -> 'a -> ('s * 'b)) -> 's -> 'a list -> 's * 'b list
end

module Char : sig

val is_whitespace : char -> bool
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about rather following what is being proposed upstream. That way, in a few years, you can likely simply delete the code. In other words:

module Char : sig 
  include Stdlib.Char
  module Ascii : sig 
    val is_white : char -> bool
  end
end

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I knew about the PR but I didn't use the same interface at the time for several reasons:

  • the two definitions are different (e.g. \v and \f are not handled by the opam definition but are with the stdlib one)
  • it wasn't merged at the time and we have a dedicated module for compatibility functions (https://github.com/ocaml/opam/blob/master/src/core/opamCompat.ml) so it would've felt a bit weird to have a yet-to-be-part-of-the-stdlib function in it
  • OpamStd isn't meant to be a mirror of the stdlib in terms of naming, OpamCompat is

Now that it has been merged i can think about it again although i personally feel like it would be better to wait for the release of 5.4 to change the function. In the meantime i'm happy to add a TODO comment to remind ourselves to change it when 5.4 is released.


end

module String : sig

(** {3 Collections} *)
Expand Down
6 changes: 2 additions & 4 deletions src/repository/opamRepositoryConfig.ml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ module E = struct

type OpamStd.Config.E.t +=
| CURL of string option
| FETCH of string option
| FETCH of string list option
| NOCHECKSUMS of bool option
| REPOSITORYTARRING of bool option
| REQUIRECHECKSUMS of bool option
Expand Down Expand Up @@ -110,9 +110,7 @@ let update ?noop:_ = setk (fun cfg () -> r := cfg) !r
let initk k =
let open OpamStd.Option.Op in
let download_tool =
E.fetch () >>= (fun s ->
let args = OpamStd.String.split s ' ' in
match args with
E.fetch () >>= (function
| cmd::a ->
let cmd, kind =
if OpamStd.String.ends_with ~suffix:"curl" cmd then
Expand Down
6 changes: 3 additions & 3 deletions src/repository/opamRepositoryConfig.mli
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@
module E : sig
type OpamStd.Config.E.t +=
| CURL of string option
| FETCH of string option
| FETCH of string list option
| NOCHECKSUMS of bool option
| REPOSITORYTARRING of bool option
| REQUIRECHECKSUMS of bool option
| RETRIES of int option
| VALIDATIONHOOK of string option

val curl: unit -> string option
val fetch: unit -> string option
val fetch: unit -> string list option

(* Non lazy access *)
val curl_t: unit -> string option
val fetch_t: unit -> string option
val fetch_t: unit -> string list option
end

(** Toggles parsing of the tool's output to detect errors
Expand Down
Loading