shell/
parse.rs

1//! Shell reader/parser.
2//! Parses a string into a shell command.
3//! Roughly a subset of <https://www.gnu.org/software/bash/manual/html_node/Shell-Expansions.html>
4
5use crate::builtins::expand_tilde;
6use crate::command_data::{Arg, CommandWithArgs, Redirects, Run};
7use crate::glob::{GlobOutput, expand_glob};
8use crate::jobs::Jobs;
9use crate::platform::{FileDesc, STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
10use std::fmt::{Display, Formatter};
11use std::io;
12use std::iter::Peekable;
13use std::str::{Chars, FromStr};
14
15/// Type of the current sequence being parsed.
16#[derive(Copy, Clone, Debug)]
17enum SeqType {
18    Command,
19    Pipe,
20    Sequence,
21    And,
22    Or,
23}
24
25/// A list of commands to run in a pipe as a job.
26#[derive(Clone, Debug)]
27pub struct ParsedJob {
28    // Only wrapped in an optional to use take() in push methods, must always be Some.
29    commands: Option<Run>,
30}
31
32impl Display for ParsedJob {
33    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
34        if let Some(run) = &self.commands {
35            write!(f, "{run}")?;
36        }
37        Ok(())
38    }
39}
40
41impl ParsedJob {
42    fn new() -> Self {
43        Self {
44            commands: Some(Run::Empty),
45        }
46    }
47
48    fn push_command(&mut self, new_run: Run) {
49        if let Some(run) = self.commands.take() {
50            self.commands = Some(run.push_run(new_run));
51        }
52    }
53
54    fn push_pipe(&mut self, new_run: Run) {
55        if let Some(run) = self.commands.take() {
56            self.commands = Some(run.push_pipe(new_run));
57        }
58    }
59
60    fn push_sequence(&mut self, new_run: Run) {
61        if let Some(run) = self.commands.take() {
62            self.commands = Some(run.push_sequence(new_run));
63        }
64    }
65
66    fn push_and(&mut self, new_run: Run) {
67        if let Some(run) = self.commands.take() {
68            self.commands = Some(run.push_and(new_run));
69        }
70    }
71
72    fn push_or(&mut self, new_run: Run) {
73        if let Some(run) = self.commands.take() {
74            self.commands = Some(run.push_or(new_run));
75        }
76    }
77
78    /// Slice of the individual commands in pipe order.
79    pub fn commands(&self) -> &Run {
80        self.commands.as_ref().expect("missing command")
81    }
82
83    /// Consume and produce the inner ['Run'].
84    pub fn into_run(mut self) -> Run {
85        self.commands.take().expect("invalid empty command")
86    }
87}
88
89fn push_next_seq_run(job: &mut ParsedJob, new_run: Run, seq_type: SeqType) {
90    match seq_type {
91        SeqType::Command => job.push_command(new_run),
92        SeqType::Pipe => job.push_pipe(new_run),
93        SeqType::Sequence => job.push_sequence(new_run),
94        SeqType::And => job.push_and(new_run),
95        SeqType::Or => job.push_or(new_run),
96    }
97}
98
99fn push_next_seq_item(
100    job: &mut ParsedJob,
101    command: CommandWithArgs,
102    seq_type: SeqType,
103    background: bool,
104) {
105    let new_run = if background {
106        Run::BackgroundCommand(command)
107    } else {
108        Run::Command(command)
109    };
110    match seq_type {
111        SeqType::Command => job.push_command(new_run),
112        SeqType::Pipe => job.push_pipe(new_run),
113        SeqType::Sequence => job.push_sequence(new_run),
114        SeqType::And => job.push_and(new_run),
115        SeqType::Or => job.push_or(new_run),
116    }
117}
118
119#[derive(Debug)]
120struct ParseState {
121    ret: ParsedJob,
122    // Should not be None, is Option to allow ownership to change.
123    command: Option<CommandWithArgs>,
124    stdio: Redirects,
125    // Should not be None, is Option to allow ownership to change.
126    token: Option<String>,
127    last_ch: char,
128    current_seq: SeqType,
129}
130
131impl ParseState {
132    fn new() -> Self {
133        let ret = ParsedJob::new();
134        let command = Some(CommandWithArgs::new());
135        let token = Some(String::new());
136        let last_ch = ' ';
137        let current_seq = SeqType::Command;
138        Self {
139            ret,
140            command,
141            stdio: Redirects::default(),
142            token,
143            last_ch,
144            current_seq,
145        }
146    }
147
148    fn command(&mut self) -> &mut CommandWithArgs {
149        self.command.as_mut().expect("invalid empty command")
150    }
151
152    fn token(&mut self) -> &mut String {
153        self.token.as_mut().expect("invalid empty token")
154    }
155
156    fn take_token(&mut self) -> String {
157        let result = self.token.take().expect("invalid empty token");
158        self.token = Some(String::new());
159        result
160    }
161
162    /// Strip single quotes and Escapes from a string and then save it as an arg.
163    /// Note double quoted strings will have already been "resolved" and made single quoted by now
164    /// so no need to strip double quotes.
165    fn strip_quotes(&mut self, token: &str) {
166        if token.contains('\'') || token.contains('\\') {
167            let mut new_token = String::new();
168            let mut quoted = false;
169            let mut last_ch = ' ';
170            for ch in token.chars() {
171                match ch {
172                    '\\' if !quoted && last_ch != '\\' => {}
173                    '\'' if last_ch != '\\' => quoted = !quoted,
174                    _ => new_token.push(ch),
175                }
176                last_ch = ch;
177            }
178            self.command().push_arg(Arg::Str(new_token.into()));
179        } else {
180            self.command().push_arg(Arg::Str(token.into()));
181        }
182    }
183
184    /// Expand tokens that are file expansions (globs) into multiple arguments.
185    fn expand_globs(&mut self, token: &str) {
186        match expand_glob(token) {
187            GlobOutput::Arg(arg) => self.strip_quotes(&arg.to_string_lossy()),
188            GlobOutput::Args(args) => {
189                for arg in args {
190                    self.strip_quotes(&arg.to_string_lossy());
191                }
192            }
193        }
194    }
195
196    /// Expand tokens that contain unquoted $ chars.
197    fn expand_params_comms(&mut self, jobs: &mut Jobs, token: &str) -> Result<(), io::Error> {
198        if token.contains('$') {
199            let mut chars = token.chars().peekable();
200            let mut token = String::new();
201            let mut last_ch = ' ';
202            let mut quoted = false;
203            while let Some(ch) = chars.next() {
204                match ch {
205                    '$' if last_ch != '\\' && !quoted => {
206                        let expansion = self.expand_var_or_command(jobs, &mut chars, None)?;
207                        token.push_str(&expansion);
208                    }
209                    '\'' if last_ch != '\\' => {
210                        quoted = !quoted;
211                    }
212                    _ => token.push(ch),
213                }
214                last_ch = ch;
215            }
216            self.expand_globs(&token);
217        } else {
218            self.expand_globs(token);
219        }
220        Ok(())
221    }
222
223    /// Expand ~ into home directory.
224    /// TODO support other tilde expansions from <https://www.gnu.org/software/bash/manual/html_node/Tilde-Expansion.html>
225    fn expand_tildes(&mut self, jobs: &mut Jobs, token: &str) -> Result<(), io::Error> {
226        let ptok = expand_tilde(token.into());
227        let token = ptok.to_string_lossy();
228        self.expand_params_comms(jobs, &token)
229    }
230
231    /// Expand {..} expressions in arguments into multiple arguments.
232    /// TODO Add range expressions, see <https://www.gnu.org/software/bash/manual/html_node/Brace-Expansion.html>
233    fn expand_braces(&mut self, jobs: &mut Jobs, token: &str) -> Result<(), io::Error> {
234        let mut well_formed = false;
235        let mut open = 0;
236        let mut close = 0;
237        let mut last_idx = 0;
238        let mut options = vec![];
239        let mut open_braces = 0;
240        let mut last_ch = ' ';
241        let mut quoted = false;
242        for (i, ch) in token.char_indices() {
243            if ch == '{' && last_ch != '\\' && !quoted {
244                if open_braces == 0 {
245                    open = i;
246                    last_idx = i;
247                }
248                open_braces += 1;
249            }
250            if open_braces == 1 && ch == ',' && last_ch != '\\' && !quoted {
251                options.push(&token[last_idx + 1..i]);
252                last_idx = i;
253            }
254            if ch == '}' && last_ch != '\\' && !quoted {
255                if open_braces == 1 {
256                    close = i;
257                    if !options.is_empty() {
258                        options.push(&token[last_idx + 1..i]);
259                        well_formed = true;
260                        break;
261                    }
262                }
263                open_braces -= 1;
264            }
265            if ch == '\'' && last_ch != '\\' {
266                quoted = !quoted;
267            }
268
269            last_ch = ch;
270        }
271        if well_formed {
272            let prefix = &token[0..open];
273            let suffix = &token[close + 1..];
274            for middle in options {
275                let new_token = format!("{prefix}{middle}{suffix}");
276                self.expand_braces(jobs, &new_token)?;
277            }
278        } else {
279            self.expand_tildes(jobs, token)?;
280        }
281        Ok(())
282    }
283
284    /// Process a token by applying expansions and saving to argument list.
285    /// TODO add process substitution and word splitting.
286    fn proc_token(&mut self, jobs: &mut Jobs) -> Result<(), io::Error> {
287        let token = self.take_token();
288        if !token.is_empty() {
289            self.expand_braces(jobs, &token)?;
290        }
291        Ok(())
292    }
293
294    fn end_command(&mut self, background: bool) {
295        if !self.command().is_empty() {
296            if let Some(mut command) = self.command.take() {
297                command.set_stdios(self.stdio.clone());
298                self.stdio.clear();
299                push_next_seq_item(&mut self.ret, command, self.current_seq, background);
300            }
301            self.command = Some(CommandWithArgs::new());
302        }
303    }
304
305    fn pipe_or(&mut self, jobs: &mut Jobs, ch: char, next_char: char) -> Result<(), io::Error> {
306        if self.last_ch == '\\' {
307            self.token().push('|');
308            self.last_ch = ' ';
309        } else if self.last_ch == '|' {
310            self.proc_token(jobs)?;
311            self.end_command(false);
312            self.current_seq = SeqType::Or;
313            self.last_ch = ' ';
314        } else if next_char == '|' {
315            // If the next char is not a '|' then we have a pipe, else will loop and become an OR.
316            self.last_ch = ch;
317        } else {
318            self.proc_token(jobs)?;
319            self.end_command(false);
320            self.current_seq = SeqType::Pipe;
321            self.last_ch = ' ';
322        }
323        Ok(())
324    }
325
326    fn seq(&mut self, jobs: &mut Jobs) -> Result<(), io::Error> {
327        self.proc_token(jobs)?;
328        self.end_command(false);
329        self.current_seq = SeqType::Sequence;
330        Ok(())
331    }
332
333    fn and(&mut self, jobs: &mut Jobs) -> Result<(), io::Error> {
334        self.proc_token(jobs)?;
335        self.end_command(false);
336        self.current_seq = SeqType::And;
337        self.last_ch = ' ';
338        Ok(())
339    }
340
341    fn redir_out(
342        &mut self,
343        jobs: &mut Jobs,
344        chars: &mut Peekable<Chars>,
345        end_char: Option<char>,
346    ) -> Result<(), io::Error> {
347        if self.last_ch == '\\' {
348            self.token().push('>');
349            self.last_ch = ' ';
350            return Ok(());
351        }
352        let amp = self.last_ch == '&';
353        let out_fd = if let Some(token) = &self.token {
354            if let Ok(fd) = FileDesc::from_str(token) {
355                if fd >= STDIN_FILENO {
356                    self.take_token();
357                    fd
358                } else {
359                    self.proc_token(jobs)?;
360                    STDOUT_FILENO
361                }
362            } else {
363                self.proc_token(jobs)?;
364                STDOUT_FILENO
365            }
366        } else {
367            self.proc_token(jobs)?;
368            STDOUT_FILENO
369        };
370        let next_char = *chars.peek().unwrap_or(&' ');
371        if next_char == '>' {
372            chars.next();
373            consume_whitespace(chars);
374            let fd_arg = read_arg(jobs, chars, end_char)?;
375            self.stdio.set_out_path(out_fd, fd_arg, false);
376            self.last_ch = ' ';
377        } else {
378            if next_char == '&' {
379                chars.next(); // Consume the &
380            }
381            consume_whitespace(chars);
382            let fd_arg = read_arg(jobs, chars, end_char)?;
383            if next_char == '&' {
384                self.stdio.set_out_fd(out_fd, fd_arg, true);
385            } else {
386                self.stdio.set_out_path(out_fd, fd_arg, true);
387            }
388            self.last_ch = ' ';
389        }
390        if amp {
391            self.stdio.set_out_internal_fd(STDERR_FILENO, out_fd, true);
392        }
393        Ok(())
394    }
395
396    fn redir_in(
397        &mut self,
398        jobs: &mut Jobs,
399        chars: &mut Peekable<Chars>,
400        end_char: Option<char>,
401    ) -> Result<(), io::Error> {
402        if self.last_ch == '\\' {
403            self.token().push('<');
404            self.last_ch = ' ';
405            return Ok(());
406        }
407        let in_fd = if let Some(token) = &self.token {
408            if let Ok(fd) = FileDesc::from_str(token) {
409                if fd >= STDIN_FILENO {
410                    self.take_token();
411                    fd
412                } else {
413                    self.proc_token(jobs)?;
414                    STDIN_FILENO
415                }
416            } else {
417                self.proc_token(jobs)?;
418                STDIN_FILENO
419            }
420        } else {
421            self.proc_token(jobs)?;
422            STDIN_FILENO
423        };
424        let next_char = *chars.peek().unwrap_or(&' ');
425        if next_char == '<' {
426            chars.next();
427            consume_whitespace(chars);
428            let fd_arg = read_arg(jobs, chars, end_char)?;
429            self.last_ch = ' ';
430            self.stdio.set_in_direct(in_fd, fd_arg);
431        } else if next_char == '>' {
432            // <> bidirectional fd.
433            chars.next();
434            let next_char = *chars.peek().unwrap_or(&' ');
435            if next_char == '&' {
436                chars.next(); // Consume the &
437            }
438            consume_whitespace(chars);
439            let fd_arg = read_arg(jobs, chars, end_char)?;
440            self.last_ch = ' ';
441            if next_char == '&' {
442                self.stdio.set_in_out_fd(in_fd, fd_arg);
443            } else {
444                self.stdio.set_in_out_path(in_fd, fd_arg);
445            }
446        } else {
447            if next_char == '&' {
448                chars.next(); // Consume the &
449            }
450            consume_whitespace(chars);
451            let fd_arg = read_arg(jobs, chars, end_char)?;
452            self.last_ch = ' ';
453            if next_char == '&' {
454                self.stdio.set_in_fd(in_fd, fd_arg, true);
455            } else {
456                self.stdio.set_in_path(in_fd, fd_arg);
457            }
458        }
459        Ok(())
460    }
461
462    fn expand_var_or_command(
463        &mut self,
464        jobs: &mut Jobs,
465        chars: &mut Peekable<Chars>,
466        end_char: Option<char>,
467    ) -> Result<String, io::Error> {
468        if let Some('(') = chars.peek() {
469            // Subshell to capture
470            chars.next();
471            let mut sub = parse_line_inner(jobs, chars, Some(')'))?;
472            if let Some(sub) = sub.commands.take() {
473                return Ok(Arg::Command(sub)
474                    .resolve_arg(jobs)?
475                    .to_string_lossy()
476                    .to_string());
477            }
478        } else {
479            // Env var
480            let name = if let Some('{') = chars.peek() {
481                chars.next();
482                let r = read_token(chars, Some('}'));
483                if let Some('}') = chars.peek() {
484                    chars.next();
485                    r
486                } else {
487                    return Err(io::Error::other("bad substitution"));
488                }
489            } else {
490                read_token(chars, end_char)
491            };
492            if !name.is_empty() {
493                return Ok(Arg::Var(name.into())
494                    .resolve_arg(jobs)?
495                    .to_string_lossy()
496                    .to_string());
497            }
498        }
499        Ok("".to_string())
500    }
501}
502
503impl From<ParseState> for ParsedJob {
504    fn from(value: ParseState) -> Self {
505        value.ret
506    }
507}
508
509fn consume_whitespace(chars: &mut Peekable<Chars>) {
510    while let Some(ch) = chars.peek() {
511        if ch.is_whitespace() {
512            chars.next();
513        } else {
514            break;
515        }
516    }
517}
518
519/// Read string surrounded by single quote (').  Assumes chars is on the open quote and
520/// consumes the end quote.
521/// This simply reads the chars until the next ' and puts them in a String Arg.
522/// Note, can not produce a string containing a ' character.
523fn read_simple_string(chars: &mut Peekable<Chars>) -> Result<Arg, io::Error> {
524    let mut res = String::new();
525    let mut next_ch = chars.peek().copied();
526    while let Some(ch) = next_ch {
527        if ch == '\'' {
528            chars.next();
529            return Ok(Arg::Str(res.into()));
530        }
531        chars.next();
532        res.push(ch);
533        next_ch = chars.peek().copied();
534    }
535    Err(io::Error::other("unclosed string"))
536}
537
538fn read_chars_until(
539    chars: &mut Peekable<Chars>,
540    token: &mut String,
541    end_ch: char,
542) -> Result<(), io::Error> {
543    let mut next_ch = chars.next();
544    while let Some(ch) = next_ch {
545        token.push(ch);
546        if ch == end_ch {
547            return Ok(());
548        }
549        next_ch = chars.next();
550    }
551    Err(io::Error::other("unclosed expression"))
552}
553
554fn char_to_hex_num(ch: char) -> Result<u8, io::Error> {
555    if ch.is_ascii_digit() {
556        Ok(ch as u8 - b'0')
557    } else {
558        match ch {
559            'a' | 'A' => Ok(10),
560            'b' | 'B' => Ok(11),
561            'c' | 'C' => Ok(12),
562            'd' | 'D' => Ok(13),
563            'e' | 'E' => Ok(14),
564            'f' | 'F' => Ok(15),
565            _ => Err(io::Error::other(format!(
566                "Invalid hex digit {ch}, expected 0-9 or A-F."
567            ))),
568        }
569    }
570}
571
572/// Read an ascii char from 0x00-0x7F endowed in a string as '\xXX' where X is single hex digit.
573fn escape_to_char(chars: &mut Peekable<Chars>) -> Result<char, io::Error> {
574    if let (Some(ch1), Some(ch2)) = (chars.next(), chars.peek()) {
575        let ch_n: u8 = (char_to_hex_num(ch1)? * 16) + (char_to_hex_num(*ch2)?);
576        if ch_n > 0x7f {
577            Err(io::Error::other(
578                "Invalid hex ascii code, must be less then \\x7f.".to_string(),
579            ))
580        } else {
581            Ok(ch_n as char)
582        }
583    } else {
584        Err(io::Error::other(
585            "Invalid hex ascii code, expected two digits.".to_string(),
586        ))
587    }
588}
589
590/// Read a UTF8 codepoint encoded in a string with '\uXXXXXXXX' or \u{XXXXXXXX}' where X is a
591/// single hex digit.  There can be 1-8 hex values (X- a nibble) in the encoding (Up to 4 bytes).
592fn read_utf_scalar(chars: &mut Peekable<Chars>) -> Result<char, io::Error> {
593    fn finish(char_u32: u32) -> Result<char, io::Error> {
594        if let Some(val) = std::char::from_u32(char_u32) {
595            Ok(val)
596        } else {
597            Err(io::Error::other(format!(
598                "Invalid unicode scalar, {char_u32:x} not a valid utf scalar."
599            )))
600        }
601    }
602    let mut first = true;
603    let mut has_bracket = false;
604    let mut char_u32 = 0;
605    let mut nibbles = 0;
606    let mut next_ch = chars.peek().copied();
607    while let Some(ch) = next_ch {
608        if ch == '\n' {
609            break;
610        }
611        if ch == '"' {
612            break;
613        }
614        if !has_bracket && ch.is_whitespace() {
615            return finish(char_u32);
616        }
617        if first && ch == '{' {
618            has_bracket = true;
619            first = false;
620            chars.next();
621            next_ch = chars.peek().copied();
622            continue;
623        }
624        first = false;
625        if has_bracket && ch == '}' {
626            return finish(char_u32);
627        }
628        if nibbles >= 8 {
629            return Err(io::Error::other(
630                "Invalid unicode scalar, too many bytes (4 max).".to_string(),
631            ));
632        }
633        nibbles += 1;
634        let nib = char_to_hex_num(ch)?;
635        char_u32 = (char_u32 << 4) | nib as u32;
636        chars.next();
637        next_ch = chars.peek().copied();
638    }
639    if has_bracket {
640        Err(io::Error::other(
641            "Invalid unicode scalar, failed to parse.".to_string(),
642        ))
643    } else {
644        finish(char_u32)
645    }
646}
647
648/// Read string surrounded by quote (").  Assumes chars is on the open quote and
649/// consumes the end quote.
650/// This version will handle interpolation and escape chars.
651fn read_string(jobs: &mut Jobs, chars: &mut Peekable<Chars>) -> Result<Arg, io::Error> {
652    let mut res = String::new();
653    let mut arg = None;
654    let mut next_ch = chars.peek().copied();
655    while let Some(ch) = next_ch {
656        if ch == '"' {
657            chars.next();
658            if let Some(Arg::Compound(mut args)) = arg {
659                if !res.is_empty() {
660                    args.push(Arg::Str(res.into()));
661                }
662                return Ok(Arg::Compound(args));
663            } else {
664                return Ok(Arg::Str(res.into()));
665            }
666        } else if ch == '$' {
667            chars.next();
668            let spec_arg = read_special_arg(jobs, chars, Some('"'))?;
669            if let Some(Arg::Compound(mut args)) = arg {
670                if !res.is_empty() {
671                    args.push(Arg::Str(res.into()));
672                }
673                args.push(spec_arg);
674                res = String::new();
675                arg = Some(Arg::Compound(args));
676            } else {
677                let args = if !res.is_empty() {
678                    vec![Arg::Str(res.into()), spec_arg]
679                } else {
680                    vec![spec_arg]
681                };
682                res = String::new();
683                arg = Some(Arg::Compound(args));
684            }
685            next_ch = chars.peek().copied();
686        } else if ch == '\\' {
687            chars.next();
688            next_ch = chars.peek().copied();
689            match next_ch {
690                Some('\n') => {
691                    // Consume \newline.
692                }
693                Some('e') | Some('E') => {
694                    // Escape
695                    res.push('\x1B');
696                }
697                Some('n') => {
698                    // Linefeed
699                    res.push('\n');
700                }
701                Some('r') => {
702                    // Carriage Return
703                    res.push('\r');
704                }
705                Some('t') => {
706                    // Tab
707                    res.push('\t');
708                }
709                Some('a') => {
710                    // Bell
711                    res.push('\x07');
712                }
713                Some('b') => {
714                    // Backspace
715                    res.push('\x08');
716                }
717                Some('f') => {
718                    // Formfeed
719                    res.push('\x0C');
720                }
721                Some('v') => {
722                    // Vertical tab
723                    res.push('\x0B');
724                }
725                Some('\"') => {
726                    res.push('"');
727                }
728                Some('x') => {
729                    chars.next();
730                    let xch = escape_to_char(chars)?;
731                    res.push(xch);
732                }
733                Some('\\') => {
734                    res.push('\\');
735                }
736                Some('u') => {
737                    chars.next();
738                    let uch = read_utf_scalar(chars)?;
739                    res.push(uch);
740                    if let Some(ch) = chars.peek().copied() {
741                        // If a \u ends in whitespace need to keep it vs swallow it.
742                        if ch.is_whitespace() {
743                            res.push(ch);
744                        }
745                        // Need to see this quote to finish...
746                        if ch == '"' {
747                            next_ch = chars.peek().copied();
748                            continue;
749                        }
750                    }
751                }
752                Some(nch) => {
753                    res.push(ch);
754                    res.push(nch);
755                }
756                _ => {
757                    res.push('\\');
758                }
759            }
760            chars.next();
761            next_ch = chars.peek().copied();
762        } else {
763            chars.next();
764            res.push(ch);
765            next_ch = chars.peek().copied();
766        }
767    }
768    Err(io::Error::other("unclosed string"))
769}
770
771fn read_token(chars: &mut Peekable<Chars>, end_char: Option<char>) -> String {
772    let end_char = end_char.unwrap_or(' ');
773    let end_set = ['"', '\'', '$', '|', ';', '&', '<', '>', '(', ':', end_char];
774    let mut res = String::new();
775    let mut next_ch = chars.peek();
776    while let Some(ch) = next_ch {
777        let ch = *ch;
778        if !ch.is_whitespace() && !end_set.contains(&ch) {
779            chars.next();
780            res.push(ch);
781            next_ch = chars.peek();
782        } else {
783            next_ch = None;
784        }
785    }
786    res
787}
788
789fn read_arg(
790    jobs: &mut Jobs,
791    chars: &mut Peekable<Chars>,
792    end_char_in: Option<char>,
793) -> Result<Arg, io::Error> {
794    let mut args = vec![];
795    let end_char = end_char_in.unwrap_or(' ');
796    let end_set = ['$', '|', ';', '&', '<', '>', '(', end_char];
797    let mut res = String::new();
798    let mut next_ch = chars.peek().copied();
799    while let Some(ch) = next_ch {
800        if ch == '$' {
801            args.push(Arg::Str(res.clone().into()));
802            res.clear();
803            chars.next();
804            let next_arg = read_special_arg(jobs, chars, end_char_in)?;
805            if let Arg::Compound(mut nargs) = next_arg {
806                args.append(&mut nargs);
807            } else {
808                args.push(next_arg);
809            }
810            next_ch = chars.peek().copied();
811        } else if ch == '\'' && ch != end_char {
812            chars.next(); // Advance to opening quote.
813            args.push(read_simple_string(chars)?);
814            next_ch = chars.peek().copied();
815        } else if ch == '"' && ch != end_char {
816            chars.next(); // Advance to opening quote.
817            args.push(read_string(jobs, chars)?);
818            next_ch = chars.peek().copied();
819        } else if !ch.is_whitespace() && !end_set.contains(&ch) {
820            chars.next();
821            res.push(ch);
822            next_ch = chars.peek().copied();
823        } else {
824            next_ch = None;
825        }
826    }
827    args.push(Arg::Str(res.into()));
828    Ok(if args.len() == 1 {
829        args.pop().expect("we had one element...")
830    } else {
831        Arg::Compound(args)
832    })
833}
834
835fn read_special_arg(
836    jobs: &mut Jobs,
837    chars: &mut Peekable<Chars>,
838    end_char: Option<char>,
839) -> Result<Arg, io::Error> {
840    let mut args = vec![];
841    if let Some('(') = chars.peek() {
842        // Subshell to capture
843        chars.next();
844        let mut sub = parse_line_inner(jobs, chars, Some(')'))?;
845        if let Some(sub) = sub.commands.take() {
846            args.push(Arg::Command(sub));
847        }
848    } else {
849        // Env var
850        let name = if let Some('{') = chars.peek() {
851            chars.next();
852            let r = read_token(chars, Some('}'));
853            if let Some('}') = chars.peek() {
854                chars.next();
855                r
856            } else {
857                return Err(io::Error::other("bad substitution"));
858            }
859        } else {
860            read_token(chars, end_char)
861        };
862        if !name.is_empty() {
863            args.push(Arg::Var(name.into()));
864        }
865    }
866    Ok(if args.len() == 1 {
867        args.pop().expect("we had one element...")
868    } else {
869        Arg::Compound(args)
870    })
871}
872
873fn parse_line_inner(
874    jobs: &mut Jobs,
875    chars: &mut Peekable<Chars>,
876    end_char: Option<char>,
877) -> Result<ParsedJob, io::Error> {
878    let mut state = ParseState::new();
879    while let Some(ch) = chars.next() {
880        if let Some(end_ch) = end_char {
881            if ch == end_ch {
882                break;
883            }
884        }
885        let next_char = *chars.peek().unwrap_or(&' ');
886        if ch.is_whitespace() {
887            if state.last_ch == '\\' {
888                state.token().push(ch);
889                state.last_ch = ch;
890            } else {
891                state.proc_token(jobs)?;
892                consume_whitespace(chars);
893            }
894        } else {
895            match ch {
896                '\'' if state.last_ch != '\\' => {
897                    let arg_str = read_simple_string(chars)?;
898                    state.token().push('\'');
899                    state
900                        .token()
901                        .push_str(&arg_str.resolve_arg(jobs)?.to_string_lossy());
902                    state.token().push('\'');
903                    state.last_ch = ch;
904                }
905                '"' if state.last_ch != '\\' => {
906                    let arg_str = read_string(jobs, chars)?;
907                    // Single quote in token to avoid future expansions in string.
908                    state.token().push('\'');
909                    state
910                        .token()
911                        .push_str(&arg_str.resolve_arg(jobs)?.to_string_lossy());
912                    state.token().push('\'');
913                    state.last_ch = ch;
914                }
915
916                '|' => {
917                    state.pipe_or(jobs, ch, next_char)?;
918                }
919                ';' if state.last_ch != '\\' => {
920                    state.seq(jobs)?;
921                }
922                '>' => {
923                    state.redir_out(jobs, chars, end_char)?;
924                }
925                '<' => {
926                    state.redir_in(jobs, chars, end_char)?;
927                }
928                '&' if state.last_ch == '\\' => {
929                    state.token().push('&');
930                    state.last_ch = ' ';
931                }
932                '&' if next_char == '>' || next_char == '&' => {
933                    state.last_ch = '&';
934                }
935                '&' if state.last_ch == '&' => {
936                    state.and(jobs)?;
937                }
938                '&' => {
939                    state.proc_token(jobs)?;
940                    state.end_command(true);
941                    state.last_ch = ' ';
942                }
943                '\\' => {
944                    if state.last_ch == '\\' {
945                        state.token().push('\\');
946                        state.last_ch = ' ';
947                    } else {
948                        state.last_ch = ch;
949                    }
950                }
951                '(' if state.last_ch != '$' && state.last_ch != '\\' => {
952                    state.proc_token(jobs)?;
953                    state.end_command(false);
954                    let mut sub = parse_line_inner(jobs, chars, Some(')'))?;
955                    if let Some(sub) = sub.commands.take() {
956                        push_next_seq_run(
957                            &mut state.ret,
958                            Run::Subshell(Box::new(sub)),
959                            state.current_seq,
960                        );
961                    }
962                }
963                '\n' if state.last_ch == '\\' => {
964                    state.last_ch = ' ';
965                }
966                '$' if state.last_ch != '\\' => {
967                    state.token().push(ch);
968                    let next_ch = chars.peek().unwrap_or(&' ');
969                    match next_ch {
970                        '{' => read_chars_until(chars, state.token(), '}')?,
971                        '(' => read_chars_until(chars, state.token(), ')')?,
972                        _ => {}
973                    }
974                    state.last_ch = ch;
975                }
976                _ => {
977                    if state.last_ch == '\\' {
978                        // Last char was a backslash and seems unremarkable so put it in the token.
979                        state.token().push('\\');
980                    }
981                    state.token().push(ch);
982                    state.last_ch = ch;
983                }
984            }
985        }
986    }
987    state.proc_token(jobs)?;
988    state.end_command(false);
989    Ok(state.into())
990}
991
992pub fn parse_line(jobs: &mut Jobs, input: &str) -> Result<ParsedJob, io::Error> {
993    let mut chars = input.chars().peekable();
994    parse_line_inner(jobs, &mut chars, None)
995}
996
997#[cfg(test)]
998mod tests {
999    use super::*;
1000
1001    fn test_parse(input: &str, expected: &str) {
1002        let mut jobs = Jobs::new(false);
1003        let pj = parse_line(&mut jobs, input).unwrap();
1004        let pj_str = pj.to_string();
1005        assert_eq!(&pj_str, expected);
1006        let pj = parse_line(&mut jobs, &pj_str).unwrap();
1007        assert_eq!(&pj.to_string(), expected);
1008    }
1009
1010    fn test_parse_once(input: &str, expected: &str) {
1011        let mut jobs = Jobs::new(false);
1012        let pj = parse_line(&mut jobs, input).unwrap();
1013        let pj_str = pj.to_string();
1014        assert_eq!(&pj_str, expected);
1015    }
1016
1017    #[test]
1018    fn test_basic_parse() {
1019        test_parse("ls", "ls");
1020        test_parse("ls -al", "ls -al");
1021        test_parse("ls -al|grep lisp", "ls -al | grep lisp");
1022        test_parse("<in_file ls -al|grep lisp", "ls -al 0<in_file | grep lisp");
1023
1024        test_parse("ls -al;grep lisp", "ls -al ; grep lisp");
1025
1026        test_parse("ls -al&&grep lisp", "ls -al && grep lisp");
1027
1028        test_parse("ls -al||grep lisp", "ls -al || grep lisp");
1029
1030        test_parse(
1031            "</some/file grep test|grep lisp>/out_file",
1032            "grep test 0</some/file | grep lisp 1>/out_file",
1033        );
1034
1035        test_parse(
1036            "</some/file > out_file grep test;<in_file grep lisp>/out_file;ls",
1037            "grep test 0</some/file 1>out_file ; grep lisp 0<in_file 1>/out_file ; ls",
1038        );
1039
1040        test_parse(
1041            "</some/file 2>&1 > out_file grep test;<in_file grep lisp 1>&2 >/out_file;ls",
1042            "grep test 0</some/file 2>&1 1>out_file ; grep lisp 0<in_file 1>&2 1>/out_file ; ls",
1043        );
1044
1045        test_parse(
1046            "</some/file 2>&1 > out_file grep test;ls|<in_file grep lisp 1>&2 >/out_file;ls",
1047            "grep test 0</some/file 2>&1 1>out_file ; ls | grep lisp 0<in_file 1>&2 1>/out_file ; ls",
1048        );
1049    }
1050
1051    #[test]
1052    fn test_subshell_parse() {
1053        test_parse("(ls -al)", "(ls -al)");
1054        test_parse("(ls -al)|grep May", "(ls -al) | grep May");
1055        test_parse(
1056            "(ls -al)|(grep May|grep 7)|grep 00",
1057            "(ls -al) | (grep May | grep 7) | grep 00",
1058        )
1059    }
1060
1061    #[test]
1062    fn test_strings() {
1063        test_parse_once("\"one\\ntwo\"", "one\x0Atwo");
1064        test_parse_once("'one\\ntwo'", "one\\ntwo");
1065        test_parse_once("'one\ntwo'", "one\x0Atwo");
1066        test_parse_once("\"one\\x0atwo\"", "one\x0Atwo");
1067        test_parse_once("\"one\\x0Atwo\"", "one\x0Atwo");
1068        test_parse_once("\"one\\u0a two\"", "one\x0A two");
1069        test_parse_once("\"one\\u0A two\"", "one\x0A two");
1070        test_parse_once("\"one\\u{0a}two\"", "one\x0Atwo");
1071        test_parse_once("\"one\\u{0A}two\"", "one\x0Atwo");
1072        test_parse_once("\"one\\vtwo\"", "one\x0Btwo");
1073        test_parse_once("\"one\\ftwo\"", "one\x0Ctwo");
1074        test_parse_once("\"one\\etwo\"", "one\x1Btwo");
1075        test_parse_once("\"one\\Etwo\"", "one\x1Btwo");
1076        test_parse_once("\"one\\atwo\"", "one\x07two");
1077        test_parse_once("\"one\\btwo\"", "one\x08two");
1078        test_parse_once("\"one\\ttwo\"", "one\x09two");
1079        test_parse_once("\"one\\rtwo\"", "one\x0Dtwo");
1080        test_parse_once("\"one\\\\rtwo\"", "one\\rtwo");
1081        test_parse_once("\"one\\\"two\"", "one\"two");
1082        test_parse_once("\"one\\x0a\"", "one\x0A");
1083        test_parse_once("\"one\\u0a\"", "one\x0A");
1084        test_parse_once("\"one\\u0a \"", "one\x0A ");
1085        test_parse_once("\"one\\u{0a}\"", "one\x0A");
1086        test_parse_once("\"one\\u{0a}\ntwo\"", "one\x0A\ntwo");
1087        test_parse_once("\"one\\u0a\ntwo\"", "one\x0A\ntwo");
1088        test_parse_once("\"one\\u0a\n\"", "one\x0A\n");
1089    }
1090}