Refactoring the line parsing

This commit is contained in:
Julian Kornberger 2016-12-31 17:32:31 +01:00
parent 95f4fe9d35
commit 8082b59766
4 changed files with 299 additions and 216 deletions

216
ftp.go
View File

@ -4,7 +4,6 @@ package ftp
import (
"bufio"
"errors"
"fmt"
"io"
"net"
"net/textproto"
@ -292,221 +291,6 @@ func (c *ServerConn) cmdDataConnFrom(offset uint64, format string, args ...inter
return conn, nil
}
var errUnsupportedListLine = errors.New("Unsupported LIST line")
// parseRFC3659ListLine parses the style of directory line defined in RFC 3659.
func parseRFC3659ListLine(line string) (*Entry, error) {
iSemicolon := strings.Index(line, ";")
iWhitespace := strings.Index(line, " ")
if iSemicolon < 0 || iSemicolon > iWhitespace {
return nil, errUnsupportedListLine
}
e := &Entry{
Name: line[iWhitespace+1:],
}
for _, field := range strings.Split(line[:iWhitespace-1], ";") {
i := strings.Index(field, "=")
if i < 1 {
return nil, errUnsupportedListLine
}
key := field[:i]
value := field[i+1:]
switch key {
case "modify":
var err error
e.Time, err = time.Parse("20060102150405", value)
if err != nil {
return nil, err
}
case "type":
switch value {
case "dir", "cdir", "pdir":
e.Type = EntryTypeFolder
case "file":
e.Type = EntryTypeFile
}
case "size":
e.setSize(value)
}
}
return e, nil
}
// parse file or folder name with multiple spaces
func parseLsListLineName(line string, fields []string, offset int) string {
if offset < 1 {
return ""
}
match := fmt.Sprintf(" %s ", fields[offset-1])
index := strings.Index(line, match)
if index == -1 {
return ""
}
index += len(match)
return strings.TrimSpace(line[index:])
}
// parseLsListLine parses a directory line in a format based on the output of
// the UNIX ls command.
func parseLsListLine(line string) (*Entry, error) {
fields := strings.Fields(line)
if len(fields) >= 7 && fields[1] == "folder" && fields[2] == "0" {
e := &Entry{
Type: EntryTypeFolder,
Name: strings.Join(fields[6:], " "),
}
if err := e.setTime(fields[3:6]); err != nil {
return nil, err
}
return e, nil
}
if len(fields) < 8 {
return nil, errUnsupportedListLine
}
if fields[1] == "0" {
e := &Entry{
Type: EntryTypeFile,
Name: strings.Join(fields[7:], " "),
}
if err := e.setSize(fields[2]); err != nil {
return nil, err
}
if err := e.setTime(fields[4:7]); err != nil {
return nil, err
}
return e, nil
}
if len(fields) < 9 {
return nil, errUnsupportedListLine
}
e := &Entry{}
switch fields[0][0] {
case '-':
e.Type = EntryTypeFile
if err := e.setSize(fields[4]); err != nil {
return nil, err
}
case 'd':
e.Type = EntryTypeFolder
case 'l':
e.Type = EntryTypeLink
default:
return nil, errors.New("Unknown entry type")
}
if err := e.setTime(fields[5:8]); err != nil {
return nil, err
}
e.Name = parseLsListLineName(line, fields, 8)
if len(e.Name) == 0 {
e.Name = strings.Join(fields[8:], " ")
}
return e, nil
}
var dirTimeFormats = []string{
"01-02-06 03:04PM",
"2006-01-02 15:04",
}
// parseDirListLine parses a directory line in a format based on the output of
// the MS-DOS DIR command.
func parseDirListLine(line string) (*Entry, error) {
e := &Entry{}
var err error
// Try various time formats that DIR might use, and stop when one works.
for _, format := range dirTimeFormats {
if len(line) > len(format) {
e.Time, err = time.Parse(format, line[:len(format)])
if err == nil {
line = line[len(format):]
break
}
}
}
if err != nil {
// None of the time formats worked.
return nil, errUnsupportedListLine
}
line = strings.TrimLeft(line, " ")
if strings.HasPrefix(line, "<DIR>") {
e.Type = EntryTypeFolder
line = strings.TrimPrefix(line, "<DIR>")
} else {
space := strings.Index(line, " ")
if space == -1 {
return nil, errUnsupportedListLine
}
e.Size, err = strconv.ParseUint(line[:space], 10, 64)
if err != nil {
return nil, errUnsupportedListLine
}
e.Type = EntryTypeFile
line = line[space:]
}
e.Name = strings.TrimLeft(line, " ")
return e, nil
}
var listLineParsers = []func(line string) (*Entry, error){
parseRFC3659ListLine,
parseLsListLine,
parseDirListLine,
}
// parseListLine parses the various non-standard format returned by the LIST
// FTP command.
func parseListLine(line string) (*Entry, error) {
for _, f := range listLineParsers {
e, err := f(line)
if err == errUnsupportedListLine {
// Try another format.
continue
}
return e, err
}
return nil, errUnsupportedListLine
}
func (e *Entry) setSize(str string) (err error) {
e.Size, err = strconv.ParseUint(str, 0, 64)
return
}
func (e *Entry) setTime(fields []string) (err error) {
var timeStr string
if strings.Contains(fields[2], ":") { // this year
thisYear, _, _ := time.Now().Date()
timeStr = fields[1] + " " + fields[0] + " " + strconv.Itoa(thisYear)[2:4] + " " + fields[2] + " GMT"
} else { // not this year
if len(fields[2]) != 4 {
return errors.New("Invalid year format in time string")
}
timeStr = fields[1] + " " + fields[0] + " " + fields[2][2:4] + " 00:00 GMT"
}
e.Time, err = time.Parse("_2 Jan 06 15:04 MST", timeStr)
return
}
// NameList issues an NLST FTP command.
func (c *ServerConn) NameList(path string) (entries []string, err error) {
conn, err := c.cmdDataConnFrom(0, "NLST %s", path)

213
parse.go Normal file
View File

@ -0,0 +1,213 @@
package ftp
import (
"errors"
"strconv"
"strings"
"time"
)
var errUnsupportedListLine = errors.New("Unsupported LIST line")
var listLineParsers = []func(line string) (*Entry, error){
parseRFC3659ListLine,
parseLsListLine,
parseDirListLine,
}
var dirTimeFormats = []string{
"01-02-06 03:04PM",
"2006-01-02 15:04",
}
// parseRFC3659ListLine parses the style of directory line defined in RFC 3659.
func parseRFC3659ListLine(line string) (*Entry, error) {
iSemicolon := strings.Index(line, ";")
iWhitespace := strings.Index(line, " ")
if iSemicolon < 0 || iSemicolon > iWhitespace {
return nil, errUnsupportedListLine
}
e := &Entry{
Name: line[iWhitespace+1:],
}
for _, field := range strings.Split(line[:iWhitespace-1], ";") {
i := strings.Index(field, "=")
if i < 1 {
return nil, errUnsupportedListLine
}
key := field[:i]
value := field[i+1:]
switch key {
case "modify":
var err error
e.Time, err = time.Parse("20060102150405", value)
if err != nil {
return nil, err
}
case "type":
switch value {
case "dir", "cdir", "pdir":
e.Type = EntryTypeFolder
case "file":
e.Type = EntryTypeFile
}
case "size":
e.setSize(value)
}
}
return e, nil
}
// parseLsListLine parses a directory line in a format based on the output of
// the UNIX ls command.
func parseLsListLine(line string) (*Entry, error) {
// Has the first field a length of 10 bytes?
if strings.IndexByte(line, ' ') != 10 {
return nil, errUnsupportedListLine
}
scanner := NewScanner(line)
fields := scanner.NextFields(6)
if len(fields) < 6 {
return nil, errUnsupportedListLine
}
if fields[1] == "folder" && fields[2] == "0" {
e := &Entry{
Type: EntryTypeFolder,
Name: scanner.Remaining(),
}
if err := e.setTime(fields[3:6]); err != nil {
return nil, err
}
return e, nil
}
if fields[1] == "0" {
fields = append(fields, scanner.Next())
e := &Entry{
Type: EntryTypeFile,
Name: scanner.Remaining(),
}
if err := e.setSize(fields[2]); err != nil {
return nil, err
}
if err := e.setTime(fields[4:7]); err != nil {
return nil, err
}
return e, nil
}
// Read two more fields
fields = append(fields, scanner.NextFields(2)...)
if len(fields) < 8 {
return nil, errUnsupportedListLine
}
e := &Entry{
Name: scanner.Remaining(),
}
switch fields[0][0] {
case '-':
e.Type = EntryTypeFile
if err := e.setSize(fields[4]); err != nil {
return nil, err
}
case 'd':
e.Type = EntryTypeFolder
case 'l':
e.Type = EntryTypeLink
default:
return nil, errors.New("Unknown entry type")
}
if err := e.setTime(fields[5:8]); err != nil {
return nil, err
}
return e, nil
}
// parseDirListLine parses a directory line in a format based on the output of
// the MS-DOS DIR command.
func parseDirListLine(line string) (*Entry, error) {
e := &Entry{}
var err error
// Try various time formats that DIR might use, and stop when one works.
for _, format := range dirTimeFormats {
if len(line) > len(format) {
e.Time, err = time.Parse(format, line[:len(format)])
if err == nil {
line = line[len(format):]
break
}
}
}
if err != nil {
// None of the time formats worked.
return nil, errUnsupportedListLine
}
line = strings.TrimLeft(line, " ")
if strings.HasPrefix(line, "<DIR>") {
e.Type = EntryTypeFolder
line = strings.TrimPrefix(line, "<DIR>")
} else {
space := strings.Index(line, " ")
if space == -1 {
return nil, errUnsupportedListLine
}
e.Size, err = strconv.ParseUint(line[:space], 10, 64)
if err != nil {
return nil, errUnsupportedListLine
}
e.Type = EntryTypeFile
line = line[space:]
}
e.Name = strings.TrimLeft(line, " ")
return e, nil
}
// parseListLine parses the various non-standard format returned by the LIST
// FTP command.
func parseListLine(line string) (*Entry, error) {
for _, f := range listLineParsers {
e, err := f(line)
if err != errUnsupportedListLine {
return e, err
}
}
return nil, errUnsupportedListLine
}
func (e *Entry) setSize(str string) (err error) {
e.Size, err = strconv.ParseUint(str, 0, 64)
return
}
func (e *Entry) setTime(fields []string) (err error) {
var timeStr string
if strings.Contains(fields[2], ":") { // this year
thisYear, _, _ := time.Now().Date()
timeStr = fields[1] + " " + fields[0] + " " + strconv.Itoa(thisYear)[2:4] + " " + fields[2] + " GMT"
} else { // not this year
if len(fields[2]) != 4 {
return errors.New("Invalid year format in time string")
}
timeStr = fields[1] + " " + fields[0] + " " + fields[2][2:4] + " 00:00 GMT"
}
e.Time, err = time.Parse("_2 Jan 06 15:04 MST", timeStr)
return
}

58
scanner.go Normal file
View File

@ -0,0 +1,58 @@
package ftp
// A Scanner for fields delimited by one or more whitespace characters
type Scanner struct {
bytes []byte
position int
}
// NewScanner creates a new Scanner
func NewScanner(str string) *Scanner {
return &Scanner{
bytes: []byte(str),
}
}
// NextFields returns the next `count` fields
func (s *Scanner) NextFields(count int) []string {
fields := make([]string, 0, count)
for i := 0; i < count; i++ {
if field := s.Next(); field != "" {
fields = append(fields, field)
} else {
break
}
}
return fields
}
// Next returns the next field
func (s *Scanner) Next() string {
sLen := len(s.bytes)
// skip trailing whitespace
for s.position < sLen {
if s.bytes[s.position] != ' ' {
break
}
s.position++
}
start := s.position
// skip non-whitespace
for s.position < sLen {
if s.bytes[s.position] == ' ' {
s.position++
return string(s.bytes[start : s.position-1])
}
s.position++
}
return string(s.bytes[start:s.position])
}
// Remaining returns the remaining string
func (s *Scanner) Remaining() string {
return string(s.bytes[s.position:len(s.bytes)])
}

28
scanner_test.go Normal file
View File

@ -0,0 +1,28 @@
package ftp
import "testing"
import "github.com/stretchr/testify/assert"
func TestScanner(t *testing.T) {
assert := assert.New(t)
s := NewScanner("foo bar x y")
assert.Equal("foo", s.Next())
assert.Equal(" bar x y", s.Remaining())
assert.Equal("bar", s.Next())
assert.Equal("x y", s.Remaining())
assert.Equal("x", s.Next())
assert.Equal(" y", s.Remaining())
assert.Equal("y", s.Next())
assert.Equal("", s.Next())
assert.Equal("", s.Remaining())
}
func TestScannerEmpty(t *testing.T) {
assert := assert.New(t)
s := NewScanner("")
assert.Equal("", s.Next())
assert.Equal("", s.Next())
assert.Equal("", s.Remaining())
}