Merge pull request #110 from digineo/time-parsing

Fix time parsing and reduce syscalls
This commit is contained in:
Julien Laffaye 2018-01-05 08:56:08 +01:00 committed by GitHub
commit 83891dbe00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 128 additions and 50 deletions

9
ftp.go
View File

@ -348,14 +348,14 @@ func (c *ServerConn) NameList(path string) (entries []string, err error) {
// List issues a LIST FTP command. // List issues a LIST FTP command.
func (c *ServerConn) List(path string) (entries []*Entry, err error) { func (c *ServerConn) List(path string) (entries []*Entry, err error) {
var cmd string var cmd string
var parseFunc func(string) (*Entry, error) var parser parseFunc
if c.mlstSupported { if c.mlstSupported {
cmd = "MLSD" cmd = "MLSD"
parseFunc = parseRFC3659ListLine parser = parseRFC3659ListLine
} else { } else {
cmd = "LIST" cmd = "LIST"
parseFunc = parseListLine parser = parseListLine
} }
conn, err := c.cmdDataConnFrom(0, "%s %s", cmd, path) conn, err := c.cmdDataConnFrom(0, "%s %s", cmd, path)
@ -367,8 +367,9 @@ func (c *ServerConn) List(path string) (entries []*Entry, err error) {
defer r.Close() defer r.Close()
scanner := bufio.NewScanner(r) scanner := bufio.NewScanner(r)
now := time.Now()
for scanner.Scan() { for scanner.Scan() {
entry, err := parseFunc(scanner.Text()) entry, err := parser(scanner.Text(), now)
if err == nil { if err == nil {
entries = append(entries, entry) entries = append(entries, entry)
} }

View File

@ -2,6 +2,7 @@ package ftp
import ( import (
"errors" "errors"
"fmt"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -9,7 +10,9 @@ import (
var errUnsupportedListLine = errors.New("Unsupported LIST line") var errUnsupportedListLine = errors.New("Unsupported LIST line")
var listLineParsers = []func(line string) (*Entry, error){ type parseFunc func(string, time.Time) (*Entry, error)
var listLineParsers = []parseFunc{
parseRFC3659ListLine, parseRFC3659ListLine,
parseLsListLine, parseLsListLine,
parseDirListLine, parseDirListLine,
@ -22,7 +25,7 @@ var dirTimeFormats = []string{
} }
// parseRFC3659ListLine parses the style of directory line defined in RFC 3659. // parseRFC3659ListLine parses the style of directory line defined in RFC 3659.
func parseRFC3659ListLine(line string) (*Entry, error) { func parseRFC3659ListLine(line string, now time.Time) (*Entry, error) {
iSemicolon := strings.Index(line, ";") iSemicolon := strings.Index(line, ";")
iWhitespace := strings.Index(line, " ") iWhitespace := strings.Index(line, " ")
@ -66,7 +69,7 @@ func parseRFC3659ListLine(line string) (*Entry, error) {
// parseLsListLine parses a directory line in a format based on the output of // parseLsListLine parses a directory line in a format based on the output of
// the UNIX ls command. // the UNIX ls command.
func parseLsListLine(line string) (*Entry, error) { func parseLsListLine(line string, now time.Time) (*Entry, error) {
// Has the first field a length of 10 bytes? // Has the first field a length of 10 bytes?
if strings.IndexByte(line, ' ') != 10 { if strings.IndexByte(line, ' ') != 10 {
@ -85,7 +88,7 @@ func parseLsListLine(line string) (*Entry, error) {
Type: EntryTypeFolder, Type: EntryTypeFolder,
Name: scanner.Remaining(), Name: scanner.Remaining(),
} }
if err := e.setTime(fields[3:6]); err != nil { if err := e.setTime(fields[3:6], now); err != nil {
return nil, err return nil, err
} }
@ -102,7 +105,7 @@ func parseLsListLine(line string) (*Entry, error) {
if err := e.setSize(fields[2]); err != nil { if err := e.setSize(fields[2]); err != nil {
return nil, errUnsupportedListLine return nil, errUnsupportedListLine
} }
if err := e.setTime(fields[4:7]); err != nil { if err := e.setTime(fields[4:7], now); err != nil {
return nil, err return nil, err
} }
@ -132,7 +135,7 @@ func parseLsListLine(line string) (*Entry, error) {
return nil, errors.New("Unknown entry type") return nil, errors.New("Unknown entry type")
} }
if err := e.setTime(fields[5:8]); err != nil { if err := e.setTime(fields[5:8], now); err != nil {
return nil, err return nil, err
} }
@ -141,7 +144,7 @@ func parseLsListLine(line string) (*Entry, error) {
// parseDirListLine parses a directory line in a format based on the output of // parseDirListLine parses a directory line in a format based on the output of
// the MS-DOS DIR command. // the MS-DOS DIR command.
func parseDirListLine(line string) (*Entry, error) { func parseDirListLine(line string, now time.Time) (*Entry, error) {
e := &Entry{} e := &Entry{}
var err error var err error
@ -185,7 +188,7 @@ func parseDirListLine(line string) (*Entry, error) {
// by hostedftp.com // by hostedftp.com
// -r-------- 0 user group 65222236 Feb 24 00:39 UABlacklistingWeek8.csv // -r-------- 0 user group 65222236 Feb 24 00:39 UABlacklistingWeek8.csv
// (The link count is inexplicably 0) // (The link count is inexplicably 0)
func parseHostedFTPLine(line string) (*Entry, error) { func parseHostedFTPLine(line string, now time.Time) (*Entry, error) {
// Has the first field a length of 10 bytes? // Has the first field a length of 10 bytes?
if strings.IndexByte(line, ' ') != 10 { if strings.IndexByte(line, ' ') != 10 {
return nil, errUnsupportedListLine return nil, errUnsupportedListLine
@ -199,14 +202,14 @@ func parseHostedFTPLine(line string) (*Entry, error) {
} }
// Set link count to 1 and attempt to parse as Unix. // Set link count to 1 and attempt to parse as Unix.
return parseLsListLine(fields[0] + " 1 " + scanner.Remaining()) return parseLsListLine(fields[0]+" 1 "+scanner.Remaining(), now)
} }
// parseListLine parses the various non-standard format returned by the LIST // parseListLine parses the various non-standard format returned by the LIST
// FTP command. // FTP command.
func parseListLine(line string) (*Entry, error) { func parseListLine(line string, now time.Time) (*Entry, error) {
for _, f := range listLineParsers { for _, f := range listLineParsers {
e, err := f(line) e, err := f(line, now)
if err != errUnsupportedListLine { if err != errUnsupportedListLine {
return e, err return e, err
} }
@ -219,17 +222,34 @@ func (e *Entry) setSize(str string) (err error) {
return return
} }
func (e *Entry) setTime(fields []string) (err error) { func (e *Entry) setTime(fields []string, now time.Time) (err error) {
var timeStr string if strings.Contains(fields[2], ":") { // contains time
if strings.Contains(fields[2], ":") { // this year thisYear, _, _ := now.Date()
thisYear, _, _ := time.Now().Date() timeStr := fmt.Sprintf("%s %s %d %s GMT", fields[1], fields[0], thisYear, fields[2])
timeStr = fields[1] + " " + fields[0] + " " + strconv.Itoa(thisYear)[2:4] + " " + fields[2] + " GMT" e.Time, err = time.Parse("_2 Jan 2006 15:04 MST", timeStr)
} else { // not this year
/*
On unix, `info ls` shows:
10.1.6 Formatting file timestamps
---------------------------------
A timestamp is considered to be recent if it is less than six
months old, and is not dated in the future. If a timestamp dated today
is not listed in recent form, the timestamp is in the future, which
means you probably have clock skew problems which may break programs
like make that rely on file timestamps.
*/
if !e.Time.Before(now.AddDate(0, 6, 0)) {
e.Time = e.Time.AddDate(-1, 0, 0)
}
} else { // only the date
if len(fields[2]) != 4 { if len(fields[2]) != 4 {
return errors.New("Invalid year format in time string") return errors.New("Invalid year format in time string")
} }
timeStr = fields[1] + " " + fields[0] + " " + fields[2][2:4] + " 00:00 GMT" timeStr := fmt.Sprintf("%s %s %s 00:00 GMT", fields[1], fields[0], fields[2])
e.Time, err = time.Parse("_2 Jan 2006 15:04 MST", timeStr)
} }
e.Time, err = time.Parse("_2 Jan 06 15:04 MST", timeStr)
return return
} }

View File

@ -1,11 +1,18 @@
package ftp package ftp
import ( import (
"strings"
"testing" "testing"
"time" "time"
) )
var thisYear, _, _ = time.Now().Date() var (
// now is the current time for all tests
now = newTime(2017, time.March, 10, 23, 00)
thisYear, _, _ = now.Date()
previousYear = thisYear - 1
)
type line struct { type line struct {
line string line string
@ -22,45 +29,45 @@ type unsupportedLine struct {
var listTests = []line{ var listTests = []line{
// UNIX ls -l style // UNIX ls -l style
{"drwxr-xr-x 3 110 1002 3 Dec 02 2009 pub", "pub", 0, EntryTypeFolder, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)}, {"drwxr-xr-x 3 110 1002 3 Dec 02 2009 pub", "pub", 0, EntryTypeFolder, newTime(2009, time.December, 2)},
{"drwxr-xr-x 3 110 1002 3 Dec 02 2009 p u b", "p u b", 0, EntryTypeFolder, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)}, {"drwxr-xr-x 3 110 1002 3 Dec 02 2009 p u b", "p u b", 0, EntryTypeFolder, newTime(2009, time.December, 2)},
{"-rw-r--r-- 1 marketwired marketwired 12016 Mar 16 2016 2016031611G087802-001.newsml", "2016031611G087802-001.newsml", 12016, EntryTypeFile, time.Date(2016, time.March, 16, 0, 0, 0, 0, time.UTC)}, {"-rw-r--r-- 1 marketwired marketwired 12016 Mar 16 2016 2016031611G087802-001.newsml", "2016031611G087802-001.newsml", 12016, EntryTypeFile, newTime(2016, time.March, 16)},
{"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 fileName", "fileName", 1234567, EntryTypeFile, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)}, {"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 fileName", "fileName", 1234567, EntryTypeFile, newTime(2009, time.December, 2)},
{"lrwxrwxrwx 1 root other 7 Jan 25 00:17 bin -> usr/bin", "bin -> usr/bin", 0, EntryTypeLink, time.Date(thisYear, time.January, 25, 0, 17, 0, 0, time.UTC)}, {"lrwxrwxrwx 1 root other 7 Jan 25 00:17 bin -> usr/bin", "bin -> usr/bin", 0, EntryTypeLink, newTime(thisYear, time.January, 25, 0, 17)},
// Another ls style // Another ls style
{"drwxr-xr-x folder 0 Aug 15 05:49 !!!-Tipp des Haus!", "!!!-Tipp des Haus!", 0, EntryTypeFolder, time.Date(thisYear, time.August, 15, 5, 49, 0, 0, time.UTC)}, {"drwxr-xr-x folder 0 Aug 15 05:49 !!!-Tipp des Haus!", "!!!-Tipp des Haus!", 0, EntryTypeFolder, newTime(thisYear, time.August, 15, 5, 49)},
{"drwxrwxrwx folder 0 Aug 11 20:32 P0RN", "P0RN", 0, EntryTypeFolder, time.Date(thisYear, time.August, 11, 20, 32, 0, 0, time.UTC)}, {"drwxrwxrwx folder 0 Aug 11 20:32 P0RN", "P0RN", 0, EntryTypeFolder, newTime(thisYear, time.August, 11, 20, 32)},
{"-rw-r--r-- 0 18446744073709551615 18446744073709551615 Nov 16 2006 VIDEO_TS.VOB", "VIDEO_TS.VOB", 18446744073709551615, EntryTypeFile, time.Date(2006, time.November, 16, 0, 0, 0, 0, time.UTC)}, {"-rw-r--r-- 0 18446744073709551615 18446744073709551615 Nov 16 2006 VIDEO_TS.VOB", "VIDEO_TS.VOB", 18446744073709551615, EntryTypeFile, newTime(2006, time.November, 16)},
// Microsoft's FTP servers for Windows // Microsoft's FTP servers for Windows
{"---------- 1 owner group 1803128 Jul 10 10:18 ls-lR.Z", "ls-lR.Z", 1803128, EntryTypeFile, time.Date(thisYear, time.July, 10, 10, 18, 0, 0, time.UTC)}, {"---------- 1 owner group 1803128 Jul 10 10:18 ls-lR.Z", "ls-lR.Z", 1803128, EntryTypeFile, newTime(thisYear, time.July, 10, 10, 18)},
{"d--------- 1 owner group 0 May 9 19:45 Softlib", "Softlib", 0, EntryTypeFolder, time.Date(thisYear, time.May, 9, 19, 45, 0, 0, time.UTC)}, {"d--------- 1 owner group 0 Nov 9 19:45 Softlib", "Softlib", 0, EntryTypeFolder, newTime(previousYear, time.November, 9, 19, 45)},
// WFTPD for MSDOS // WFTPD for MSDOS
{"-rwxrwxrwx 1 noone nogroup 322 Aug 19 1996 message.ftp", "message.ftp", 322, EntryTypeFile, time.Date(1996, time.August, 19, 0, 0, 0, 0, time.UTC)}, {"-rwxrwxrwx 1 noone nogroup 322 Aug 19 1996 message.ftp", "message.ftp", 322, EntryTypeFile, newTime(1996, time.August, 19)},
// RFC3659 format: https://tools.ietf.org/html/rfc3659#section-7 // RFC3659 format: https://tools.ietf.org/html/rfc3659#section-7
{"modify=20150813224845;perm=fle;type=cdir;unique=119FBB87U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; .", ".", 0, EntryTypeFolder, time.Date(2015, time.August, 13, 22, 48, 45, 0, time.UTC)}, {"modify=20150813224845;perm=fle;type=cdir;unique=119FBB87U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; .", ".", 0, EntryTypeFolder, newTime(2015, time.August, 13, 22, 48, 45)},
{"modify=20150813224845;perm=fle;type=pdir;unique=119FBB87U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; ..", "..", 0, EntryTypeFolder, time.Date(2015, time.August, 13, 22, 48, 45, 0, time.UTC)}, {"modify=20150813224845;perm=fle;type=pdir;unique=119FBB87U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; ..", "..", 0, EntryTypeFolder, newTime(2015, time.August, 13, 22, 48, 45)},
{"modify=20150806235817;perm=fle;type=dir;unique=1B20F360U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; movies", "movies", 0, EntryTypeFolder, time.Date(2015, time.August, 6, 23, 58, 17, 0, time.UTC)}, {"modify=20150806235817;perm=fle;type=dir;unique=1B20F360U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; movies", "movies", 0, EntryTypeFolder, newTime(2015, time.August, 6, 23, 58, 17)},
{"modify=20150814172949;perm=flcdmpe;type=dir;unique=85A0C168U4;UNIX.group=0;UNIX.mode=0777;UNIX.owner=0; _upload", "_upload", 0, EntryTypeFolder, time.Date(2015, time.August, 14, 17, 29, 49, 0, time.UTC)}, {"modify=20150814172949;perm=flcdmpe;type=dir;unique=85A0C168U4;UNIX.group=0;UNIX.mode=0777;UNIX.owner=0; _upload", "_upload", 0, EntryTypeFolder, newTime(2015, time.August, 14, 17, 29, 49)},
{"modify=20150813175250;perm=adfr;size=951;type=file;unique=119FBB87UE;UNIX.group=0;UNIX.mode=0644;UNIX.owner=0; welcome.msg", "welcome.msg", 951, EntryTypeFile, time.Date(2015, time.August, 13, 17, 52, 50, 0, time.UTC)}, {"modify=20150813175250;perm=adfr;size=951;type=file;unique=119FBB87UE;UNIX.group=0;UNIX.mode=0644;UNIX.owner=0; welcome.msg", "welcome.msg", 951, EntryTypeFile, newTime(2015, time.August, 13, 17, 52, 50)},
// Format and types have first letter UpperCase // Format and types have first letter UpperCase
{"Modify=20150813175250;Perm=adfr;Size=951;Type=file;Unique=119FBB87UE;UNIX.group=0;UNIX.mode=0644;UNIX.owner=0; welcome.msg", "welcome.msg", 951, EntryTypeFile, time.Date(2015, time.August, 13, 17, 52, 50, 0, time.UTC)}, {"Modify=20150813175250;Perm=adfr;Size=951;Type=file;Unique=119FBB87UE;UNIX.group=0;UNIX.mode=0644;UNIX.owner=0; welcome.msg", "welcome.msg", 951, EntryTypeFile, newTime(2015, time.August, 13, 17, 52, 50)},
// DOS DIR command output // DOS DIR command output
{"08-07-15 07:50PM 718 Post_PRR_20150901_1166_265118_13049.dat", "Post_PRR_20150901_1166_265118_13049.dat", 718, EntryTypeFile, time.Date(2015, time.August, 7, 19, 50, 0, 0, time.UTC)}, {"08-07-15 07:50PM 718 Post_PRR_20150901_1166_265118_13049.dat", "Post_PRR_20150901_1166_265118_13049.dat", 718, EntryTypeFile, newTime(2015, time.August, 7, 19, 50)},
{"08-10-15 02:04PM <DIR> Billing", "Billing", 0, EntryTypeFolder, time.Date(2015, time.August, 10, 14, 4, 0, 0, time.UTC)}, {"08-10-15 02:04PM <DIR> Billing", "Billing", 0, EntryTypeFolder, newTime(2015, time.August, 10, 14, 4)},
// dir and file names that contain multiple spaces // dir and file names that contain multiple spaces
{"drwxr-xr-x 3 110 1002 3 Dec 02 2009 spaces dir name", "spaces dir name", 0, EntryTypeFolder, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)}, {"drwxr-xr-x 3 110 1002 3 Dec 02 2009 spaces dir name", "spaces dir name", 0, EntryTypeFolder, newTime(2009, time.December, 2)},
{"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 file name", "file name", 1234567, EntryTypeFile, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)}, {"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 file name", "file name", 1234567, EntryTypeFile, newTime(2009, time.December, 2)},
{"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 foo bar ", " foo bar ", 1234567, EntryTypeFile, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)}, {"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 foo bar ", " foo bar ", 1234567, EntryTypeFile, newTime(2009, time.December, 2)},
// Odd link count from hostedftp.com // Odd link count from hostedftp.com
{"-r-------- 0 user group 65222236 Feb 24 00:39 RegularFile", "RegularFile", 65222236, EntryTypeFile, time.Date(thisYear, time.February, 24, 0, 39, 0, 0, time.UTC)}, {"-r-------- 0 user group 65222236 Feb 24 00:39 RegularFile", "RegularFile", 65222236, EntryTypeFile, newTime(thisYear, time.February, 24, 0, 39)},
} }
// Not supported, we expect a specific error message // Not supported, we expect a specific error message
@ -77,7 +84,7 @@ var listTestsFail = []unsupportedLine{
func TestParseValidListLine(t *testing.T) { func TestParseValidListLine(t *testing.T) {
for _, lt := range listTests { for _, lt := range listTests {
entry, err := parseListLine(lt.line) entry, err := parseListLine(lt.line, now)
if err != nil { if err != nil {
t.Errorf("parseListLine(%v) returned err = %v", lt.line, err) t.Errorf("parseListLine(%v) returned err = %v", lt.line, err)
continue continue
@ -91,7 +98,7 @@ func TestParseValidListLine(t *testing.T) {
if entry.Size != lt.size { if entry.Size != lt.size {
t.Errorf("parseListLine(%v).Size = %v, want %v", lt.line, entry.Size, lt.size) t.Errorf("parseListLine(%v).Size = %v, want %v", lt.line, entry.Size, lt.size)
} }
if entry.Time.Unix() != lt.time.Unix() { if !entry.Time.Equal(lt.time) {
t.Errorf("parseListLine(%v).Time = %v, want %v", lt.line, entry.Time, lt.time) t.Errorf("parseListLine(%v).Time = %v, want %v", lt.line, entry.Time, lt.time)
} }
} }
@ -99,7 +106,7 @@ func TestParseValidListLine(t *testing.T) {
func TestParseUnsupportedListLine(t *testing.T) { func TestParseUnsupportedListLine(t *testing.T) {
for _, lt := range listTestsFail { for _, lt := range listTestsFail {
_, err := parseListLine(lt.line) _, err := parseListLine(lt.line, now)
if err == nil { if err == nil {
t.Errorf("parseListLine(%v) expected to fail", lt.line) t.Errorf("parseListLine(%v) expected to fail", lt.line)
} }
@ -108,3 +115,53 @@ func TestParseUnsupportedListLine(t *testing.T) {
} }
} }
} }
func TestSettime(t *testing.T) {
tests := []struct {
line string
expected time.Time
}{
// this year, in the past
{"Feb 10 23:00", newTime(thisYear, time.February, 10, 23)},
// this year, less than six months in the future
{"Sep 10 22:59", newTime(thisYear, time.September, 10, 22, 59)},
// previous year, otherwise it would be more than 6 months in the future
{"Sep 10 23:00", newTime(previousYear, time.September, 10, 23)},
// far in the future
{"Jan 23 2019", newTime(2019, time.January, 23)},
}
for _, test := range tests {
entry := &Entry{}
entry.setTime(strings.Fields(test.line), now)
if !entry.Time.Equal(test.expected) {
t.Errorf("setTime(%v).Time = %v, want %v", test.line, entry.Time, test.expected)
}
}
}
// newTime builds a UTC time from the given year, month, day, hour and minute
func newTime(year int, month time.Month, day int, hourMinSec ...int) time.Time {
var hour, min, sec int
switch len(hourMinSec) {
case 0:
// nothing
case 3:
sec = hourMinSec[2]
fallthrough
case 2:
min = hourMinSec[1]
fallthrough
case 1:
hour = hourMinSec[0]
default:
panic("too many arguments")
}
return time.Date(year, month, day, hour, min, sec, 0, time.UTC)
}