diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..f4d412f --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module github.com/mpl/gphotos-cdp + +go 1.12 + +require ( + github.com/chromedp/cdproto v0.0.0-20190812224334-39ef923dcb8d + github.com/chromedp/chromedp v0.4.0 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..2da1a9c --- /dev/null +++ b/go.sum @@ -0,0 +1,16 @@ +github.com/chromedp/cdproto v0.0.0-20190812224334-39ef923dcb8d h1:00kLGv5nKzpFchNhGDXDRbKtYx/WoT983Ka2t8/pzRE= +github.com/chromedp/cdproto v0.0.0-20190812224334-39ef923dcb8d/go.mod h1:0YChpVzuLJC5CPr+x3xkHN6Z8KOSXjNbL7qV8Wc4GW0= +github.com/chromedp/chromedp v0.4.0 h1:0AJC5ejETuh/6n7Tcsw4u4G0eKZkI9aVRwckWaImLUE= +github.com/chromedp/chromedp v0.4.0/go.mod h1:DC3QUn4mJ24dwjcaGQLoZrhm4X/uPHZ6spDbS2uFhm4= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee h1:s+21KNqlpePfkah2I+gwHF8xmJWRjooY+5248k6m4A0= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= +github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8= +github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= +github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= +github.com/knq/sysutil v0.0.0-20181215143952-f05b59f0f307 h1:vl4eIlySbjertFaNwiMjXsGrFVK25aOWLq7n+3gh2ls= +github.com/knq/sysutil v0.0.0-20181215143952-f05b59f0f307/go.mod h1:BjPj+aVjl9FW/cCGiF3nGh5v+9Gd3VCgBQbod/GlMaQ= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e h1:hB2xlXdHp/pmPZq0y3QnmWAArdw9PqbmotexnWx/FU8= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/main.go b/main.go index d0b1b69..df55c6d 100644 --- a/main.go +++ b/main.go @@ -34,7 +34,6 @@ import ( "sync" "time" - "github.com/chromedp/cdproto/cdp" "github.com/chromedp/cdproto/input" "github.com/chromedp/cdproto/page" "github.com/chromedp/chromedp" @@ -101,6 +100,9 @@ type Session struct { // really) that was downloaded. If set, it is used as a sentinel, to indicate that // we should skip dowloading all items older than this one. lastDone string + // firstItem is the most recent item in the feed. It is determined at the + // beginning of the run, and is used as the final sentinel. + firstItem string } // getLastDone returns the URL of the most recent item that was downloaded in @@ -248,6 +250,10 @@ func (s *Session) login(ctx context.Context) error { // 2) if the last session marked what was the most recent downloaded photo, it navigates to it // 3) otherwise it jumps to the end of the timeline (i.e. the oldest photo) func (s *Session) firstNav(ctx context.Context) error { + if err := s.setFirstItem(ctx); err != nil { + return err + } + if *startFlag != "" { chromedp.Navigate(*startFlag).Do(ctx) chromedp.WaitReady("body", chromedp.ByQuery).Do(ctx) @@ -270,29 +276,43 @@ func (s *Session) firstNav(ctx context.Context) error { return nil } -// navToEnd waits for the page to be ready to receive scroll key events, by -// trying to select an item with the right arrow key, and then scrolls down to the -// end of the page, i.e. to the oldest items. -func navToEnd(ctx context.Context) error { +// setFirstItem looks for the first item, and sets it as s.firstItem. +// We always run it first even for code paths that might not need s.firstItem, +// because we also run it for the side-effect of waiting for the first page load to +// be done, and to be ready to receive scroll key events. +func (s *Session) setFirstItem(ctx context.Context) error { // wait for page to be loaded, i.e. that we can make an element active by using // the right arrow key. for { chromedp.KeyEvent(kb.ArrowRight).Do(ctx) time.Sleep(tick) - var ids []cdp.NodeID + attributes := make(map[string]string) if err := chromedp.Run(ctx, - chromedp.NodeIDs(`document.activeElement`, &ids, chromedp.ByJSPath)); err != nil { + chromedp.Attributes(`document.activeElement`, &attributes, chromedp.ByJSPath)); err != nil { return err } - if len(ids) > 0 { - if *verboseFlag { - log.Printf("We are ready, because element %v is selected", ids[0]) - } - break + if len(attributes) == 0 { + time.Sleep(tick) + continue } - time.Sleep(tick) + + photoHref, ok := attributes["href"] + if !ok || !strings.HasPrefix(photoHref, "./photo/") { + time.Sleep(tick) + continue + } + + s.firstItem = strings.TrimPrefix(photoHref, "./photo/") + break + } + if *verboseFlag { + log.Printf("Page loaded, most recent item in the feed is: %s", s.firstItem) } + return nil +} +// navToEnd scrolls down to the end of the page, i.e. to the oldest items. +func navToEnd(ctx context.Context) error { // try jumping to the end of the page. detect we are there and have stopped // moving when two consecutive screenshots are identical. var previousScr, scr []byte @@ -593,6 +613,9 @@ func (s *Session) navN(N int) func(context.Context) error { if N > 0 && n >= N { break } + if strings.HasSuffix(location, s.firstItem) { + break + } if err := navLeft(ctx); err != nil { return fmt.Errorf("error at %v: %v", location, err)