package main import ( "bytes" "errors" "fmt" "os" "strings" "github.com/antchfx/htmlquery" "github.com/playwright-community/playwright-go" "golang.org/x/net/html" ) // recurseNodes - walk HtmlNodes of a Html document func recurseNodes(top *html.Node, sb *strings.Builder) { if top.Type == html.ElementNode && top.Data == "span" { sb.WriteString(htmlquery.InnerText(top) + "\n") } for c := top.FirstChild; c != nil; c = c.NextSibling { recurseNodes(c, sb) } } // searchGenius - print to file scraped song from google search func searchGoogle(song string) (err error) { runOption := &playwright.RunOptions{ SkipInstallBrowsers: true, } tempErr := playwright.Install(runOption) if tempErr != nil { err = fmt.Errorf("could not install playwright dependencies: %v", tempErr) return err } pw, tempErr := playwright.Run() if tempErr != nil { err = fmt.Errorf("could not start playwright: %v", tempErr) return err } defer func(pw *playwright.Playwright) { tempErr := pw.Stop() if tempErr != nil { e := fmt.Errorf("could not stop Playwright: %v", tempErr) err = errors.Join(err, e) } }(pw) option := playwright.BrowserTypeLaunchOptions{ Channel: playwright.String("chrome"), Headless: playwright.Bool(false), } browser, tempErr := pw.Chromium.Launch(option) if tempErr != nil { err = fmt.Errorf("could not launch browser: %v", tempErr) return err } defer func(browser playwright.Browser) { tempErr = browser.Close() if tempErr != nil { e := fmt.Errorf("could not close browser: %v", tempErr) err = errors.Join(err, e) } }(browser) page, tempErr := browser.NewPage() if tempErr != nil { err = fmt.Errorf("could not create page: %v", tempErr) return err } if _, tempErr := page.Goto(fmt.Sprintf("https://www.google.com/search?q=%ss+lyrics", song), playwright.PageGotoOptions{ WaitUntil: playwright.WaitUntilStateLoad, }); tempErr != nil { err = fmt.Errorf("could not goto: %v", tempErr) return err } tempErr = page.Locator("body").WaitFor(playwright.LocatorWaitForOptions{ State: playwright.WaitForSelectorStateVisible, }) if tempErr != nil { err = fmt.Errorf("could not wait for body: %v", tempErr) return err } html, tempErr := page.Locator("html").InnerHTML() if tempErr != nil { err = fmt.Errorf("could not get innerHtml: %v", tempErr) return err } doc, tempErr := htmlquery.Parse(bytes.NewReader([]byte(html))) if tempErr != nil { err = fmt.Errorf("could not parse the innerHtml: %v", tempErr) return err } nodes, tempErr := htmlquery.QueryAll(doc, "//div[@data-lyricid]/div") if tempErr != nil { err = fmt.Errorf("could not get the nodes: %v", tempErr) return err } var sb strings.Builder for _, node := range nodes { recurseNodes(node, &sb) } if sb.Len() > 0 { if printDebug { mainLog.Println("Writing lyrics from Google...") } filename := fmt.Sprintf("%s_google.txt", outputFile) tempErr = os.WriteFile(filename, []byte(sb.String()), os.ModePerm) if tempErr != nil { err = fmt.Errorf("could not write to %s: %v", filename, err) return err } } else { mainLog.Println("Lyrics cannot be found...") } return nil }