package main import ( "bytes" "encoding/json" "errors" "fmt" "net/http" "net/http/httputil" "os" "regexp" "strings" "time" "github.com/antchfx/htmlquery" "github.com/gocolly/colly/v2" "golang.org/x/net/html" ) var ( client *http.Client = &http.Client{Timeout: 10 * time.Second} genuisAPI = "https://api.genius.com/search" GENIUS_API_TOKEN = "" ) // makeRequest - generic function for making Http request func makeRequest(endpoint string, apiToken string) (GeniusSearchOtherResponse, http.Header, error) { var geniusApiResponse GeniusSearchOtherResponse req, _ := http.NewRequest(http.MethodGet, endpoint, nil) bearerHeader := fmt.Sprintf("Bearer %s", apiToken) req.Header.Add("Authorization", bearerHeader) reqDump, err := httputil.DumpRequestOut(req, true) if err != nil { return geniusApiResponse, nil, fmt.Errorf("error printing request: %+v", err) } if printDebug { mainLog.Printf("REQUEST:\n%s\n", string(reqDump)) } resp, err := client.Do(req) if err != nil { return geniusApiResponse, nil, fmt.Errorf("error receiving response: %+v", err) } defer resp.Body.Close() respDump, err := httputil.DumpResponse(resp, true) if err != nil { return geniusApiResponse, nil, fmt.Errorf("error printing response: %+v", err) } if printDebug { mainLog.Printf("RESPONSE:\n%s\n", string(respDump)) } err = json.NewDecoder(resp.Body).Decode(&geniusApiResponse) if err != nil { return geniusApiResponse, nil, fmt.Errorf("error decoding response: %+v", err) } if printDebug { mainLog.Printf("Decoded => %+v\n", geniusApiResponse) } return geniusApiResponse, resp.Header, nil } // getLyricUrl - get the url of the html page of the song to scrape func getLyricUrl(searchTerm string) (string, error) { ret, _, err := makeRequest(fmt.Sprintf("%s?q=%s", genuisAPI, searchTerm), GENIUS_API_TOKEN) if err != nil { return "", err } if len(ret.Response.Hits) == 0 { return "", fmt.Errorf("there is no song on the Genius Api for the song: %s", searchTerm) } var songs []GeniusSearchResponseHit for _, item := range ret.Response.Hits { if item.Type == "song" { songs = append(songs, item.Result) } } // Extract the first one as that is most of the time the correct one return songs[0].URL, nil } // stripUnneccessaryChars - unescape all escaped characters func stripUnneccessaryChars(nodeHtml string) string { captureBr := regexp.MustCompile("
") // Replace all br with a new line convertedBr := captureBr.ReplaceAllString(nodeHtml, "\n") captureAngleBrackets := regexp.MustCompile(`(?i)\<(.*?)\>`) // Remove all content between the angle brackets and the angle brackets themselves removedAngleBrackets := captureAngleBrackets.ReplaceAllString(convertedBr, "") // Unescape all escaped string return html.UnescapeString(removedAngleBrackets) } // searchGenius - print to file scraped song from Genius Search API func searchGenius(searchTerm string) error { var errs error lyricUrl, err := getLyricUrl(searchTerm) if err != nil { return errors.Join(err, errs) } if printDebug { mainLog.Printf("lyricUrl: %s", lyricUrl) } var sb strings.Builder c := colly.NewCollector() c.OnHTML("body", func(e *colly.HTMLElement) { doc, errT := htmlquery.Parse(bytes.NewReader(e.Response.Body)) if errT != nil { errs = fmt.Errorf("error parsing html : %+v", errT) return } // fmt.Println("doc: ", htmlquery.OutputHTML(doc, true)) // Use XPath to find nodes nodes, errT := htmlquery.QueryAll(doc, "//div[contains(@class, 'Lyrics__Container')]") // Example XPath query if err != nil { errs = fmt.Errorf("error in query: %+v", errT) return } for _, node := range nodes { sb.WriteString(stripUnneccessaryChars(htmlquery.OutputHTML(node, true))) } }) err = c.Visit(lyricUrl) if err != nil { return errors.Join(err, errs) } filename := fmt.Sprintf("%s_genius.txt", searchTerm) err = os.WriteFile(filename, []byte(sb.String()), os.ModePerm) if err != nil { g := fmt.Errorf("could not write to %s: %v", filename, err) return errors.Join(errs, g) } return errs }