lyricdownloader/usegenius.go

157 lines
4.0 KiB
Go
Raw Permalink Normal View History

package main
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/http/httputil"
"os"
"regexp"
"strings"
"time"
"github.com/antchfx/htmlquery"
"github.com/gocolly/colly/v2"
"golang.org/x/net/html"
)
var (
client *http.Client = &http.Client{Timeout: 10 * time.Second}
genuisAPI = "https://api.genius.com/search"
GENIUS_API_TOKEN = ""
)
// makeRequest - generic function for making Http request
func makeRequest(endpoint string, apiToken string) (GeniusSearchOtherResponse, http.Header, error) {
var geniusApiResponse GeniusSearchOtherResponse
req, _ := http.NewRequest(http.MethodGet, endpoint, nil)
bearerHeader := fmt.Sprintf("Bearer %s", apiToken)
req.Header.Add("Authorization", bearerHeader)
reqDump, err := httputil.DumpRequestOut(req, true)
if err != nil {
return geniusApiResponse, nil, fmt.Errorf("error printing request: %+v", err)
}
if printDebug {
mainLog.Printf("REQUEST:\n%s\n", string(reqDump))
}
resp, err := client.Do(req)
if err != nil {
return geniusApiResponse, nil, fmt.Errorf("error receiving response: %+v", err)
}
defer resp.Body.Close()
respDump, err := httputil.DumpResponse(resp, true)
if err != nil {
return geniusApiResponse, nil, fmt.Errorf("error printing response: %+v", err)
}
if printDebug {
mainLog.Printf("RESPONSE:\n%s\n", string(respDump))
}
err = json.NewDecoder(resp.Body).Decode(&geniusApiResponse)
if err != nil {
return geniusApiResponse, nil, fmt.Errorf("error decoding response: %+v", err)
}
if printDebug {
mainLog.Printf("Decoded => %+v\n", geniusApiResponse)
}
return geniusApiResponse, resp.Header, nil
}
// getLyricUrl - get the url of the html page of the song to scrape
func getLyricUrl(searchTerm string) (string, error) {
ret, _, err := makeRequest(fmt.Sprintf("%s?q=%s", genuisAPI, searchTerm), GENIUS_API_TOKEN)
if err != nil {
return "", err
}
if len(ret.Response.Hits) == 0 {
return "", fmt.Errorf("there is no song on the Genius Api for the song: %s", searchTerm)
}
var songs []GeniusSearchResponseHit
for _, item := range ret.Response.Hits {
if item.Type == "song" {
songs = append(songs, item.Result)
}
}
// Extract the first one as that is most of the time the correct one
return songs[0].URL, nil
}
// stripUnneccessaryChars - unescape all escaped characters
func stripUnneccessaryChars(nodeHtml string) string {
captureBr := regexp.MustCompile("<br/>")
// Replace all br with a new line
convertedBr := captureBr.ReplaceAllString(nodeHtml, "\n")
captureAngleBrackets := regexp.MustCompile(`(?i)\<(.*?)\>`)
// Remove all content between the angle brackets and the angle brackets themselves
removedAngleBrackets := captureAngleBrackets.ReplaceAllString(convertedBr, "")
// Unescape all escaped string
return html.UnescapeString(removedAngleBrackets)
}
// searchGenius - print to file scraped song from Genius Search API
func searchGenius(searchTerm string) error {
var errs error
lyricUrl, err := getLyricUrl(searchTerm)
if err != nil {
return errors.Join(err, errs)
}
if printDebug {
mainLog.Printf("lyricUrl: %s", lyricUrl)
}
var sb strings.Builder
c := colly.NewCollector()
c.OnHTML("body", func(e *colly.HTMLElement) {
doc, errT := htmlquery.Parse(bytes.NewReader(e.Response.Body))
if errT != nil {
errs = fmt.Errorf("error parsing html : %+v", errT)
return
}
// fmt.Println("doc: ", htmlquery.OutputHTML(doc, true))
// Use XPath to find nodes
nodes, errT := htmlquery.QueryAll(doc, "//div[contains(@class, 'Lyrics__Container')]") // Example XPath query
if err != nil {
errs = fmt.Errorf("error in query: %+v", errT)
return
}
for _, node := range nodes {
sb.WriteString(stripUnneccessaryChars(htmlquery.OutputHTML(node, true)))
}
})
err = c.Visit(lyricUrl)
if err != nil {
return errors.Join(err, errs)
}
filename := fmt.Sprintf("%s_genius.txt", searchTerm)
err = os.WriteFile(filename, []byte(sb.String()), os.ModePerm)
if err != nil {
g := fmt.Errorf("could not write to %s: %v", filename, err)
return errors.Join(errs, g)
}
return errs
}