Refactor extractor
This commit is contained in:
parent
1a72eaeaaf
commit
4e09ea6add
3 changed files with 11 additions and 13 deletions
2
.env
2
.env
|
@ -1 +1 @@
|
|||
URL=http://vk.gy/database
|
||||
URL=https://vk.gy/database
|
|
@ -25,7 +25,7 @@ pub struct Informations {
|
|||
pub releases: Vec<releases::Release>,
|
||||
}
|
||||
|
||||
pub fn extract() {
|
||||
pub fn extract() -> Informations {
|
||||
let response = reqwest::blocking::get(env::var("URL").unwrap())
|
||||
.unwrap()
|
||||
.text()
|
||||
|
@ -37,36 +37,33 @@ pub fn extract() {
|
|||
|
||||
let divs = document.select(&div_selector);
|
||||
|
||||
let mut categories = Informations::default();
|
||||
let mut informations = Informations::default();
|
||||
|
||||
for div in divs {
|
||||
if let Some(category) = div.select(&scraper::Selector::parse(CATEGORY_TITLE_SELECTOR).unwrap()).next() {
|
||||
|
||||
let category_title = category.inner_html();
|
||||
|
||||
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
|
||||
match category_title.as_str() {
|
||||
"artists" => {
|
||||
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
|
||||
categories.artists = artists::Artist::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
informations.artists = artists::Artist::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
},
|
||||
"labels" => {
|
||||
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
|
||||
categories.labels = labels::Label::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
informations.labels = labels::Label::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
}
|
||||
"musicians" => {
|
||||
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
|
||||
categories.musicians = musicians::Musician::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
informations.musicians = musicians::Musician::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
}
|
||||
"releases" => {
|
||||
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
|
||||
categories.releases = releases::Release::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
informations.releases = releases::Release::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("{:#?}", categories);
|
||||
informations
|
||||
}
|
||||
|
||||
pub(self) fn trim_whitespace(s: &str) -> String {
|
||||
|
|
|
@ -3,5 +3,6 @@ mod extractor;
|
|||
fn main() {
|
||||
dotenvy::dotenv().unwrap();
|
||||
|
||||
extractor::extract();
|
||||
let informations = extractor::extract();
|
||||
println!("{:#?}", informations);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue