1
0
Fork 0

Refactor extractor

This commit is contained in:
Florian RICHER 2022-06-16 23:19:00 +02:00
parent 1a72eaeaaf
commit 4e09ea6add
3 changed files with 11 additions and 13 deletions

2
.env
View file

@ -1 +1 @@
URL=http://vk.gy/database
URL=https://vk.gy/database

View file

@ -25,7 +25,7 @@ pub struct Informations {
pub releases: Vec<releases::Release>,
}
pub fn extract() {
pub fn extract() -> Informations {
let response = reqwest::blocking::get(env::var("URL").unwrap())
.unwrap()
.text()
@ -37,36 +37,33 @@ pub fn extract() {
let divs = document.select(&div_selector);
let mut categories = Informations::default();
let mut informations = Informations::default();
for div in divs {
if let Some(category) = div.select(&scraper::Selector::parse(CATEGORY_TITLE_SELECTOR).unwrap()).next() {
let category_title = category.inner_html();
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
match category_title.as_str() {
"artists" => {
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
categories.artists = artists::Artist::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
informations.artists = artists::Artist::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
},
"labels" => {
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
categories.labels = labels::Label::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
informations.labels = labels::Label::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
}
"musicians" => {
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
categories.musicians = musicians::Musician::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
informations.musicians = musicians::Musician::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
}
"releases" => {
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
categories.releases = releases::Release::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
informations.releases = releases::Release::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
}
_ => {}
}
}
}
println!("{:#?}", categories);
informations
}
pub(self) fn trim_whitespace(s: &str) -> String {

View file

@ -3,5 +3,6 @@ mod extractor;
fn main() {
dotenvy::dotenv().unwrap();
extractor::extract();
let informations = extractor::extract();
println!("{:#?}", informations);
}