- Main sitemap index at /sitemap.xml referencing 4 sub-sitemaps
- /sidemap/static: fixed routes (homepage, auth, marketing pages)
- /sidemap/users: public user profiles sorted alphabetically
- /sidemap/projects: public projects sorted alphabetically
- /sidemap/repos: public repos sorted alphabetically
- Redis cache with 8h TTL (no refresh on access), key: sidemap:{type}
- robots.txt Sitemap URL uses main_domain() with https:// forced
- All sitemap loc entries use https:// base URL
329 lines
9.6 KiB
Rust
329 lines
9.6 KiB
Rust
use actix_web::{web, HttpResponse};
|
|
use db::cache::AppCache;
|
|
use models::projects::project::{Column as PCol, Entity as PEntity};
|
|
use models::repos::repo::{Column as RCol, Entity as REntity};
|
|
use models::users::user::{Column as UCol, Entity as UEntity};
|
|
use sea_orm::*;
|
|
use service::AppService;
|
|
|
|
const CACHE_KEY_PREFIX: &str = "sidemap";
|
|
const CACHE_TTL_SECS: u64 = 8 * 3600; // 8 hours, no refresh
|
|
|
|
/// Returns the base URL, forcing https:// prefix for public sitemap crawlers.
|
|
fn public_base(config: &config::AppConfig) -> String {
|
|
let fallback = "https://gitdata.ai".to_string();
|
|
let base = match config.main_domain() {
|
|
Ok(b) => b.trim_end_matches('/').to_string(),
|
|
Err(_) => fallback,
|
|
};
|
|
if base.starts_with("https://") {
|
|
base
|
|
} else if base.starts_with("http://") {
|
|
base.replacen("http://", "https://", 1)
|
|
} else {
|
|
format!("https://{base}")
|
|
}
|
|
}
|
|
|
|
// ── Handlers ──────────────────────────────────────────────────────────────────
|
|
|
|
/// Main sitemap index referencing all sub-sitemaps.
|
|
pub async fn sitemap(service: web::Data<AppService>) -> HttpResponse {
|
|
let base = public_base(&service.config);
|
|
|
|
let xml = format!(
|
|
r#"<?xml version="1.0" encoding="UTF-8"?>
|
|
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
<sitemap>
|
|
<loc>{base}/sidemap/static</loc>
|
|
</sitemap>
|
|
<sitemap>
|
|
<loc>{base}/sidemap/users</loc>
|
|
</sitemap>
|
|
<sitemap>
|
|
<loc>{base}/sidemap/projects</loc>
|
|
</sitemap>
|
|
<sitemap>
|
|
<loc>{base}/sidemap/repos</loc>
|
|
</sitemap>
|
|
</sitemapindex>"#
|
|
);
|
|
|
|
HttpResponse::Ok()
|
|
.content_type("application/xml; charset=utf-8")
|
|
.body(xml)
|
|
}
|
|
|
|
/// Static routes (no DB, no cache).
|
|
pub async fn sitemap_static(service: web::Data<AppService>) -> HttpResponse {
|
|
let base = public_base(&service.config);
|
|
|
|
HttpResponse::Ok()
|
|
.content_type("application/xml; charset=utf-8")
|
|
.body(build_static_xml(&base))
|
|
}
|
|
|
|
/// User profiles sitemap.
|
|
pub async fn sitemap_users(service: web::Data<AppService>) -> HttpResponse {
|
|
let base = public_base(&service.config);
|
|
|
|
let xml = cached_or_build(&service.cache, "users", || async {
|
|
let db = service.db.reader();
|
|
let users: Vec<(String, String)> = UEntity::find()
|
|
.filter(UCol::Username.ne(""))
|
|
.order_by_asc(UCol::Username)
|
|
.all(db)
|
|
.await
|
|
.unwrap_or_default()
|
|
.into_iter()
|
|
.map(|u| (u.username, u.updated_at.to_rfc3339()))
|
|
.collect();
|
|
Ok(build_users_xml(&base, &users))
|
|
})
|
|
.await
|
|
.unwrap_or_else(|_| build_users_xml(&base, &[]));
|
|
|
|
HttpResponse::Ok()
|
|
.content_type("application/xml; charset=utf-8")
|
|
.body(xml)
|
|
}
|
|
|
|
/// Public projects sitemap.
|
|
pub async fn sitemap_projects(service: web::Data<AppService>) -> HttpResponse {
|
|
let base = public_base(&service.config);
|
|
|
|
let xml = cached_or_build(&service.cache, "projects", || async {
|
|
let db = service.db.reader();
|
|
let projects: Vec<(String, String, String)> = PEntity::find()
|
|
.filter(PCol::IsPublic.eq(true))
|
|
.order_by_asc(PCol::Name)
|
|
.all(db)
|
|
.await
|
|
.unwrap_or_default()
|
|
.into_iter()
|
|
.map(|p| (p.name, p.id.to_string(), p.updated_at.to_rfc3339()))
|
|
.collect();
|
|
Ok(build_projects_xml(&base, &projects))
|
|
})
|
|
.await
|
|
.unwrap_or_else(|_| build_projects_xml(&base, &[]));
|
|
|
|
HttpResponse::Ok()
|
|
.content_type("application/xml; charset=utf-8")
|
|
.body(xml)
|
|
}
|
|
|
|
/// Public repos sitemap.
|
|
pub async fn sitemap_repos(service: web::Data<AppService>) -> HttpResponse {
|
|
let base = public_base(&service.config);
|
|
|
|
let xml = cached_or_build(&service.cache, "repos", || async {
|
|
let db = service.db.reader();
|
|
|
|
let project_map: std::collections::HashMap<String, String> = PEntity::find()
|
|
.filter(PCol::IsPublic.eq(true))
|
|
.all(db)
|
|
.await
|
|
.unwrap_or_default()
|
|
.into_iter()
|
|
.map(|p| (p.id.to_string(), p.name))
|
|
.collect();
|
|
|
|
let repos: Vec<(String, String)> = REntity::find()
|
|
.filter(RCol::IsPrivate.eq(false))
|
|
.order_by_asc(RCol::RepoName)
|
|
.all(db)
|
|
.await
|
|
.unwrap_or_default()
|
|
.into_iter()
|
|
.filter_map(|r| {
|
|
let ns = project_map.get(&r.project.to_string())?;
|
|
Some((format!("{ns}/{}", r.repo_name), r.updated_at.to_rfc3339()))
|
|
})
|
|
.collect();
|
|
|
|
Ok(build_repos_xml(&base, &repos))
|
|
})
|
|
.await
|
|
.unwrap_or_else(|_| build_repos_xml(&base, &[]));
|
|
|
|
HttpResponse::Ok()
|
|
.content_type("application/xml; charset=utf-8")
|
|
.body(xml)
|
|
}
|
|
|
|
// ── Cache helpers ────────────────────────────────────────────────────────────────
|
|
|
|
async fn cached_or_build<F, Fut>(cache: &AppCache, key: &str, build: F) -> Result<String, ()>
|
|
where
|
|
F: FnOnce() -> Fut,
|
|
Fut: std::future::Future<Output = Result<String, ()>>,
|
|
{
|
|
let cache_key = format!("{CACHE_KEY_PREFIX}:{key}");
|
|
|
|
if let Ok(xml) = get_cached(cache, &cache_key).await {
|
|
return Ok(xml);
|
|
}
|
|
|
|
let xml = build().await?;
|
|
|
|
let _ = set_cached(cache, &cache_key, &xml).await;
|
|
|
|
Ok(xml)
|
|
}
|
|
|
|
async fn get_cached(cache: &AppCache, key: &str) -> Result<String, ()> {
|
|
let mut conn = cache.redis_pool().get().await.map_err(|e| {
|
|
tracing::debug!("sidemap redis get pool error: {}", e);
|
|
})?;
|
|
redis::cmd("GET")
|
|
.arg(key)
|
|
.query_async::<String>(&mut conn)
|
|
.await
|
|
.map_err(|e| {
|
|
tracing::debug!("sidemap redis get error: {}", e);
|
|
})
|
|
}
|
|
|
|
async fn set_cached(cache: &AppCache, key: &str, value: &str) -> Result<(), ()> {
|
|
let mut conn = cache.redis_pool().get().await.map_err(|e| {
|
|
tracing::debug!("sidemap redis set pool error: {}", e);
|
|
})?;
|
|
redis::cmd("SETEX")
|
|
.arg(key)
|
|
.arg(CACHE_TTL_SECS)
|
|
.arg(value)
|
|
.query_async::<()>(&mut conn)
|
|
.await
|
|
.map_err(|e| {
|
|
tracing::debug!("sidemap redis set error: {}", e);
|
|
})
|
|
}
|
|
|
|
// ── XML builders ────────────────────────────────────────────────────────────────
|
|
|
|
fn build_static_xml(base: &str) -> String {
|
|
let mut xml = xml_header();
|
|
for loc in [
|
|
"/",
|
|
"/auth/login",
|
|
"/auth/register",
|
|
"/auth/password/reset",
|
|
"/auth/reset-password",
|
|
"/auth/verify-email",
|
|
"/about",
|
|
"/pricing",
|
|
"/pricing/enterprise",
|
|
"/pricing/faq",
|
|
"/skills",
|
|
"/skills/publish",
|
|
"/skills/docs",
|
|
"/solutions",
|
|
"/solutions/rooms",
|
|
"/solutions/memory",
|
|
"/solutions/governance",
|
|
"/network",
|
|
"/network/rooms",
|
|
"/network/api",
|
|
"/docs",
|
|
] {
|
|
xml.push_str(&url_entry(&format!("{base}{loc}"), 0.9, "daily", None));
|
|
}
|
|
xml.push_str("</urlset>");
|
|
xml
|
|
}
|
|
|
|
fn build_users_xml(base: &str, users: &[(String, String)]) -> String {
|
|
let mut xml = xml_header();
|
|
for (username, updated) in users {
|
|
xml.push_str(&url_entry(
|
|
&format!("{base}/user/{username}"),
|
|
0.6,
|
|
"weekly",
|
|
Some(updated),
|
|
));
|
|
}
|
|
xml.push_str("</urlset>");
|
|
xml
|
|
}
|
|
|
|
fn build_projects_xml(base: &str, projects: &[(String, String, String)]) -> String {
|
|
let mut xml = xml_header();
|
|
for (name, _, updated) in projects {
|
|
xml.push_str(&url_entry(
|
|
&format!("{base}/project/{name}"),
|
|
0.7,
|
|
"weekly",
|
|
Some(updated),
|
|
));
|
|
for sub in [
|
|
"/activity",
|
|
"/repositories",
|
|
"/issues",
|
|
"/members",
|
|
"/articles",
|
|
"/resources",
|
|
] {
|
|
xml.push_str(&url_entry(
|
|
&format!("{base}/project/{name}{sub}"),
|
|
0.6,
|
|
"weekly",
|
|
Some(updated),
|
|
));
|
|
}
|
|
}
|
|
xml.push_str("</urlset>");
|
|
xml
|
|
}
|
|
|
|
fn build_repos_xml(base: &str, repos: &[(String, String)]) -> String {
|
|
let mut xml = xml_header();
|
|
for (path, updated) in repos {
|
|
xml.push_str(&url_entry(
|
|
&format!("{base}/repository/{path}"),
|
|
0.7,
|
|
"daily",
|
|
Some(updated),
|
|
));
|
|
for sub in [
|
|
"/files",
|
|
"/commits",
|
|
"/branches",
|
|
"/tags",
|
|
"/contributors",
|
|
"/pull-requests",
|
|
] {
|
|
xml.push_str(&url_entry(
|
|
&format!("{base}/repository/{path}{sub}"),
|
|
0.6,
|
|
"daily",
|
|
Some(updated),
|
|
));
|
|
}
|
|
}
|
|
xml.push_str("</urlset>");
|
|
xml
|
|
}
|
|
|
|
fn xml_header() -> String {
|
|
String::from(
|
|
r#"<?xml version="1.0" encoding="UTF-8"?>
|
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
"#,
|
|
)
|
|
}
|
|
|
|
fn url_entry(loc: &str, priority: f32, changefreq: &str, updated: Option<&str>) -> String {
|
|
let updated_xml = updated
|
|
.map(|d| format!("\n <lastmod>{d}</lastmod>"))
|
|
.unwrap_or_default();
|
|
format!(
|
|
r#" <url>
|
|
<loc>{loc}</loc>{updated_xml}
|
|
<changefreq>{changefreq}</changefreq>
|
|
<priority>{priority}</priority>
|
|
</url>
|
|
"#,
|
|
)
|
|
}
|