1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
From 75920321062d682437f3fb0319dad227d8b18f6c Mon Sep 17 00:00:00 2001
From: Gabriele Musco <gabmus@disroot.org>
Date: Sat, 25 Mar 2023 14:13:44 +0100
Subject: [PATCH] add author extraction for feed item
---
src/feed_item.cpp | 12 +++++++++---
src/feed_item.hpp | 14 ++++++++++++++
src/pybind.cpp | 4 ++++
3 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/src/feed_item.cpp b/src/feed_item.cpp
index a08cd71..e0dbb8c 100644
--- a/src/feed_item.cpp
+++ b/src/feed_item.cpp
@@ -1,5 +1,5 @@
-#include "feed_item.hpp"
-#include "utils.hpp"
+#include "./feed_item.hpp"
+#include "./utils.hpp"
std::string FeedItem::extract_url() {
std::string res = item_node.child("link").text().as_string();
@@ -75,6 +75,10 @@ void FeedItem::parse() {
// pub_date
pub_date = SynDomUtils::extract_from_node(item_node, __PUB_DATE_PARAMS);
+ // author
+ author_name = SynDomUtils::extract_from_node(item_node, __AUTHOR_NAME_PARAMS);
+ author_url = SynDomUtils::extract_from_node(item_node, __AUTHOR_URL_PARAMS);
+
// img_url
img_url = extract_img_url();
fix_url(img_url);
@@ -87,6 +91,8 @@ std::string FeedItem::to_json() {
" \"url\": \"" + url + "\",\n"
" \"media_url\": \"" + media_url + "\",\n"
" \"pub_date\": \"" + pub_date + "\",\n"
- " \"img_url\": \"" + img_url + "\"\n"
+ " \"img_url\": \"" + img_url + "\",\n"
+ " \"author_name\": \"" + author_name + "\",\n"
+ " \"author_url\": \"" + author_url + "\"\n"
" }";
}
diff --git a/src/feed_item.hpp b/src/feed_item.hpp
index 5d7105e..fd1259f 100644
--- a/src/feed_item.hpp
+++ b/src/feed_item.hpp
@@ -28,6 +28,8 @@ private:
std::string media_url;
std::string pub_date;
std::string img_url;
+ std::string author_name;
+ std::string author_url;
/**
* Tries to extract the item url and returns it.
@@ -73,6 +75,16 @@ private:
{ExtractionParam::ParamType::CHILD, {"date"}},
{ExtractionParam::ParamType::CHILD, {"dc:date"}}
};
+ static inline const std::vector<ExtractionParam> __AUTHOR_NAME_PARAMS{
+ {ExtractionParam::ParamType::CHILD, {"author", "name"}},
+ {ExtractionParam::ParamType::CHILD, {"author"}},
+ {ExtractionParam::ParamType::CHILD, {"dc:creator"}},
+ {ExtractionParam::ParamType::CHILD, {"creator"}},
+ {ExtractionParam::ParamType::CHILD, {"itunes:author"}},
+ };
+ static inline const std::vector<ExtractionParam> __AUTHOR_URL_PARAMS{
+ {ExtractionParam::ParamType::CHILD, {"author", "uri"}}
+ };
/**
* Entry point of the class, parses all the relevant content. Called by
* the constructor.
@@ -101,6 +113,8 @@ public:
std::string get_media_url() { return media_url; }
std::string get_pub_date() { return pub_date; }
std::string get_img_url() { return img_url; }
+ std::string get_author_name() { return author_name; }
+ std::string get_author_url() { return author_url; }
/**
* Represents the FeedItem object (itself) as a json, returned as a string.
diff --git a/src/pybind.cpp b/src/pybind.cpp
index bef72f9..1d5a58d 100644
--- a/src/pybind.cpp
+++ b/src/pybind.cpp
@@ -19,11 +19,15 @@ PYBIND11_MODULE(syndom, m) {
.def_property_readonly("media_url", &FeedItem::get_media_url)
.def_property_readonly("pub_date", &FeedItem::get_pub_date)
.def_property_readonly("img_url", &FeedItem::get_img_url)
+ .def_property_readonly("author_name", &FeedItem::get_author_name)
+ .def_property_readonly("author_url", &FeedItem::get_author_url)
.def("get_title", &FeedItem::get_title)
.def("get_content", &FeedItem::get_content)
.def("get_url", &FeedItem::get_url)
.def("get_media_url", &FeedItem::get_media_url)
.def("get_pub_date", &FeedItem::get_pub_date)
+ .def("get_author_name", &FeedItem::get_author_name)
+ .def("get_author_url", &FeedItem::get_author_url)
.def("get_img_url", &FeedItem::get_img_url);
py::class_<Feed>(m, "Feed")
.def(py::init<std::string>())
--
GitLab
|