1 : #ifndef EPT_DEBTAGS_VOCABULARY_H
2 : #define EPT_DEBTAGS_VOCABULARY_H
3 :
4 : /** @file
5 : * @author Enrico Zini <enrico@enricozini.org>
6 : * Debtags vocabulary access
7 : */
8 :
9 : /*
10 : * Copyright (C) 2003,2004,2005,2006,2007 Enrico Zini <enrico@debian.org>
11 : *
12 : * This program is free software; you can redistribute it and/or modify
13 : * it under the terms of the GNU General Public License as published by
14 : * the Free Software Foundation; either version 2 of the License, or
15 : * (at your option) any later version.
16 : *
17 : * This program is distributed in the hope that it will be useful,
18 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 : * GNU General Public License for more details.
21 : *
22 : * You should have received a copy of the GNU General Public License
23 : * along with this program; if not, write to the Free Software
24 : * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 : */
26 :
27 : #include <ept/debtags/tag.h>
28 : #include <tagcoll/diskindex/mmap.h>
29 :
30 : #include <string>
31 : #include <vector>
32 : #include <map>
33 :
34 : namespace ept {
35 : namespace debtags {
36 :
37 : class Vocabulary
38 : {
39 : public:
40 : class FacetIndex : public tagcoll::diskindex::MMap
41 : {
42 : protected:
43 : // Layout of the data in the index
44 : struct Item {
45 : int offset;
46 : int size;
47 : int firsttag;
48 : int lasttag;
49 : const char name[];
50 : };
51 2393 : inline Item* item(int id) const
52 : {
53 2393 : if (id >= 0 && (unsigned)id < size())
54 2392 : return (Item*)(m_buf + ((int*)m_buf)[id]);
55 1 : return NULL;
56 : }
57 :
58 : public:
59 31 : FacetIndex() : tagcoll::diskindex::MMap() {}
60 : FacetIndex(const tagcoll::diskindex::MasterMMap& master, size_t idx)
61 : : tagcoll::diskindex::MMap(master, idx) {}
62 :
63 : /// Get the number of facets in the index
64 2501 : size_t size() const { return m_size == 0 ? 0 : *(int*)m_buf / sizeof(int); }
65 : /// Get the offset of the facet data in the vocabulary for this facet
66 59 : size_t offset(int id) const { Item* i = item(id); return i == NULL ? 0 : i->offset; }
67 : /// Get the size of the facet data in the vocabulary for this facet
68 59 : size_t size(int id) const { Item* i = item(id); return i == NULL ? 0 : i->size; }
69 : /// Get the id of the first tag for this facet
70 91 : int firsttag(int id) const { Item* i = item(id); return i == NULL ? -1 : i->firsttag; }
71 : /// Get the id of the last tag for this facet
72 2043 : int lasttag(int id) const { Item* i = item(id); return i == NULL ? -1 : i->lasttag; }
73 : /// Get the name of this facet
74 88 : const char* name(int id) const { Item* i = item(id); return i == NULL ? "" : i->name; }
75 : /// Get the ID of the facet with this name
76 : int id(const char* name) const;
77 7 : int id(const std::string& name) const { return id(name.c_str()); }
78 : };
79 :
80 : class TagIndex : public tagcoll::diskindex::MMap
81 : {
82 : protected:
83 : // Layout of the data in the index
84 : struct Item {
85 : int offset;
86 : int size;
87 : int facet;
88 : const char name[];
89 : };
90 1112004 : inline Item* item(int id) const
91 : {
92 1112004 : if (id >= 0 && (unsigned)id < size())
93 1112004 : return (Item*)(m_buf + ((int*)m_buf)[id]);
94 0 : return NULL;
95 : }
96 :
97 : public:
98 31 : TagIndex() : tagcoll::diskindex::MMap() {}
99 : TagIndex(const tagcoll::diskindex::MasterMMap& master, size_t idx)
100 : : tagcoll::diskindex::MMap(master, idx) {}
101 :
102 : /// Get the number of tags in the index
103 1327864 : size_t size() const { return m_size == 0 ? 0 : *(int*)m_buf / sizeof(int); }
104 : /// Get the offset of the facet data in the vocabulary for this tag
105 1242 : size_t offset(int id) const { Item* i = item(id); return i == NULL ? 0 : i->offset; }
106 : /// Get the size of the facet data in the vocabulary for this tag
107 1242 : size_t size(int id) const { Item* i = item(id); return i == NULL ? 0 : i->size; }
108 : /// Get the id of the facet for this tag
109 0 : int facet(int id) const { Item* i = item(id); return i == NULL ? -1 : i->facet; }
110 : /// Get the name of this tag
111 3767 : const char* name(int id) const { Item* i = item(id); return i == NULL ? "" : i->name; }
112 : /// Get the ID of the tag with this name
113 : int id(const char* name) const;
114 105755 : int id(const std::string& name) const { return id(name.c_str()); }
115 : };
116 :
117 : protected:
118 : // Master MMap index container
119 : tagcoll::diskindex::MasterMMap mastermmap;
120 :
121 : time_t m_timestamp;
122 :
123 : // Mmapped vocabulary file
124 : std::string voc_fname;
125 : int voc_fd;
126 : size_t voc_size;
127 : const char* voc_buf;
128 :
129 : // Facet and tag indexes
130 : FacetIndex findex;
131 : TagIndex tindex;
132 :
133 : // Cached parsed facet and tag records
134 : mutable std::vector< std::map<std::string, std::string> > m_facetData;
135 : mutable std::vector< std::map<std::string, std::string> > m_tagData;
136 : // Empty parsed data to return when data is asked for IDs == -1
137 : std::map<std::string, std::string> emptyData;
138 :
139 : void parseVocBuf(std::map<std::string, std::string>& res, size_t ofs, size_t len) const;
140 :
141 : public:
142 : Vocabulary();
143 : ~Vocabulary();
144 :
145 : /// Get the timestamp of when the index was last updated
146 : time_t timestamp() const { return m_timestamp; }
147 :
148 : /// Return true if this data source has data, false if it's empty
149 1 : bool hasData() const { return m_timestamp != 0; }
150 :
151 : const FacetIndex& facetIndex() const { return findex; }
152 : const TagIndex& tagIndex() const { return tindex; }
153 :
154 : /**
155 : * Check if the vocabulary contains the facet `name'
156 : */
157 2 : bool hasFacet(const std::string& name) const
158 : {
159 2 : return findex.id(name.c_str()) != -1;
160 : }
161 :
162 : /**
163 : * Check if the vocabulary contains the tag `fullname'
164 : */
165 622 : bool hasTag(const std::string& fullname) const
166 : {
167 622 : return tindex.id(fullname.c_str()) != -1;
168 : }
169 :
170 : /**
171 : * Return the facet with the given name
172 : */
173 : Facet facetByID(int id) const;
174 :
175 : /**
176 : * Return the tag with the given full name
177 : */
178 : Tag tagByID(int id) const;
179 :
180 : template<typename IDS>
181 84590 : std::set<Tag> tagsByID(const IDS& ids) const
182 : {
183 84590 : std::set<Tag> res;
184 507017 : for (typename IDS::const_iterator i = ids.begin();
185 : i != ids.end(); ++i)
186 422427 : res.insert(tagByID(*i));
187 0 : return res;
188 : }
189 :
190 : /**
191 : * Return the facet for the tag with the given ID
192 : */
193 0 : Facet facetByTag(int id) const { return facetByID(tindex.facet(id)); }
194 :
195 : /**
196 : * Return the facet with the given name
197 : */
198 4 : Facet facetByName(const std::string& name) const { return facetByID(findex.id(name)); }
199 :
200 : /**
201 : * Return the tag with the given full name
202 : */
203 105755 : Tag tagByName(const std::string& fullname) const { return tagByID(tindex.id(fullname)); }
204 :
205 : /**
206 : * Return all the facets in the vocabulary
207 : */
208 4 : std::set< Facet > facets() const
209 : {
210 4 : std::set< Facet > res;
211 91 : for (size_t i = 0; i < findex.size(); i++)
212 87 : res.insert(facetByID(i));
213 0 : return res;
214 : }
215 :
216 : /**
217 : * Return all the tags in the vocabulary
218 : */
219 6 : std::set< Tag > tags() const
220 : {
221 6 : std::set< Tag > res;
222 3106 : for (size_t i = 0; i < tindex.size(); i++)
223 3100 : res.insert(tagByID(i));
224 0 : return res;
225 : }
226 :
227 : /**
228 : * Return the tags in the given facet
229 : */
230 91 : std::set< Tag > tags(int facet) const
231 : {
232 91 : std::set< Tag > res;
233 2044 : for (int i = findex.firsttag(facet); i != -1 && i <= findex.lasttag(facet); i++)
234 1953 : res.insert(tagByID(i));
235 0 : return res;
236 : }
237 :
238 3 : std::set< Tag > tags(const std::string& facetName) const
239 : {
240 3 : return tags(findex.id(facetName));
241 : }
242 :
243 : std::set< Tag > tags(const Facet& facet) const
244 : {
245 : return tags(facet.id());
246 : }
247 :
248 : #if 0
249 : /// Get the DerivedTagList with the Equates: expressions read from the vocabulary
250 : const DerivedTagList& getEquations() const throw () { return equations; }
251 :
252 : /// Get a set with all the facets present in the vocabulary that are matched by `filter'
253 : FacetSet facets(const FacetMatcher& filter) const throw () { return getFiltered(filter); }
254 : #endif
255 :
256 : #if 0
257 : // These functions are here just to be used by Facet and Tag. I'm not
258 : // making them private because I don't want Facet and Tag to access other
259 : // Vocabulary member, and other classes can't use these anyway as Facet::Data and
260 : // Tag::Data are protected
261 : const Facet::Data& facetData(int idx) { return m_facets[idx]; }
262 : const Tag::Data& tagData(int idx) { return m_tags[idx]; }
263 : #endif
264 :
265 : /// Get the facet name given the facet id
266 88 : std::string facetName(int id) const { return findex.name(id); }
267 :
268 : /// Get the tag name given the tag id
269 1904 : std::string tagName(int id) const { return tindex.name(id); }
270 :
271 : /// Get the tag name given the tag id
272 : std::string tagShortName(int id) const;
273 :
274 : const std::map<std::string, std::string>& facetData(int id) const;
275 : const std::map<std::string, std::string>& tagData(int id) const;
276 : };
277 :
278 : }
279 : }
280 :
281 : // vim:set ts=4 sw=4:
282 : #endif
|