1    | /***************************************
2    |   $Revision: 1.23 $
3    | 
4    |   which_keytypes:  Determine which keys to look for.
5    |   
6    |   This is based on the existing Perl code. 
7    | 
8    |   Authors: ottrey, marek
9    | 
10   |   ******************/ /******************
11   |   Copyright (c) 1999,2000,2001                    RIPE NCC
12   |  
13   |   All Rights Reserved
14   |   
15   |   Permission to use, copy, modify, and distribute this software and its
16   |   documentation for any purpose and without fee is hereby granted,
17   |   provided that the above copyright notice appear in all copies and that
18   |   both that copyright notice and this permission notice appear in
19   |   supporting documentation, and that the name of the author not be
20   |   used in advertising or publicity pertaining to distribution of the
21   |   software without specific, written prior permission.
22   |   
23   |   THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
24   |   ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS; IN NO EVENT SHALL
25   |   AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
26   |   DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
27   |   AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
28   |   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
29   |   ***************************************/
30   | #include <stdio.h>
31   | #include <stdlib.h>
32   | #include <strings.h>
33   | #include <glib.h>
34   | #include <pthread.h>
35   | 
36   | #include "bitmask.h"
37   | #include "memwrap.h"
38   | 
39   | #define  WK_IMPL
40   | #include "which_keytypes.h"
41   | #include <regex.h>
42   | 
43   | #define DOMAINNAME "^[ ]*[a-zA-Z0-9-]*(\\.[a-zA-Z0-9-]+)*[ ]*$"
44   | /* add a constraint: there must be at least one character in the domain name
45   |    because the TLD must not be composed of digits only */
46   | #define DOMAINALPHA  "[a-zA-Z]"
47   | 
48   | #define VALIDIP6PREFIX "^[0-9A-F:]*:[0-9A-F:/]*$"     /* at least one colon */
49   | /* "^[0-9A-F]{1,4}(:[0-9A-F]{1,4}){7}$"*/
50   | 
51   | /* AS numbers, prepared for 32-bit AS numbers */
52   | #define ASNUM "^AS[1-9][0-9]{0,9}$"
53   | 
54   | /* AS numbers, prepared for 32-bit AS numbers */
55   | #define ASRANGE "^AS[1-9][0-9]{0,9}[ ]*([-][ ]*AS[1-9][0-9]{0,9}){0,1}$"   /* [ ]*(-[ ]*AS[0-9]+)?   */
56   | 
57   | #define NETNAME "^[A-Z][A-Z0-9-]*$"
58   | 
59   | #define MAINTAINER "^[A-Z][A-Z0-9-]*$"
60   | 
61   | #define LIMERICK "^LIM-[A-Z0-9-]+$"
62   | 
63   | #define KEYCERT "^PGPKEY-[0-9A-F]{8}$"
64   | 
65   | /* made less restrictive to make consistent with other sets ... shane */
66   | /* made to match what we're actually looking for - shane */
67   | /*#define ROUTESETNAME "^RS-[A-Z0-9_:-]*$"*/
68   | #define ROUTESETNAME "(^|:)RS-[A-Z0-9_-]*[A-Z0-9](:|$)"
69   | 
70   | /* made less restrictive to make consistent with other sets ... shane */
71   | /* made to match what we're actually looking for - shane */
72   | /*#define ASSETNAME "^AS-[A-Z0-9_:-]*$"*/
73   | #define ASSETNAME "(^|:)AS-[A-Z0-9_-]*[A-Z0-9](:|$)" 
74   | 
75   | #define AUTONICPREFIXREGULAR "^AUTO-"
76   | 
77   | #define IPRANGE "^[0-9]{1,3}(\\.[0-9]{1,3}){0,3}[ ]*-[ ]*[0-9]{1,3}(\\.[0-9]{1,3}){0,3}$"
78   | 
79   | #define IPADDRESS "^[0-9.]+$"
80   | 
81   | #define IPPREFIX "^[0-9.]+/[0-9]+$"
82   | 
83   | /*#define PEERINGSET "^PRNG-"*/
84   | #define PEERINGSET "(^|:)PRNG-[A-Z0-9_-]*[A-Z0-9](:|$)" 
85   | 
86   | /*#define FILTERSET  "^FLTR-"*/
87   | #define FILTERSET "(^|:)FLTR-[A-Z0-9_-]*[A-Z0-9](:|$)" 
88   | 
89   | /*#define RTRSET     "^RTRS-"*/
90   | #define RTRSET "(^|:)RTRS-[A-Z0-9_-]*[A-Z0-9](:|$)" 
91   | 
92   | #define NICHANDLE "^[A-Z0-9-]+$"
93   | 
94   | /*
95   |   XXX This seems to be the same as the Perl code.  But I don't see where a " " is allowed for.
96   |   I.e. Perl -> ^[a-zA-Z][\w\-\.\'\|\`]*$
97   |   Does \w include [ ;:,?/}{()+*#] ?
98   | #define NAME_B "^[a-zA-Z][a-zA-Z_0-9.'|`-]*$"
99   | */
100  | #define NAME_B "^[a-zA-Z][a-zA-Z_0-9.'|`;:,?/}{()+*#&-]*$"
101  | 
102  | #define EMAIL "@[a-zA-Z0-9-]+(\\.[a-zA-Z0-9-]+)*$"
103  | 
104  | /* structure for simple keys, with a single regular expression to match */
105  | /* NOTE: the WK_NAME, WK_DOMAIN, and WK_HOSTNAME are not handled here   */
106  | struct {
107  |     int key_type;		/* identifier for key, e.g. WK_RTRSET */
108  |     char *pattern;		/* string for regular expression */
109  |     regex_t regex;		/* regular expression */
110  | } wk_regex_list[] = {
111  |     { WK_NIC_HDL,       NICHANDLE },
112  |     { WK_EMAIL,         EMAIL },
113  |     { WK_MNTNER,        MAINTAINER },
114  |     { WK_KEY_CERT,      KEYCERT },
115  |     { WK_IPRANGE,       IPRANGE },
116  |     { WK_IPADDRESS,     IPADDRESS },
117  |     { WK_IPPREFIX,      IPPREFIX },
118  |     { WK_IP6PREFIX,     VALIDIP6PREFIX },
119  |     { WK_NETNAME,       NETNAME },
120  |     { WK_NET6NAME,      NETNAME },
121  |     { WK_AUTNUM,        ASNUM },
122  |     { WK_ASSETNAME,     ASSETNAME },
123  |     { WK_ROUTESETNAME,  ROUTESETNAME },
124  |     { WK_LIMERICK,      LIMERICK },
125  |     { WK_ASRANGE,       ASRANGE },
126  |     { WK_PEERINGSET,    PEERINGSET },
127  |     { WK_FILTERSET,     FILTERSET },
128  |     { WK_RTRSET,        RTRSET }
129  | };
130  | #define WK_REGEX_LIST_LEN  (sizeof(wk_regex_list)/sizeof(wk_regex_list[0]))
131  | 
132  | /* regular expressions used by wk_is_name() */
133  | static regex_t ipaddress;
134  | static regex_t ipprefix;
135  | static regex_t validip6prefix;
136  | 
137  | /* regular expression used by isdomname() */
138  | static regex_t domainname;
139  | static regex_t domainalpha;
140  | 
141  | /* initialize regular expressions */
142  | static void 
143  | wk_regex_init ()
144  | {
145  |     int i;
146  |     int errcode;
147  | 
148  |     /* initialize our table */
149  |     for (i=0; i<WK_REGEX_LIST_LEN; i++) {
150  |         errcode = regcomp(&wk_regex_list[i].regex, 
151  | 	                  wk_regex_list[i].pattern, 
152  | 		          REG_EXTENDED|REG_NOSUB);
153  |         dieif(errcode != 0);
154  |     }
155  | 
156  |     /* add some special cases used by our other functions */
157  |     errcode = regcomp(&ipaddress, IPADDRESS, REG_EXTENDED|REG_NOSUB);
158  |     dieif(errcode != 0);
159  |     errcode = regcomp(&ipprefix, IPPREFIX, REG_EXTENDED|REG_NOSUB);
160  |     dieif(errcode != 0);
161  |     errcode = regcomp(&validip6prefix, VALIDIP6PREFIX, REG_EXTENDED|REG_NOSUB);
162  |     dieif(errcode != 0);
163  |     errcode = regcomp(&domainname, DOMAINNAME, REG_EXTENDED|REG_NOSUB);
164  |     dieif(errcode != 0);
165  |     errcode = regcomp(&domainalpha, DOMAINALPHA, REG_EXTENDED|REG_NOSUB);
166  |     dieif(errcode != 0);
167  | }
168  | 
169  | 
170  | /* see if the key looks like it could be a name */
171  | static unsigned int 
172  | wk_is_name (char *key) 
173  | {
174  |     /* if it's an address, it cannot be a name */
175  |     if (regexec(&ipaddress, key, 0, NULL, 0) == 0) { 
176  |         return 0;
177  |     }
178  |     if (regexec(&ipprefix, key, 0, NULL, 0) == 0) { 
179  |         return 0;
180  |     }
181  |     if (regexec(&validip6prefix, key, 0, NULL, 0) == 0) { 
182  |         return 0;
183  |     }
184  | 
185  |     /* Everything apart from addresses matches to name */
186  |     return 1;
187  | } /* wk_is_name() */
188  | 
189  | /* check for domain name */
190  | static unsigned int 
191  | wk_is_domain (char *key) 
192  | {
193  |     /* if it matches the general domain name search, and contains an */
194  |     /* alphabetic character, consider it a possible domain name */
195  |     if (regexec(&domainname, key, 0, NULL, 0) == 0) {
196  |         if (regexec(&domainalpha, key, 0, NULL, 0) == 0) {
197  | 	    return 1;
198  | 	}
199  |     }
200  |     return 0;
201  | } 
202  | 
203  | /* check for a host name (could be a domain, or an IP) */
204  | static unsigned int 
205  | wk_is_hostname (char *key) 
206  | {
207  |     /* Fix - should check for IPADDRESS, not IPRANGE.  - Shane */
208  |     return (wk_is_domain(key) || (regexec(&ipaddress, key, 0, NULL, 0) == 0));
209  | } /* wk_is_hostname() */
210  | 
211  | /* WK_to_string() */
212  | /*++++++++++++++++++++++++++++++++++++++
213  |   Convert the which keytypes bitmap into a string.
214  | 
215  |   mask_t wk The which keytypes mask to be converted.
216  | 
217  |   More:
218  |   +html+ <PRE>
219  |   Authors:
220  |         ottrey
221  |   +html+ </PRE><DL COMPACT>
222  |   +html+ <DT>Online References:
223  |   +html+ <DD><UL>
224  |   +html+ </UL></DL>
225  | 
226  |   ++++++++++++++++++++++++++++++++++++++*/
227  | char *
228  | WK_to_string (mask_t wk) 
229  | {
230  | 
231  |   return MA_to_string(wk, Keytypes);
232  | 
233  | } /* WK_to_string() */
234  | 
235  | /* WK_new() */
236  | /*++++++++++++++++++++++++++++++++++++++
237  |   Create a new which keytypes bitmap.
238  | 
239  |   This checks the string to see which keys it looks like.  This helps 
240  |   us decide what SQL tables (or radix trees) we need to query for a
241  |   match.
242  | 
243  |   char *key The key to be examined.
244  | 
245  |   More:
246  |   +html+ <PRE>
247  |   Authors:
248  |         ottrey
249  | 	shane
250  |   +html+ </PRE><DL COMPACT>
251  |   +html+ <DT>Online References:
252  |   +html+ <DD><UL>
253  |   +html+ </UL></DL>
254  | 
255  |   ++++++++++++++++++++++++++++++++++++++*/
256  | mask_t 
257  | WK_new (char *key) 
258  | {
259  |   static pthread_once_t once_control = { PTHREAD_ONCE_INIT };
260  | 
261  |   mask_t wk; 
262  |   int i;
263  | 
264  |   /* initialize our regular expressions on the first call */
265  |   pthread_once(&once_control, wk_regex_init);
266  | 
267  |   /* empty bitmask */
268  |   wk = MA_new(MA_END);
269  | 
270  |   /* search regular expressions in the list */
271  |   for (i=0; i<WK_REGEX_LIST_LEN; i++) {
272  |       if (regexec(&wk_regex_list[i].regex, key, 0, NULL, 0) == 0) { 
273  |           MA_set(&wk, wk_regex_list[i].key_type, 1);
274  |       }
275  |   }
276  | 
277  |   /* check our more complicated key patterns */
278  |   MA_set(&wk, WK_NAME,         wk_is_name(key));
279  |   MA_set(&wk, WK_DOMAIN,       wk_is_domain(key));
280  |   MA_set(&wk, WK_HOSTNAME,     wk_is_hostname(key));
281  |   
282  |   /* return resulting bitmask */
283  |   return wk;
284  | 
285  | } /* WK_new() */