hvn-network

My Blog List

Sunday, September 11, 2016

[C programming] Structure

We cover below parts.

- Basics of structures
- Structures and functions
- Arrays of structures
- Pointers to structures
- Self-referential structures
- Table lookup
- Typedef
- Unions
- Bit-fields

1. Basics of structures 
Define structure assignment - structures may be copied and assigned to, passed to functions, and returned by functions.

A struct declaration defines a type. The right brace that terminates the list of members may be followed by a list of variables, just as for any basic type. That is,
struct { ... } x, y, z;
is syntactically analogous to

int x, y, z;

Structures can be nested.

struct point {
 int x;
 int y;
};

struct rect {
 struct point pt1;
 struct point pt2;
};
The rect structure contains two point structures. If we declare screen as
struct rect screen;
then
screen.pt1.x
refers to the x coordinate of the pt1 member of screen.
2. Structures and functions

The only legal operations on a structure are copying it or assigning to it as a unit, taking its address with &, and accessing its members.


/* makepoint: make a point from x and y components */
struct point makepoint(int x, int y)
{
 struct point temp;
    temp.x = x;
    temp.y = y;
    return temp;
}

struct rect screen;
struct point middle;
struct point makepoint(int, int);
screen.pt1 = makepoint(0,0);
screen.pt2 = makepoint(XMAX, YMAX);
middle = makepoint((screen.pt1.x + screen.pt2.x)/2,

(screen.pt1.y + screen.pt2.y)/2);

If a large structure is to be passed to a function, it is generally more efficient to pass a pointer than to copy the whole structure.

struct point *pp;
struct point origin, *pp;
pp = &origin;

printf("origin is (%d,%d)\n", (*pp).x, (*pp).y);


If using *pp.x means *(pp.x), which is illegal here because x is not a pointer.

Pointers to structures are so frequently used that an alternative notation is provided as a shorthand. If p is a pointer to a structure, then
 
p->member-of-structure
refers to the particular member. So we could write instead
printf("origin is (%d,%d)\n", pp->x, pp->y);


Both . and -> associate from left to right, so if we have

 
struct rect r, *rp = &r;
then these four expressions are equivalent:
r.pt1.x
rp->pt1.x
(r.pt1).x
(rp->pt1).x

Given the declaration
struct {
     int len;
     char *str;
} *p;
then
++p->len

increments len, not p, because the implied parenthesization is ++(p->len). Parentheses can be used to alter binding: (++p)->len increments p before accessing len, and (p++)->len increments p afterward. (This last set of parentheses is unnecessary.)

In the same way, *p->str fetches whatever str points to; *p->str++ increments str after accessing whatever it points to (just like *s++); (*p->str)++ increments whatever str points to; and *p++->str increments p after accessing whatever str points to.

3. Arrays of structure 
Consider writing a program to count the occurrences of each C keyword. We need an array of character strings to hold the names, and an array of integers for the counts. One possibility is to use two parallel arrays, keyword and keycount, as in


 
char *keyword[NKEYS];
int keycount[NKEYS];
But the very fact that the arrays are parallel suggests a different organization, an array of structures. Each keyword is a pair:

 
char *word;
int cout;
and there is an array of pairs. The structure declaration


 
struct key {
      char *word;
      int count;
} keytab[NKEYS];

declares a structure type key, defines an array keytab of structures of this type, and sets aside storage for them. Each element of the array is a structure. This could also be written

 
struct key {
     char *word;
     int count;
};
struct key keytab[NKEYS];

Check how to implement this example.
 
#define MAXWORD 100

/* All keywords, see A2.4, p192 of the book. */

struct key {
 char *word;
 int count;
} keytab[] = {
 "auto", 0,
 "break", 0,
 "case", 0,
 "char", 0, 
 "const", 0,
 "continue", 0,
 "default", 0,
 "do", 0,
 "double", 0,
 "else", 0,
 "enum", 0,
 "extern", 0,
 "float", 0,
 "for", 0,
 "goto", 0,
 "if", 0,
 "int", 0,
 "long", 0,
 "register", 0,
 "return", 0, 
 "short", 0,
 "signed", 0,
 "sizeof", 0, 
 "static", 0, 
 "struct", 0,
 "switch", 0,
 "typedef", 0, 
 "union", 0, 
 "unsigned", 0, 
 "void", 0, 
 "volatile", 0, 
 "while", 0,
};

#define NKEYS (sizeof keytab / sizeof keytab[0])

int binsearch (char *word, struct key tab[], int n);
int  getword (char *word, int lim);

int main ()
{
 int n;
 char word[MAXWORD];

 while (getword(word, MAXWORD) != EOF)
 {
  printf("GET:%s\n", word);
  if ((n = binsearch(word, keytab, NKEYS)) >= 0)
   keytab[n].count++;
    }
 for (n = 0; n < NKEYS; n++)
  if (keytab[n].count > 0)
   printf("%4d %s\n", keytab[n].count, keytab[n].word);
  
 return 0;
}

int binsearch (char *word, struct key tab[], int n)
{
 int cond;
 int low, high, mid;

 low = 0;
 high = n - 1;
 while (low <= high) {
  mid = (low + high) / 2;
  if ((cond = strcmp(word, tab[mid].word)) < 0)
   high = mid - 1;
  else if (cond > 0)
   low = mid + 1;
  else
   return mid;
 }

 return -1;
}

int  getch (void);
void ungetch (int c);

/* getword() now only returns words starting with a letter and followed by 
 * letters, digits, or underscores.  Everything else is skipped and ignored.  
 * It returns EOF if an error is detected or if the end of file is reached. 
 */

int getword (char *word, int lim)
{
 int c, d;
 char *w = word;

    while (isspace(c = getch()))
 ;
 
 if (c != EOF)
  *w++ = c;
  if (!isalpha(c)) {
  *w = '\0';
  return c;
 }
 
 for ( ; --lim > 0; w++)
 if (!isalnum(*w = getch())) {
 ungetch(*w);
 break;
 }
 *w = '\0';
 return word[0]; 
}

#define BUFSIZE 100
char    buf[BUFSIZE];
int     bufp = 0;

int getch (void)
{
        return bufp > 0 ? buf[--bufp] : getchar();
}

void ungetch (int c)
{
        if (bufp >= BUFSIZE)
            printf("Error: Too many characters!\n");
        else
            buf[bufp++] = c;
}
4. Pointers to structures
To illustrate some of the considerations involved with pointers to and arrays of structures, let us write the keyword-counting program again, this time using pointers instead of array indices.

 
struct key *binsearch(char *, struct key *, int);

/* count C keywords; pointer version */
main()
{
 char word[MAXWORD];
 struct key *p;
 while (getword(word, MAXWORD) != EOF)
  if (isalpha(word[0]))
  if ((p=binsearch(word, keytab, NKEYS)) != NULL)
  p->count++;

 for (p = keytab; p < keytab + NKEYS; p++)
 if (p->count > 0)
 printf("%4d %s\n", p->count, p->word);

 return 0;
}
/* binsearch: find word in tab[0]...tab[n-1] */
struct key *binsearch(char *word, struct key *tab, int n)
{
 int cond;
 struct key *low = &tab[0];
 struct key *high = &tab[n];
 struct key *mid;
 
 while (low < high) {
 mid = low + (high-low) / 2;
  if ((cond = strcmp(word, mid->word)) < 0)
   high = mid;
   else if (cond > 0)
   low = mid + 1;
   else
   return mid;
 }
 return NULL;
}

the elements of keytab are now accessed by pointers.

The initializers for low and high are now pointers to the beginning and just past the end of the table.
The computation of the middle element can no longer be simply
mid = (low+high) / 2 /* WRONG */
because the addition of pointers is illegal. Subtraction is legal, however, so high-low is the number of elements, and thus
mid = low + (high-low) / 2
sets mid to the element halfway between low and high.
The most important change is to adjust the algorithm to make sure that it does not generate an illegal pointer or attempt to access an element outside the array. The problem is that &tab[-1] and &tab[n] are both outside the limits of the array tab. The former is strictly illegal, and it is illegal to dereference the latter. The language definition does guarantee, however, that pointer arithmetic that involves the first element beyond the end of an array (that is, &tab[n]) will work correctly.


Don't assume, however, that the size of a structure is the sum of the sizes of its members. Because of alignment requirements for different objects, there may be unnamed ``holes'' in a structure. Thus, for instance, if a char is one byte and an int four bytes, the structure
struct {
     char c;
     int i;
};
might well require eight bytes, not five.

5. Self-referential structures
It is illegal for a structure to contain an instance of itself, but is possible to make a pointer to the structure.
Below example uses a data structure called binary tree to count the occurrences of all the words in some input. Since the list of words isn't known in advance, we can't conveniently sort it and use a binary search.


#define MAXWORD   100

// binary tree 
struct tnode {  
 char *word; // point to text
 int count; // number of occurences
 struct tnode *left; // left child
 struct tnode *right; // right child
};

struct tnode *addtree (struct tnode *p, char *w);
void  treeprint (struct tnode *p);
int   getword (char *word, int lim);

int main (int argc, char *argv[])

{
 struct tnode *root;
 char  word[MAXWORD];

 /* Keywords are not inserted in the binary tree. */

 root = NULL;
 while (getword(word, MAXWORD) != EOF)
  if (isalpha(word[0]))
  root = addtree(root, word);

 treeprint(root);

 return 0;
}

struct tnode *talloc (void);
char  *_strdup (char *s);

struct tnode *addtree (struct tnode *p, char *w)
{
 int cond;

 if (p == NULL) {

  p = talloc();
  p->word = _strdup(w);
  p->count = 1;
  p->left = p->right = NULL;

 } else if ((cond = strcmp(w, p->word)) == 0)
  p->count++; // repeated word
 else if (cond <0 p-="">left = addtree(p->left, w);
 else 
  p->right = addtree(p->right, w);

 return p;
}

struct tnode *talloc (void)
{
 return (struct tnode *) malloc(sizeof(struct tnode));
}

char *_strdup (char *s)
{
 char *p;

 if ((p = (char *) malloc(strlen(s) + 1)) != NULL)
  strcpy(p, s);

 return p;
}

void treeprint (struct tnode *p)
{
 if (p != NULL) {
  treeprint(p->left);
  printf("%d %s\n",p->count, p->word);
  treeprint(p->right);
 }
}

int getch (void);
void ungetch (int c);

/* See exercise 6-1 for a description of the updated getword() function. */

int getword (char *word, int lim)
{
    int c, d;
 char *w = word;

    while (isspace(c = getch()))
 ;
 
 if (c != EOF)
  *w++ = c;
  if (!isalpha(c)) {
  *w = '\0';
  return c;
 }
 
 for ( ; --lim > 0; w++)
 if (!isalnum(*w = getch())) {
 ungetch(*w);
 break;
 }
 *w = '\0';
 
 return word[0]; 
}

#define BUFSIZE 100

char    buf[BUFSIZE];
int     bufp = 0;

int getch (void)
{
    return bufp > 0 ? buf[--bufp] : getchar();
}

void ungetch (int c)
{
    if (bufp >= BUFSIZE)
  printf("Error: Too many characters!\n");
 else
  buf[bufp++] = c;
}


6. Table lookup 

7. Typedef

8. Unions
9. Bit-fields
Consider bitmask below.
unsigned char flags;

// initialising the flags
// note that assignming a value will clobber any other flags, so you
// should generally only use the = operator when initialising vars.
flags = LOG_ERRORS;
// sets to 1 i.e. bit 0

//initialising to multiple values with OR (|)
flags = LOG_ERRORS | LOG_WARNINGS | LOG_INCOMING;
// sets to 1 + 2 + 8 i.e. bits 0, 1 and 3

// setting one flag on, leaving the rest untouched
// OR bitmask with the current value
flags |= LOG_INCOMING;

// testing for a flag
// AND with the bitmask before testing with ==
if ((flags & LOG_WARNINGS) == LOG_WARNINGS)
   ...

// testing for multiple flags
// as above, OR the bitmasks
if ((flags & (LOG_INCOMING | LOG_OUTGOING))
         == (LOG_INCOMING | LOG_OUTGOING))
   ...

// removing a flag, leaving the rest untouched
// AND with the inverse (NOT) of the bitmask
flags &= ~LOG_OUTGOING;

// toggling a flag, leaving the rest untouched
flags ^= LOG_LOOPBACK;

Use bit-field for alternative solution
/* bit-field */

struct {
unsigned int is_keyword : 1;
unsigned int is_extern : 1;
unsigned int is_static : 1;
} flags;

// to turn the bits on;
flags.is_extern = flags.is_static = 1;

// to turn them off;
flags.is_extern = flags.is_static = 0;

// to test them
if (flags.is_extern == 0 && flags.is_static == 0)

No comments:

Post a Comment