(define_expand "movqi"
  [(set (match_operand:QI 0 "nonimmediate_operand" "")
	(match_operand:QI 1 "general_operand" ""))]
  ""
{

     if (!reload_completed &&
         (MEM == GET_CODE(operands[0]) || MEM == GET_CODE(operands[1])))
     {
       rtx address;
       rtx wordAddress;
       rtx const3;
       rtx shiftVal;
       rtx loadedValue;
       rtx addressMask;

       /* warn_of_byte_access(); */

        Load the constant 3 into a register. 
       const3 = gen_reg_rtx(SImode);
       emit_insn(gen_rtx_SET(SImode, const3, GEN_INT(3)));

        Load the address mask with the bitwise complement of 3. 
       addressMask = gen_reg_rtx(SImode);
       emit_insn(gen_rtx_SET(SImode, addressMask, GEN_INT(-4)));

       /* Handle loads first, in case we are dealing with a mem := mem
        * instruction. */
       if (MEM == GET_CODE(operands[1]))
       {
	 /* Loads work as follows. The entire word containing the desired byte
          * is loaded. The bottom bit of the address indicates which
          * byte is required. The desired byte is moved into the most
          * significant byte, and then an arithmetic shift right
          * invoked to achieve sign extension. The desired byte is
          * moved to the MSB by XOR'ing the bottom address bit by 1,
          * multiplying the result by 8, and then shifting left by
          * that amount. Note that shifts only operate on the bottom
          * 4-bits of the source offset, so although the XOR may
          * produce a value which has its upper bits set, only bit 4
          * (i.e., the inverted, shifted bottom address bit) actually
          * gets used.
          */

         /* Ensure the address is in a register. */
         address = gen_reg_rtx(SImode);
         emit_insn(gen_rtx_SET(SImode, address, XEXP(operands[1], 0)));

         /* Compute the word address by masking out the bottom bit. */
         wordAddress = gen_reg_rtx(SImode);
         emit_insn(gen_andsi3(wordAddress, address, addressMask));

         /* Compute the shift value. This is the two bottom address bits,
          * inverted(not needed in bigendian), and multiplied by 8. */
         shiftVal = gen_reg_rtx(SImode);
         emit_insn(gen_andsi3(shiftVal, address, const3));
         emit_insn(gen_ashlsi3(shiftVal, shiftVal, GEN_INT(3)));

         /* Emit the memory load. */
         loadedValue = gen_reg_rtx(SImode);
         emit_insn(gen_rtx_SET(SImode, loadedValue, gen_rtx_MEM(SImode, wordAddress)));

	 /* Shift the desired byte to the most significant byte. */
	 rtx topByteValue = gen_reg_rtx (SImode);
	 emit_insn (gen_ashlsi3 (topByteValue, loadedValue, shiftVal));

         /* Sign extend the top-byte back into the bottom byte. */
	 rtx signExtendedValue = gen_reg_rtx(SImode);
         emit_insn(gen_ashrsi3(signExtendedValue, topByteValue, shiftVal));

         /* Final extraction of QI mode register. */
        operands[1] = gen_rtx_SUBREG(QImode, signExtendedValue, 0);

       }

       if (MEM == GET_CODE(operands[0]) && GET_CODE(operands[1]) != MEM)
       {
         rtx zeroingByteMask;
         rtx temp;
         rtx tempQiMode;
         rtx tempSiMode;

         /* Get the address. */
         address = gen_reg_rtx(SImode);
         emit_insn(gen_rtx_SET(SImode, address, XEXP(operands[0], 0)));

         /* Compute the word aligned address. *
         wordAddress = gen_reg_rtx(SImode);
         emit_insn(gen_andsi3(wordAddress, address, addressMask));

         /* Compute the shift value. Inverted in bigendian */
         shiftVal = gen_reg_rtx(SImode);
         emit_insn(gen_andsi3(shiftVal, address, const3));
	 emit_insn(gen_xorsi3(shiftVal, shiftVal, const3));
         emit_insn(gen_ashlsi3(shiftVal, shiftVal, GEN_INT(3)));

         /* Emit the memory load. */
         loadedValue = gen_reg_rtx(SImode);
         emit_insn(gen_rtx_SET(SImode, loadedValue, gen_rtx_MEM(SImode, wordAddress)));

         /* Zero out the destination bits by AND'ing with 0x000000FF
          * shifted appropriately and inverted. */
         zeroingByteMask = gen_reg_rtx(SImode);
         emit_insn(gen_rtx_SET(SImode, zeroingByteMask, GEN_INT(-256)));
         emit_insn(gen_lshrsi3(zeroingByteMask, zeroingByteMask, shiftVal));
	 emit_insn(gen_one_cmplsi2(zeroingByteMask, zeroingByteMask));
         emit_insn(gen_andsi3(loadedValue, loadedValue, zeroingByteMask));

	 /* Grab the incoming QI register, and ensure that the top bits
	  * are zeroed out. This is because the register may be
	  * storing a signed value, in which case the top-bits will be
	  * sign bits. These must be removed to ensure that the
	  * read-modify-write (which uses an OR) doesn't pick up those
	  * bits, instead of the original memory value which is being
	  * modified.
  	  */
         /*if (register_operand(operands[1],QImode))
         {
           tempSiMode = XEXP(operands[1], 0);
         }
         else
         {
           tempSiMode = operands[1];
         }*/
         //tempSiMode = force_reg(QImode, operands[1]);
         tempSiMode = simplify_gen_subreg(SImode, operands[1], QImode, 0);
         temp = gen_reg_rtx(SImode);
	 emit_insn(gen_rtx_SET(SImode, temp, tempSiMode));
         rtx lsbByteMask = gen_reg_rtx (SImode);
	 emit_insn (gen_rtx_SET (SImode, lsbByteMask, GEN_INT (0xFF)));
	 emit_insn (gen_andsi3 (temp, temp, lsbByteMask));

         /* Shift the incoming byte value by the appropriate amount,
          * and OR into the load value. */
         emit_insn(gen_ashlsi3(temp, temp, shiftVal));
         emit_insn(gen_iorsi3(loadedValue, loadedValue, temp));

         /* Rewrite the original assignment, to assign the new value
          * to the word address. */
         operands[0] = gen_rtx_MEM(SImode, wordAddress);
         operands[1] = loadedValue;

       }

     }
})

(define_expand "movhi"
  [(set (match_operand:HI 0 "nonimmediate_operand" "")
	(match_operand:HI 1 "general_operand" ""))]
  ""
{

     if (!reload_completed &&
         (MEM == GET_CODE(operands[0]) || MEM == GET_CODE(operands[1])))
     {
       rtx address;
       rtx wordAddress;
       rtx const3;
       rtx shiftVal;
       rtx loadedValue;
       rtx addressMask;

/*       warn_of_byte_access();*/

       /* Load the constant 3 into a register. */
       const3 = gen_reg_rtx(SImode);
       emit_insn(gen_rtx_SET(SImode, const3, GEN_INT(3)));

       /* Load the address mask with the bitwise complement of 3. */
       addressMask = gen_reg_rtx(SImode);
       emit_insn(gen_rtx_SET(SImode, addressMask, GEN_INT(-4)));

       /* Handle loads first, in case we are dealing with a mem := mem
        * instruction. */
       if (MEM == GET_CODE(operands[1]))
       {
	 /* Loads work as follows. The entire word containing the desired byte
          * is loaded. The bottom bit of the address indicates which
          * byte is required. The desired byte is moved into the most
          * significant byte, and then an arithmetic shift right
          * invoked to achieve sign extension. The desired byte is
          * moved to the MSB by XOR'ing the bottom address bit by 1,
          * multiplying the result by 8, and then shifting left by
          * that amount. Note that shifts only operate on the bottom
          * 4-bits of the source offset, so although the XOR may
          * produce a value which has its upper bits set, only bit 4
          * (i.e., the inverted, shifted bottom address bit) actually
          * gets used.
          */

         /* Ensure the address is in a register. */
         address = gen_reg_rtx(SImode);
         emit_insn(gen_rtx_SET(SImode, address, XEXP(operands[1], 0)));

         /* Compute the word address by masking out the bottom bit. */
         wordAddress = gen_reg_rtx(SImode);
         emit_insn(gen_andsi3(wordAddress, address, addressMask));

         /* Compute the shift value. This is the two bottom address bits,
          * inverted(not needed in bigendian), and multiplied by 8. */
         shiftVal = gen_reg_rtx(SImode);
         emit_insn(gen_andsi3(shiftVal, address, const3));
         emit_insn(gen_ashlsi3(shiftVal, shiftVal, GEN_INT(3)));

         /* Emit the memory load. */
         loadedValue = gen_reg_rtx(SImode);
         emit_insn(gen_rtx_SET(SImode, loadedValue, gen_rtx_MEM(SImode, wordAddress)));

	 /* Shift the desired byte to the most significant byte. */
	 rtx topByteValue = gen_reg_rtx (SImode);
	 emit_insn (gen_ashlsi3 (topByteValue, loadedValue, shiftVal));

         /* Sign extend the top-byte back into the bottom byte. */
	 rtx signExtendedValue = gen_reg_rtx(SImode);
         emit_insn(gen_ashrsi3(signExtendedValue, topByteValue, shiftVal));

         /* Final extraction of QI mode register. */
        operands[1] = gen_rtx_SUBREG(HImode, signExtendedValue, 0);

       }

       if (MEM == GET_CODE(operands[0]) && GET_CODE(operands[1]) != MEM)
       {
         rtx zeroingByteMask;
         rtx temp;
         rtx tempHiMode;
         rtx tempSiMode;

         /* Get the address. */
         address = gen_reg_rtx(SImode);
         emit_insn(gen_rtx_SET(SImode, address, XEXP(operands[0], 0)));

         /* Compute the word aligned address. */
         wordAddress = gen_reg_rtx(SImode);
         emit_insn(gen_andsi3(wordAddress, address, addressMask));

         /* Compute the shift value. */
         shiftVal = gen_reg_rtx(SImode);
         emit_insn(gen_andsi3(shiftVal, address, const3));
/*	 emit_insn(gen_xorsi3(shiftVal, shiftVal, const3));*/
         emit_insn(gen_ashlsi3(shiftVal, shiftVal, GEN_INT(3)));

         /* Emit the memory load. */
         loadedValue = gen_reg_rtx(SImode);
         emit_insn(gen_rtx_SET(SImode, loadedValue, gen_rtx_MEM(SImode, wordAddress)));

         /* Zero out the destination bits by AND'ing with 0x0000FFFF
          * shifted appropriately and inverted. */
         zeroingByteMask = gen_reg_rtx(SImode);
         emit_insn(gen_rtx_SET(SImode, zeroingByteMask, GEN_INT(0xffff)));
         emit_insn(gen_lshlsi3(zeroingByteMask, zeroingByteMask, shiftVal));
	 emit_insn(gen_one_cmplsi2(zeroingByteMask, zeroingByteMask));
         emit_insn(gen_andsi3(loadedValue, loadedValue, zeroingByteMask));

	 /* Grab the incoming QI register, and ensure that the top bits
	  * are zeroed out. This is because the register may be
	  * storing a signed value, in which case the top-bits will be
	  * sign bits. These must be removed to ensure that the
	  * read-modify-write (which uses an OR) doesn't pick up those
	  * bits, instead of the original memory value which is being
	  * modified.
  	  */
         /*if (register_operand(operands[1],HImode))
         {
           tempSiMode = XEXP(operands[1], 0);
         }
         else
         {
           tempSiMode = operands[1];
         }*/
         //tempSiMode = force_reg(HImode, operands[1]);
         tempSiMode = simplify_gen_subreg(SImode, operands[1], HImode, 0);
         temp = gen_reg_rtx(SImode);
	 emit_insn(gen_rtx_SET(SImode, temp, tempSiMode));
         rtx lowerHalfByteMask = gen_reg_rtx (SImode);
	 emit_insn (gen_rtx_SET (SImode, lowerHalfByteMask, GEN_INT (0xFFFF)));
	 emit_insn (gen_andsi3 (temp, temp, lowerHalfByteMask));

         /* Shift the incoming byte value by the appropriate amount,
          * and OR into the load value. */
         emit_insn(gen_ashlsi3(temp, temp, shiftVal));
         emit_insn(gen_iorsi3(loadedValue, loadedValue, temp));

         /* Rewrite the original assignment, to assign the new value
          * to the word address. */
         operands[0] = gen_rtx_MEM(SImode, wordAddress);
         operands[1] = loadedValue;

       }

     }
})